From e2520df296cc7f1ac42a746ec54ebc16dbf3d8b1 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Mon, 11 Nov 2024 10:17:13 +0100 Subject: [PATCH 01/40] fix: Ensure seed is based on RNG State (#1193) --- mteb/abstasks/AbsTaskClassification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 58b4441a13..683d42b336 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -193,7 +193,8 @@ def _undersample_data(self, X, y, samples_per_label: int, idxs=None): y_sampled = [] if idxs is None: idxs = np.arange(len(y)) - np.random.shuffle(idxs) + rng_state = np.random.default_rng(self.seed) + rng_state.shuffle(idxs) label_counter = defaultdict(int) for i in idxs: if label_counter[y[i]] < samples_per_label: From 9c58518080f6640f1e644c18fe8f174cdb2530e6 Mon Sep 17 00:00:00 2001 From: Orion Weller <31665361+orionw@users.noreply.github.com> Date: Wed, 13 Nov 2024 11:30:38 -0500 Subject: [PATCH 02/40] Consolidate Retrieval/Reranking/Instruction Variants (#1359) * update * merged retrieval; working * update tasks; working multilingual * everything working except instructions * working instructions; just need cleanup * add metadata for all but MindSmall * faster evaluation; mindsmall can compute in reasonable time * fix bad merge of docs * lint * fix test * qa * updated mindsmall * lint * fix debug * Update mteb/abstasks/dataloaders.py Co-authored-by: Roman Solomatin * lint --------- Co-authored-by: Roman Solomatin --- README.md | 1 + mteb/abstasks/AbsTaskInstructionRetrieval.py | 744 ------------------ mteb/abstasks/AbsTaskReranking.py | 260 +++--- mteb/abstasks/AbsTaskRetrieval.py | 323 +++----- mteb/abstasks/TaskMetadata.py | 1 + mteb/abstasks/__init__.py | 1 - mteb/abstasks/dataloaders.py | 324 ++++++++ .../descriptive_stats/Retrieval/NFCorpus.json | 14 + mteb/evaluation/MTEB.py | 22 +- .../InstructionRetrievalEvaluator.py | 52 -- .../evaluators/RerankingEvaluator.py | 563 ------------- .../evaluators/RetrievalEvaluator.py | 525 +----------- mteb/evaluation/evaluators/__init__.py | 1 - mteb/evaluation/evaluators/model_classes.py | 568 +++++++++++++ mteb/evaluation/evaluators/utils.py | 422 +++++++++- mteb/model_meta.py | 2 +- mteb/models/sentence_transformers_models.py | 17 + mteb/overview.py | 30 +- .../IndicSentimentClassification.py | 2 +- mteb/tasks/InstructionReranking/__init__.py | 6 + .../eng/Core17InstructionRetrieval.py | 10 +- .../eng/News21InstructionRetrieval.py | 10 +- .../eng/Robust04InstructionRetrieval.py | 10 +- .../eng}/__init__.py | 0 .../multilingual/__init__.py | 0 .../multilingual/mFollowIR.py | 121 +-- mteb/tasks/InstructionRetrieval/__init__.py | 5 +- .../InstructionRetrieval/eng/InstructIR.py | 49 ++ mteb/tasks/Reranking/__init__.py | 1 + .../Reranking/eng/AskUbuntuDupQuestions.py | 2 +- .../tasks/Reranking/eng/MindSmallReranking.py | 176 ++++- mteb/tasks/Reranking/eng/NevIR.py | 37 + mteb/tasks/Reranking/eng/SciDocsReranking.py | 2 +- .../eng/StackOverflowDupQuestions.py | 2 +- .../eng/WebLINXCandidatesReranking.py | 23 +- mteb/tasks/Reranking/fra/AlloprofReranking.py | 5 +- mteb/tasks/Reranking/fra/SyntecReranking.py | 5 +- mteb/tasks/Reranking/jpn/MMarcoReranking.py | 17 +- .../Reranking/multilingual/ESCIReranking.py | 4 +- .../Reranking/multilingual/MIRACLReranking.py | 149 +++- .../WikipediaRerankingMultilingual.py | 4 +- mteb/tasks/Reranking/rus/RuBQReranking.py | 2 +- mteb/tasks/Reranking/zho/CMTEBReranking.py | 8 +- mteb/tasks/__init__.py | 1 + scripts/running_model/check_results.py | 1 + scripts/running_model/create_slurm_jobs.py | 1 + tests/test_benchmark/mock_tasks.py | 506 +++++++----- tests/test_benchmark/task_grid.py | 12 +- tests/test_benchmark/test_benchmark.py | 8 +- .../test_InstructionRetrievalEvaluator.py | 36 +- .../test_RerankingEvaluator.py | 58 -- .../test_RetrievalEvaluator.py | 7 +- tests/test_reproducible_workflow.py | 1 + tests/test_tasks/test_all_abstasks.py | 4 +- 54 files changed, 2500 insertions(+), 2655 deletions(-) delete mode 100644 mteb/abstasks/AbsTaskInstructionRetrieval.py create mode 100644 mteb/abstasks/dataloaders.py create mode 100644 mteb/descriptive_stats/Retrieval/NFCorpus.json delete mode 100644 mteb/evaluation/evaluators/InstructionRetrievalEvaluator.py delete mode 100644 mteb/evaluation/evaluators/RerankingEvaluator.py create mode 100644 mteb/evaluation/evaluators/model_classes.py create mode 100644 mteb/tasks/InstructionReranking/__init__.py rename mteb/tasks/{InstructionRetrieval => InstructionReranking}/eng/Core17InstructionRetrieval.py (79%) rename mteb/tasks/{InstructionRetrieval => InstructionReranking}/eng/News21InstructionRetrieval.py (79%) rename mteb/tasks/{InstructionRetrieval => InstructionReranking}/eng/Robust04InstructionRetrieval.py (79%) rename mteb/tasks/{InstructionRetrieval/multilingual => InstructionReranking/eng}/__init__.py (100%) create mode 100644 mteb/tasks/InstructionReranking/multilingual/__init__.py rename mteb/tasks/{InstructionRetrieval => InstructionReranking}/multilingual/mFollowIR.py (60%) create mode 100644 mteb/tasks/InstructionRetrieval/eng/InstructIR.py create mode 100644 mteb/tasks/Reranking/eng/NevIR.py delete mode 100644 tests/test_evaluators/test_RerankingEvaluator.py diff --git a/README.md b/README.md index ef87ec4370..ec402579ad 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ In prompts the key can be: 8. `STS` 9. `Summarization` 10. `InstructionRetrieval` + 11. `InstructionReranking` 3. Pair of task type and prompt type like `Retrival-query` - these prompts will be used in all classification tasks 4. Task name - these prompts will be used in the specific task 5. Pair of task name and prompt type like `NFCorpus-query` - these prompts will be used in the specific task diff --git a/mteb/abstasks/AbsTaskInstructionRetrieval.py b/mteb/abstasks/AbsTaskInstructionRetrieval.py deleted file mode 100644 index bdbe5cd6c5..0000000000 --- a/mteb/abstasks/AbsTaskInstructionRetrieval.py +++ /dev/null @@ -1,744 +0,0 @@ -from __future__ import annotations - -import json -import logging -import os -from collections import defaultdict -from time import time -from typing import Any - -import tqdm -from datasets import Dataset, Features, Value, load_dataset - -from mteb.encoder_interface import Encoder - -from ..evaluation.evaluators import utils -from ..evaluation.evaluators.InstructionRetrievalEvaluator import ( - InstructionRetrievalEvaluator, -) -from .AbsTask import AbsTask -from .AbsTaskRetrieval import HFDataLoader -from .TaskMetadata import DescriptiveStatistics - -logger = logging.getLogger(__name__) - - -# Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/datasets/data_loader_hf.py#L10 -class HFDataLoaderInstructions(HFDataLoader): - def __init__( - self, - hf_repo: str | None = None, - hf_repo_qrels: str | None = None, - data_folder: str | None = None, - prefix: str | None = None, - corpus_file: str = "corpus.jsonl", - query_file: str = "queries.jsonl", - qrels_folder: str = "qrels", - qrels_file: str = "", - streaming: bool = False, - keep_in_memory: bool = False, - ): - self.corpus = {} - self.queries = {} - self.qrels = {} - self.og_instructions = {} - self.changed_instructions = {} - self.top_ranked = {} - self.hf_repo = hf_repo - if hf_repo: - # By default fetch qrels from same repo not a second repo with "-qrels" like in original - self.hf_repo_qrels = hf_repo_qrels if hf_repo_qrels else hf_repo - else: - # data folder would contain these files: - # (1) fiqa/corpus.jsonl (format: jsonlines) - # (2) fiqa/queries.jsonl (format: jsonlines) - # (3) fiqa/qrels/test.tsv (format: tsv ("\t")) - if prefix: - query_file = prefix + "-" + query_file - qrels_folder = prefix + "-" + qrels_folder - - self.corpus_file = ( - os.path.join(data_folder, corpus_file) if data_folder else corpus_file - ) - self.query_file = ( - os.path.join(data_folder, query_file) if data_folder else query_file - ) - self.qrels_folder = ( - os.path.join(data_folder, qrels_folder) if data_folder else None - ) - self.qrels_file = qrels_file - self.streaming = streaming - self.keep_in_memory = keep_in_memory - - def load( - self, split="test" - ) -> tuple[ - Dataset, - Dataset, - dict[str, dict[str, int]], - dict[str, dict[str, int]], - Dataset, - ]: - if not self.hf_repo: - self.og_qrels_file = os.path.join(self.qrels_folder + "_og", split + ".tsv") - self.changed_qrels_file = os.path.join( - self.qrels_folder + "_changed", split + ".tsv" - ) - self.check(fIn=self.corpus_file, ext="jsonl") - self.check(fIn=self.query_file, ext="jsonl") - self.check(fIn=self.og_qrels_file, ext="tsv") - self.check(fIn=self.changed_qrels_file, ext="tsv") - - if not len(self.corpus): - logger.info("Loading Corpus...") - self._load_corpus() - logger.info("Loaded %d %s Documents.", len(self.corpus), split.upper()) - logger.info("Doc Example: %s", self.corpus[0]) - - if not len(self.queries): - logger.info("Loading Queries...") - self._load_queries() - - self._load_qrels(split, changed=False) - self._load_qrels(split, changed=True) - # filter queries with no qrels - og_qrels_dict = defaultdict(dict) - changed_qrels_dict = defaultdict(dict) - - def qrels_dict_init(row): - og_qrels_dict[row["query-id"]][row["corpus-id"]] = int(row["score"]) - - def qrels_changed_dict_init(row): - changed_qrels_dict[row["query-id"]][row["corpus-id"]] = int(row["score"]) - - self.changed_qrels.map(qrels_dict_init) - self.og_qrels.map(qrels_changed_dict_init) - self.og_qrels = og_qrels_dict - self.changed_qrels = changed_qrels_dict - self.queries = self.queries.filter(lambda x: x["id"] in self.og_qrels) - logger.info("Loaded %d %s Queries.", len(self.queries), split.upper()) - logger.info("Query Example: %s", self.queries[0]) - - # load top_ranked - self.load_top_ranked() - - return ( - self.corpus, - self.queries, - self.og_qrels, - self.changed_qrels, - self.top_ranked, - ) - - def load_top_ranked(self) -> None: - if self.hf_repo: - top_ranked_ds = load_dataset( - self.hf_repo, - "top_ranked", - keep_in_memory=self.keep_in_memory, - streaming=self.streaming, - ) - else: - top_ranked_ds = load_dataset( - "json", - data_files=self.top_ranked_file, - streaming=self.streaming, - keep_in_memory=self.keep_in_memory, - ) - top_ranked_ds = next(iter(top_ranked_ds.values())) # get first split - top_ranked_ds = top_ranked_ds.cast_column("qid", Value("string")) - top_ranked_ds = top_ranked_ds.cast_column("pid", Value("string")) - top_ranked_ds = top_ranked_ds.remove_columns( - [col for col in top_ranked_ds.column_names if col not in ["qid", "pid"]] - ) - self.top_ranked = top_ranked_ds - - def _load_queries(self): - if self.hf_repo: - queries_ds = load_dataset( - self.hf_repo, - "queries", - keep_in_memory=self.keep_in_memory, - streaming=self.streaming, - ) - else: - queries_ds = load_dataset( - "json", - data_files=self.query_file, - streaming=self.streaming, - keep_in_memory=self.keep_in_memory, - ) - queries_ds = next(iter(queries_ds.values())) # get first split - queries_ds = queries_ds.cast_column("_id", Value("string")) - queries_ds = queries_ds.rename_column("_id", "id") - queries_ds = queries_ds.remove_columns( - [ - col - for col in queries_ds.column_names - if col - not in [ - "id", - "text", - "instruction_og", - "instruction_changed", - "keywords", - "short_query", - ] - ] - ) - self.queries = queries_ds - - def _load_qrels(self, split, changed=False): - if self.hf_repo: - qrels_ds = load_dataset( - self.hf_repo_qrels, - "qrels_og" if not changed else "qrels_changed", - keep_in_memory=self.keep_in_memory, - streaming=self.streaming, - )[split] - else: - qrels_file = self.og_qrels_file if not changed else self.changed_qrels_file - qrels_ds = load_dataset( - "csv", - data_files=qrels_file, - delimiter="\t", - keep_in_memory=self.keep_in_memory, - ) - features = Features( - { - "query-id": Value("string"), - "corpus-id": Value("string"), - "score": Value("float"), - } - ) - qrels_ds = qrels_ds.cast(features) - - if changed: - self.changed_qrels = qrels_ds - else: - self.og_qrels = qrels_ds - - -class InstructionRetrievalDescriptiveStatistics(DescriptiveStatistics): - """Descriptive statistics for Instruction Retrieval tasks - - Attributes: - num_samples: Number of samples - num_queries: Number of queries - num_docs: Number of documents - number_of_characters: Total number of symbols in the dataset - average_document_length: Average length of documents - average_query_length: Average length of queries - average_instruction_length: Average length of instructions - average_changed_instruction_length: Average length of changed instructions - average_relevant_docs_per_query: Average number of relevant docs per query - average_top_ranked_per_query: Average number of top ranked docs per query - """ - - num_samples: int - num_queries: int - num_docs: int - number_of_characters: int - average_document_length: float - average_query_length: float - average_instruction_length: float - average_changed_instruction_length: float - average_relevant_docs_per_query: float - average_top_ranked_per_query: float - - -class AbsTaskInstructionRetrieval(AbsTask): - """Abstract class for retrieval tasks that use instructions. An example from Core17 would be - query: What is the ongoing status of The Three Gorges Project? - instruction: A relevant document will provide the projected or actual date of completion of the project, its estimated or actual total cost, or the estimated or ongoing electrical output of the finished project. Discussions of the social, political, or ecological impact of the project are not relevant. - - Child-classes must implement the following properties: - self.corpus = dict[corpus_id, dict[str, str]] #id => dict with document datas like title and text - self.queries = dict[query_id, str] #id => query - self.relevant_docs = dict[query_id, dict[corpus_id, int]] - self.og_instructions = dict[str, str] query => original instruction - self.changed_instructions = dict[str, str] query => changed instruction - self.top_ranked = dict[query_id, list[corpus_id]] #id => list of top ranked document ids - - See https://arxiv.org/abs/2403.15246 for more details - """ - - abstask_prompt = "Retrieve text based on user query." - - def __init__( - self, - **kwargs, - ): - super().__init__(**kwargs) - self.do_length_ablation = kwargs.get("do_length_ablation", False) - if self.do_length_ablation: - logger.info("Running length ablation also...") - - def load_data(self, **kwargs): - if self.data_loaded: - return - self.corpus, self.queries, self.og_relevant_docs, self.changed_relevant_docs = ( - {}, - {}, - {}, - {}, - ) - self.og_instructions, self.changed_instructions = {}, {} - self.top_ranked = {} - if self.do_length_ablation: - self.keywords, self.short_instructions = {}, {} - - dataset_path = self.metadata_dict["dataset"]["path"] - hf_repo_qrels = ( - dataset_path + "-qrels" if "clarin-knext" in dataset_path else None - ) - for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): - ( - corpus, - queries, - og_relevant_docs, - changed_relevant_docs, - top_ranked_init, - ) = HFDataLoaderInstructions( - hf_repo=dataset_path, - hf_repo_qrels=hf_repo_qrels, - streaming=False, - keep_in_memory=False, - ).load(split=split) - - # Conversion from DataSet - top_ranked = defaultdict(list) - [ - top_ranked[cur_inst["qid"]].append(cur_inst["pid"]) - for cur_inst in top_ranked_init - ] - og_instructions = { - query["text"]: query["instruction_og"] for query in queries - } - changed_instructions = { - query["text"]: query["instruction_changed"] for query in queries - } - if self.do_length_ablation: - keywords = {query["text"]: query["keywords"] for query in queries} - short_instructions = { - query["text"]: query["short_query"] for query in queries - } - queries = {query["id"]: query["text"] for query in queries} - corpus = { - doc["id"]: {"title": doc["title"], "text": doc["text"]} - for doc in corpus - } - assert ( - len(top_ranked) == len(queries) - ), f"Top ranked not loaded properly! Expected {len(self.queries)} but got {len(self.top_ranked)}." - - ( - self.corpus[split], - self.queries[split], - self.og_relevant_docs[split], - self.changed_relevant_docs[split], - ) = corpus, queries, og_relevant_docs, changed_relevant_docs - self.changed_instructions[split], self.og_instructions[split] = ( - changed_instructions, - og_instructions, - ) - self.top_ranked[split] = top_ranked - - if self.do_length_ablation: - self.keywords[split], self.short_instructions[split] = ( - keywords, - short_instructions, - ) - - self.data_loaded = True - - def _evaluate_subset_lang( - self, - retriever: InstructionRetrievalEvaluator, - corpus: dict, - queries: dict, - og_relevant_docs: dict, - changed_relevant_docs: dict, - og_instructions: dict, - changed_instructions: dict, - top_ranked: dict, - lang: str, - split: str, - keywords: dict | None = None, - short_instructions: dict | None = None, - **kwargs, - ) -> dict[str, dict[str, float] | float]: - corpus, queries = corpus[split], queries[split] - og_relevant_docs, changed_relevant_docs = ( - og_relevant_docs[split], - changed_relevant_docs[split], - ) - og_instructions, changed_instructions = ( - og_instructions[split], - changed_instructions[split], - ) - - top_ranked = top_ranked[split] - kwargs["prediction_name"] = "og" # for naming predictions, as needed - scores_og, results_og = self._evaluate_subset( - retriever, - corpus, - queries, - og_relevant_docs, - og_instructions, - top_ranked, - lang, - **kwargs, - ) - kwargs["prediction_name"] = "changed" # for naming predictions, as needed - scores_changed, results_changed = self._evaluate_subset( - retriever, - corpus, - queries, - changed_relevant_docs, - changed_instructions, - top_ranked, - lang, - **kwargs, - ) - - newly_irrelevant_qrels = self.create_qrel_diff( - og_relevant_docs, - changed_relevant_docs, - ) - overall_changed_scores = utils.evaluate_change( - results_og, results_changed, newly_irrelevant_qrels - ) - - overall_changed_scores["individual"] = { - "original": scores_og, - "changed": scores_changed, - } - - if self.do_length_ablation: - keywords, short_instructions = ( - keywords[split], - short_instructions[split], - ) - kwargs["prediction_name"] = "base" # for naming predictions, as needed - scores_base, results_base = self._evaluate_subset( - retriever, - corpus, - queries, - og_relevant_docs, - defaultdict(str), - top_ranked, - lang, - **kwargs, - ) - kwargs["prediction_name"] = "keywords" # for naming predictions, as needed - scores_w_keywords_scores, scores_w_keywords_results = self._evaluate_subset( - retriever, - corpus, - queries, - og_relevant_docs, - keywords, - top_ranked, - lang, - **kwargs, - ) - kwargs["prediction_name"] = ( - "short_instr" # for naming predictions, as needed - ) - ( - scores_w_short_instr_scores, - scores_w_short_instr_result, - ) = self._evaluate_subset( - retriever, - corpus, - queries, - og_relevant_docs, - short_instructions, - top_ranked, - lang, - **kwargs, - ) - overall_changed_scores["length_ablation"] = { - "keywords": scores_w_keywords_scores, - "short_instructions": scores_w_short_instr_scores, - "base": scores_base, - } - - return overall_changed_scores - - def evaluate( - self, - model: Encoder, - split: str = "test", - *, - encode_kwargs: dict[str, Any] = {}, - **kwargs, - ) -> dict[str, dict[str, Any]]: - retriever = InstructionRetrievalEvaluator( - retriever=model, - task_name=self.metadata.name, - encode_kwargs=encode_kwargs, - **kwargs, - ) - scores = {} - if self.is_multilingual: - for lang in self.hf_subsets: - logger.info(f"Language: {lang}") - scores[lang] = self._evaluate_subset_lang( - retriever, - corpus=self.corpus[lang], - queries=self.queries[lang], - og_relevant_docs=self.og_relevant_docs[lang], - changed_relevant_docs=self.changed_relevant_docs[lang], - og_instructions=self.og_instructions[lang], - changed_instructions=self.changed_instructions[lang], - top_ranked=self.top_ranked[lang], - lang=lang, - split=split, - keywords=self.keywords[lang] if self.do_length_ablation else None, - short_instructions=self.short_instructions[lang] - if self.do_length_ablation - else None, - **kwargs, - ) - self._add_main_score(scores[lang]) - else: - lang = "default" - scores[lang] = self._evaluate_subset_lang( - retriever, - corpus=self.corpus, - queries=self.queries, - og_relevant_docs=self.og_relevant_docs, - changed_relevant_docs=self.changed_relevant_docs, - og_instructions=self.og_instructions, - changed_instructions=self.changed_instructions, - top_ranked=self.top_ranked, - lang=lang, - split=split, - keywords=self.keywords if self.do_length_ablation else None, - short_instructions=self.short_instructions - if self.do_length_ablation - else None, - **kwargs, - ) - self._add_main_score(scores[lang]) - - return scores - - def _add_main_score(self, scores: dict[str, dict[str, float]]) -> None: - scores["main_score"] = scores[self.metadata.main_score] - - def _evaluate_subset( - self, - retriever: InstructionRetrievalEvaluator, - corpus: dict[str, dict[str, str]], - queries: dict[str, str], - relevant_docs: dict[str, dict[str, int]], - instructions: dict[str, str], - top_ranked: dict[str, list[str]], - lang=None, - **kwargs, - ) -> tuple[dict[str, float], dict[str, dict[str, float]]]: - start_time = time() - - # do the results by query and relevant docs only - all_results = [] - for query_id in tqdm.tqdm(list(queries.keys()), leave=False, desc="Retrieving"): - cur_queries = {query_id: queries[query_id]} - cur_instructions = {queries[query_id]: instructions[queries[query_id]]} - cur_docs = { - key: value - for (key, value) in corpus.items() - if key in top_ranked[query_id] - } - all_results.append( - retriever( - cur_docs, cur_queries, instructions=cur_instructions, qid=query_id - ) - ) - - # combine all the results (which are {'qid' -> {'doc_id' -> score} mappings) - # we know all are unique qids, so we can smash together - results = {k: v for d in all_results for k, v in d.items()} - - end_time = time() - logger.info(f"Time taken to retrieve: {end_time - start_time:.2f} seconds") - - if kwargs.get("save_predictions", False): - output_folder = kwargs.get("output_folder", "results") - if not os.path.isdir(output_folder): - os.makedirs(output_folder) - top_k = kwargs.get("top_k", None) - if top_k is not None: - for qid in list(results.keys()): - doc_ids = set( - sorted( - results[qid], key=lambda x: results[qid][x], reverse=True - )[:top_k] - ) - results[qid] = { - k: v for k, v in results[qid].items() if k in doc_ids - } - if lang is None: - qrels_save_path = ( - f"{output_folder}/{self.metadata_dict['name']}_predictions.json" - ) - else: - qrels_save_path = f"{output_folder}/{self.metadata_dict['name']}_{lang}_predictions.json" - - if kwargs.get("prediction_name", None): - qrels_save_path = qrels_save_path.replace( - ".json", f"_{kwargs['prediction_name']}.json" - ) - - with open(qrels_save_path, "w") as f: - json.dump(results, f) - - ndcg, _map, recall, precision, naucs = retriever.evaluate( - relevant_docs, - results, - retriever.k_values, - ignore_identical_ids=kwargs.get("ignore_identical_ids", True), - ) - mrr, naucs = retriever.evaluate_custom( - relevant_docs, results, retriever.k_values, "mrr" - ) - scores = { - **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, - **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()}, - **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()}, - **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()}, - **{f"naucs_at_{k.split('@')[1]}": v for (k, v) in naucs.items()}, - **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()}, - } - return scores, results - - def create_qrel_diff(self, og_qrels, changed_qrels): - newly_irrelevant_qrels = {} - for qid in og_qrels: - newly_irrelevant_qrels[qid] = [] - for doc_id in og_qrels[qid]: - if changed_qrels[qid][doc_id] != og_qrels[qid][doc_id]: - newly_irrelevant_qrels[qid].append(doc_id) - - return newly_irrelevant_qrels - - def _calculate_metrics_from_split( - self, split: str, hf_subset: str | None = None, compute_overall: bool = False - ) -> InstructionRetrievalDescriptiveStatistics: - if hf_subset: - corpus = self.corpus[hf_subset][split] - queries = self.queries[hf_subset][split] - relevant_docs = self.og_relevant_docs[hf_subset][split] - og_instructions = self.og_instructions[hf_subset][split] - changed_instructions = self.changed_instructions[hf_subset][split] - top_ranked = self.top_ranked[hf_subset][split] - elif compute_overall: - corpus = {} - queries = {} - relevant_docs = {} - og_instructions = {} - changed_instructions = {} - top_ranked = {} - for hf_subset in self.metadata.eval_langs: - corpus.update(process_docs(self.corpus, hf_subset, split)) - queries.update(process_docs(self.queries, hf_subset, split)) - relevant_docs.update( - process_relevant_docs(self.og_relevant_docs, hf_subset, split) - ) - og_instructions.update( - process_docs( - self.og_instructions, - hf_subset, - split, - ) - ) - changed_instructions.update( - process_docs(self.changed_instructions, hf_subset, split) - ) - top_ranked.update(process_top_ranked(self.top_ranked, hf_subset, split)) - else: - corpus = self.corpus[split] - queries = self.queries[split] - relevant_docs = self.og_relevant_docs[split] - og_instructions = self.og_instructions[split] - changed_instructions = self.changed_instructions[split] - top_ranked = self.top_ranked[split] - - total_corpus_len = sum( - [len(doc.get("title", "")) + len(doc["text"]) for doc in corpus.values()] - ) - total_queries_len = sum([len(query) for query in queries.values()]) - total_instructions_len = sum( - [len(instruction) for instruction in og_instructions.values()] - ) - total_changed_instructions_len = sum( - [len(instruction) for instruction in changed_instructions.values()] - ) - num_qrels_non_zero = sum( - sum(1 for doc_id in docs if docs[doc_id] != 0) - for docs in relevant_docs.values() - ) - qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if len(queries) else 0 - top_ranked_per_query = ( - sum(len(docs) for docs in top_ranked.values()) / len(queries) - if len(queries) - else 0 - ) - return InstructionRetrievalDescriptiveStatistics( - num_samples=len(queries) + len(corpus), - num_docs=len(corpus), - num_queries=len(queries), - number_of_characters=total_corpus_len - + total_queries_len - + total_instructions_len - + total_changed_instructions_len, - average_document_length=( - total_corpus_len / len(corpus) if len(corpus) else 0 - ), - average_query_length=( - total_queries_len / len(queries) if len(queries) else 0 - ), - average_instruction_length=( - total_instructions_len / len(queries) if len(queries) else 0 - ), - average_changed_instruction_length=( - total_changed_instructions_len / len(queries) if len(queries) else 0 - ), - average_relevant_docs_per_query=qrels_per_doc, - average_top_ranked_per_query=top_ranked_per_query, - ) - - -def process_docs( - collection: dict[str, dict[str, dict[str, str]]], hf_subset: str, split: str -) -> dict[str, str]: - """Collections can contain overlapping ids in different splits. Prepend split to avoid this""" - return { - f"{split}_{hf_subset}_{k}": v for k, v in collection[hf_subset][split].items() - } - - -def process_relevant_docs( - collection: dict[str, dict[str, dict[str, dict[str, int]]]], - hf_subset: str, - split: str, -) -> dict[str, dict[str, int]]: - """Collections can contain overlapping ids in different splits. Prepend split to avoid this""" - return_collection = {} - for query_id, relevant in collection[hf_subset][split].items(): - return_collection[f"{split}_{hf_subset}_{query_id}"] = { - f"{split}_{hf_subset}_{doc_id}": value for doc_id, value in relevant.items() - } - return return_collection - - -def process_top_ranked( - collection: dict[str, dict[str, dict[str, list[str]]]], hf_subset: str, split: str -) -> dict[str, list[str]]: - return_collection = {} - for query_id, docs_id in collection[hf_subset][split].items(): - return_collection[f"{split}_{hf_subset}_{query_id}"] = [ - f"{split}_{hf_subset}_{doc_id}" for doc_id in docs_id - ] - return return_collection diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index 3703b5a3c2..b4a5cffd25 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -1,121 +1,167 @@ from __future__ import annotations -from typing import Any +import logging +from collections import defaultdict +import datasets from datasets import Dataset -from mteb.encoder_interface import Encoder -from mteb.load_results.task_results import ScoresDict - -from ..evaluation.evaluators import RerankingEvaluator -from .AbsTask import AbsTask -from .TaskMetadata import DescriptiveStatistics - - -class RerankingDescriptiveStatistics(DescriptiveStatistics): - """Descriptive statistics for Reranking - - Attributes: - num_samples: number of samples in the dataset. - number_of_characters: Total number of symbols in the dataset. - num_positive: Number of positive examples - num_negative: Number of negative examples - avg_query_len: Average length of queries - avg_positive_len: Average length of positive examples - avg_negative_len: Average length of negative examples - """ - - num_samples: int - number_of_characters: int - num_positive: int - num_negative: int - avg_query_len: float - avg_positive_len: float - avg_negative_len: float +from .AbsTaskRetrieval import AbsTaskRetrieval + +logger = logging.getLogger(__name__) + +OLD_FORMAT_RERANKING_TASKS = [ + "AskUbuntuDupQuestions", + "MindSmallReranking", + "SciDocsRR", + "StackOverflowDupQuestions", + "WebLINXCandidatesReranking", + "AlloprofReranking", + "SyntecReranking", + "VoyageMMarcoReranking", + "ESCIReranking", + "MIRACLReranking", + "WikipediaRerankingMultilingual", + "RuBQReranking", + "T2Reranking", + "MMarcoReranking", + "CMedQAv1-reranking", + "CMedQAv2-reranking", +] + + +class AbsTaskReranking(AbsTaskRetrieval): + """Abstract class for re-ranking experiments. This is mostly the same as the RetrievalEvaluator, but here to adapt the old format to the new format. TODO: update these tasks to the new format and delete this class.""" + def __init__(self, **kwargs): + super(AbsTaskRetrieval, self).__init__(**kwargs) -class AbsTaskReranking(AbsTask): - """Abstract class for re-ranking experiments. + def load_data(self, **kwargs): + if self.data_loaded: + return - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns: - query: str - positive: list[str] - negative: list[str] - """ + if self.metadata.name in OLD_FORMAT_RERANKING_TASKS: + self.transform_old_dataset_format() + else: + # use AbsTaskRetrieval default to load the data + return super().load_data(**kwargs) + + def process_example(self, example: dict, split: str, query_idx: int) -> dict: + """Process a single example from the dataset.""" + query = example["query"] + positive_docs = example["positive"] + negative_docs = example["negative"] + + query_id = f"{split}_query{query_idx}" + + # Initialize the structures for this example + example_data = { + "query_id": query_id, + "query": query, + "doc_ids": [], + "doc_texts": [], + "relevance_scores": [], + } + + for i, pos_doc in enumerate(positive_docs): + # format i as a five digit number + formatted_i = str(i).zfill(5) + # have "a" in front so that positives are first, then negatives + # this shouldn't matter except for ties, and the previous reranking results + # had the positives first + doc_id = f"apositive_{query_id}_{formatted_i}" + example_data["doc_ids"].append(doc_id) + example_data["doc_texts"].append(pos_doc) + example_data["relevance_scores"].append(1) + + for i, neg_doc in enumerate(negative_docs): + formatted_i = str(i).zfill(5) + doc_id = f"negative_{query_id}_{formatted_i}" + example_data["doc_ids"].append(doc_id) + example_data["doc_texts"].append(neg_doc) + example_data["relevance_scores"].append(0) + + return example_data + + def transform_old_dataset_format(self, given_dataset=None): + """Transform the old format to the new format using HF datasets mapping. This is a one-time transformation for datasets which are in the old format. + + Args: + given_dataset (Dataset, optional): The dataset to transform. Defaults to None. This is helpful for some older datasets which are loaded with custom code, but need to be transformed still. + + """ + if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS: + return + + logging.info( + f"Transforming old format to standard format for {self.metadata.name}" + ) - abstask_prompt = "Retrieve text based on user query." + self.corpus = defaultdict(lambda: defaultdict(dict)) + self.queries = defaultdict(lambda: defaultdict(dict)) + self.relevant_docs = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) + self.top_ranked = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) + + hf_subsets = list(self.hf_subsets) if self.is_multilingual else ["default"] + + for hf_subset in hf_subsets: + if given_dataset: + cur_dataset = given_dataset + elif "name" in self.metadata_dict["dataset"]: + cur_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore + assert ( + hf_subset == "default" + ), f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata." + else: + cur_dataset = datasets.load_dataset( + **self.metadata_dict["dataset"], name=hf_subset + ) # type: ignore + + for split in cur_dataset: + # Create an enumerated dataset to pass indices + enumerated_dataset = Dataset.from_dict( + { + "index": range(len(cur_dataset[split])), + "query": cur_dataset[split]["query"], + "positive": cur_dataset[split]["positive"], + "negative": cur_dataset[split]["negative"], + } + ) - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _evaluate_subset( - self, - model: Encoder, - data_split: Dataset, - *, - encode_kwargs: dict[str, Any] = {}, - **kwargs: Any, - ) -> ScoresDict: - evaluator = RerankingEvaluator( - data_split, - task_name=self.metadata.name, - encode_kwargs=encode_kwargs, - **kwargs, - ) - scores = evaluator(model) - - self._add_main_score(scores) - return scores - - def _add_main_score(self, scores: ScoresDict) -> None: - scores["main_score"] = scores[self.metadata.main_score] - - def _calculate_metrics_from_split( - self, split: str, hf_subset: str | None = None, compute_overall: bool = False - ) -> RerankingDescriptiveStatistics: - if hf_subset: - query = self.dataset[hf_subset][split]["query"] - positive = transform_reranking_data( - self.dataset[hf_subset][split]["positive"] - ) - negative = transform_reranking_data( - self.dataset[hf_subset][split]["negative"] - ) - elif compute_overall: - query = [] - positive = [] - negative = [] - for hf_subset in self.metadata.eval_langs: - query.extend(self.dataset[hf_subset][split]["query"]) - positive.extend( - transform_reranking_data(self.dataset[hf_subset][split]["positive"]) + # first, filter out the ones that have no positive or no negatives + enumerated_dataset = enumerated_dataset.filter( + lambda example: len(example["positive"]) > 0 + and len(example["negative"]) > 0 ) - negative.extend( - transform_reranking_data(self.dataset[hf_subset][split]["negative"]) + + logger.info( + f"Filtered out {len(cur_dataset[split]) - len(enumerated_dataset)} examples with no positive or no negative examples. {len(enumerated_dataset)} examples remaining." ) - else: - query = self.dataset[split]["query"] - positive = transform_reranking_data(self.dataset[split]["positive"]) - negative = transform_reranking_data(self.dataset[split]["negative"]) - - total_len_query = sum([len(q) for q in query]) - total_len_positive = sum([len(p) for p in positive]) - total_len_negative = sum([len(n) for n in negative]) - return RerankingDescriptiveStatistics( - num_samples=len(query), - number_of_characters=total_len_query - + total_len_positive - + total_len_negative, - num_positive=len(positive), - num_negative=len(negative), - avg_query_len=total_len_query / len(query), - avg_positive_len=total_len_positive / len(positive), - avg_negative_len=total_len_negative / len(negative), - ) + # Map the transformation function over the dataset + processed_dataset = enumerated_dataset.map( + lambda example, idx: self.process_example(example, split, idx), + with_indices=True, + remove_columns=enumerated_dataset.column_names, + ) -def transform_reranking_data(data: list[list[str]] | list[str]) -> list[str]: - """Transforms a list of lists of strings into a list of strings""" - if isinstance(data[0], str): - return data - return [item for sublist in data for item in sublist] + # Populate the data structures + for item in processed_dataset: + query_id = item["query_id"] + self.queries[hf_subset][split][query_id] = item["query"] + + # Add documents and relevance information + for doc_id, doc_text, relevance in zip( + item["doc_ids"], item["doc_texts"], item["relevance_scores"] + ): + self.corpus[hf_subset][split][doc_id] = { + "text": doc_text, + "_id": doc_id, + } + self.top_ranked[hf_subset][split][query_id].append(doc_id) + self.relevant_docs[hf_subset][split][query_id][doc_id] = ( + relevance + ) + + self.instructions = None + self.data_loaded = True diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 078979b6bf..c980adad45 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -3,201 +3,22 @@ import json import logging import os -from collections import defaultdict from pathlib import Path from time import time from typing import Any -from datasets import Features, Value, load_dataset - from mteb.abstasks.TaskMetadata import HFSubset from ..evaluation.evaluators import RetrievalEvaluator +from ..evaluation.evaluators.utils import make_score_dict from ..load_results.task_results import ScoresDict from .AbsTask import AbsTask +from .dataloaders import HFDataLoader from .TaskMetadata import DescriptiveStatistics logger = logging.getLogger(__name__) -# Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/datasets/data_loader_hf.py#L10 -class HFDataLoader: - def __init__( - self, - hf_repo: str | None = None, - hf_repo_qrels: str | None = None, - data_folder: str | None = None, - prefix: str | None = None, - corpus_file: str = "corpus.jsonl", - query_file: str = "queries.jsonl", - qrels_folder: str = "qrels", - qrels_file: str = "", - streaming: bool = False, - keep_in_memory: bool = False, - trust_remote_code: bool = False, - ): - self.corpus = {} - self.queries = {} - self.qrels = {} - self.hf_repo = hf_repo - if hf_repo: - # By default fetch qrels from same repo not a second repo with "-qrels" like in original - self.hf_repo_qrels = hf_repo_qrels if hf_repo_qrels else hf_repo - else: - # data folder would contain these files: - # (1) fiqa/corpus.jsonl (format: jsonlines) - # (2) fiqa/queries.jsonl (format: jsonlines) - # (3) fiqa/qrels/test.tsv (format: tsv ("\t")) - if prefix: - query_file = prefix + "-" + query_file - qrels_folder = prefix + "-" + qrels_folder - - self.corpus_file = ( - os.path.join(data_folder, corpus_file) if data_folder else corpus_file - ) - self.query_file = ( - os.path.join(data_folder, query_file) if data_folder else query_file - ) - self.qrels_folder = ( - os.path.join(data_folder, qrels_folder) if data_folder else None - ) - self.qrels_file = qrels_file - self.streaming = streaming - self.keep_in_memory = keep_in_memory - self.trust_remote_code = trust_remote_code - - @staticmethod - def check(fIn: str, ext: str): - if not os.path.exists(fIn): - raise ValueError(f"File {fIn} not present! Please provide accurate file.") - - if not fIn.endswith(ext): - raise ValueError(f"File {fIn} must be present with extension {ext}") - - def load( - self, split="test" - ) -> tuple[dict[str, dict[str, str]], dict[str, str], dict[str, dict[str, int]]]: - if not self.hf_repo: - self.qrels_file = os.path.join(self.qrels_folder, split + ".tsv") - self.check(fIn=self.corpus_file, ext="jsonl") - self.check(fIn=self.query_file, ext="jsonl") - self.check(fIn=self.qrels_file, ext="tsv") - - if not len(self.corpus): - logger.info("Loading Corpus...") - self._load_corpus() - logger.info("Loaded %d %s Documents.", len(self.corpus), split.upper()) - logger.info("Doc Example: %s", self.corpus[0]) - - if not len(self.queries): - logger.info("Loading Queries...") - self._load_queries() - - self._load_qrels(split) - # filter queries with no qrels - qrels_dict = defaultdict(dict) - - def qrels_dict_init(row): - qrels_dict[row["query-id"]][row["corpus-id"]] = int(row["score"]) - - self.qrels.map(qrels_dict_init) - self.qrels = qrels_dict - self.queries = self.queries.filter(lambda x: x["id"] in self.qrels) - logger.info("Loaded %d %s Queries.", len(self.queries), split.upper()) - logger.info("Query Example: %s", self.queries[0]) - - return self.corpus, self.queries, self.qrels - - def load_corpus(self) -> dict[str, dict[str, str]]: - if not self.hf_repo: - self.check(fIn=self.corpus_file, ext="jsonl") - - if not len(self.corpus): - logger.info("Loading Corpus...") - self._load_corpus() - logger.info("Loaded %d %s Documents.", len(self.corpus)) - logger.info("Doc Example: %s", self.corpus[0]) - - return self.corpus - - def _load_corpus(self): - if self.hf_repo: - corpus_ds = load_dataset( - self.hf_repo, - "corpus", - keep_in_memory=self.keep_in_memory, - streaming=self.streaming, - trust_remote_code=self.trust_remote_code, - ) - else: - corpus_ds = load_dataset( - "json", - data_files=self.corpus_file, - streaming=self.streaming, - keep_in_memory=self.keep_in_memory, - ) - corpus_ds = next(iter(corpus_ds.values())) # get first split - corpus_ds = corpus_ds.cast_column("_id", Value("string")) - corpus_ds = corpus_ds.rename_column("_id", "id") - corpus_ds = corpus_ds.remove_columns( - [ - col - for col in corpus_ds.column_names - if col not in ["id", "text", "title"] - ] - ) - self.corpus = corpus_ds - - def _load_queries(self): - if self.hf_repo: - queries_ds = load_dataset( - self.hf_repo, - "queries", - keep_in_memory=self.keep_in_memory, - streaming=self.streaming, - trust_remote_code=self.trust_remote_code, - ) - else: - queries_ds = load_dataset( - "json", - data_files=self.query_file, - streaming=self.streaming, - keep_in_memory=self.keep_in_memory, - ) - queries_ds = next(iter(queries_ds.values())) # get first split - queries_ds = queries_ds.cast_column("_id", Value("string")) - queries_ds = queries_ds.rename_column("_id", "id") - queries_ds = queries_ds.remove_columns( - [col for col in queries_ds.column_names if col not in ["id", "text"]] - ) - self.queries = queries_ds - - def _load_qrels(self, split): - if self.hf_repo: - qrels_ds = load_dataset( - self.hf_repo_qrels, - keep_in_memory=self.keep_in_memory, - streaming=self.streaming, - trust_remote_code=self.trust_remote_code, - )[split] - else: - qrels_ds = load_dataset( - "csv", - data_files=self.qrels_file, - delimiter="\t", - keep_in_memory=self.keep_in_memory, - ) - features = Features( - { - "query-id": Value("string"), - "corpus-id": Value("string"), - "score": Value("float"), - } - ) - qrels_ds = qrels_ds.cast(features) - self.qrels = qrels_ds - - class RetrievalDescriptiveStatistics(DescriptiveStatistics): """Descriptive statistics for Retrieval @@ -209,6 +30,9 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): average_document_length: Average length of documents average_query_length: Average length of queries average_relevant_docs_per_query: Average number of relevant documents per query + average_instruction_length: Average length of instructions + num_instructions: Number of instructions + average_top_ranked_per_query: Average number of top ranked documents per query """ num_samples: int @@ -218,6 +42,11 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): average_document_length: float average_query_length: float average_relevant_docs_per_query: float + # these are for datasets with instructions + average_instruction_length: float + num_instructions: int + # this is for datasets that do reranking + average_top_ranked_per_query: float class AbsTaskRetrieval(AbsTask): @@ -237,24 +66,38 @@ class AbsTaskRetrieval(AbsTask): self.relevant_docs: dict[str, dict[str, dict[str, int]]] Semantically, it should contain dict[split_name, dict[sample_id, dict[doc_id, score]]] E.g.: {"test": {"q1": {"document_one": 1}}} + + Child classes may optionally implement the following properties (top_ranked for reranking and instructions if needed): + + self.top_ranked: dict[str, dict[str, list[str]]] or dict[str, dict[str, dict[str, float]]] + Semantically, it should contain dict[split_name, dict[sample_id, list[doc_id]]] or dict[split_name, dict[sample_id, dict[doc_id, score]]] + E.g.: {"test": {"q1": ["document_one", "document_two"]}} or {"test": {"q1": {"document_one": 1, "document_two": 0.5}}} + + self.instructions: dict[str, dict[str, str]] or dict[str, dict[str, list[str]]] + Semantically, it should contain dict[split_name, dict[sample_id, str]]. If there are multiple instructions per query, please duplicate the queries and give them unique ids for consolidation. + E.g. {"test": {"query-id1": "instruction text"}} """ ignore_identical_ids: bool = False abstask_prompt = "Retrieve text based on user query." def __init__(self, **kwargs): - super().__init__(**kwargs) + self.top_ranked = None + self.instructions = None + # there could be multiple options, so do this even if multilingual + super(AbsTaskRetrieval, self).__init__(**kwargs) # noqa def load_data(self, **kwargs): if self.data_loaded: return self.corpus, self.queries, self.relevant_docs = {}, {}, {} + self.instructions, self.top_ranked = None, None dataset_path = self.metadata_dict["dataset"]["path"] hf_repo_qrels = ( dataset_path + "-qrels" if "clarin-knext" in dataset_path else None ) for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): - corpus, queries, qrels = HFDataLoader( + corpus, queries, qrels, instructions, top_ranked = HFDataLoader( hf_repo=dataset_path, hf_repo_qrels=hf_repo_qrels, streaming=False, @@ -274,6 +117,18 @@ def load_data(self, **kwargs): qrels, ) + # optional args + if instructions: + self.instructions = { + split: { + inst["query-id"]: inst["instruction"] for inst in instructions + } + } + if top_ranked: + self.top_ranked = { + split: {tr["query-id"]: tr["corpus-ids"] for tr in top_ranked} + } + self.data_loaded = True def evaluate( @@ -297,18 +152,27 @@ def evaluate( for hf_subset in hf_subsets: logger.info(f"Subset: {hf_subset}") - if hf_subset == "default": + if hf_subset == "default" and "default" not in self.corpus: corpus, queries, relevant_docs = ( self.corpus[split], self.queries[split], self.relevant_docs[split], ) + if self.top_ranked is not None: + kwargs["top_ranked"] = self.top_ranked[split] + if self.instructions is not None: + kwargs["instructions"] = self.instructions[split] else: corpus, queries, relevant_docs = ( self.corpus[hf_subset][split], self.queries[hf_subset][split], self.relevant_docs[hf_subset][split], ) + if self.top_ranked is not None: + kwargs["top_ranked"] = self.top_ranked[hf_subset][split] + if self.instructions is not None: + kwargs["instructions"] = self.instructions[hf_subset][split] + scores[hf_subset] = self._evaluate_subset( retriever, corpus, queries, relevant_docs, hf_subset, **kwargs ) @@ -317,14 +181,20 @@ def evaluate( def _evaluate_subset( self, retriever, corpus, queries, relevant_docs, hf_subset: str, **kwargs ) -> ScoresDict: - start_time = time() - results = retriever(corpus, queries) - end_time = time() - logger.info(f"Time taken to retrieve: {end_time - start_time:.2f} seconds") + if "results" in kwargs: + # reranking has already been done + results = kwargs["results"] + else: + # perform the retrieval here + start_time = time() + results = retriever(corpus, queries, **kwargs) + end_time = time() + logger.info(f"Time taken to retrieve: {end_time - start_time:.2f} seconds") save_predictions = kwargs.get("save_predictions", False) export_errors = kwargs.get("export_errors", False) - if save_predictions or export_errors: + save_qrels = kwargs.get("save_qrels", False) + if save_predictions or export_errors or save_qrels: output_folder = Path(kwargs.get("output_folder", "results")) if not os.path.isdir(output_folder): os.makedirs(output_folder) @@ -348,30 +218,26 @@ def _evaluate_subset( with open(qrels_save_path, "w") as f: json.dump(results, f) - ndcg, _map, recall, precision, naucs = retriever.evaluate( + if save_qrels: + with open( + output_folder / f"{self.metadata.name}_{hf_subset}_qrels.json", "w" + ) as f: + json.dump(relevant_docs, f) + + ndcg, _map, recall, precision, naucs, task_scores = retriever.evaluate( relevant_docs, results, retriever.k_values, ignore_identical_ids=self.ignore_identical_ids, + task_name=self.metadata.name, ) + mrr, naucs_mrr = retriever.evaluate_custom( relevant_docs, results, retriever.k_values, "mrr" ) - scores = { - **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, - **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()}, - **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()}, - **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()}, - **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()}, - **{ - k.replace("@", "_at_").replace("_P", "_precision").lower(): v - for k, v in naucs.items() - }, - **{ - k.replace("@", "_at_").replace("_P", "_precision").lower(): v - for k, v in naucs_mrr.items() - }, - } + scores = make_score_dict( + ndcg, _map, recall, precision, mrr, naucs, naucs_mrr, task_scores + ) self._add_main_score(scores) if export_errors: @@ -413,24 +279,40 @@ def _add_main_score(self, scores: ScoresDict) -> None: def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> RetrievalDescriptiveStatistics: - if hf_subset: + if hf_subset and hf_subset in self.queries: queries = self.queries[hf_subset][split] corpus = self.corpus[hf_subset][split] relevant_docs = self.relevant_docs[hf_subset][split] + if self.instructions is not None: + instructions = self.instructions[hf_subset][split] + if self.top_ranked is not None: + top_ranked = self.top_ranked[hf_subset][split] elif compute_overall: queries = {} corpus = {} relevant_docs = {} + instructions = {} + top_ranked = {} for hf_subset in self.metadata.eval_langs: queries.update(process_docs(self.queries, hf_subset, split)) corpus.update(process_docs(self.corpus, hf_subset, split)) relevant_docs.update( process_relevant_docs(self.relevant_docs, hf_subset, split) ) + if self.instructions is not None: + instructions.update( + process_docs(self.instructions, hf_subset, split) + ) + if self.top_ranked is not None: + top_ranked.update(process_docs(self.top_ranked, hf_subset, split)) else: queries = self.queries[split] corpus = self.corpus[split] relevant_docs = self.relevant_docs[split] + if self.instructions is not None: + instructions = self.instructions[split] + if self.top_ranked is not None: + top_ranked = self.top_ranked[split] query_len, doc_len = calculate_length(queries, corpus) num_documents = len(corpus) @@ -442,6 +324,25 @@ def _calculate_metrics_from_split( for docs in relevant_docs.values() ) qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if num_queries else 0 + + if self.instructions is not None: + total_instructions_len = sum( + [len(instruction) for instruction in instructions.values()] + ) + num_instructions = len(instructions) + else: + total_instructions_len = 0 + num_instructions = 0 + + if self.top_ranked is not None: + top_ranked_per_query = ( + sum(len(docs) for docs in top_ranked.values()) / num_queries + if num_queries + else 0 + ) + else: + top_ranked_per_query = 0 + return RetrievalDescriptiveStatistics( number_of_characters=query_len + doc_len, num_samples=num_documents + num_queries, @@ -450,6 +351,11 @@ def _calculate_metrics_from_split( average_document_length=doc_len / num_documents, average_query_length=query_len / num_queries, average_relevant_docs_per_query=qrels_per_doc, + average_instruction_length=total_instructions_len / num_instructions + if num_instructions + else 0, + num_instructions=num_instructions, + average_top_ranked_per_query=top_ranked_per_query, ) @@ -465,7 +371,10 @@ def calculate_length( queries_lens.extend([len(turn) for turn in query]) for doc in corpus.values(): - doc_lens.append(len(doc)) + if isinstance(doc, dict): + doc_lens.append(len(doc["text"])) + else: + doc_lens.append(len(doc)) doc_len = sum(doc_lens) / len(doc_lens) if doc_lens else 0 query_len = sum(queries_lens) / len(queries_lens) if queries_lens else 0 diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 07c4f97a04..d1fd1fd4df 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -94,6 +94,7 @@ "STS", "Summarization", "InstructionRetrieval", + "InstructionReranking", "Speed", ] diff --git a/mteb/abstasks/__init__.py b/mteb/abstasks/__init__.py index ef3e8853d7..086866b997 100644 --- a/mteb/abstasks/__init__.py +++ b/mteb/abstasks/__init__.py @@ -5,7 +5,6 @@ from .AbsTaskBitextMining import * from .AbsTaskClassification import * from .AbsTaskClustering import * -from .AbsTaskInstructionRetrieval import * from .AbsTaskMultilabelClassification import * from .AbsTaskPairClassification import * from .AbsTaskReranking import * diff --git a/mteb/abstasks/dataloaders.py b/mteb/abstasks/dataloaders.py new file mode 100644 index 0000000000..0b6505c5ee --- /dev/null +++ b/mteb/abstasks/dataloaders.py @@ -0,0 +1,324 @@ +from __future__ import annotations + +import logging +import os +from collections import defaultdict + +from datasets import Features, Sequence, Value, get_dataset_config_names, load_dataset + +logger = logging.getLogger(__name__) + + +# Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/datasets/data_loader_hf.py#L10 +class HFDataLoader: + """This dataloader handles the dataloading for retrieval-oriented tasks, including standard retrieval, reranking, and instruction-based variants of the above. + + If the `hf_repo` is provided, the dataloader will fetch the data from the HuggingFace hub. Otherwise, it will look for the data in the specified `data_folder`. + + Required files include the corpus, queries, and qrels files. Optionally, the dataloader can also load instructions and top-ranked (for reranking) files. + """ + + def __init__( + self, + hf_repo: str | None = None, + hf_repo_qrels: str | None = None, + data_folder: str | None = None, + prefix: str | None = None, + corpus_file: str = "corpus.jsonl", + query_file: str = "queries.jsonl", + qrels_folder: str = "qrels", + qrels_file: str = "", + streaming: bool = False, + keep_in_memory: bool = False, + trust_remote_code: bool = False, + ): + self.corpus = {} + self.queries = {} + self.qrels = {} + self.instructions = {} + self.top_ranked = {} + self.hf_repo = hf_repo + if hf_repo: + # By default fetch qrels from same repo not a second repo with "-qrels" like in original + self.hf_repo_qrels = hf_repo_qrels if hf_repo_qrels else hf_repo + else: + # data folder would contain these files: + # (1) fiqa/corpus.jsonl (format: jsonlines) + # (2) fiqa/queries.jsonl (format: jsonlines) + # (3) fiqa/qrels/test.tsv (format: tsv ("\t")) + if prefix: + query_file = prefix + "-" + query_file + qrels_folder = prefix + "-" + qrels_folder + + self.corpus_file = ( + os.path.join(data_folder, corpus_file) if data_folder else corpus_file + ) + self.query_file = ( + os.path.join(data_folder, query_file) if data_folder else query_file + ) + self.qrels_folder = ( + os.path.join(data_folder, qrels_folder) if data_folder else None + ) + self.qrels_file = qrels_file + self.top_ranked_file = ( + os.path.join(data_folder, "top_ranked.jsonl") + if data_folder + else "top_ranked.jsonl" + ) + self.top_ranked_file = ( + None + if not os.path.exists(self.top_ranked_file) + else self.top_ranked_file + ) + self.instructions_file = ( + os.path.join(data_folder, "instructions.jsonl") + if data_folder + else "instructions.jsonl" + ) + self.instructions_file = ( + None + if not os.path.exists(self.instructions_file) + else self.instructions_file + ) + self.streaming = streaming + self.keep_in_memory = keep_in_memory + self.trust_remote_code = trust_remote_code + + @staticmethod + def check(fIn: str, ext: str): + if not os.path.exists(fIn): + raise ValueError(f"File {fIn} not present! Please provide accurate file.") + + if not fIn.endswith(ext): + raise ValueError(f"File {fIn} must be present with extension {ext}") + + def load( + self, split: str = "test" + ) -> tuple[ + dict[str, dict[str, str]], # corpus + dict[str, str | list[str]], # queries + dict[str, dict[str, int]], # qrels/relevant_docs + dict[str, str | list[str]] | None, # instructions (optional) + dict[str, list[str]] + | dict[str, dict[str, float]] + | None, # top_ranked (optional) + ]: + if not self.hf_repo: + self.qrels_file = os.path.join(self.qrels_folder, split + ".tsv") + self.check(fIn=self.corpus_file, ext="jsonl") + self.check(fIn=self.query_file, ext="jsonl") + self.check(fIn=self.qrels_file, ext="tsv") + if self.top_ranked_file: + self.check(fIn=self.top_ranked_file, ext="jsonl") + if self.instructions_file: + self.check(fIn=self.instructions_file, ext="jsonl") + configs = [] + else: + configs = get_dataset_config_names(self.hf_repo) + + if not len(self.corpus): + logger.info("Loading Corpus...") + self._load_corpus() + logger.info("Loaded %d %s Documents.", len(self.corpus), split.upper()) + logger.info("Doc Example: %s", self.corpus[0]) + + if not len(self.queries): + logger.info("Loading Queries...") + self._load_queries() + + if "top_ranked" in configs or (not self.hf_repo and self.top_ranked_file): + logger.info("Loading Top Ranked") + self._load_top_ranked() + logger.info( + f"Top ranked loaded: {len(self.top_ranked) if self.top_ranked else 0}" + ) + else: + self.top_ranked = None + + if "instruction" in configs or (not self.hf_repo and self.instructions_file): + logger.info("Loading Instructions") + self._load_instructions() + logger.info( + f"Instructions loaded: {len(self.instructions) if self.instructions else 0}" + ) + else: + self.instructions = None + + self._load_qrels(split) + # filter queries with no qrels + qrels_dict = defaultdict(dict) + + def qrels_dict_init(row): + qrels_dict[row["query-id"]][row["corpus-id"]] = int(row["score"]) + + self.qrels.map(qrels_dict_init) + self.qrels = qrels_dict + self.queries = self.queries.filter(lambda x: x["id"] in self.qrels) + logger.info("Loaded %d %s Queries.", len(self.queries), split.upper()) + logger.info("Query Example: %s", self.queries[0]) + + return self.corpus, self.queries, self.qrels, self.instructions, self.top_ranked + + def load_corpus(self) -> dict[str, dict[str, str]]: + if not self.hf_repo: + self.check(fIn=self.corpus_file, ext="jsonl") + + if not len(self.corpus): + logger.info("Loading Corpus...") + self._load_corpus() + logger.info("Loaded %d %s Documents.", len(self.corpus)) + logger.info("Doc Example: %s", self.corpus[0]) + + return self.corpus + + def _load_corpus(self): + if self.hf_repo: + corpus_ds = load_dataset( + self.hf_repo, + "corpus", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + trust_remote_code=self.trust_remote_code, + ) + else: + corpus_ds = load_dataset( + "json", + data_files=self.corpus_file, + streaming=self.streaming, + keep_in_memory=self.keep_in_memory, + trust_remote_code=self.trust_remote_code, + ) + corpus_ds = next(iter(corpus_ds.values())) # get first split + corpus_ds = corpus_ds.cast_column("_id", Value("string")) + corpus_ds = corpus_ds.rename_column("_id", "id") + corpus_ds = corpus_ds.remove_columns( + [ + col + for col in corpus_ds.column_names + if col not in ["id", "text", "title"] + ] + ) + self.corpus = corpus_ds + + def _load_queries(self): + if self.hf_repo: + queries_ds = load_dataset( + self.hf_repo, + "queries", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + trust_remote_code=self.trust_remote_code, + ) + else: + queries_ds = load_dataset( + "json", + data_files=self.query_file, + streaming=self.streaming, + keep_in_memory=self.keep_in_memory, + ) + queries_ds = next(iter(queries_ds.values())) # get first split + queries_ds = queries_ds.cast_column("_id", Value("string")) + queries_ds = queries_ds.rename_column("_id", "id") + queries_ds = queries_ds.remove_columns( + [col for col in queries_ds.column_names if col not in ["id", "text"]] + ) + self.queries = queries_ds + + def _load_qrels(self, split): + if self.hf_repo: + qrels_ds = load_dataset( + self.hf_repo_qrels, + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + trust_remote_code=self.trust_remote_code, + )[split] + else: + qrels_ds = load_dataset( + "csv", + data_files=self.qrels_file, + delimiter="\t", + keep_in_memory=self.keep_in_memory, + ) + features = Features( + { + "query-id": Value("string"), + "corpus-id": Value("string"), + "score": Value("float"), + } + ) + qrels_ds = qrels_ds.cast(features) + self.qrels = qrels_ds + + def _load_top_ranked(self): + if self.hf_repo: + top_ranked_ds = load_dataset( + self.hf_repo, + "top_ranked", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + trust_remote_code=self.trust_remote_code, + ) + else: + top_ranked_ds = load_dataset( + "json", + data_files=self.top_ranked_file, + streaming=self.streaming, + keep_in_memory=self.keep_in_memory, + ) + top_ranked_ds = next(iter(top_ranked_ds.values())) # get first split + if ( + "query-id" in top_ranked_ds.column_names + and "corpus-ids" in top_ranked_ds.column_names + ): + # is a {query-id: str, corpus-ids: list[str]} format + top_ranked_ds = top_ranked_ds.cast_column("query-id", Value("string")) + top_ranked_ds = top_ranked_ds.cast_column( + "corpus-ids", Sequence(Value("string")) + ) + else: + # is a {"query-id": {"corpus-id": score}} format, let's change it + top_ranked_ds = top_ranked_ds.map( + lambda x: {"query-id": x["query-id"], "corpus-ids": list(x.keys())}, + remove_columns=[ + col for col in top_ranked_ds.column_names if col != "query-id" + ], + ) + + top_ranked_ds = top_ranked_ds.remove_columns( + [ + col + for col in top_ranked_ds.column_names + if col not in ["query-id", "corpus-ids"] + ] + ) + self.top_ranked = top_ranked_ds + + def _load_instructions(self): + if self.hf_repo: + instructions_ds = load_dataset( + self.hf_repo, + "instruction", + keep_in_memory=self.keep_in_memory, + streaming=self.streaming, + trust_remote_code=self.metadata_dict["dataset"].get( + "trust_remote_code", False + ), + ) + else: + instructions_ds = load_dataset( + "json", + data_files=self.instructions_file, + streaming=self.streaming, + keep_in_memory=self.keep_in_memory, + ) + instructions_ds = next(iter(instructions_ds.values())) + instructions_ds = instructions_ds.cast_column("query-id", Value("string")) + instructions_ds = instructions_ds.cast_column("instruction", Value("string")) + instructions_ds = instructions_ds.remove_columns( + [ + col + for col in instructions_ds.column_names + if col not in ["query-id", "instruction"] + ] + ) + self.instructions = instructions_ds diff --git a/mteb/descriptive_stats/Retrieval/NFCorpus.json b/mteb/descriptive_stats/Retrieval/NFCorpus.json new file mode 100644 index 0000000000..edaaf460c7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NFCorpus.json @@ -0,0 +1,14 @@ +{ + "test": { + "number_of_characters": 1612.5486310130989, + "num_samples": 3956, + "num_queries": 323, + "num_documents": 3633, + "average_document_length": 0.43787060972495073, + "average_query_length": 0.06738299034784193, + "average_relevant_docs_per_query": 38.18575851393189, + "average_instruction_length": 0, + "num_instructions": 0, + "average_top_ranked_per_query": 0 + } +} \ No newline at end of file diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 05a3c02ba4..64d5e9071b 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -20,7 +20,7 @@ from mteb.models import model_meta_from_sentence_transformers from ..abstasks import * -from ..abstasks import AbsTask +from ..abstasks import AbsTask, AbsTaskReranking from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper from ..models.wrapper import Wrapper @@ -222,13 +222,19 @@ def print_selected_tasks(self): def select_tasks(self, **kwargs): """Select the tasks to be evaluated.""" # Get all existing tasks - tasks_categories_cls = list(AbsTask.__subclasses__()) - self.tasks_cls = [ - cls(hf_subsets=self._task_langs, **kwargs) - for cat_cls in tasks_categories_cls - for cls in cat_cls.__subclasses__() - if cat_cls.__name__.startswith("AbsTask") - ] + # reranking subclasses retrieval to share methods, but is an abstract task + tasks_categories_cls = list(AbsTask.__subclasses__()) + [AbsTaskReranking] + all_task_classes = [] + for cat_cls in tasks_categories_cls: + for cls in cat_cls.__subclasses__(): + if ( + cat_cls.__name__.startswith("AbsTask") + and cls.__name__ != "AbsTaskReranking" + ): + task = cls(hf_subsets=self._task_langs, **kwargs) + all_task_classes.append(task) + + self.tasks_cls = all_task_classes # If `task_list` is specified, select list of tasks if self._tasks is not None: diff --git a/mteb/evaluation/evaluators/InstructionRetrievalEvaluator.py b/mteb/evaluation/evaluators/InstructionRetrievalEvaluator.py deleted file mode 100644 index f17dad9872..0000000000 --- a/mteb/evaluation/evaluators/InstructionRetrievalEvaluator.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import annotations - -import logging - -from .RetrievalEvaluator import ( - RetrievalEvaluator, -) - -logger = logging.getLogger(__name__) - - -class InstructionRetrievalEvaluator(RetrievalEvaluator): - # only added to extend the RetrievalEvaluator to pass along the instructions - def __call__( - self, - corpus: dict[str, dict[str, str]], - queries: dict[str, str], - instructions: dict[str, str], - qid: str | None = None, - **kwargs, - ) -> dict[str, dict[str, float]]: - if not self.retriever: - raise ValueError("Model/Technique has not been provided!") - - if self.is_cross_encoder: - return self.retriever.search_cross_encoder( - corpus, queries, self.top_k, instructions=instructions, **kwargs - ) - elif ( - hasattr(self.retriever.model, "mteb_model_meta") - and self.retriever.model.mteb_model_meta.name == "bm25s" - ): - return self.retriever.model.search( - corpus, - queries, - self.top_k, - self.score_function, - task_name=self.task_name, # type: ignore - instructions=instructions, - **kwargs, - ) - else: - return self.retriever.search( - corpus, - queries, - self.top_k, - self.score_function, - instructions=instructions, - request_qid=qid, - task_name=self.task_name, - **kwargs, - ) diff --git a/mteb/evaluation/evaluators/RerankingEvaluator.py b/mteb/evaluation/evaluators/RerankingEvaluator.py deleted file mode 100644 index 62d741ee0c..0000000000 --- a/mteb/evaluation/evaluators/RerankingEvaluator.py +++ /dev/null @@ -1,563 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any - -import numpy as np -import torch -import tqdm -from sklearn.metrics import average_precision_score - -from mteb.evaluation.evaluators.RetrievalEvaluator import RetrievalEvaluator - -from ...encoder_interface import Encoder, PromptType -from .Evaluator import Evaluator -from .utils import confidence_scores, cos_sim, nAUC - -logger = logging.getLogger(__name__) - - -class RerankingEvaluator(Evaluator): - """This class evaluates a SentenceTransformer model for the task of re-ranking. - Given a query and a list of documents, it computes the score [query, doc_i] for all possible - documents and sorts them in decreasing order. Then, MRR@10 and MAP is compute to measure the quality of the ranking. - :param samples: Must be a list and each element is of the form: - - {'query': '', 'positive': [], 'negative': []}. Query is the search query, positive is a list of positive - (relevant) documents, negative is a list of negative (irrelevant) documents. - - {'query': [], 'positive': [], 'negative': []}. Where query is a list of strings, which embeddings we average - to get the query embedding. - """ - - def __init__( - self, - samples, - task_name: str | None = None, - mrr_at_k: int = 10, - name: str = "", - similarity_fct=cos_sim, - encode_kwargs: dict[str, Any] = {}, - use_batched_encoding: bool = True, - limit: int | None = None, - k_values: list[int] = [1, 3, 5, 10, 20, 100, 1000], - evaluator_type: str = "standard", - **kwargs, - ): - super().__init__(**kwargs) - if limit: - samples = samples.train_test_split(limit)["test"] - self.samples = samples - self.name = name - self.mrr_at_k = mrr_at_k - self.similarity_fct = similarity_fct - self.use_batched_encoding = use_batched_encoding - self.task_name = task_name - self.k_values = k_values - self.evaluator_type = evaluator_type - self.encode_kwargs = encode_kwargs - - if "batch_size" not in self.encode_kwargs: - self.encode_kwargs["batch_size"] = 512 - - if isinstance(self.samples, dict): - self.samples = list(self.samples.values()) - - ### Remove sample with empty positive / negative set - self.samples = [ - sample - for sample in self.samples - if len(sample["positive"]) > 0 and len(sample["negative"]) > 0 - ] - - def __call__(self, model: Encoder): - scores = self.compute_metrics(model) - return scores - - def compute_metrics(self, model: Encoder): - return ( - self.compute_metrics_batched(model) - if self.use_batched_encoding - else self.compute_metrics_individual(model) - ) - - def compute_metrics_batched(self, model: Encoder): - """Computes the metrices in a batched way, by batching all queries and - all documents together - """ - logger.info("Encoding queries...") - if isinstance(self.samples[0]["query"], str): - all_query_embs = np.asarray( - model.encode( - [sample["query"] for sample in self.samples], - task_name=self.task_name, - prompt_type=PromptType.query, - **self.encode_kwargs, - ) - ) - elif isinstance(self.samples[0]["query"], list): - # In case the query is a list of strings, we get the most similar embedding to any of the queries - all_query_flattened = [ - q for sample in self.samples for q in sample["query"] - ] - all_query_embs = self._encode_unique_texts( - all_query_flattened, - model, - task_name=self.task_name, - prompt_type=PromptType.query, - **self.encode_kwargs, - ) - else: - raise ValueError( - f"Query must be a string or a list of strings but is {type(self.samples[0]['query'])}" - ) - - if self.evaluator_type == "standard": - results = self._encode_candidates( - model=model, - batched=True, - all_query_embs=all_query_embs, - ) - elif self.evaluator_type == "miracl": - results = self._encode_candidates_miracl( - model=model, - batched=True, - all_query_embs=all_query_embs, - ) - return results - - def compute_metrics_individual(self, model: Encoder): - """Embeds every (query, positive, negative) tuple individually. - Is slower than the batched version, but saves memory as only the - embeddings for one tuple are needed. Useful when you have - a really large test set - """ - if self.evaluator_type == "standard": - results = self._encode_candidates( - model=model, - batched=False, - ) - elif self.evaluator_type == "miracl": - results = self._encode_candidates_miracl( - model=model, - batched=False, - ) - return results - - def _encode_candidates(self, model: Encoder, batched: bool, all_query_embs=None): - all_mrr_scores = [] - all_ap_scores = [] - all_conf_scores = [] - logger.info("Encoding candidates...") - if batched: - self._encode_candidates_batched( - model=model, - all_query_embs=all_query_embs, - all_mrr_scores=all_mrr_scores, - all_ap_scores=all_ap_scores, - all_conf_scores=all_conf_scores, - ) - else: - self._encode_candidates_individual( - model=model, - all_mrr_scores=all_mrr_scores, - all_ap_scores=all_ap_scores, - all_conf_scores=all_conf_scores, - ) - scores = self._collect_results(all_mrr_scores, all_ap_scores, all_conf_scores) - return scores - - def _encode_candidates_batched( - self, - all_query_embs, - model: Encoder, - all_mrr_scores, - all_ap_scores, - all_conf_scores, - ): - all_docs = [] - for sample in self.samples: - all_docs.extend(sample["positive"]) - all_docs.extend(sample["negative"]) - - all_docs_embs = self._encode_unique_texts( - all_docs, - model, - task_name=self.task_name, - prompt_type=PromptType.passage, - **self.encode_kwargs, - ) - - # Compute scores and confidence scores - logger.info("Evaluating...") - query_idx, docs_idx = 0, 0 - for instance in self.samples: - num_subqueries = ( - len(instance["query"]) if isinstance(instance["query"], list) else 1 - ) - query_emb = all_query_embs[query_idx : query_idx + num_subqueries] - query_idx += num_subqueries - - num_pos = len(instance["positive"]) - num_neg = len(instance["negative"]) - docs_emb = all_docs_embs[docs_idx : docs_idx + num_pos + num_neg] - docs_idx += num_pos + num_neg - - if num_pos == 0 or num_neg == 0: - continue - is_relevant = [True] * num_pos + [False] * num_neg - self._apply_sim_scores( - query_emb, - docs_emb, - is_relevant, - all_mrr_scores, - all_ap_scores, - all_conf_scores, - ) - - def _encode_candidates_individual( - self, - model: Encoder, - all_mrr_scores, - all_ap_scores, - all_conf_scores, - ): - for instance in tqdm.tqdm(self.samples, desc="Samples"): - query = instance["query"] - positive = list(instance["positive"]) - negative = list(instance["negative"]) - - if len(positive) == 0 or len(negative) == 0: - continue - - docs = positive + negative - is_relevant = [True] * len(positive) + [False] * len(negative) - - if isinstance(query, str): - # .encoding interface requires list[str] as input - query = [query] - query_emb = np.asarray( - model.encode( - query, - task_name=self.task_name, - prompt_type=PromptType.query, - **self.encode_kwargs, - ) - ) - docs_emb = np.asarray( - model.encode( - docs, - task_name=self.task_name, - prompt_type=PromptType.passage, - **self.encode_kwargs, - ) - ) - self._apply_sim_scores( - query_emb, - docs_emb, - is_relevant, - all_mrr_scores, - all_ap_scores, - all_conf_scores, - ) - - def _collect_results(self, all_mrr_scores, all_ap_scores, all_conf_scores): - mean_ap = np.mean(all_ap_scores) - mean_mrr = np.mean(all_mrr_scores) - - # Compute nAUCs - naucs_map = self.nAUC_scores(all_conf_scores, all_ap_scores, "map") - naucs_mrr = self.nAUC_scores(all_conf_scores, all_mrr_scores, "mrr") - - return {**{"map": mean_ap, "mrr": mean_mrr}, **naucs_map, **naucs_mrr} - - def _encode_candidates_miracl( - self, - model: Encoder, - batched, - all_query_embs=None, - ): - if batched: - return self._encode_candidates_miracl_batched( - model=model, all_query_embs=all_query_embs - ) - else: - return self._encode_candidates_miracl_individual( - model=model, - ) - - def _encode_candidates_miracl_batched(self, all_query_embs, model: Encoder): - all_docs = [] - for sample in self.samples: - all_docs.extend(sample["candidates"]) - - all_docs_embs = np.asarray( - model.encode( - all_docs, - task_name=self.task_name, - prompt_type=PromptType.passage, - **self.encode_kwargs, - ) - ) - - # Compute scores - logger.info("Evaluating...") - query_idx, docs_idx = 0, 0 - results, qrels = {}, {} - for instance in self.samples: - num_subqueries = ( - len(instance["query"]) if isinstance(instance["query"], list) else 1 - ) - query_emb = all_query_embs[query_idx : query_idx + num_subqueries] - query_idx += num_subqueries - - positive = instance["positive"] - docs = instance["candidates"] - num_doc = len(docs) - docs_emb = all_docs_embs[docs_idx : docs_idx + num_doc] - docs_idx += num_doc - - fake_qid = str(query_idx) - results[fake_qid] = self.rerank(query_emb, docs_emb) - qrels[fake_qid] = { - str(i): 1 if doc in positive else 0 for i, doc in enumerate(docs) - } - - scores_miracl = self._collect_miracl_results(results, qrels) - return scores_miracl - - def _encode_candidates_miracl_individual(self, model: Encoder): - results, qrels = {}, {} - for i, instance in enumerate(tqdm.tqdm(self.samples, desc="Samples")): - query = instance["query"] - positive = set(instance["positive"]) - docs = list(instance["candidates"]) - - if isinstance(query, str): - # .encoding interface requires list[str] as input - query_emb = np.asarray( - model.encode( - [query], - task_name=self.task_name, - prompt_type=PromptType.query, - **self.encode_kwargs, - ) - ) - docs_emb = np.asarray( - model.encode( - docs, - task_name=self.task_name, - prompt_type=PromptType.passage, - **self.encode_kwargs, - ) - ) - - fake_qid = str(i) - results[fake_qid] = self.rerank(query_emb, docs_emb) - qrels[fake_qid] = { - str(i): 1 if doc in positive else 0 for i, doc in enumerate(docs) - } - - scores_miracl = self._collect_miracl_results(results, qrels) - return scores_miracl - - def _collect_miracl_results(self, results, qrels): - ndcg, _map, recall, precision, naucs = RetrievalEvaluator.evaluate( - qrels=qrels, - results=results, - k_values=self.k_values, - ignore_identical_ids=False, - ) - scores = {**ndcg, **_map, **recall, **precision, **naucs} - scores_miracl = {f"{k}(MIRACL)": v for k, v in scores.items()} - return scores_miracl - - def rerank( - self, query_emb: torch.Tensor, docs_emb: torch.Tensor - ) -> dict[str, float]: - """Rerank documents (docs_emb) given the query (query_emb) - - Args: - query_emb: Query embedding of shape `(num_queries, hidden_size)`) - if `num_queries` > 0: we take the closest document to any of the queries - docs_emb: Candidates documents embeddings of shape `(num_pos+num_neg, hidden_size)`) - - Returns: - similarity_scores: - """ - if not query_emb.shape[0]: - raise ValueError("Empty query embedding") - - if not docs_emb.shape[0]: - return {"empty-docid": 0} - - pred_scores = self.similarity_fct(query_emb, docs_emb) - if len(pred_scores.shape) > 1: - pred_scores = torch.amax(pred_scores, dim=0) - - return { - str(i): score.detach().numpy().item() for i, score in enumerate(pred_scores) - } - - def _apply_sim_scores( - self, - query_emb, - docs_emb, - is_relevant, - all_mrr_scores, - all_ap_scores, - all_conf_scores, - ): - sim_scores = self._compute_sim_scores_instance(query_emb, docs_emb) - scores = self._compute_metrics_instance(sim_scores, is_relevant) - conf_scores = self.conf_scores(sim_scores.tolist()) - - all_mrr_scores.append(scores["mrr"]) - all_ap_scores.append(scores["ap"]) - all_conf_scores.append(conf_scores) - - @staticmethod - def _encode_unique_texts( - all_texts: list[str], - model: Encoder, - task_name: str | None, - prompt_type: PromptType | None, - **encode_kwargs: Any, - ): - index_map, all_unique_texts, all_texts_indexes = {}, [], [] - for text in all_texts: - text_hash = hash(text) - if text_hash not in index_map: - index_map[text_hash] = len(all_unique_texts) - all_unique_texts.append(text) - all_texts_indexes.append(index_map[text_hash]) - logger.warning( - f"A total on {len(all_texts) - len(all_unique_texts)}/{len(all_texts)} duplicate texts were found during encoding. Only encoding unique text and duplicating embeddings across." - ) - all_unique_texts_embs = np.asarray( - model.encode( - all_unique_texts, - task_name=task_name, - prompt_type=prompt_type, - **encode_kwargs, - ) - ) - return all_unique_texts_embs[all_texts_indexes] - - def _compute_sim_scores_instance( - self, query_emb: torch.Tensor, docs_emb: torch.Tensor - ) -> torch.Tensor: - """Computes similarity scores for a single instance = (query, positives, negatives) - - Args: - query_emb: Query embedding, with shape `(num_queries, hidden_size)` - if `num_queries` > 0: we take the closest document to any of the queries - docs_emb: Candidates documents embeddings, with shape `(num_pos+num_neg, hidden_size)` - - Returns: - sim_scores: Query-documents similarity scores, with shape `(num_pos+num_neg,)` - """ - sim_scores = self.similarity_fct(query_emb, docs_emb) - if len(sim_scores.shape) > 1: - sim_scores = torch.amax(sim_scores, dim=0) - - return sim_scores - - def _compute_metrics_instance( - self, sim_scores: torch.Tensor, is_relevant: list[bool] - ) -> dict[str, float]: - """Computes metrics for a single instance = (query, positives, negatives) - - Args: - sim_scores: Query-documents similarity scores, with shape `(num_pos+num_neg,)` - is_relevant: True if the document is relevant, with length `num_pos+num_neg` - - Returns: - scores: - - `mrr`: Mean Reciprocal Rank @ `self.mrr_at_k` - - `ap`: Average Precision - """ - pred_scores_argsort = torch.argsort(-sim_scores) # Sort in decreasing order - mrr = self.mrr_at_k_score(is_relevant, pred_scores_argsort, self.mrr_at_k) - ap = self.ap_score(is_relevant, sim_scores.cpu().tolist()) - return {"mrr": mrr, "ap": ap} - - @staticmethod - def conf_scores(sim_scores: torch.Tensor) -> dict[str, float]: - """Computes confidence scores for a single instance = (query, positives, negatives) - - Args: - sim_scores: Query-documents similarity scores, with shape `(num_pos+num_neg,)` - - Returns: - conf_scores: - - `max`: Maximum similarity score - - `std`: Standard deviation of similarity scores - - `diff1`: Difference between highest and second highest similarity scores - """ - return confidence_scores(sim_scores) - - @staticmethod - def nAUC_scores( - all_conf_scores: list[dict[str, float]], - metrics: list[float], - metric_name: str, - ) -> dict[str, float]: - """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997 - - Args: - all_conf_scores: Confidence scores for all instances, with length `len(samples)` - metrics: Metric scores for all instances, with length `len(samples)` - metric_name: Name of the metric (mrr or ap) - - Returns: - naucs: nAUCs for each confidence function - """ - conf_fcts = list(all_conf_scores[0].keys()) - all_conf_scores = { - fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts - } - metrics = np.array(metrics) - naucs = { - f"nAUC_{metric_name}_{fct}": nAUC(all_conf_scores[fct], metrics) - for fct in conf_fcts - } - return naucs - - @staticmethod - def mrr_at_k_score( - is_relevant: list[bool], pred_ranking: list[int], k: int - ) -> float: - """Computes MRR@k score - - Args: - is_relevant: True if the document is relevant - pred_ranking: Indices of the documents sorted in decreasing order - of the similarity score - k: Top-k documents to consider - - Returns: - The MRR@k score - """ - mrr_score = 0 - for rank, index in enumerate(pred_ranking[:k]): - if is_relevant[index]: - mrr_score = 1 / (rank + 1) - break - - return mrr_score - - @staticmethod - def ap_score(is_relevant, pred_scores): - """Computes AP score - - Args: - is_relevant (`list[bool]` of length `num_pos+num_neg`): True if the document is relevant - pred_scores (`list[float]` of length `num_pos+num_neg`): Predicted similarity scores - - Returns: - ap_score (`float`): AP score - """ - # preds = np.array(is_relevant)[pred_scores_argsort] - # precision_at_k = np.mean(preds[:k]) - # ap = np.mean([np.mean(preds[: k + 1]) for k in range(len(preds)) if preds[k]]) - ap = average_precision_score(is_relevant, pred_scores) - return ap diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index 4b2596c4d5..8dcac9ab00 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -1,32 +1,21 @@ from __future__ import annotations -import heapq -import json import logging -import os -from collections import defaultdict -from pathlib import Path from typing import Any -import numpy as np -import pytrec_eval -import torch -import tqdm -from sentence_transformers import CrossEncoder, SentenceTransformer - -from mteb.encoder_interface import Encoder, PromptType -from mteb.model_meta import ModelMeta +from mteb.evaluation.evaluators.model_classes import ( + DenseRetrievalExactSearch, + DRESModel, + is_cross_encoder_compatible, +) from .Evaluator import Evaluator from .utils import ( - confidence_scores, - convert_conv_history_to_query, - cos_sim, - dot_score, - download, + add_task_specific_scores, + calculate_retrieval_scores, + evaluate_abstention, hole, mrr, - nAUC, recall_cap, top_k_accuracy, ) @@ -34,404 +23,6 @@ logger = logging.getLogger(__name__) -def corpus_to_str( - corpus: list[dict[str, str]] | dict[str, list[str]] | list[str], -) -> list[str]: - if isinstance(corpus, dict): - sentences = [ - (corpus["title"][i] + " " + corpus["text"][i]).strip() - if "title" in corpus - else corpus["text"][i].strip() - for i in range(len(corpus["text"])) - ] - elif isinstance(corpus, list) and isinstance(corpus[0], dict): - sentences = [ - (doc["title"] + " " + doc["text"]).strip() - if "title" in doc - else doc["text"].strip() - for doc in corpus - ] - else: - sentences = corpus - return sentences - - -# Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/search/dense/exact_search.py#L12 -class DenseRetrievalExactSearch: - def __init__( - self, - model: Encoder, - encode_kwargs: dict[str, Any] = {}, - corpus_chunk_size: int = 50000, - previous_results: str | Path | None = None, - **kwargs: Any, - ): - # Model is class that provides encode_corpus() and encode_queries() - self.model = model - self.encode_kwargs = encode_kwargs - - if "batch_size" not in encode_kwargs: - encode_kwargs["batch_size"] = 128 - if "show_progress_bar" not in encode_kwargs: - encode_kwargs["show_progress_bar"] = True - if "convert_to_tensor" not in encode_kwargs: - encode_kwargs["convert_to_tensor"] = True - - self.score_functions = {"cos_sim": cos_sim, "dot": dot_score} - self.score_function_desc = { - "cos_sim": "Cosine Similarity", - "dot": "Dot Product", - } - self.corpus_chunk_size = corpus_chunk_size - if isinstance(previous_results, Path): - self.previous_results = str(previous_results) - else: - self.previous_results = previous_results - self.batch_size = encode_kwargs.get("batch_size") - self.show_progress_bar = encode_kwargs.get("show_progress_bar") - self.save_corpus_embeddings = kwargs.get("save_corpus_embeddings", False) - self.corpus_embeddings = defaultdict(list) - self.results = {} - - if self.previous_results is not None: - self.previous_results = self.load_results_file() - - if isinstance(self.model, CrossEncoder): - # load the predict instance from the CrossEncoder - # custom functions can be used by extending the DenseRetrievalExactSearch class - self.predict = self.model.predict - - def search( - self, - corpus: dict[str, dict[str, str]], - queries: dict[str, str | list[str]], - top_k: int, - score_function: str, - task_name: str, - instructions: dict[str, str] | None = None, - request_qid: str | None = None, - return_sorted: bool = False, - **kwargs, - ) -> dict[str, dict[str, float]]: - # Create embeddings for all queries using model.encode - # Runs semantic search against the corpus embeddings - # Returns a ranked list with the corpus ids - if score_function not in self.score_functions: - raise ValueError( - f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product" - ) - - logger.info("Encoding Queries.") - query_ids = list(queries.keys()) - self.results = {qid: {} for qid in query_ids} - queries = [queries[qid] for qid in queries] # type: ignore - if instructions: - queries = [f"{query} {instructions[query]}".strip() for query in queries] - if isinstance(queries[0], list): # type: ignore - query_embeddings = self.encode_conversations( - model=self.model, - conversations=queries, # type: ignore - task_name=task_name, - **self.encode_kwargs, - ) - else: - query_embeddings = self.model.encode( - queries, # type: ignore - task_name=task_name, - prompt_type=PromptType.query, - **self.encode_kwargs, - ) - - logger.info("Sorting Corpus by document length (Longest first)...") - corpus_ids = sorted( - corpus, - reverse=True, - ) - corpus = [corpus[cid] for cid in corpus_ids] # type: ignore - - logger.info("Encoding Corpus in batches... Warning: This might take a while!") - logger.info( - f"Scoring Function: {self.score_function_desc[score_function]} ({score_function})" - ) - - itr = range(0, len(corpus), self.corpus_chunk_size) - - result_heaps = { - qid: [] for qid in query_ids - } # Keep only the top-k docs for each query - for batch_num, corpus_start_idx in enumerate(itr): - logger.info(f"Encoding Batch {batch_num + 1}/{len(itr)}...") - corpus_end_idx = min(corpus_start_idx + self.corpus_chunk_size, len(corpus)) - - # Encode chunk of corpus - if ( - self.save_corpus_embeddings - and request_qid - and len(self.corpus_embeddings[request_qid]) - ): - sub_corpus_embeddings = torch.tensor( - self.corpus_embeddings[request_qid][batch_num] - ) - else: - # Encode chunk of corpus - sub_corpus_embeddings = self.model.encode( - corpus[corpus_start_idx:corpus_end_idx], # type: ignore - task_name=task_name, - prompt_type=PromptType.passage, - request_qid=request_qid, - **self.encode_kwargs, - ) - if self.save_corpus_embeddings and request_qid: - self.corpus_embeddings[request_qid].append(sub_corpus_embeddings) - - # Compute similarites using either cosine-similarity or dot product - cos_scores = self.score_functions[score_function]( - query_embeddings, sub_corpus_embeddings - ) - is_nan = torch.isnan(cos_scores) - if is_nan.sum() > 0: - logger.warning( - f"Found {is_nan.sum()} NaN values in the similarity scores. Replacing NaN values with -1." - ) - cos_scores[is_nan] = -1 - - # Get top-k values - cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( - cos_scores, - min( - top_k + 1, - len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]), - ), - dim=1, - largest=True, - sorted=return_sorted, - ) - cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist() - cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist() - - for query_itr in range(len(query_embeddings)): - query_id = query_ids[query_itr] - for sub_corpus_id, score in zip( - cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr] - ): - corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id] - if len(result_heaps[query_id]) < top_k: - # Push item on the heap - heapq.heappush(result_heaps[query_id], (score, corpus_id)) - else: - # If item is larger than the smallest in the heap, push it on the heap then pop the smallest element - heapq.heappushpop(result_heaps[query_id], (score, corpus_id)) - - for qid in result_heaps: - for score, corpus_id in result_heaps[qid]: - self.results[qid][corpus_id] = score - - return self.results - - def load_results_file(self): - # load the first stage results from file in format {qid: {doc_id: score}} - if "https://" in self.previous_results: - # download the file - if not os.path.exists(self.previous_results): - url_descriptor = self.previous_results.split("https://")[-1].replace( - "/", "--" - ) - dest_file = os.path.join( - "results", f"cached_predictions--{url_descriptor}" - ) - os.makedirs(os.path.dirname(os.path.abspath(dest_file)), exist_ok=True) - download(self.previous_results, dest_file) - logger.info( - f"Downloaded the previous results at {self.previous_results} to {dest_file}" - ) - self.previous_results = dest_file - - with open(self.previous_results) as f: - previous_results = json.load(f) - assert isinstance(previous_results, dict) - assert isinstance(previous_results[list(previous_results.keys())[0]], dict) - return previous_results - - def search_cross_encoder( - self, - corpus: dict[str, dict[str, str]], - queries: dict[str, str | list[str]], - top_k: int, - instructions: dict[str, str] | None = None, - **kwargs, - ) -> dict[str, dict[str, float]]: - """This function provides support for reranker (or cross-encoder) models that encoder query and document at the same time (typically with attention). - Some notable examples include MonoBERT, MonoT5, RankLlama, etc. - Note: you must provide the path to the results to rerank to the __init__ function as `previous_results` or else rerank all documents in the corpus - """ - pairs = [] # create the pairs for reranking - for qid in queries.keys(): - if self.previous_results is None: - # try to use all of them - logging.logging( - f"previous_results is None. Using all the documents to rerank: {len(corpus)}" - ) - q_results = {doc_id: 0.0 for doc_id in corpus.keys()} - else: - q_results = self.previous_results[qid] - # take the top-k only - q_results_sorted = dict( - sorted(q_results.items(), key=lambda item: item[1], reverse=True) - ) - top_n = [k for k, v in list(q_results_sorted.items())[:top_k]] - query = queries[qid] - query = ( - self.convert_conv_history_to_query(self.model, [query])[0] - if isinstance(query, list) - else query - ) - for doc_id in top_n: - pairs.append( - ( - query, - corpus[doc_id], - instructions[query] if instructions is not None else None, - qid, - doc_id, - ) - ) - - logger.info(f"Reranking the top {top_k} in batches... This might take a while!") - itr = range(0, len(pairs), self.batch_size) - - results = {qid: {} for qid in queries.keys()} - for batch_num, corpus_start_idx in enumerate( - tqdm.tqdm(itr, leave=False, disable=not self.show_progress_bar) - ): - corpus_end_idx = min(corpus_start_idx + self.batch_size, len(pairs)) - cur_batch = pairs[corpus_start_idx:corpus_end_idx] - - ( - queries_in_pair, - corpus_in_pair, - instructions_in_pair, - query_ids, - corpus_ids, - ) = zip(*cur_batch) - - assert ( - len(queries_in_pair) == len(corpus_in_pair) == len(instructions_in_pair) - ) - - if isinstance(self.model.model, CrossEncoder): - # can't take instructions, so add them here - queries_in_pair = [ - f"{q} {i}".strip() - for i, q in zip(instructions_in_pair, queries_in_pair) - ] - scores = self.model.predict(list(zip(queries_in_pair, corpus_in_pair))) # type: ignore - else: - # may use the instructions in a unique way, so give them also - scores = self.model.predict( # type: ignore - list(zip(queries_in_pair, corpus_in_pair, instructions_in_pair)) - ) - - for i, score in enumerate(scores): - results[query_ids[i]][corpus_ids[i]] = float(score) - - return results - - def predict(self, queries, passages, **kwargs): - raise NotImplementedError( - "You must implement a predict method for your reranker model" - ) - - def encode_conversations( - self, - model: Encoder, - conversations: list[list[str]], - task_name: str, - **kwargs, - ): - if callable(getattr(self.model, "encode_conversations", None)): - return model.encode_conversations( # type: ignore - conversations, task_name=task_name, **kwargs - ) - logger.warning( - "Model doesn't have encode_conversations fallback to default implementation" - ) - queries = self.convert_conv_history_to_query(model, conversations) # type: ignore - return model.encode( - queries, task_name=task_name, prompt_type=PromptType.query, **kwargs - ) # type: ignore - - @staticmethod - def convert_conv_history_to_query( - model: Encoder, conversations: list[list[str]] - ) -> str: - if callable(getattr(model, "convert_conv_history_to_query", None)): - return model.convert_conv_history_to_query(conversations) # type: ignore - return convert_conv_history_to_query(conversations) # type: ignore - - -class DRESModel: - """Dense Retrieval Exact Search (DRES). - This class converts a model with just an .encode method into DRES format. - """ - - mteb_model_meta: ModelMeta | None - - def __init__(self, model, **kwargs): - self.model = model - self.use_sbert_model = isinstance(model, SentenceTransformer) - self.save_corpus_embeddings = kwargs.get("save_corpus_embeddings", False) - self.corpus_embeddings = {} - - def encode_corpus( - self, - corpus: list[dict[str, str]], - task_name: str, - batch_size: int, - prompt_type: PromptType = PromptType.passage, - request_qid: str | None = None, - **kwargs, - ): - if ( - request_qid - and self.save_corpus_embeddings - and len(self.corpus_embeddings) > 0 - ): - return self.corpus_embeddings[request_qid] - - sentences = corpus_to_str(corpus) - corpus_embeddings = self.model.encode( - sentences, - task_name=task_name, - prompt_type=prompt_type, - batch_size=batch_size, - **kwargs, - ) - - if self.save_corpus_embeddings and request_qid: - self.corpus_embeddings[request_qid] = corpus_embeddings - return corpus_embeddings - - def encode( - self, - sentences: list[str], - task_name: str, - prompt_type: PromptType | None = None, - **kwargs, - ): - if prompt_type and prompt_type == PromptType.passage: - return self.encode_corpus( - sentences, task_name, prompt_type=prompt_type, **kwargs - ) - return self.model.encode( - sentences, task_name=task_name, prompt_type=prompt_type, **kwargs - ) - - -def is_cross_encoder_compatible(model) -> bool: - op = getattr(model.model, "predict", None) - return callable(op) - - # Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/evaluation.py#L9 class RetrievalEvaluator(Evaluator): def __init__( @@ -461,19 +52,36 @@ def __init__( self.top_k = ( max(k_values) if "top_k" not in kwargs else kwargs["top_k"] ) # can lower it if reranking - self.score_function = score_function + self.score_function = ( + retriever.mteb_model_meta.similarity_fn_name + if ( + hasattr(retriever, "mteb_model_meta") + and retriever.mteb_model_meta.similarity_fn_name + ) + else score_function + ) self.task_name = task_name def __call__( self, corpus: dict[str, dict[str, str]], - queries: dict[str, str | list[str]], + queries: dict[str, str], + instructions: dict[str, str] | None = None, + qid: str | None = None, + **kwargs, ) -> dict[str, dict[str, float]]: if not self.retriever: raise ValueError("Model/Technique has not been provided!") + # allow kwargs top-k to override the class top-k + if "top_k" in kwargs: + self.top_k = kwargs["top_k"] + del kwargs["top_k"] + if self.is_cross_encoder: - return self.retriever.search_cross_encoder(corpus, queries, self.top_k) + return self.retriever.search_cross_encoder( + corpus, queries, self.top_k, instructions=instructions, **kwargs + ) elif ( hasattr(self.retriever.model, "mteb_model_meta") and self.retriever.model.mteb_model_meta.name == "bm25s" @@ -484,6 +92,8 @@ def __call__( self.top_k, self.score_function, task_name=self.task_name, # type: ignore + instructions=instructions, + **kwargs, ) else: return self.retriever.search( @@ -491,7 +101,10 @@ def __call__( queries, self.top_k, self.score_function, - task_name=self.task_name, # type: ignore + instructions=instructions, + request_qid=qid, + task_name=self.task_name, + **kwargs, ) @staticmethod @@ -500,6 +113,7 @@ def evaluate( results: dict[str, dict[str, float]], k_values: list[int], ignore_identical_ids: bool = False, + task_name: str = None, ) -> tuple[ dict[str, float], dict[str, float], @@ -521,48 +135,14 @@ def evaluate( "For evaluation, we DO NOT ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=True`` to ignore this." ) - all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {} - - for k in k_values: - all_ndcgs[f"NDCG@{k}"] = [] - all_aps[f"MAP@{k}"] = [] - all_recalls[f"Recall@{k}"] = [] - all_precisions[f"P@{k}"] = [] - - map_string = "map_cut." + ",".join([str(k) for k in k_values]) - ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values]) - recall_string = "recall." + ",".join([str(k) for k in k_values]) - precision_string = "P." + ",".join([str(k) for k in k_values]) - evaluator = pytrec_eval.RelevanceEvaluator( - qrels, {map_string, ndcg_string, recall_string, precision_string} + all_scores, ndcg, _map, recall, precision, naucs = calculate_retrieval_scores( + results, qrels, k_values ) - scores = evaluator.evaluate(results) - - for query_id in scores.keys(): - for k in k_values: - all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)]) - all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)]) - all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)]) - all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)]) - - ndcg, _map, recall, precision = ( - all_ndcgs.copy(), - all_aps.copy(), - all_recalls.copy(), - all_precisions.copy(), + task_scores = add_task_specific_scores( + all_scores, qrels, results, task_name, k_values ) - for k in k_values: - ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5) - _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5) - recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5) - precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5) - - naucs = RetrievalEvaluator.evaluate_abstention( - results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions} - ) - - return ndcg, _map, recall, precision, naucs + return ndcg, _map, recall, precision, naucs, task_scores @staticmethod def evaluate_custom( @@ -590,30 +170,7 @@ def evaluate_custom( ]: metric_scores = top_k_accuracy(qrels, results, k_values, output_type) - naucs = RetrievalEvaluator.evaluate_abstention(results, metric_scores) + naucs = evaluate_abstention(results, metric_scores) metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()} return metric_scores_avg, naucs - - @staticmethod - def evaluate_abstention( - results: dict[str, dict[str, float]], - metric_scores: dict[str, list[float]], - ) -> dict[str, float]: - """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997""" - all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())] - all_conf_scores = [ - confidence_scores(sim_scores) for sim_scores in all_sim_scores - ] - conf_fcts = list(all_conf_scores[0].keys()) - all_conf_scores = { - fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts - } - metric_scores = {k: np.array(v) for k, v in metric_scores.items()} - naucs = {} - - for metric_name, scores in metric_scores.items(): - for fct, conf_scores in all_conf_scores.items(): - naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores) - - return naucs diff --git a/mteb/evaluation/evaluators/__init__.py b/mteb/evaluation/evaluators/__init__.py index a1dc8faaa5..fc293a3448 100644 --- a/mteb/evaluation/evaluators/__init__.py +++ b/mteb/evaluation/evaluators/__init__.py @@ -4,7 +4,6 @@ from .ClassificationEvaluator import * from .ClusteringEvaluator import * from .PairClassificationEvaluator import * -from .RerankingEvaluator import * from .RetrievalEvaluator import * from .STSEvaluator import * from .SummarizationEvaluator import * diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py new file mode 100644 index 0000000000..7c984b6348 --- /dev/null +++ b/mteb/evaluation/evaluators/model_classes.py @@ -0,0 +1,568 @@ +from __future__ import annotations + +import heapq +import json +import logging +import os +from pathlib import Path +from typing import Any + +import torch +import tqdm +from sentence_transformers import CrossEncoder, SentenceTransformer + +from mteb.encoder_interface import Encoder, PromptType +from mteb.model_meta import ModelMeta + +from .utils import convert_conv_history_to_query, cos_sim, dot_score, download + +logger = logging.getLogger(__name__) + + +def corpus_to_str( + corpus: list[dict[str, str]] | dict[str, list[str]] | list[str], +) -> list[str]: + if isinstance(corpus, dict): + sentences = [ + (corpus["title"][i] + " " + corpus["text"][i]).strip() + if "title" in corpus + else corpus["text"][i].strip() + for i in range(len(corpus["text"])) + ] + elif isinstance(corpus, list) and isinstance(corpus[0], dict): + sentences = [ + (doc["title"] + " " + doc["text"]).strip() + if "title" in doc + else doc["text"].strip() + for doc in corpus + ] + else: + sentences = corpus + return sentences + + +# Adapted from https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/search/dense/exact_search.py#L12 +class DenseRetrievalExactSearch: + def __init__( + self, + model: Encoder, + encode_kwargs: dict[str, Any] = {}, + corpus_chunk_size: int = 50000, + previous_results: str | Path | None = None, + **kwargs: Any, + ): + # Model is class that provides encode_corpus() and encode_queries() + self.model = model + self.encode_kwargs = encode_kwargs + + if "batch_size" not in encode_kwargs: + encode_kwargs["batch_size"] = 128 + if "show_progress_bar" not in encode_kwargs: + encode_kwargs["show_progress_bar"] = True + + self.score_functions = {"cos_sim": cos_sim, "dot": dot_score, "cosine": cos_sim} + self.score_function_desc = { + "cos_sim": "Cosine Similarity", + "cosine": "Cosine Similarity", + "dot": "Dot Product", + } + self.corpus_chunk_size = corpus_chunk_size + if isinstance(previous_results, Path): + self.previous_results = str(previous_results) + else: + self.previous_results = previous_results + self.batch_size = encode_kwargs.get("batch_size") + self.show_progress_bar = encode_kwargs.get("show_progress_bar") + self.results = {} + + if self.previous_results is not None: + self.previous_results = self.load_results_file() + + if isinstance(self.model, CrossEncoder): + # load the predict instance from the CrossEncoder + # custom functions can be used by extending the DenseRetrievalExactSearch class + self.predict = self.model.predict + + def search( + self, + corpus: dict[str, dict[str, str]], + queries: dict[str, str], + top_k: int, + score_function: str, + task_name: str, + instructions: dict[str, str] | None = None, + request_qid: str | None = None, + return_sorted: bool = False, + top_ranked: dict[str, list[str]] | None = None, + **kwargs, + ) -> dict[str, dict[str, float]]: + """Perform semantic search (retrieval or reranking). + + Args: + corpus: Dictionary mapping corpus IDs to document dictionaries + queries: Dictionary mapping query IDs to query strings + top_k: Number of top results to return + score_function: Scoring function to use ('cos_sim' or 'dot') + task_name: Name of the task + instructions: Optional instructions to append to queries + request_qid: Optional request query ID + return_sorted: Whether to return results sorted + top_ranked: Optional dict mapping query IDs to lists of pre-ranked corpus IDs + **kwargs: Additional keyword arguments passed to the underlying model + """ + if score_function not in self.score_functions: + raise ValueError( + f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product" + ) + + logger.info("Encoding Queries.") + query_ids = list(queries.keys()) + self.results = {qid: {} for qid in query_ids} + query_ids, queries = zip(*queries.items()) + + if instructions: + new_queries = [] + for q_idx, qid in enumerate(query_ids): + new_queries.append( + f"{queries[q_idx].strip()} {instructions[qid]}".strip() + ) + queries = new_queries + + # Create mapping of unique queries to their indices + unique_queries = [] + query_to_idx = {} + query_idx_mapping = [] + + for query in queries: + query_key = tuple(query) if isinstance(query, list) else query + if query_key not in query_to_idx: + query_to_idx[query_key] = len(unique_queries) + unique_queries.append(query) + query_idx_mapping.append(query_to_idx[query_key]) + + # Encode only unique queries + if isinstance(queries[0], list): + unique_query_embeddings = self.encode_conversations( + model=self.model, + conversations=unique_queries, + task_name=task_name, + **self.encode_kwargs, + ) + else: + unique_query_embeddings = self.model.encode( + unique_queries, + task_name=task_name, + prompt_type=PromptType.query, + **self.encode_kwargs, + ) + + # Map back to original order but reuse embeddings + query_embeddings = unique_query_embeddings[query_idx_mapping] + + logger.info( + f"Scoring Function: {self.score_function_desc[score_function]} ({score_function})" + ) + + if top_ranked is not None: + logger.info("Performing reranking on pre-ranked documents...") + result_heaps = self._rerank_documents( + query_ids=query_ids, + query_embeddings=query_embeddings, + corpus=corpus, + top_ranked=top_ranked, + top_k=top_k, + score_function=score_function, + task_name=task_name, + request_qid=request_qid, + return_sorted=return_sorted, + ) + else: + logger.info("Performing full corpus search...") + result_heaps = self._full_corpus_search( + query_ids=query_ids, + query_embeddings=query_embeddings, + corpus=corpus, + top_k=top_k, + score_function=score_function, + task_name=task_name, + request_qid=request_qid, + return_sorted=return_sorted, + ) + + for qid in result_heaps: + for score, corpus_id in result_heaps[qid]: + self.results[qid][corpus_id] = score + + return self.results + + def _rerank_documents( + self, + query_ids: list[str], + query_embeddings: torch.Tensor, + corpus: dict[str, dict[str, str]], + top_ranked: dict[str, list[str]], + top_k: int, + score_function: str, + task_name: str, + request_qid: str | None = None, + return_sorted: bool = False, + ) -> dict[str, list[tuple[float, str]]]: + """Rerank documents for each query using top_ranked.""" + # Determine device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + logger.info(f"Using device: {device}") + + # Move query embeddings to appropriate device + query_embeddings = torch.as_tensor(query_embeddings).to(device) + + result_heaps = {qid: [] for qid in query_ids} + + # Get unique document IDs across all queries + unique_doc_ids = list( + { + doc_id + for qid in query_ids + if qid in top_ranked + for doc_id in top_ranked[qid] + } + ) + + # Create mapping from unique doc IDs to their index in the embedding matrix + doc_id_to_idx = {doc_id: idx for idx, doc_id in enumerate(unique_doc_ids)} + + # Encode unique documents only once + unique_docs = [corpus[doc_id] for doc_id in unique_doc_ids] + all_doc_embeddings = self.model.encode( + unique_docs, + task_name=task_name, + prompt_type=PromptType.passage, + request_qid=request_qid, + **self.encode_kwargs, + ) + + # Let's make sure we don't get the warnings for the tokenizer here via torch.compile + if hasattr(torch, "compile"): + os.environ["TOKENIZERS_PARALLELISM"] = "false" # we don't need it anymore + + # Process each query + for query_idx, query_id in enumerate(tqdm.tqdm(query_ids)): + if query_id not in top_ranked: + logger.warning(f"No pre-ranked documents found for query {query_id}") + continue + + ranked_ids = top_ranked[query_id] + doc_indices = torch.tensor([doc_id_to_idx[doc_id] for doc_id in ranked_ids]) + query_doc_embeddings = torch.as_tensor(all_doc_embeddings[doc_indices]).to( + device + ) + + # Ensure query embedding is on the correct device and has correct shape + query_embedding = query_embeddings[query_idx].unsqueeze(0) + + with torch.inference_mode(): + scores = self.score_functions[score_function]( + query_embedding, + query_doc_embeddings, + ) + + # Handle NaN values + is_nan = torch.isnan(scores) + if is_nan.sum() > 0: + raise ValueError( + f"NaN values detected in the similarity scores: {is_nan.sum()}" + ) + + # Compute top-k scores + scores_top_k_values, scores_top_k_idx = torch.topk( + scores, + min(top_k, len(ranked_ids)), + dim=1, + largest=True, + sorted=return_sorted, + ) + + # Move results back to CPU for heap operations + scores_top_k_values = scores_top_k_values.cpu() + scores_top_k_idx = scores_top_k_idx.cpu() + + # Build result heap + for doc_idx, score in zip( + scores_top_k_idx[0].tolist(), + scores_top_k_values[0].tolist(), + ): + corpus_id = ranked_ids[doc_idx] + heapq.heappush(result_heaps[query_id], (score, corpus_id)) + + # Clear CUDA cache after processing + if device.type == "cuda": + torch.cuda.empty_cache() + + return result_heaps + + def _full_corpus_search( + self, + query_ids: list[str], + query_embeddings: torch.Tensor, + corpus: dict[str, dict[str, str]], + top_k: int, + score_function: str, + task_name: str, + request_qid: str | None = None, + return_sorted: bool = False, + ) -> dict[str, list[tuple[float, str]]]: + """Perform full corpus search using batched processing.""" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + logger.info(f"Using device: {device}") + + logger.info("Sorting Corpus by document length (Longest first)...") + corpus_ids = sorted(corpus, reverse=True) + corpus = [corpus[cid] for cid in corpus_ids] + + logger.info("Encoding Corpus in batches... Warning: This might take a while!") + itr = range(0, len(corpus), self.corpus_chunk_size) + + result_heaps = {qid: [] for qid in query_ids} + for batch_num, corpus_start_idx in enumerate(itr): + logger.info(f"Encoding Batch {batch_num + 1}/{len(itr)}...") + corpus_end_idx = min(corpus_start_idx + self.corpus_chunk_size, len(corpus)) + # Encode chunk of corpus + sub_corpus_embeddings = self.model.encode( + corpus[corpus_start_idx:corpus_end_idx], # type: ignore + task_name=task_name, + prompt_type=PromptType.passage, + request_qid=request_qid, + **self.encode_kwargs, + ) + + # Compute similarites using either cosine-similarity or dot product + logging.info("Computing Similarities...") + query_embeddings = torch.as_tensor(query_embeddings).to(device) + sub_corpus_embeddings = torch.as_tensor(sub_corpus_embeddings).to(device) + with torch.inference_mode(): + cos_scores = self.score_functions[score_function]( + query_embeddings, sub_corpus_embeddings + ) + + is_nan = torch.isnan(cos_scores) + if is_nan.sum() > 0: + raise ValueError( + f"NaN values detected in the similarity scores: {is_nan.sum()}" + ) + + # get top-k values + cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( + cos_scores, + min( + top_k + 1, + len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]), + ), + dim=1, + largest=True, + sorted=return_sorted, + ) + + for query_itr in range(len(query_embeddings)): + query_id = query_ids[query_itr] + for sub_corpus_id, score in zip( + cos_scores_top_k_idx[query_itr].cpu().tolist(), + cos_scores_top_k_values[query_itr].cpu().tolist(), + ): + corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id] + if len(result_heaps[query_id]) < top_k: + # push item on the heap + heapq.heappush(result_heaps[query_id], (score, corpus_id)) + else: + # If item is larger than the smallest in the heap, push it on the heap then pop the smallest element + heapq.heappushpop(result_heaps[query_id], (score, corpus_id)) + + return result_heaps + + def load_results_file(self): + # load the first stage results from file in format {qid: {doc_id: score}} + if "https://" in self.previous_results: + # download the file + if not os.path.exists(self.previous_results): + url_descriptor = self.previous_results.split("https://")[-1].replace( + "/", "--" + ) + dest_file = os.path.join( + "results", f"cached_predictions--{url_descriptor}" + ) + os.makedirs(os.path.dirname(os.path.abspath(dest_file)), exist_ok=True) + download(self.previous_results, dest_file) + logger.info( + f"Downloaded the previous results at {self.previous_results} to {dest_file}" + ) + self.previous_results = dest_file + + with open(self.previous_results) as f: + previous_results = json.load(f) + assert isinstance(previous_results, dict) + assert isinstance(previous_results[list(previous_results.keys())[0]], dict) + return previous_results + + def search_cross_encoder( + self, + corpus: dict[str, dict[str, str]], + queries: dict[str, str | list[str]], + top_k: int, + instructions: dict[str, str] | None = None, + **kwargs, + ) -> dict[str, dict[str, float]]: + """This function provides support for reranker (or cross-encoder) models that encoder query and document at the same time (typically with attention). + Some notable examples include MonoBERT, MonoT5, RankLlama, etc. + Note: you must provide the path to the results to rerank to the __init__ function as `previous_results` or else rerank all documents in the corpus + """ + pairs = [] # create the pairs for reranking + for qid in queries.keys(): + if self.previous_results is None: + # try to use all of them + logging.logging( + f"previous_results is None. Using all the documents to rerank: {len(corpus)}" + ) + q_results = {doc_id: 0.0 for doc_id in corpus.keys()} + else: + q_results = self.previous_results[qid] + # take the top-k only + q_results_sorted = dict( + sorted(q_results.items(), key=lambda item: item[1], reverse=True) + ) + top_n = [k for k, v in list(q_results_sorted.items())[:top_k]] + query = queries[qid] + query = ( + self.convert_conv_history_to_query(self.model, [query])[0] + if isinstance(query, list) + else query + ) + for doc_id in top_n: + pairs.append( + ( + query, + corpus[doc_id], + instructions[qid] if instructions is not None else None, + qid, + doc_id, + ) + ) + + logger.info(f"Reranking the top {top_k} in batches... This might take a while!") + itr = range(0, len(pairs), self.batch_size) + + results = {qid: {} for qid in queries.keys()} + for batch_num, corpus_start_idx in enumerate( + tqdm.tqdm(itr, leave=False, disable=not self.show_progress_bar) + ): + corpus_end_idx = min(corpus_start_idx + self.batch_size, len(pairs)) + cur_batch = pairs[corpus_start_idx:corpus_end_idx] + + ( + queries_in_pair, + corpus_in_pair, + instructions_in_pair, + query_ids, + corpus_ids, + ) = zip(*cur_batch) + + assert ( + len(queries_in_pair) == len(corpus_in_pair) == len(instructions_in_pair) + ) + + if isinstance(self.model.model, CrossEncoder): + # can't take instructions, so add them here + queries_in_pair = [ + f"{q} {i}".strip() + for i, q in zip(instructions_in_pair, queries_in_pair) + ] + scores = self.model.predict(list(zip(queries_in_pair, corpus_in_pair))) # type: ignore + else: + # may use the instructions in a unique way, so give them also + scores = self.model.predict( # type: ignore + list(zip(queries_in_pair, corpus_in_pair, instructions_in_pair)) + ) + + for i, score in enumerate(scores): + results[query_ids[i]][corpus_ids[i]] = float(score) + + return results + + def predict(self, queries, passages, **kwargs): + raise NotImplementedError( + "You must implement a predict method for your reranker model" + ) + + def encode_conversations( + self, + model: Encoder, + conversations: list[list[str]], + task_name: str, + **kwargs, + ): + if callable(getattr(self.model, "encode_conversations", None)): + return model.encode_conversations( # type: ignore + conversations, task_name=task_name, **kwargs + ) + logger.warning( + "Model doesn't have encode_conversations fallback to default implementation" + ) + queries = self.convert_conv_history_to_query(model, conversations) # type: ignore + return model.encode( + queries, task_name=task_name, prompt_type=PromptType.query, **kwargs + ) # type: ignore + + @staticmethod + def convert_conv_history_to_query( + model: Encoder, conversations: list[list[str]] + ) -> str: + if callable(getattr(model, "convert_conv_history_to_query", None)): + return model.convert_conv_history_to_query(conversations) # type: ignore + return convert_conv_history_to_query(conversations) # type: ignore + + +class DRESModel: + """Dense Retrieval Exact Search (DRES). + This class converts a model with just an .encode method into DRES format. + """ + + mteb_model_meta: ModelMeta | None + + def __init__(self, model, **kwargs): + self.model = model + self.use_sbert_model = isinstance(model, SentenceTransformer) + + def encode_corpus( + self, + corpus: list[dict[str, str]], + task_name: str, + batch_size: int, + prompt_type: PromptType = PromptType.passage, + **kwargs, + ): + sentences = corpus_to_str(corpus) + corpus_embeddings = self.model.encode( + sentences, + task_name=task_name, + prompt_type=prompt_type, + batch_size=batch_size, + **kwargs, + ) + return corpus_embeddings + + def encode( + self, + sentences: list[str], + task_name: str, + prompt_type: PromptType | None = None, + **kwargs, + ): + if prompt_type and prompt_type == PromptType.passage: + return self.encode_corpus( + sentences, task_name, prompt_type=prompt_type, **kwargs + ) + return self.model.encode( + sentences, task_name=task_name, prompt_type=prompt_type, **kwargs + ) + + +def is_cross_encoder_compatible(model) -> bool: + op = getattr(model.model, "predict", None) + return callable(op) diff --git a/mteb/evaluation/evaluators/utils.py b/mteb/evaluation/evaluators/utils.py index 95d84bd2f2..e01e0ec463 100644 --- a/mteb/evaluation/evaluators/utils.py +++ b/mteb/evaluation/evaluators/utils.py @@ -1,56 +1,102 @@ from __future__ import annotations import logging +from collections import defaultdict import numpy as np import pandas as pd +import pytrec_eval import requests import torch import tqdm +from datasets import load_dataset from packaging.version import Version from sklearn.metrics import auc +logger = logging.getLogger(__name__) -def cos_sim(a, b): - """Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j. + +try: + # speeds up computation if available + torch.set_float32_matmul_precision("high") + logger.info("Setting torch float32 matmul precision to high for a speedup") +except Exception: + pass + + +def use_torch_compile(): + gpu_ok = False + if torch.cuda.is_available(): + device_cap = torch.cuda.get_device_capability() + if device_cap in ((7, 0), (8, 0), (9, 0)): + gpu_ok = True + + return gpu_ok + + +def cos_sim(a: torch.Tensor, b: torch.Tensor): + """Calculate pairwise cosine similarities between two sets of vectors. + + Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j. Return: Matrix with res[i][j] = cos_sim(a[i], b[j]) - """ # noqa: D402 + """ + # Move tensor conversion outside the compiled function + # since compile works better with pure tensor operations if not isinstance(a, torch.Tensor): a = torch.tensor(a) - if not isinstance(b, torch.Tensor): b = torch.tensor(b) - if len(a.shape) == 1: - a = a.unsqueeze(0) - - if len(b.shape) == 1: - b = b.unsqueeze(0) - - a_norm = torch.nn.functional.normalize(a, p=2, dim=1) - b_norm = torch.nn.functional.normalize(b, p=2, dim=1) - return torch.mm(a_norm, b_norm.transpose(0, 1)) + # The actual function to compile + def _cos_sim_core(a_tensor, b_tensor): + if len(a_tensor.shape) == 1: + a_tensor = a_tensor.unsqueeze(0) + if len(b_tensor.shape) == 1: + b_tensor = b_tensor.unsqueeze(0) + + a_norm = torch.nn.functional.normalize(a_tensor, p=2, dim=1) + b_norm = torch.nn.functional.normalize(b_tensor, p=2, dim=1) + return torch.mm(a_norm, b_norm.transpose(0, 1)) + + # Compile the core function once + if ( + hasattr(torch, "compile") and use_torch_compile() + ): # Check if torch.compile is available + _cos_sim_core_compiled = torch.compile(_cos_sim_core) + return _cos_sim_core_compiled(a, b) + else: + return _cos_sim_core(a, b) def dot_score(a: torch.Tensor, b: torch.Tensor): """Computes the dot-product dot_prod(a[i], b[j]) for all i and j. :return: Matrix with res[i][j] = dot_prod(a[i], b[j]) """ + # Move tensor conversion outside the compiled function if not isinstance(a, torch.Tensor): a = torch.tensor(a) - if not isinstance(b, torch.Tensor): b = torch.tensor(b) - if len(a.shape) == 1: - a = a.unsqueeze(0) - - if len(b.shape) == 1: - b = b.unsqueeze(0) - - return torch.mm(a, b.transpose(0, 1)) + # The actual function to compile + def _dot_score_core(a_tensor, b_tensor): + if len(a_tensor.shape) == 1: + a_tensor = a_tensor.unsqueeze(0) + if len(b_tensor.shape) == 1: + b_tensor = b_tensor.unsqueeze(0) + + return torch.mm(a_tensor, b_tensor.transpose(0, 1)) + + # Compile the core function once + if ( + hasattr(torch, "compile") and use_torch_compile() + ): # Check if torch.compile is available + _dot_score_core_compiled = torch.compile(_dot_score_core) + return _dot_score_core_compiled(a, b) + else: + return _dot_score_core(a, b) # From https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/custom_metrics.py#L4 @@ -234,15 +280,14 @@ def get_rank_from_dict( return len(sorted_by_score) + 1, 0 -def evaluate_change( - original_run: dict[str, dict[str, float]], - new_run: dict[str, dict[str, float]], - changed_qrels: dict[str, list[str]], -) -> dict[str, float]: +def calculate_pmrr(original_run, new_run, changed_qrels): changes = [] for qid in changed_qrels.keys(): - original_qid_run = original_run[qid] - new_qid_run = new_run[qid] + if qid + "-og" not in original_run or qid + "-changed" not in new_run: + logging.warning(f"Query {qid} not found in the runs for calculating p-MRR") + continue + original_qid_run = original_run[qid + "-og"] + new_qid_run = new_run[qid + "-changed"] for idx, changed_doc in enumerate(changed_qrels[qid]): original_rank, original_score = get_rank_from_dict( original_qid_run, changed_doc @@ -266,10 +311,86 @@ def evaluate_change( changes_df = pd.DataFrame(changes) changes_df["p-MRR"] = changes_df.apply(lambda x: rank_score(x), axis=1) qid_wise = changes_df.groupby("qid").agg({"p-MRR": "mean"}) - return { - "p-MRR": qid_wise["p-MRR"].mean(), + return qid_wise["p-MRR"].mean() + + +def evaluate_p_mrr_change( + results: dict[str, dict[str, float]], + qrels: dict[str, dict[str, float]], + task_name: str, + k_values: list[int], +) -> dict[str, float]: + """Computes the scores needed for FollowIR datasets, including p-MRR (measuring change in instruction) and + details about the original instruction run and changed instruction run. + """ + followir_scores = defaultdict(dict) + # load the qrel_diff from the dataset + TASK_TO_HF_DATASET = { + "Core17InstructionRetrieval": ("jhu-clsp/core17-instructions-mteb", False), + "Robust04InstructionRetrieval": ("jhu-clsp/robust04-instructions-mteb", False), + "News21InstructionRetrieval": ("jhu-clsp/news21-instructions-mteb", False), + "mFollowIR": ("jhu-clsp/mfollowir-parquet-mteb", True), + "mFollowIRCrossLingual": ( + "jhu-clsp/mfollowir-cross-lingual-parquet-mteb", + True, + ), + } + hf_path, is_multilingual = TASK_TO_HF_DATASET[task_name] + if is_multilingual: + # figure out which of the languages this is: ["zho", "rus", "fas"] + # gather the changed_qrels for each, and store the keys as a check + for lang in ["zho", "rus", "fas"]: + config_name = f"qrel_diff-{lang}" + changed_qrels = { + item["query-id"]: item["corpus-ids"] + for item in load_dataset(hf_path, config_name)["qrel_diff"] + } + potential_keys = {item + "-og" for item in changed_qrels.keys()} | { + item + "-changed" for item in changed_qrels.keys() + } + if ( + potential_keys == set(qrels.keys()) + or len(potential_keys - set(qrels.keys())) <= 2 + ): # there are about two skipped + break # this is the right qrels + + else: + changed_qrels = { + item["query-id"]: item["corpus-ids"] + for item in load_dataset(hf_path, "qrel_diff")["qrel_diff"] + } + + qrels_sep = { + "og": {k: v for k, v in qrels.items() if k.endswith("-og")}, + "changed": {k: v for k, v in qrels.items() if not k.endswith("-og")}, } + original_run = {} + new_run = {} + # make original run from the results file with all "-og" items only and vice versa + for qid, docs in results.items(): + if qid.endswith("-og"): + original_run[qid] = docs + else: + new_run[qid] = docs + + p_mrr = calculate_pmrr(original_run, new_run, changed_qrels) + followir_scores["p-MRR"] = p_mrr + + # unfortunately, have to re-compute scores here to get only og and changed scores + followir_scores["og"] = {} + followir_scores["changed"] = {} + for name, group in [("og", original_run), ("changed", new_run)]: + _, ndcg, _map, recall, precision, naucs = calculate_retrieval_scores( + group, qrels_sep[name], k_values + ) + # add these to the followir_scores with name prefix + scores_dict = make_score_dict(ndcg, _map, recall, precision, {}, naucs, {}, {}) + for key, value in scores_dict.items(): + followir_scores[name][key] = value + + return followir_scores + def rank_score(x: dict[str, float]) -> float: if x["og_rank"] >= x["new_rank"]: @@ -359,7 +480,6 @@ def confidence_scores(sim_scores: list[float]) -> dict[str, float]: cs_diff1 = 0.0 conf_scores = {"max": cs_max, "std": cs_std, "diff1": cs_diff1} - return conf_scores @@ -426,3 +546,243 @@ def abstention_curve( abst_nauc = (abst_auc - flat_auc) / (or_auc - flat_auc) return abst_nauc + + +def add_task_specific_scores( + scores: dict[str, float], + qrels: dict[str, dict[str, int]], + results: dict[str, dict[str, float]], + task_name: str, + k_values: list[int], +) -> dict[str, float]: + """Add task-specific scores to the scores dictionary, that are not needed for all results but require additional computation.""" + task_scores = {} + if task_name in ["NevIR"]: + paired_score = paired_accuracy(qrels, results, scores) + task_scores["paired_accuracy"] = paired_score + + if task_name in ["InstructIR"]: + robustness_at_10_score = robustness_at_10(qrels, results, scores) + task_scores["robustness_at_10"] = robustness_at_10_score + + if task_name in [ + "mFollowIR", + "mFollowIRCrossLingual", + "Robust04InstructionRetrieval", + "Core17InstructionRetrieval", + "News21InstructionRetrieval", + ]: + p_mrr_and_consolidated_scores = evaluate_p_mrr_change( + results, qrels, task_name, k_values + ) + task_scores.update(p_mrr_and_consolidated_scores) + + if task_name in ["MindSmallReranking"]: + take_max_over_subqueries = max_over_subqueries(qrels, results, k_values) + task_scores.update(take_max_over_subqueries) + + return task_scores + + +def paired_accuracy( + qrels: dict[str, dict[str, float]], + results: dict[str, dict[str, float]], + scores: dict[str, float], +) -> float: + """Computes the paired accuracy. This means both queries for an instance have to be correct for it to count. + This is because models will prefer one passage all the time, giving it 50% automatically unless we correct for this. + For more details, see https://arxiv.org/abs/2305.07614 + + Args: + qrels: Ground truth relevance judgments for the queries + results: Predicted relevance scores for the queries + scores: The scores for the queries, to extract top_1 recall for each query + """ + # group the queries by the query id + query_keys = set() + for key in qrels.keys(): + query_keys.add(key.split("_")[0]) + + paired_scores = [] + for key in query_keys: + # get recall_at_1 for both q1 and q2 + q1_recall_at_1 = scores[f"{key}_q1"]["recall_1"] + q2_recall_at_1 = scores[f"{key}_q2"]["recall_1"] + + # the score is 1 if both are 1, 0 otherwise + paired_scores.append(1 if q1_recall_at_1 == 1 and q2_recall_at_1 == 1 else 0) + + return sum(paired_scores) / len(paired_scores) + + +def robustness_at_10( + qrels: dict[str, dict[str, float]], + results: dict[str, dict[str, float]], + scores: dict[str, float], +) -> float: + """Computes the robustness at 10. This computes the lowest ndcg@10 over all instructions. Taken from https://arxiv.org/abs/2402.14334 + + Args: + qrels: Ground truth relevance judgments for the queries + results: Predicted relevance scores for the queries + scores: The scores for the queries, to extract ndcg@10 for each query + """ + query_keys = defaultdict(list) + for key in qrels.keys(): + query_keys[key.split("_")[0]].append(key) + + robustness_scores = [] + for _, keys in query_keys.items(): + # get the ndcg@10 for each query + current_scores = [] + for key in keys: + current_scores.append(scores[key]["ndcg_cut_10"]) + + # get the lowest ndcg@10 + robustness_scores.append(min(current_scores)) + + return sum(robustness_scores) / len(robustness_scores) + + +def make_score_dict(ndcg, _map, recall, precision, mrr, naucs, naucs_mrr, task_scores): + scores = { + **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, + **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()}, + **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()}, + **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()}, + **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()}, + **{ + k.replace("@", "_at_").replace("_P", "_precision").lower(): v + for k, v in naucs.items() + }, + **{ + k.replace("@", "_at_").replace("_P", "_precision").lower(): v + for k, v in naucs_mrr.items() + }, + **task_scores, + } + return scores + + +def parse_metrics_from_scores(scores, k_values): + all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {} + for k in k_values: + all_ndcgs[f"NDCG@{k}"] = [] + all_aps[f"MAP@{k}"] = [] + all_recalls[f"Recall@{k}"] = [] + all_precisions[f"P@{k}"] = [] + + for query_id in scores.keys(): + for k in k_values: + all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)]) + all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)]) + all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)]) + all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)]) + + ndcg, _map, recall, precision = ( + all_ndcgs.copy(), + all_aps.copy(), + all_recalls.copy(), + all_precisions.copy(), + ) + + for k in k_values: + ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5) + _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5) + recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5) + precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5) + + return ( + ndcg, + _map, + recall, + precision, + all_ndcgs, + all_aps, + all_recalls, + all_precisions, + ) + + +def max_over_subqueries(qrels, results, k_values): + """Computes the max over subqueries scores when merging. + + Args: + qrels: Ground truth relevance judgments for the queries + results: Predicted relevance scores for the queries + k_values: The k values for which to compute the scores + """ + query_keys = defaultdict(list) + for key in qrels.keys(): + query_keys["_".join(key.split("_")[:-1])].append(key) + + new_results = {} + new_qrels = {} + for query_id_base, query_ids in query_keys.items(): + doc_scores = defaultdict(float) + for query_id_full in query_ids: + for doc_id, score in results[query_id_full].items(): + if doc_id not in doc_scores: + doc_scores[doc_id] = score + else: + doc_scores[doc_id] = max(score, doc_scores[doc_id]) + + new_results[query_id_base] = doc_scores + new_qrels[query_id_base] = qrels[query_id_full] # all the same + + # now we have the new results, we can compute the scores + _, ndcg, _map, recall, precision, naucs = calculate_retrieval_scores( + new_results, new_qrels, k_values + ) + score_dict = make_score_dict(ndcg, _map, recall, precision, {}, naucs, {}, {}) + return {"max_over_subqueries_" + k: v for k, v in score_dict.items()} + + +def calculate_retrieval_scores(results, qrels, k_values): + map_string = "map_cut." + ",".join([str(k) for k in k_values]) + ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values]) + recall_string = "recall." + ",".join([str(k) for k in k_values]) + precision_string = "P." + ",".join([str(k) for k in k_values]) + + evaluator = pytrec_eval.RelevanceEvaluator( + qrels, {map_string, ndcg_string, recall_string, precision_string} + ) + scores = evaluator.evaluate(results) + + ( + ndcg, + _map, + recall, + precision, + all_ndcgs, + all_aps, + all_recalls, + all_precisions, + ) = parse_metrics_from_scores(scores, k_values) + + naucs = evaluate_abstention( + results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions} + ) + + return scores, ndcg, _map, recall, precision, naucs + + +def evaluate_abstention( + results: dict[str, dict[str, float]], + metric_scores: dict[str, list[float]], +) -> dict[str, float]: + """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997""" + all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())] + all_conf_scores = [confidence_scores(sim_scores) for sim_scores in all_sim_scores] + conf_fcts = list(all_conf_scores[0].keys()) + all_conf_scores = { + fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts + } + metric_scores = {k: np.array(v) for k, v in metric_scores.items()} + naucs = {} + + for metric_name, scores in metric_scores.items(): + for fct, conf_scores in all_conf_scores.items(): + naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores) + + return naucs diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 4a8146b3de..9595dd79a9 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -26,7 +26,7 @@ "API", "Tevatron", ] -DISTANCE_METRICS = Literal["cosine"] +DISTANCE_METRICS = Literal["cosine", "dot"] def sentence_transformers_loader( diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 7a3116e667..e546203019 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -127,3 +127,20 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, ) + +contriever = ModelMeta( + name="facebook/contriever-msmarco", + languages=["eng-Latn"], + open_weights=True, + revision="abe8c1493371369031bcb1e02acb754cf4e162fa", + release_date="2022-06-25", # release date of model on HF + n_parameters=150_000_000, + memory_usage=None, + embed_dim=768, + license=None, + max_tokens=512, + reference="https://huggingface.co/facebook/contriever-msmarco", + similarity_fn_name="dot", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, +) diff --git a/mteb/overview.py b/mteb/overview.py index 7b1bfbb426..91723ec4df 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -8,7 +8,7 @@ import pandas as pd -from mteb.abstasks import AbsTask +from mteb.abstasks import AbsTask, AbsTaskReranking from mteb.abstasks.TaskMetadata import TASK_CATEGORY, TASK_DOMAIN, TASK_TYPE from mteb.languages import ( ISO_TO_LANGUAGE, @@ -25,19 +25,31 @@ def create_task_list() -> list[type[AbsTask]]: - tasks_categories_cls = list(AbsTask.__subclasses__()) - tasks = [ - cls - for cat_cls in tasks_categories_cls - for cls in cat_cls.__subclasses__() - if cat_cls.__name__.startswith("AbsTask") - ] + # reranking subclasses retrieval to share methods, but is an abstract task + tasks_categories_cls = list(AbsTask.__subclasses__()) + [AbsTaskReranking] + tasks = [] + for cat_cls in tasks_categories_cls: + for cls in cat_cls.__subclasses__(): + if ( + cat_cls.__name__.startswith("AbsTask") + and cls.__name__ != "AbsTaskReranking" + ): + tasks.append(cls) return tasks def create_name_to_task_mapping() -> dict[str, type[AbsTask]]: tasks = create_task_list() - return {cls.metadata.name: cls for cls in tasks} + metadata_names = {} + for cls in tasks: + if cls.metadata.name in metadata_names: + raise ValueError( + f"Duplicate task name found: {cls.metadata.name}. Please make sure that all task names are unique." + ) + if "AbsTask" in cls.__name__: + continue + metadata_names[cls.metadata.name] = cls + return metadata_names TASKS_REGISTRY = create_name_to_task_mapping() diff --git a/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py b/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py index 2687422935..e70acd9850 100644 --- a/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicSentimentClassification.py @@ -21,7 +21,7 @@ } -class IndicSentimentClassification(MultilingualTask, AbsTaskClassification): +class IndicSentimentClassification(AbsTaskClassification, MultilingualTask): fast_loading = True metadata = TaskMetadata( name="IndicSentimentClassification", diff --git a/mteb/tasks/InstructionReranking/__init__.py b/mteb/tasks/InstructionReranking/__init__.py new file mode 100644 index 0000000000..f5e812247d --- /dev/null +++ b/mteb/tasks/InstructionReranking/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .eng.Core17InstructionRetrieval import * +from .eng.News21InstructionRetrieval import * +from .eng.Robust04InstructionRetrieval import * +from .multilingual.mFollowIR import * diff --git a/mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py b/mteb/tasks/InstructionReranking/eng/Core17InstructionRetrieval.py similarity index 79% rename from mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py rename to mteb/tasks/InstructionReranking/eng/Core17InstructionRetrieval.py index 14fa5b45b9..368e4a5699 100644 --- a/mteb/tasks/InstructionRetrieval/eng/Core17InstructionRetrieval.py +++ b/mteb/tasks/InstructionReranking/eng/Core17InstructionRetrieval.py @@ -2,19 +2,19 @@ from mteb.abstasks.TaskMetadata import TaskMetadata -from ....abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval +from ....abstasks.AbsTaskReranking import AbsTaskReranking -class Core17InstructionRetrieval(AbsTaskInstructionRetrieval): +class Core17InstructionRetrieval(AbsTaskReranking): metadata = TaskMetadata( name="Core17InstructionRetrieval", description="Measuring retrieval instruction following ability on Core17 narratives for the FollowIR benchmark.", reference="https://arxiv.org/abs/2403.15246", dataset={ - "path": "jhu-clsp/core17-instructions", - "revision": "e39ff896cf3efbbdeeb950e6bd7c79f266995b07", + "path": "jhu-clsp/core17-instructions-mteb", + "revision": "7030c7efc3585d9020f243b12862997889243b78", }, - type="InstructionRetrieval", + type="InstructionReranking", category="s2p", modalities=["text"], eval_splits=["test"], diff --git a/mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py b/mteb/tasks/InstructionReranking/eng/News21InstructionRetrieval.py similarity index 79% rename from mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py rename to mteb/tasks/InstructionReranking/eng/News21InstructionRetrieval.py index 3973ca0b75..c2e0c9a4dc 100644 --- a/mteb/tasks/InstructionRetrieval/eng/News21InstructionRetrieval.py +++ b/mteb/tasks/InstructionReranking/eng/News21InstructionRetrieval.py @@ -2,19 +2,19 @@ from mteb.abstasks.TaskMetadata import TaskMetadata -from ....abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval +from ....abstasks.AbsTaskReranking import AbsTaskReranking -class News21InstructionRetrieval(AbsTaskInstructionRetrieval): +class News21InstructionRetrieval(AbsTaskReranking): metadata = TaskMetadata( name="News21InstructionRetrieval", description="Measuring retrieval instruction following ability on News21 narratives for the FollowIR benchmark.", reference="https://arxiv.org/abs/2403.15246", dataset={ - "path": "jhu-clsp/news21-instructions", - "revision": "e0144086b45fe31ac125e9ac1a83b6a409bb6ca6", + "path": "jhu-clsp/news21-instructions-mteb", + "revision": "39db677749b3b783bb277d0e2d4712f5f133f52b", }, - type="InstructionRetrieval", + type="InstructionReranking", category="s2p", modalities=["text"], eval_splits=["test"], diff --git a/mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py b/mteb/tasks/InstructionReranking/eng/Robust04InstructionRetrieval.py similarity index 79% rename from mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py rename to mteb/tasks/InstructionReranking/eng/Robust04InstructionRetrieval.py index 1d3cb5c923..6fe69a57ba 100644 --- a/mteb/tasks/InstructionRetrieval/eng/Robust04InstructionRetrieval.py +++ b/mteb/tasks/InstructionReranking/eng/Robust04InstructionRetrieval.py @@ -2,19 +2,19 @@ from mteb.abstasks.TaskMetadata import TaskMetadata -from ....abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval +from ....abstasks.AbsTaskReranking import AbsTaskReranking -class Robust04InstructionRetrieval(AbsTaskInstructionRetrieval): +class Robust04InstructionRetrieval(AbsTaskReranking): metadata = TaskMetadata( name="Robust04InstructionRetrieval", description="Measuring retrieval instruction following ability on Robust04 narratives for the FollowIR benchmark.", reference="https://arxiv.org/abs/2403.15246", dataset={ - "path": "jhu-clsp/robust04-instructions", - "revision": "a5a1c4fe2bc528ac12e83f8cdf82178da85d2f1d", + "path": "jhu-clsp/robust04-instructions-mteb", + "revision": "0a3efedfcac0a7f859c46cff3a0fac0f8747b28f", }, - type="InstructionRetrieval", + type="InstructionReranking", category="s2p", modalities=["text"], eval_splits=["test"], diff --git a/mteb/tasks/InstructionRetrieval/multilingual/__init__.py b/mteb/tasks/InstructionReranking/eng/__init__.py similarity index 100% rename from mteb/tasks/InstructionRetrieval/multilingual/__init__.py rename to mteb/tasks/InstructionReranking/eng/__init__.py diff --git a/mteb/tasks/InstructionReranking/multilingual/__init__.py b/mteb/tasks/InstructionReranking/multilingual/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py b/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py similarity index 60% rename from mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py rename to mteb/tasks/InstructionReranking/multilingual/mFollowIR.py index 9452beb8de..7a2974dea2 100644 --- a/mteb/tasks/InstructionRetrieval/multilingual/mFollowIR.py +++ b/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py @@ -1,13 +1,11 @@ from __future__ import annotations -from collections import defaultdict - import datasets from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata -from ....abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval +from ....abstasks.AbsTaskReranking import AbsTaskReranking _LANGUAGES = { "fas": ["fas-Arab"], @@ -51,8 +49,8 @@ def load_data( ): corpus = {lang: {EVAL_SPLIT: {}} for lang in langs} queries = {lang: {EVAL_SPLIT: {}} for lang in langs} - og_relevant_docs = {lang: {EVAL_SPLIT: {}} for lang in langs} - changed_relevant_docs = {lang: {EVAL_SPLIT: {}} for lang in langs} + relevant_docs = {lang: {EVAL_SPLIT: {}} for lang in langs} + instructions = {lang: {EVAL_SPLIT: {}} for lang in langs} top_ranked = {lang: {EVAL_SPLIT: {}} for lang in langs} for lang in langs: @@ -60,6 +58,7 @@ def load_data( loading_lang = lang.split("-")[1] # don't care about the eng part else: loading_lang = lang + print(f"Loading data for {lang} from {loading_lang}") # Load corpus data corpus_data = datasets.load_dataset( @@ -67,7 +66,6 @@ def load_data( f"corpus-{loading_lang}", cache_dir=cache_dir, revision=revision, - trust_remote_code=True, ) corpus[lang][EVAL_SPLIT] = { row["_id"]: {"title": row["title"], "text": row["text"]} @@ -80,54 +78,39 @@ def load_data( f"queries-{loading_lang}", cache_dir=cache_dir, revision=revision, - trust_remote_code=True, ) queries[lang][EVAL_SPLIT] = { - row["_id"]: { - "text": row["text"], - "instruction_og": row["instruction_og"], - "instruction_changed": row["instruction_changed"], - "keywords": row["keywords"] if "keywords" in row else None, - "short_query": row["short_query"] if "short_query" in row else None, - } - for row in queries_data["queries"] + row["_id"]: row["text"] for row in queries_data["queries"] } - # Load qrels_og data - qrels_og_data = datasets.load_dataset( + # Load instructions data + instructions_data = datasets.load_dataset( path, - f"qrels_og-{loading_lang}", + f"instruction-{loading_lang}", cache_dir=cache_dir, revision=revision, - trust_remote_code=True, ) - for row in qrels_og_data[EVAL_SPLIT]: - if row["query-id"] not in og_relevant_docs[lang][EVAL_SPLIT]: - og_relevant_docs[lang][EVAL_SPLIT][row["query-id"]] = { - row["corpus-id"]: int(row["score"]) - } - else: - og_relevant_docs[lang][EVAL_SPLIT][row["query-id"]][ - row["corpus-id"] - ] = int(row["score"]) + instructions[lang][EVAL_SPLIT] = { + row["query-id"]: row["instruction"] + for row in instructions_data["instruction"] + } - # Load qrels_changed data - qrels_changed_data = datasets.load_dataset( + # Load qrels_og data + qrels_og_data = datasets.load_dataset( path, - f"qrels_changed-{loading_lang}", + f"default-{loading_lang}", cache_dir=cache_dir, revision=revision, - trust_remote_code=True, ) - for row in qrels_changed_data[EVAL_SPLIT]: - if row["query-id"] not in changed_relevant_docs[lang][EVAL_SPLIT]: - changed_relevant_docs[lang][EVAL_SPLIT][row["query-id"]] = { + for row in qrels_og_data[EVAL_SPLIT]: + if row["query-id"] not in relevant_docs[lang][EVAL_SPLIT]: + relevant_docs[lang][EVAL_SPLIT][row["query-id"]] = { row["corpus-id"]: int(row["score"]) } else: - changed_relevant_docs[lang][EVAL_SPLIT][row["query-id"]][ - row["corpus-id"] - ] = int(row["score"]) + relevant_docs[lang][EVAL_SPLIT][row["query-id"]][row["corpus-id"]] = ( + int(row["score"]) + ) # Load top_ranked data top_ranked_data = datasets.load_dataset( @@ -135,51 +118,21 @@ def load_data( f"top_ranked-{loading_lang}", cache_dir=cache_dir, revision=revision, - trust_remote_code=True, ) for row in top_ranked_data["top_ranked"]: - if row["qid"] not in top_ranked[lang][EVAL_SPLIT]: - top_ranked[lang][EVAL_SPLIT][row["qid"]] = [row["pid"]] - else: - top_ranked[lang][EVAL_SPLIT][row["qid"]].append(row["pid"]) + top_ranked[lang][EVAL_SPLIT][row["query-id"]] = row["corpus-ids"] - # make og_instructions and changed_instructions from queries and then turn queries into just queries - og_instructions = {lang: {EVAL_SPLIT: defaultdict(dict)} for lang in queries} - changed_instructions = {lang: {EVAL_SPLIT: defaultdict(dict)} for lang in queries} - queries_only = {lang: {EVAL_SPLIT: {}} for lang in queries} - for lang in queries: - for split in queries[lang]: - for qid in queries[lang][split]: - text = queries[lang][split][qid]["text"] - og_instructions[lang][split][text] = queries[lang][split][qid][ - "instruction_og" - ] - changed_instructions[lang][split][text] = queries[lang][split][qid][ - "instruction_changed" - ] - queries_only[lang][split][qid] = text - - queries = queries_only - - return ( - corpus, - queries, - og_instructions, - changed_instructions, - og_relevant_docs, - changed_relevant_docs, - top_ranked, - ) + return (corpus, queries, instructions, relevant_docs, top_ranked) -class mFollowIRCrossLingual(MultilingualTask, AbsTaskInstructionRetrieval): +class mFollowIRCrossLingual(MultilingualTask, AbsTaskReranking): metadata = TaskMetadata( - name="mFollowIRCrossLingualInstructionRetrieval", + name="mFollowIRCrossLingual", description="This tasks measures retrieval instruction following ability on NeuCLIR narratives for the mFollowIR benchmark on the Farsi, Russian, and Chinese languages with English queries/instructions.", reference="https://neuclir.github.io/", dataset={ - "path": "jhu-clsp/mFollowIR-cross-lingual-parquet", - "revision": "7a82814a53229d3c8f18b2e18762a1a959dc5ff6", + "path": "jhu-clsp/mFollowIR-cross-lingual-parquet-mteb", + "revision": "6b01566619233a0c35d135123510b6b02c258ff5", }, type="Retrieval", category="s2p", @@ -209,10 +162,8 @@ def load_data(self, **kwargs): ( self.corpus, self.queries, - self.og_instructions, - self.changed_instructions, - self.og_relevant_docs, - self.changed_relevant_docs, + self.instructions, + self.relevant_docs, self.top_ranked, ) = load_data( path=self.metadata_dict["dataset"]["path"], @@ -225,14 +176,14 @@ def load_data(self, **kwargs): self.data_loaded = True -class mFollowIR(MultilingualTask, AbsTaskInstructionRetrieval): +class mFollowIR(MultilingualTask, AbsTaskReranking): metadata = TaskMetadata( - name="mFollowIRInstructionRetrieval", + name="mFollowIR", description="This tasks measures retrieval instruction following ability on NeuCLIR narratives for the mFollowIR benchmark on the Farsi, Russian, and Chinese languages.", reference="https://neuclir.github.io/", dataset={ - "path": "jhu-clsp/mFollowIR-parquet", - "revision": "2c5cdcb438eff9de6412803768ac7304d4771cdc", + "path": "jhu-clsp/mFollowIR-parquet-mteb", + "revision": "09eecbe45c54b4a6dfb8e68e345cae77337768e2", }, type="Retrieval", category="s2p", @@ -262,10 +213,8 @@ def load_data(self, **kwargs): ( self.corpus, self.queries, - self.og_instructions, - self.changed_instructions, - self.og_relevant_docs, - self.changed_relevant_docs, + self.instructions, + self.relevant_docs, self.top_ranked, ) = load_data( path=self.metadata_dict["dataset"]["path"], diff --git a/mteb/tasks/InstructionRetrieval/__init__.py b/mteb/tasks/InstructionRetrieval/__init__.py index f5e812247d..c8454858dd 100644 --- a/mteb/tasks/InstructionRetrieval/__init__.py +++ b/mteb/tasks/InstructionRetrieval/__init__.py @@ -1,6 +1,3 @@ from __future__ import annotations -from .eng.Core17InstructionRetrieval import * -from .eng.News21InstructionRetrieval import * -from .eng.Robust04InstructionRetrieval import * -from .multilingual.mFollowIR import * +from .eng.InstructIR import * diff --git a/mteb/tasks/InstructionRetrieval/eng/InstructIR.py b/mteb/tasks/InstructionRetrieval/eng/InstructIR.py new file mode 100644 index 0000000000..910a3a5bae --- /dev/null +++ b/mteb/tasks/InstructionRetrieval/eng/InstructIR.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval + + +class InstructIR(AbsTaskRetrieval): + metadata = TaskMetadata( + name="InstructIR", + description='A benchmark specifically designed to evaluate the instruction following ability in information retrieval models. Our approach focuses on user-aligned instructions tailored to each query instance, reflecting the diverse characteristics inherent in real-world search scenarios. NOTE: scores on this may differ unless you include instruction first, then "[SEP]" and then the query.', + reference="https://github.com/kaistAI/InstructIR/tree/main", + dataset={ + "path": "mteb/InstructIR-mteb", + "revision": "42c3afabe480643b755a7099dbf0f9ebeedaf6ca", + }, + type="Reranking", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="robustness_at_10", + date=("2024-02-05", "2024-02-06"), + domains=["Web"], + task_subtypes=["Article retrieval"], + license="mit", + annotations_creators="human-annotated", + dialect=[], + sample_creation="created", + bibtex_citation="""@article{oh2024instructir, + title={{INSTRUCTIR: A Benchmark for Instruction Following of Information Retrieval Models}}, + author={{Hanseok Oh and Hyunji Lee and Seonghyeon Ye and Haebin Shin and Hansol Jang and Changwook Jun and Minjoon Seo}}, + year={{2024}}, + eprint={{2402.14334}}, + archivePrefix={{arXiv}}, + primaryClass={{cs.CL}} +}""", + descriptive_stats={ + "n_samples": {"test": 2255}, + "test": { + "num_samples": 375, + "num_positive": 375, + "num_negative": 375, + "avg_query_len": 50.205333333333336, + "avg_positive_len": 6.013333333333334, + "avg_negative_len": 13.986666666666666, + }, + }, + ) diff --git a/mteb/tasks/Reranking/__init__.py b/mteb/tasks/Reranking/__init__.py index a4b302a17f..18dbd53f43 100644 --- a/mteb/tasks/Reranking/__init__.py +++ b/mteb/tasks/Reranking/__init__.py @@ -2,6 +2,7 @@ from .eng.AskUbuntuDupQuestions import * from .eng.MindSmallReranking import * +from .eng.NevIR import * from .eng.SciDocsReranking import * from .eng.StackOverflowDupQuestions import * from .eng.WebLINXCandidatesReranking import * diff --git a/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py b/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py index 90fe689cdd..51d39c770c 100644 --- a/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py +++ b/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py @@ -19,7 +19,7 @@ class AskUbuntuDupQuestions(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["eng-Latn"], - main_score="map", + main_score="map_at_1000", date=None, domains=None, task_subtypes=None, diff --git a/mteb/tasks/Reranking/eng/MindSmallReranking.py b/mteb/tasks/Reranking/eng/MindSmallReranking.py index bdc01edbcb..5c57e3f77d 100644 --- a/mteb/tasks/Reranking/eng/MindSmallReranking.py +++ b/mteb/tasks/Reranking/eng/MindSmallReranking.py @@ -1,8 +1,17 @@ from __future__ import annotations +import logging +from collections import defaultdict + +import tqdm + from mteb.abstasks.TaskMetadata import TaskMetadata from ....abstasks.AbsTaskReranking import AbsTaskReranking +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) class MindSmallReranking(AbsTaskReranking): @@ -19,7 +28,7 @@ class MindSmallReranking(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["eng-Latn"], - main_score="map", + main_score="max_over_subqueries_map_at_1000", date=("2019-10-12", "2019-11-22"), domains=["News", "Written"], task_subtypes=[], @@ -48,3 +57,168 @@ class MindSmallReranking(AbsTaskReranking): language models can effectively improve the performance of news recommendation. The MIND dataset will be available at https://msnews.github.io}.", }""", ) + + def process_example( + self, example: dict, split: str, query_idx: int, subquery_idx: int + ) -> dict: # Added subquery_idx parameter + """Process a single example from the dataset.""" + query = example["query"] + positive_docs = example["positive"] + negative_docs = example["negative"] + + # Modified query_id to include subquery index + query_id = f"{split}_query{query_idx}_{subquery_idx}" + + # Rest of the method remains the same + example_data = { + "query_id": query_id, + "query": query, + "doc_ids": [], + "doc_texts": [], + "relevance_scores": [], + } + + def get_doc_hash(text: str) -> str: + import hashlib + + return hashlib.md5(text.encode()).hexdigest() + + # Process positive documents + for i, pos_doc in enumerate(positive_docs): + doc_hash = get_doc_hash(pos_doc) + if pos_doc in self.doc_text_to_id[split]: + doc_id = self.doc_text_to_id[split][pos_doc] + else: + formatted_i = str(i).zfill(5) + doc_id = f"apositive_{doc_hash}_{formatted_i}" + self.doc_text_to_id[split][pos_doc] = doc_id + + example_data["doc_ids"].append(doc_id) + example_data["doc_texts"].append(pos_doc) + example_data["relevance_scores"].append(1) + + # Process negative documents + for i, neg_doc in enumerate(negative_docs): + doc_hash = get_doc_hash(neg_doc) + if neg_doc in self.doc_text_to_id[split]: + doc_id = self.doc_text_to_id[split][neg_doc] + else: + formatted_i = str(i).zfill(5) + doc_id = f"negative_{doc_hash}_{formatted_i}" + self.doc_text_to_id[split][neg_doc] = doc_id + + example_data["doc_ids"].append(doc_id) + example_data["doc_texts"].append(neg_doc) + example_data["relevance_scores"].append(0) + + return example_data + + def load_data(self, **kwargs): + """Load and transform the dataset with efficient deduplication.""" + if self.data_loaded: + return + + # Call parent class method + super(AbsTaskRetrieval, self).load_data(**kwargs) + + logging.info( + f"Transforming old format to standard format for {self.metadata.name}" + ) + + self.corpus = defaultdict(lambda: defaultdict(dict)) + self.queries = defaultdict(lambda: defaultdict(dict)) + self.relevant_docs = defaultdict(lambda: defaultdict(dict)) + self.top_ranked = defaultdict(lambda: defaultdict(list)) + self.doc_text_to_id = defaultdict(dict) + + # Process each split + for split in self.dataset: + logging.info(f"Processing split {split}") + + # Pre-allocate lists for batch processing + all_queries = [] + all_positives = [] + all_negatives = [] + all_instance_indices = [] + all_subquery_indices = [] + + # First pass: expand queries while maintaining relationships + current_instance_idx = 0 + for instance in tqdm.tqdm(self.dataset[split]): + queries = instance["query"] + positives = instance.get("positive", []) + negatives = instance.get("negative", []) + + # For each query in this instance + for subquery_idx, query in enumerate(queries): + all_queries.append(query) + all_positives.append(positives) # Same positives for each subquery + all_negatives.append(negatives) # Same negatives for each subquery + all_instance_indices.append(current_instance_idx) + all_subquery_indices.append(subquery_idx) + + current_instance_idx += 1 + + # Filter valid examples + valid_examples = [] + valid_instance_indices = [] + valid_subquery_indices = [] + + # Filter while maintaining relationships + for idx, (pos, neg) in enumerate(zip(all_positives, all_negatives)): + if len(pos) > 0 and len(neg) > 0: + valid_examples.append(idx) + valid_instance_indices.append(all_instance_indices[idx]) + valid_subquery_indices.append(all_subquery_indices[idx]) + + total_instances = len(set(all_instance_indices)) + valid_unique_instances = len(set(valid_instance_indices)) + logging.info( + f"Found {total_instances} total instances, {valid_unique_instances} valid instances" + ) + logging.info( + f"Filtered {len(all_queries) - len(valid_examples)} invalid examples. {len(valid_examples)} remaining." + ) + + # Process valid examples in batches + batch_size = 1000 + for batch_start in tqdm.tqdm(range(0, len(valid_examples), batch_size)): + batch_end = min(batch_start + batch_size, len(valid_examples)) + batch_indices = valid_examples[batch_start:batch_end] + + # Process batch + for i, example_idx in enumerate(batch_indices): + instance_idx = valid_instance_indices[batch_start + i] + subquery_idx = valid_subquery_indices[batch_start + i] + + example_data = self.process_example( + { + "query": all_queries[example_idx], + "positive": all_positives[example_idx], + "negative": all_negatives[example_idx], + }, + split, + instance_idx, + subquery_idx, + ) + + # Populate data structures + query_id = example_data["query_id"] + self.queries[split][query_id] = example_data["query"] + + for doc_id, doc_text, relevance in zip( + example_data["doc_ids"], + example_data["doc_texts"], + example_data["relevance_scores"], + ): + if doc_id not in self.corpus[split]: + self.corpus[split][doc_id] = { + "text": doc_text, + "_id": doc_id, + } + + self.top_ranked[split][query_id].append(doc_id) + self.relevant_docs[split][query_id][doc_id] = relevance + + self.instructions = None + self.data_loaded = True diff --git a/mteb/tasks/Reranking/eng/NevIR.py b/mteb/tasks/Reranking/eng/NevIR.py new file mode 100644 index 0000000000..d37b5a363a --- /dev/null +++ b/mteb/tasks/Reranking/eng/NevIR.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskReranking import AbsTaskReranking + + +class NevIR(AbsTaskReranking): + metadata = TaskMetadata( + name="NevIR", + description="Paired evaluation of real world negation in retrieval, with questions and passages. Since models generally prefer one passage over the other always, there are two questions that the model must get right to understand the negation (hence the `paired_accuracy` metric).", + reference="https://github.com/orionw/NevIR", + dataset={ + "path": "orionweller/NevIR-mteb", + "revision": "eab99575c01c6a8e39f8d2adc6e3c3adcfe84413", + }, + type="Reranking", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="paired_accuracy", + date=("2023-05-12", "2023-09-28"), + domains=["Web"], + task_subtypes=["Article retrieval"], + license="mit", + annotations_creators="human-annotated", + dialect=[], + sample_creation="created", + bibtex_citation="""@inproceedings{Weller2023NevIRNI, + title={{NevIR: Negation in Neural Information Retrieval}}, + author={{Orion Weller and Dawn J Lawrie and Benjamin Van Durme}}, + booktitle={{Conference of the European Chapter of the Association for Computational Linguistics}}, + year={{2023}}, + url={{https://api.semanticscholar.org/CorpusID:258676146}} +}""", + ) diff --git a/mteb/tasks/Reranking/eng/SciDocsReranking.py b/mteb/tasks/Reranking/eng/SciDocsReranking.py index 183566cfe6..620e4471a2 100644 --- a/mteb/tasks/Reranking/eng/SciDocsReranking.py +++ b/mteb/tasks/Reranking/eng/SciDocsReranking.py @@ -19,7 +19,7 @@ class SciDocsReranking(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["eng-Latn"], - main_score="map", + main_score="map_at_1000", date=("2000-01-01", "2020-12-31"), # best guess domains=["Academic", "Non-fiction", "Written"], task_subtypes=["Scientific Reranking"], diff --git a/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py b/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py index 9e47461620..d51e63b609 100644 --- a/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py +++ b/mteb/tasks/Reranking/eng/StackOverflowDupQuestions.py @@ -19,7 +19,7 @@ class StackOverflowDupQuestions(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["eng-Latn"], - main_score="map", + main_score="map_at_1000", date=None, domains=None, task_subtypes=None, diff --git a/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py b/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py index 981dfa4eef..c0874c7bbb 100644 --- a/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py +++ b/mteb/tasks/Reranking/eng/WebLINXCandidatesReranking.py @@ -1,7 +1,5 @@ from __future__ import annotations -import datasets - from mteb.abstasks.TaskMetadata import TaskMetadata from ....abstasks.AbsTaskReranking import AbsTaskReranking @@ -29,7 +27,7 @@ class WebLINXCandidatesReranking(AbsTaskReranking): "test_web", ], eval_langs=["eng-Latn"], - main_score="mrr", + main_score="mrr_at_10", date=("2023-03-01", "2023-10-30"), domains=["Academic", "Web", "Written"], task_subtypes=["Code retrieval", "Conversational retrieval"], @@ -48,22 +46,3 @@ class WebLINXCandidatesReranking(AbsTaskReranking): } """, ) - - def load_data(self, **kwargs): - if self.data_loaded: - return - - self._datasets = {} - - for split in self.metadata.eval_splits: - self._datasets[split] = datasets.load_dataset( - split=split, **self.metadata_dict["dataset"] - ) - - self.dataset = datasets.DatasetDict( - {split: self._datasets[split] for split in self.metadata.eval_splits} - ) - - self.dataset_transform() - - self.data_loaded = True diff --git a/mteb/tasks/Reranking/fra/AlloprofReranking.py b/mteb/tasks/Reranking/fra/AlloprofReranking.py index 20d24f03ec..150d2b314b 100644 --- a/mteb/tasks/Reranking/fra/AlloprofReranking.py +++ b/mteb/tasks/Reranking/fra/AlloprofReranking.py @@ -21,7 +21,7 @@ class AlloprofReranking(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["fra-Latn"], - main_score="map", + main_score="map_at_1000", date=("2020-01-01", "2023-04-14"), # supposition domains=["Web", "Academic", "Written"], task_subtypes=None, @@ -66,4 +66,7 @@ def load_data(self, **kwargs): self.dataset_transform() + # now convert to the new format + self.transform_old_dataset_format(self.dataset) + self.data_loaded = True diff --git a/mteb/tasks/Reranking/fra/SyntecReranking.py b/mteb/tasks/Reranking/fra/SyntecReranking.py index 3f9188bd33..8b74f2115a 100644 --- a/mteb/tasks/Reranking/fra/SyntecReranking.py +++ b/mteb/tasks/Reranking/fra/SyntecReranking.py @@ -21,7 +21,7 @@ class SyntecReranking(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["fra-Latn"], - main_score="map", + main_score="map_at_1000", date=("2022-12-01", "2022-12-02"), domains=["Legal", "Written"], task_subtypes=None, @@ -64,4 +64,7 @@ def load_data(self, **kwargs): self.dataset_transform() + # now convert to the new format + self.transform_old_dataset_format(self.dataset) + self.data_loaded = True diff --git a/mteb/tasks/Reranking/jpn/MMarcoReranking.py b/mteb/tasks/Reranking/jpn/MMarcoReranking.py index dd37f16af7..61ca4bf29e 100644 --- a/mteb/tasks/Reranking/jpn/MMarcoReranking.py +++ b/mteb/tasks/Reranking/jpn/MMarcoReranking.py @@ -1,6 +1,7 @@ from __future__ import annotations from mteb.abstasks.AbsTaskReranking import AbsTaskReranking +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval from mteb.abstasks.TaskMetadata import TaskMetadata @@ -18,7 +19,7 @@ class VoyageMMarcoReranking(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["jpn-Jpan"], - main_score="map", + main_score="map_at_1000", date=("2016-12-01", "2023-12-23"), domains=["Academic", "Non-fiction", "Written"], task_subtypes=["Scientific Reranking"], @@ -35,7 +36,14 @@ class VoyageMMarcoReranking(AbsTaskReranking): archivePrefix={arXiv},}""", ) - def dataset_transform(self): + def load_data(self, **kwargs): + if self.data_loaded: + return + + # since AbsTaskReranking has no `load_data` method, we call the parent class method + super(AbsTaskRetrieval, self).load_data(**kwargs) + + # now fix the column names self.dataset = self.dataset.rename_column( "positives", "positive" ).rename_column("negatives", "negative") @@ -43,3 +51,8 @@ def dataset_transform(self): self.dataset["test"] = self.dataset.pop("train").train_test_split( test_size=2048, seed=self.seed )["test"] + + # now convert to the new format + self.transform_old_dataset_format(self.dataset) + + self.data_loaded = True diff --git a/mteb/tasks/Reranking/multilingual/ESCIReranking.py b/mteb/tasks/Reranking/multilingual/ESCIReranking.py index 03c6608f27..e3587dcb03 100644 --- a/mteb/tasks/Reranking/multilingual/ESCIReranking.py +++ b/mteb/tasks/Reranking/multilingual/ESCIReranking.py @@ -24,7 +24,7 @@ }""" -class ESCIReranking(MultilingualTask, AbsTaskReranking): +class ESCIReranking(AbsTaskReranking, MultilingualTask): metadata = TaskMetadata( name="ESCIReranking", description="", @@ -38,7 +38,7 @@ class ESCIReranking(MultilingualTask, AbsTaskReranking): modalities=["text"], eval_splits=[_EVAL_SPLIT], eval_langs=_LANGUAGES, - main_score="map", + main_score="map_at_1000", date=("2022-06-14", "2022-06-14"), domains=["Written"], task_subtypes=[], diff --git a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py index 4d90ce641d..e9e2677b3b 100644 --- a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py +++ b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py @@ -1,15 +1,12 @@ from __future__ import annotations import logging -from typing import Any +from collections import defaultdict -from datasets import Dataset +import datasets from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata -from mteb.encoder_interface import Encoder -from mteb.evaluation.evaluators import RerankingEvaluator -from mteb.load_results.task_results import ScoresDict from ....abstasks.AbsTaskReranking import AbsTaskReranking @@ -50,7 +47,7 @@ }""" -class MIRACLReranking(MultilingualTask, AbsTaskReranking): +class MIRACLReranking(AbsTaskReranking, MultilingualTask): metadata = TaskMetadata( name="MIRACLReranking", description="MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages.", @@ -65,7 +62,7 @@ class MIRACLReranking(MultilingualTask, AbsTaskReranking): modalities=["text"], eval_splits=[_EVAL_SPLIT], eval_langs=_LANGUAGES, - main_score="NDCG@10(MIRACL)", + main_score="ndcg_at_10", date=("2022-06-01", "2023-01-30"), domains=["Encyclopaedic", "Written"], task_subtypes=[], @@ -79,22 +76,126 @@ class MIRACLReranking(MultilingualTask, AbsTaskReranking): }, ) - def _evaluate_subset( - self, - model: Encoder, - data_split: Dataset, - *, - encode_kwargs: dict[str, Any] = {}, - **kwargs: Any, - ) -> ScoresDict: - evaluator = RerankingEvaluator( - samples=data_split, - evaluator_type="miracl", - task_name=self.metadata.name, - encode_kwargs=encode_kwargs, - **kwargs, + def process_example(self, example: dict, split: str, query_idx: int) -> dict: + """Process a single example from the dataset. Slightly altered from the original class""" + query = example["query"] + assert isinstance(query, str) + positive_docs = set(example["positive"]) + candidate_docs = example["candidates"] + + # add four leading zeros + # query_id = f"{split}_query{query_idx:04d}" + query_id = f"{split}_query{query_idx}" + + # Initialize the structures for this example + example_data = { + "query_id": query_id, + "query": query, + "doc_ids": [], + "doc_texts": [], + "relevance_scores": [], + } + + for i, candidate_doc in enumerate(candidate_docs): + # format i as a five digit number + formatted_i = str(i).zfill(5) + doc_id = f"candidate_{query_id}_{formatted_i}" + example_data["doc_ids"].append(doc_id) + example_data["doc_texts"].append(candidate_doc) + if candidate_doc in positive_docs: + example_data["relevance_scores"].append(1) + else: + # this is not technically correct, but was done in the original so keeping it + example_data["relevance_scores"].append(0) + + return example_data + + def load_data(self, **kwargs): + """Super method to load the data, then convert to the new format. It is almost the same as the above, except there are negatives, positives, and candidates""" + logging.info( + f"Transforming old format to standard format for {self.metadata.name}" ) - scores = evaluator(model) - self._add_main_score(scores) - return scores + self.corpus = defaultdict(lambda: defaultdict(dict)) + self.queries = defaultdict(lambda: defaultdict(dict)) + self.relevant_docs = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) + self.top_ranked = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) + + hf_subsets = list(self.hf_subsets) if self.is_multilingual else ["default"] + for hf_subset in hf_subsets: + if "name" in self.metadata_dict["dataset"]: + cur_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore + assert ( + hf_subset == "default" + ), f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata." + else: + cur_dataset = datasets.load_dataset( + **self.metadata_dict["dataset"], name=hf_subset + ) # type: ignore + + for split in cur_dataset: + # Create an enumerated dataset to pass indices + enumerated_dataset = datasets.Dataset.from_dict( + { + "index": range(len(cur_dataset[split])), + "query": cur_dataset[split]["query"], + "positive": cur_dataset[split]["positive"], + "negative": cur_dataset[split]["negative"], + "candidates": cur_dataset[split]["candidates"], + } + ) + + # first, only keep those that have positives and negatives + enumerated_dataset = enumerated_dataset.filter( + lambda example: len(example["positive"]) > 0 + and len(example["negative"]) > 0 + ) + + logger.info( + f"Filtered out {len(cur_dataset[split]) - len(enumerated_dataset)} examples. {len(enumerated_dataset)} examples remaining." + ) + + # Map the transformation function over the dataset + processed_dataset = enumerated_dataset.map( + lambda example, idx: self.process_example(example, split, idx), + with_indices=True, + remove_columns=enumerated_dataset.column_names, + ) + + # Populate the data structures + for idx, item in enumerate(processed_dataset): + query_id = item["query_id"] + self.queries[hf_subset][split][query_id] = item["query"] + + # Add documents and relevance information + for doc_id, doc_text, relevance in zip( + item["doc_ids"], item["doc_texts"], item["relevance_scores"] + ): + self.corpus[hf_subset][split][doc_id] = { + "text": doc_text, + "_id": doc_id, + } + self.top_ranked[hf_subset][split][query_id].append(doc_id) + self.relevant_docs[hf_subset][split][query_id][doc_id] = ( + relevance + ) + + if len(self.top_ranked[hf_subset][split][query_id]) == 0: + # give it a negative, even though qrels should be empty since that was how it was done in the original + neg_doc = cur_dataset[split]["negative"][idx][0] + assert isinstance( + neg_doc, str + ), f"Negative document is not a string: {neg_doc}" + neg_doc_id = f"negative_{query_id}" + self.top_ranked[hf_subset][split][query_id].append(neg_doc_id) + self.corpus[hf_subset][split][neg_doc_id] = { + "text": neg_doc, + "_id": neg_doc_id, + } + assert self.relevant_docs[hf_subset][split][query_id] == {} + logger.warning( + f"Query {query_id} has no relevant documents. Adding a negative example." + ) + + self.instructions = None + self.data_loaded = True diff --git a/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py b/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py index 3bfbd04f13..8189d92f4e 100644 --- a/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py +++ b/mteb/tasks/Reranking/multilingual/WikipediaRerankingMultilingual.py @@ -25,7 +25,7 @@ } -class WikipediaRerankingMultilingual(MultilingualTask, AbsTaskReranking): +class WikipediaRerankingMultilingual(AbsTaskReranking, MultilingualTask): metadata = TaskMetadata( name="WikipediaRerankingMultilingual", description="The dataset is derived from Cohere's wikipedia-2023-11 dataset and contains synthetically generated queries.", @@ -39,7 +39,7 @@ class WikipediaRerankingMultilingual(MultilingualTask, AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=_EVAL_LANGS, - main_score="map", + main_score="map_at_1000", date=("2023-11-01", "2024-05-15"), domains=["Encyclopaedic", "Written"], task_subtypes=[], diff --git a/mteb/tasks/Reranking/rus/RuBQReranking.py b/mteb/tasks/Reranking/rus/RuBQReranking.py index fb79a17588..84c7e9432a 100644 --- a/mteb/tasks/Reranking/rus/RuBQReranking.py +++ b/mteb/tasks/Reranking/rus/RuBQReranking.py @@ -19,7 +19,7 @@ class RuBQReranking(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["rus-Cyrl"], - main_score="map", + main_score="map_at_1000", date=("2001-01-01", "2021-01-01"), domains=["Encyclopaedic", "Written"], task_subtypes=[], diff --git a/mteb/tasks/Reranking/zho/CMTEBReranking.py b/mteb/tasks/Reranking/zho/CMTEBReranking.py index 302f62adf5..ee830f7e16 100644 --- a/mteb/tasks/Reranking/zho/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zho/CMTEBReranking.py @@ -18,7 +18,7 @@ class T2Reranking(AbsTaskReranking): modalities=["text"], eval_splits=["dev"], eval_langs=["cmn-Hans"], - main_score="map", + main_score="map_at_1000", date=None, form=None, domains=None, @@ -53,7 +53,7 @@ class MMarcoReranking(AbsTaskReranking): modalities=["text"], eval_splits=["dev"], eval_langs=["cmn-Hans"], - main_score="map", + main_score="map_at_1000", date=None, form=None, domains=None, @@ -89,7 +89,7 @@ class CMedQAv1(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["cmn-Hans"], - main_score="map", + main_score="map_at_1000", date=("2017-01-01", "2017-07-26"), domains=["Medical", "Written"], task_subtypes=[], @@ -125,7 +125,7 @@ class CMedQAv2(AbsTaskReranking): modalities=["text"], eval_splits=["test"], eval_langs=["cmn-Hans"], - main_score="map", + main_score="map_at_1000", date=None, form=None, domains=None, diff --git a/mteb/tasks/__init__.py b/mteb/tasks/__init__.py index dfe568bb89..72c357606f 100644 --- a/mteb/tasks/__init__.py +++ b/mteb/tasks/__init__.py @@ -3,6 +3,7 @@ from .BitextMining import * from .Classification import * from .Clustering import * +from .InstructionReranking import * from .InstructionRetrieval import * from .MultiLabelClassification import * from .PairClassification import * diff --git a/scripts/running_model/check_results.py b/scripts/running_model/check_results.py index c410fb5be7..09f95fcd22 100644 --- a/scripts/running_model/check_results.py +++ b/scripts/running_model/check_results.py @@ -119,6 +119,7 @@ def normalize_results(results): "Summarization", "Clustering", "InstructionRetrieval", + "InstructionReranking", "Retrieval", ] ) diff --git a/scripts/running_model/create_slurm_jobs.py b/scripts/running_model/create_slurm_jobs.py index 606630d9e5..ce3a03df92 100644 --- a/scripts/running_model/create_slurm_jobs.py +++ b/scripts/running_model/create_slurm_jobs.py @@ -100,6 +100,7 @@ def run_slurm_jobs(files: list[Path]) -> None: "Reranking", "Retrieval", "InstructionRetrieval", + "InstructionReranking", "STS", "Summarization", ], diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 2ac0096da8..f599a21990 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -9,7 +9,6 @@ from mteb.abstasks.AbsTaskClassification import AbsTaskClassification from mteb.abstasks.AbsTaskClustering import AbsTaskClustering from mteb.abstasks.AbsTaskClusteringFast import AbsTaskClusteringFast -from mteb.abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval from mteb.abstasks.AbsTaskMultilabelClassification import ( AbsTaskMultilabelClassification, ) @@ -868,76 +867,94 @@ def metadata_dict(self) -> dict[str, str]: class MockRerankingTask(AbsTaskReranking): expected_stats = { "test": { - "num_samples": 2, - "number_of_characters": 172, - "num_positive": 2, - "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "average_document_length": 13.5, + "average_query_length": 13.0, + "average_instruction_length": 0, + "num_documents": 2, + "num_queries": 2, + "average_top_ranked_per_query": 2.0, + "num_instructions": 0, + "average_relevant_docs_per_query": 1.0, + "number_of_characters": 53.0, + "num_samples": 4, } } metadata = TaskMetadata( type="Reranking", name="MockRerankingTask", - main_score="map", + main_score="map_at_1000", **general_args, # type: ignore ) def load_data(self, **kwargs): - query = ["This is a test sentence", "This is another test sentence"] - positive = [ - "This is a positive sentence", - "This is another positive sentence", - ] - negative = [ - "This is a negative sentence", - "This is another negative sentence", - ] - - self.dataset = DatasetDict( - { - "test": Dataset.from_dict( - { - "query": query, - "positive": positive, - "negative": negative, - } - ), + self.queries = { + "test": { + "q1": "This is a test sentence", + "q2": "This is another test sentence", } - ) + } + self.corpus = { + "test": { + "d1": "This is a positive sentence", + "d2": "This is a negative sentence", + } + } + + self.relevant_docs = { + "test": { + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, + } + + self.top_ranked = { + "test": { + "q1": ["d1", "d2"], + "q2": ["d2", "d1"], + }, + } + self.instructions = None self.data_loaded = True class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): expected_stats = { "test": { - "num_samples": 4, - "number_of_characters": 344, - "num_positive": 4, - "num_negative": 4, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "average_document_length": 7.5, + "average_query_length": 6.5, + "num_documents": 4, + "num_queries": 4, + "num_instructions": 0, + "average_relevant_docs_per_query": 1.0, + "average_instruction_length": 0, + "average_top_ranked_per_query": 2.0, + "num_samples": 8, + "number_of_characters": 56.0, "hf_subset_descriptive_stats": { "eng": { - "num_samples": 2, - "number_of_characters": 172, - "num_positive": 2, - "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "average_document_length": 15.0, + "average_query_length": 13.0, + "average_instruction_length": 0, + "average_top_ranked_per_query": 2.0, + "num_instructions": 0, + "num_documents": 2, + "num_queries": 2, + "average_relevant_docs_per_query": 1.0, + "num_samples": 4, + "number_of_characters": 56.0, }, "fra": { - "num_samples": 2, - "number_of_characters": 172, - "num_positive": 2, - "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "average_document_length": 15.0, + "average_query_length": 13.0, + "num_documents": 2, + "num_queries": 2, + "average_relevant_docs_per_query": 1.0, + "average_instruction_length": 0, + "average_top_ranked_per_query": 2.0, + "num_instructions": 0, + "num_samples": 4, + "number_of_characters": 56.0, }, }, } @@ -946,49 +963,64 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): metadata = TaskMetadata( type="Reranking", name="MockMultilingualRerankingTask", - main_score="map", + main_score="map_at_10", **general_args, # type: ignore ) metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - query = ["This is a test sentence", "This is another test sentence"] - positive = [ - "This is a positive sentence", - "This is another positive sentence", - ] - negative = [ - "This is a negative sentence", - "This is another negative sentence", - ] - data = { - "test": Dataset.from_dict( - { - "query": query, - "positive": positive, - "negative": negative, - } - ), + queries = { + "test": { + "q1": "This is a test sentence", + "q2": "This is another test sentence", + } } - self.dataset = DatasetDict( - { - "eng": data, - "fra": data, + self.queries = {"eng": queries, "fra": queries} + corpus = { + "test": { + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", } - ) + } + self.corpus = {"eng": corpus, "fra": corpus} + + relevant_docs = { + "test": { + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, + } + self.relevant_docs = { + "eng": relevant_docs, + "fra": relevant_docs, + } + top_ranked = { + "test": { + "q1": ["d1", "d2"], + "q2": ["d2", "d1"], + }, + } + self.top_ranked = { + "eng": top_ranked, + "fra": top_ranked, + } + self.instructions = None self.data_loaded = True class MockRetrievalTask(AbsTaskRetrieval): expected_stats = { "test": { - "num_samples": 4, - "number_of_characters": 56.0, "average_document_length": 15.0, "average_query_length": 13.0, "num_documents": 2, "num_queries": 2, "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 0, + "average_instruction_length": 0, + "num_instructions": 0, + "num_samples": 4, + "number_of_characters": 56.0, } } @@ -1019,37 +1051,48 @@ def load_data(self, **kwargs): "q2": {"d1": 0, "d2": 1}, }, } + self.top_ranked = None + self.instructions = None self.data_loaded = True class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 56.0, - "num_samples": 8, - "num_queries": 4, - "num_documents": 4, "average_document_length": 7.5, "average_query_length": 6.5, + "num_documents": 4, + "num_queries": 4, "average_relevant_docs_per_query": 1.0, + "num_instructions": 0, + "average_top_ranked_per_query": 0, + "average_instruction_length": 0, + "num_samples": 8, + "number_of_characters": 56.0, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 56.0, - "num_samples": 4, - "num_queries": 2, - "num_documents": 2, "average_document_length": 15.0, "average_query_length": 13.0, + "num_documents": 2, + "num_queries": 2, "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 0, + "average_instruction_length": 0, + "num_instructions": 0, + "num_samples": 4, + "number_of_characters": 56.0, }, "fra": { - "number_of_characters": 56.0, - "num_samples": 4, - "num_queries": 2, - "num_documents": 2, "average_document_length": 15.0, "average_query_length": 13.0, + "num_documents": 2, + "num_queries": 2, "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 0, + "average_instruction_length": 0, + "num_instructions": 0, + "num_samples": 4, + "number_of_characters": 56.0, }, }, } @@ -1089,6 +1132,8 @@ def load_data(self, **kwargs): "eng": relevant_docs, "fra": relevant_docs, } + self.top_ranked = None + self.instructions = None self.data_loaded = True @@ -1202,27 +1247,26 @@ def load_data(self, **kwargs): self.data_loaded = True -class MockInstructionRetrival(AbsTaskInstructionRetrieval): - do_length_ablation = True +class MockInstructionRetrieval(AbsTaskRetrieval): expected_stats = { "test": { - "average_changed_instruction_length": 37.0, - "average_document_length": 30.0, + "num_documents": 2, + "num_queries": 2, + "average_document_length": 15.0, + "average_query_length": 13.0, "average_instruction_length": 29.0, - "average_query_length": 26.0, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 2.0, - "num_docs": 2, - "num_queries": 2, + "average_top_ranked_per_query": 0, + "num_instructions": 2, "num_samples": 4, - "number_of_characters": 244, + "number_of_characters": 56.0, } } metadata = TaskMetadata( type="InstructionRetrieval", - name="MockInstructionRetrival", - main_score="p-MRR", + name="MockInstructionRetrieval", + main_score="ndcg_at_10", **general_args, # type: ignore ) @@ -1235,98 +1279,122 @@ def load_data(self, **kwargs): } self.corpus = { "test": { - "d1": {"text": "This is a positive sentence"}, - "d2": {"text": "This is another positive sentence"}, + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", } } - self.og_relevant_docs = { + self.relevant_docs = { "test": { "q1": {"d1": 1, "d2": 0}, "q2": {"d1": 0, "d2": 1}, }, } - self.og_instructions = { + self.instructions = { "test": { - "This is a test sentence": "This is a test instruction", - "This is another test sentence": "This is another test instruction", + "q1": "This is a test instruction", + "q2": "This is another test instruction", } } - self.changed_instructions = { + self.top_ranked = None + self.data_loaded = True + + +class MockInstructionReranking(AbsTaskReranking): + expected_stats = { + "test": { + "num_documents": 2, + "num_queries": 2, + "num_instructions": 2, + "average_document_length": 15.0, + "average_query_length": 13.0, + "average_instruction_length": 29.0, + "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 2.0, + "num_samples": 4, + "number_of_characters": 56.0, + } + } + + metadata = TaskMetadata( + type="InstructionReranking", + name="MockInstructionReranking", + main_score="ndcg_at_10", + **general_args, # type: ignore + ) + + def load_data(self, **kwargs): + self.queries = { "test": { - "This is a test sentence": "This is a changed test instruction", - "This is another test sentence": "This is changed another test instruction", + "q1": "This is a test sentence", + "q2": "This is another test sentence", } } - self.changed_relevant_docs = { + self.corpus = { "test": { - "q1": {"d1": 0, "d2": 1}, - "q2": {"d1": 1, "d2": 0}, + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", } } - self.top_ranked = { + self.relevant_docs = { "test": { - "q1": ["d1", "d2"], - "q2": ["d2", "d1"], - } + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, } - - self.keywords = { + self.instructions = { "test": { - "This is a test sentence": "test1", - "This is another test sentence": "test2", + "q1": "This is a test instruction", + "q2": "This is another test instruction", } } - self.short_instructions = { + self.top_ranked = { "test": { - "This is a test sentence": "short1", - "This is another test sentence": "short2", + "q1": ["d1", "d2"], + "q2": ["d2", "d1"], } } self.data_loaded = True -class MockMultilingualInstructionRetrival( - AbsTaskInstructionRetrieval, MultilingualTask -): - do_length_ablation = True +class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "num_samples": 8, - "num_docs": 4, + "num_documents": 4, "num_queries": 4, - "number_of_characters": 488, - "average_document_length": 30.0, - "average_query_length": 26.0, + "num_instructions": 4, + "average_document_length": 7.5, + "average_query_length": 6.5, "average_instruction_length": 29.0, - "average_changed_instruction_length": 37.0, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 2.0, + "average_top_ranked_per_query": 0, + "num_samples": 8, + "number_of_characters": 56.0, "hf_subset_descriptive_stats": { "eng": { - "num_samples": 4, - "num_docs": 2, + "num_documents": 2, "num_queries": 2, - "number_of_characters": 244, - "average_document_length": 30.0, - "average_query_length": 26.0, + "num_instructions": 2, + "average_document_length": 15.0, + "average_query_length": 13.0, "average_instruction_length": 29.0, - "average_changed_instruction_length": 37.0, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 2.0, + "average_top_ranked_per_query": 0, + "num_samples": 4, + "number_of_characters": 56.0, }, "fra": { - "num_samples": 4, - "num_docs": 2, + "num_documents": 2, "num_queries": 2, - "number_of_characters": 244, - "average_document_length": 30.0, - "average_query_length": 26.0, + "num_instructions": 2, + "average_document_length": 15.0, + "average_query_length": 13.0, "average_instruction_length": 29.0, - "average_changed_instruction_length": 37.0, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 2.0, + "average_top_ranked_per_query": 0, + "num_samples": 4, + "number_of_characters": 56.0, }, }, } @@ -1334,8 +1402,8 @@ class MockMultilingualInstructionRetrival( metadata = TaskMetadata( type="InstructionRetrieval", - name="MockMultilingualInstructionRetrival", - main_score="p-MRR", + name="MockMultilingualInstructionRetrieval", + main_score="ndcg_at_10", **general_args, # type: ignore ) metadata.eval_langs = multilingual_eval_langs @@ -1353,8 +1421,8 @@ def load_data(self, **kwargs): } corpus = { "test": { - "d1": {"text": "This is a positive sentence"}, - "d2": {"text": "This is another positive sentence"}, + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", } } self.corpus = { @@ -1362,77 +1430,133 @@ def load_data(self, **kwargs): "fra": corpus, } - og_relevant_docs = { + relevant_docs = { "test": { "q1": {"d1": 1, "d2": 0}, "q2": {"d1": 0, "d2": 1}, }, } - self.og_relevant_docs = { - "eng": og_relevant_docs, - "fra": og_relevant_docs, + self.relevant_docs = { + "eng": relevant_docs, + "fra": relevant_docs, } - og_instructions = { + instructions = { "test": { - "This is a test sentence": "This is a test instruction", - "This is another test sentence": "This is another test instruction", + "q1": "This is a test instruction", + "q2": "This is another test instruction", } } - self.og_instructions = { - "eng": og_instructions, - "fra": og_instructions, + self.instructions = { + "eng": instructions, + "fra": instructions, } - changed_instructions = { + self.top_ranked = None + + +class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): + expected_stats = { + "test": { + "num_documents": 4, + "num_queries": 4, + "num_instructions": 4, + "average_document_length": 7.5, + "average_query_length": 6.5, + "average_instruction_length": 29.0, + "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 2.0, + "num_samples": 8, + "number_of_characters": 56.0, + "hf_subset_descriptive_stats": { + "eng": { + "num_documents": 2, + "num_queries": 2, + "num_instructions": 2, + "average_document_length": 15.0, + "average_query_length": 13.0, + "average_instruction_length": 29.0, + "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 2.0, + "num_samples": 4, + "number_of_characters": 56.0, + }, + "fra": { + "num_documents": 2, + "num_queries": 2, + "num_instructions": 2, + "average_document_length": 15.0, + "average_query_length": 13.0, + "average_instruction_length": 29.0, + "average_relevant_docs_per_query": 1.0, + "average_top_ranked_per_query": 2.0, + "num_samples": 4, + "number_of_characters": 56.0, + }, + }, + } + } + + metadata = TaskMetadata( + type="InstructionReranking", + name="MockMultilingualInstructionReranking", + main_score="ndcg_at_10", + **general_args, # type: ignore + ) + metadata.eval_langs = multilingual_eval_langs + + def load_data(self, **kwargs): + queries = { "test": { - "This is a test sentence": "This is a changed test instruction", - "This is another test sentence": "This is changed another test instruction", + "q1": "This is a test sentence", + "q2": "This is another test sentence", } } - self.changed_instructions = { - "eng": changed_instructions, - "fra": changed_instructions, + self.queries = { + "eng": queries, + "fra": queries, } - changed_relevant_docs = { + corpus = { "test": { - "q1": {"d1": 0, "d2": 1}, - "q2": {"d1": 1, "d2": 0}, + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", } } - self.changed_relevant_docs = { - "eng": changed_relevant_docs, - "fra": changed_relevant_docs, + + self.corpus = { + "eng": corpus, + "fra": corpus, } - top_ranked = { + relevant_docs = { "test": { - "q1": ["d1", "d2"], - "q2": ["d2", "d1"], - } + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, } - self.top_ranked = { - "eng": top_ranked, - "fra": top_ranked, + + self.relevant_docs = { + "eng": relevant_docs, + "fra": relevant_docs, } - keywords = { + instructions = { "test": { - "This is a test sentence": "test1", - "This is another test sentence": "test2", + "q1": "This is a test instruction", + "q2": "This is another test instruction", } } - self.keywords = { - "eng": keywords, - "fra": keywords, + self.instructions = { + "eng": instructions, + "fra": instructions, } - short_instructions = { + top_ranked = { "test": { - "This is a test sentence": "short1", - "This is another test sentence": "short2", + "q1": ["d1", "d2"], + "q2": ["d2", "d1"], } } - self.short_instructions = { - "eng": short_instructions, - "fra": short_instructions, + self.top_ranked = { + "eng": top_ranked, + "fra": top_ranked, } self.data_loaded = True diff --git a/tests/test_benchmark/task_grid.py b/tests/test_benchmark/task_grid.py index c28ad3ea59..4c73d825e5 100644 --- a/tests/test_benchmark/task_grid.py +++ b/tests/test_benchmark/task_grid.py @@ -16,13 +16,15 @@ MockClassificationTask, MockClusteringFastTask, MockClusteringTask, - MockInstructionRetrival, + MockInstructionReranking, + MockInstructionRetrieval, MockMultilabelClassification, MockMultilingualBitextMiningTask, MockMultilingualClassificationTask, MockMultilingualClusteringFastTask, MockMultilingualClusteringTask, - MockMultilingualInstructionRetrival, + MockMultilingualInstructionReranking, + MockMultilingualInstructionRetrieval, MockMultilingualMultilabelClassification, MockMultilingualPairClassificationTask, MockMultilingualParallelBitextMiningTask, @@ -90,8 +92,10 @@ MockMultilingualMultilabelClassification(), MockSummarizationTask(), MockMultilingualSummarizationTask(), - MockInstructionRetrival(), - MockMultilingualInstructionRetrival(), + MockInstructionRetrieval(), + MockMultilingualInstructionRetrieval(), + MockMultilingualInstructionReranking(), + MockInstructionReranking(), ] MOCK_TASK_TEST_GRID_AS_STRING = [ diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index ff3e1d5c86..660dd50c80 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -23,8 +23,8 @@ MockTorchEncoder, ) from .mock_tasks import ( - MockInstructionRetrival, - MockMultilingualInstructionRetrival, + MockInstructionRetrieval, + MockMultilingualInstructionRetrieval, MockMultilingualRerankingTask, MockMultilingualRetrievalTask, MockRerankingTask, @@ -268,8 +268,8 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): [ MockRerankingTask(), MockMultilingualRerankingTask(), - MockInstructionRetrival(), - MockMultilingualInstructionRetrival(), + MockInstructionRetrieval(), + MockMultilingualInstructionRetrieval(), MockRetrievalTask(), MockMultilingualRetrievalTask(), ], diff --git a/tests/test_evaluators/test_InstructionRetrievalEvaluator.py b/tests/test_evaluators/test_InstructionRetrievalEvaluator.py index 9fe1cb13c0..42bc23b48f 100644 --- a/tests/test_evaluators/test_InstructionRetrievalEvaluator.py +++ b/tests/test_evaluators/test_InstructionRetrievalEvaluator.py @@ -1,18 +1,18 @@ from __future__ import annotations from mteb import SentenceTransformerWrapper -from mteb.evaluation.evaluators import InstructionRetrievalEvaluator, utils +from mteb.evaluation.evaluators import RetrievalEvaluator, utils from tests.test_benchmark.mock_models import MockNumpyEncoder -class TestInstructionRetrievalEvaluator: +class TestInstructionMetricsEvaluation: def setup_method(self): """Setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ # checks that it loads - self.evaluator = InstructionRetrievalEvaluator.InstructionRetrievalEvaluator( + self.evaluator = RetrievalEvaluator( SentenceTransformerWrapper(MockNumpyEncoder()), task_name="test" ) @@ -23,41 +23,43 @@ def test_p_mrr(self): # these are the query: {"doc_id": score} original_run = { - "a": {"0": 1, "1": 2, "2": 3, "3": 4}, + "a-og": {"0": 1, "1": 2, "2": 3, "3": 4}, } new_run = { - "a": {"0": 1, "1": 2, "2": 3, "3": 4}, + "a-changed": {"0": 1, "1": 2, "2": 3, "3": 4}, } - results = utils.evaluate_change( + score = utils.calculate_pmrr( original_run, new_run, changed_qrels, ) - - assert results["p-MRR"] == 0.0 + assert score == 0.0 # test with a change new_run = { - "a": {"0": 4, "1": 1, "2": 2, "3": 3}, + "a-changed": {"0": 4, "1": 1, "2": 2, "3": 3}, } - results = utils.evaluate_change( + score = utils.calculate_pmrr( original_run, new_run, changed_qrels, ) + assert score == -0.75 - assert results["p-MRR"] == -0.75 - - # test with a positive change - - results = utils.evaluate_change( + # test with a positive change, flipping them + new_run = { + "a-og": {"0": 4, "1": 1, "2": 2, "3": 3}, + } + original_run = { + "a-changed": {"0": 1, "1": 2, "2": 3, "3": 4}, + } + score = utils.calculate_pmrr( new_run, original_run, changed_qrels, ) - - assert results["p-MRR"] == 0.75 + assert score == 0.75 diff --git a/tests/test_evaluators/test_RerankingEvaluator.py b/tests/test_evaluators/test_RerankingEvaluator.py deleted file mode 100644 index 19b21e5721..0000000000 --- a/tests/test_evaluators/test_RerankingEvaluator.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -import pytest - -from mteb.evaluation.evaluators import RerankingEvaluator - -TOL = 0.0001 - - -class TestRerankingEvaluator: - def setup_method(self): - """Setup any state tied to the execution of the given method in a class. - - setup_method is invoked for every test method of a class. - """ - self.evaluator = RerankingEvaluator([]) - - def test_mrr_at_k(self): - is_relevant = [1, 1, 1, 0, 0, 0, 0, 0, 0] - pred_ranking = [5, 2, 6, 1, 3, 4, 7, 8, 9] - - assert self.evaluator.mrr_at_k_score( - is_relevant, pred_ranking, 10 - ) == pytest.approx(0.5, TOL) - assert self.evaluator.mrr_at_k_score( - is_relevant, pred_ranking, 3 - ) == pytest.approx(0.5, TOL) - assert self.evaluator.mrr_at_k_score( - is_relevant, pred_ranking, 1 - ) == pytest.approx(0, TOL) - - def test_map(self): - is_relevant = [1, 1, 1, 0, 0] - pred_scores = [0.75, 0.93, 0.85, 0.76, 0.75] - - assert self.evaluator.ap_score(is_relevant, pred_scores) == pytest.approx( - 0.86666, TOL - ) - - def test_nAUC(self): - is_relevant = [[1, 1, 0, 0, 0], [1, 0, 0], [1, 1, 1, 0], [1, 0], [1, 1, 0, 0]] - pred_scores = [ - [0.8, 0.3, 0.4, 0.6, 0.5], - [0.5, 0.8, 0.4], - [0.9, 0.3, 0.3, 0.1], - [0.1, 0.2], - [0.5, 0.4, 0.5, 0.2], - ] - - ap_scores = [ - self.evaluator.ap_score(y, x) for x, y in zip(pred_scores, is_relevant) - ] - conf_scores = [self.evaluator.conf_scores(x) for x in pred_scores] - nauc_scores_map = self.evaluator.nAUC_scores(conf_scores, ap_scores, "map") - - assert nauc_scores_map["nAUC_map_max"] == pytest.approx(0.8694, TOL) - assert nauc_scores_map["nAUC_map_std"] == pytest.approx(0.94065, TOL) - assert nauc_scores_map["nAUC_map_diff1"] == pytest.approx(0.85460, TOL) diff --git a/tests/test_evaluators/test_RetrievalEvaluator.py b/tests/test_evaluators/test_RetrievalEvaluator.py index 01a4747969..1d4714aca4 100644 --- a/tests/test_evaluators/test_RetrievalEvaluator.py +++ b/tests/test_evaluators/test_RetrievalEvaluator.py @@ -38,6 +38,7 @@ def setup_method(self): "map": {"MAP@1": 0.75, "MAP@2": 1.0, "MAP@3": 1.0}, "recall": {"Recall@1": 0.75, "Recall@2": 1.0, "Recall@3": 1.0}, "precision": {"P@1": 1.0, "P@2": 0.75, "P@3": 0.5}, + "task_specific": {}, }, ), # Test no self retrieval @@ -57,6 +58,7 @@ def setup_method(self): "map": {"MAP@1": 0.25, "MAP@2": 0.25, "MAP@3": 0.25}, "recall": {"Recall@1": 0.25, "Recall@2": 0.25, "Recall@3": 0.25}, "precision": {"P@1": 0.5, "P@2": 0.25, "P@3": 0.16667}, + "task_specific": {}, }, ), ], @@ -71,12 +73,13 @@ def test_metrics_at_k( ignore_identical_ids=ignore_identical_ids, ) - ndcg, _map, recall, precision, nauc = output + ndcg, _map, recall, precision, nauc, task_specific = output assert ndcg == expected_metrics["ndcg"] assert _map == expected_metrics["map"] assert recall == expected_metrics["recall"] assert precision == expected_metrics["precision"] + assert task_specific == expected_metrics["task_specific"] @pytest.mark.parametrize( "ignore_identical_ids, expected_naucs", @@ -115,7 +118,7 @@ def test_nAUC(self, ignore_identical_ids, expected_naucs): "4": {"0": 0.5, "1": 0.4, "2": 0.5}, } - _, _, _, _, naucs = self.evaluator.evaluate( + _, _, _, _, naucs, _ = self.evaluator.evaluate( relevant_docs, results, [1, 2, 3], diff --git a/tests/test_reproducible_workflow.py b/tests/test_reproducible_workflow.py index 8f912b9998..308153d2a9 100644 --- a/tests/test_reproducible_workflow.py +++ b/tests/test_reproducible_workflow.py @@ -46,6 +46,7 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio "STS", "Summarization", "InstructionRetrieval", + "InstructionReranking", "Speed", ], ) diff --git a/tests/test_tasks/test_all_abstasks.py b/tests/test_tasks/test_all_abstasks.py index 6c00a2d5e0..208e7221c3 100644 --- a/tests/test_tasks/test_all_abstasks.py +++ b/tests/test_tasks/test_all_abstasks.py @@ -10,7 +10,7 @@ import mteb from mteb import MTEB from mteb.abstasks import AbsTask -from mteb.abstasks.AbsTaskInstructionRetrieval import AbsTaskInstructionRetrieval +from mteb.abstasks.AbsTaskReranking import AbsTaskReranking from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval from mteb.abstasks.AbsTaskSpeedTask import AbsTaskSpeedTask from mteb.abstasks.MultiSubsetLoader import MultiSubsetLoader @@ -34,7 +34,7 @@ def test_load_data( # TODO: We skip because this load_data is completely different. if ( isinstance(task, AbsTaskRetrieval) - or isinstance(task, AbsTaskInstructionRetrieval) + or isinstance(task, AbsTaskReranking) or isinstance(task, MultiSubsetLoader) or isinstance(task, AbsTaskSpeedTask) ): From 2a8a37044f0c07fd22b80a898bc9ad076077826b Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Thu, 14 Nov 2024 11:34:10 +0100 Subject: [PATCH 03/40] fix: Unsure TaskResults can handle runtime and version being unspecified --- mteb/load_results/task_results.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index ce2e979654..202ed9b5f5 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -156,9 +156,9 @@ class TaskResult(BaseModel): dataset_revision: str task_name: str - mteb_version: str + mteb_version: str | None scores: dict[Split, list[ScoresDict]] - evaluation_time: float + evaluation_time: float | None kg_co2_emissions: float | None = None @classmethod @@ -290,6 +290,9 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: f"Error loading TaskResult from disk. You can try to load historic data by setting `load_historic_data=True`. Error: {e}" ) + if data["mteb_version"] is None: + data.pop("mteb_version") + pre_1_11_load = ( ( "mteb_version" in data From 23d6cb2fd6dd6ab1e6aec489819185c14469522b Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Thu, 14 Nov 2024 11:50:28 +0100 Subject: [PATCH 04/40] fix: remove NaN handling for retrieval --- mteb/evaluation/evaluators/model_classes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index 7c984b6348..60dea56385 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -343,12 +343,6 @@ def _full_corpus_search( query_embeddings, sub_corpus_embeddings ) - is_nan = torch.isnan(cos_scores) - if is_nan.sum() > 0: - raise ValueError( - f"NaN values detected in the similarity scores: {is_nan.sum()}" - ) - # get top-k values cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( cos_scores, From 5470c886938b55c4490cd5390f96495cfcc20360 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 15 Nov 2024 01:26:15 +0500 Subject: [PATCH 05/40] fix: Merge main into v2 (#1454) * fix: Count unique texts, data leaks in calculate metrics (#1438) * add more stat * add more stat * update statistics * fix: update task metadata to allow for null (#1448) * Update tasks table * 1.19.5 Automatically generated by python-semantic-release * base * sync with main --------- Co-authored-by: Kenneth Enevoldsen Co-authored-by: github-actions[bot] Co-authored-by: github-actions --- docs/tasks.md | 100 +- mteb/abstasks/AbsTask.py | 12 +- mteb/abstasks/AbsTaskBitextMining.py | 39 +- mteb/abstasks/AbsTaskClassification.py | 29 +- mteb/abstasks/AbsTaskClustering.py | 26 +- mteb/abstasks/AbsTaskClusteringFast.py | 22 +- .../AbsTaskMultilabelClassification.py | 42 +- mteb/abstasks/AbsTaskPairClassification.py | 45 +- mteb/abstasks/AbsTaskRetrieval.py | 116 +- mteb/abstasks/AbsTaskSTS.py | 37 +- mteb/abstasks/AbsTaskSummarization.py | 76 +- .../BitextMining/BUCC.v2.json | 69 + .../BitextMining/BornholmBitextMining.json | 11 +- .../BitextMining/IN22ConvBitextMining.json | 5577 +++- .../BitextMining/IN22GenBitextMining.json | 6595 ++++ .../BitextMining/IWSLT2017BitextMining.json | 329 + .../IndicGenBenchFloresBitextMining.json | 1540 + .../BitextMining/NTREXBitextMining.json | 24925 ++++++++++++++++ .../BitextMining/NollySentiBitextMining.json | 69 + .../NorwegianCourtsBitextMining.json | 15 + .../NusaTranslationBitextMining.json | 132 +- .../BitextMining/PhincBitextMining.json | 30 + .../TbilisiCityHallBitextMining.json | 43 + .../BitextMining/VieMedEVBitextMining.json | 15 + .../LanguageClassification.json | 76 + .../SlovakHateSpeechClassification.json | 22 + .../ArXivHierarchicalClusteringP2P.json | 4 + .../Clustering/BiorxivClusteringS2S.json | 5 + .../Clustering/MedrxivClusteringP2P.v2.json | 168 + .../Clustering/MedrxivClusteringS2S.v2.json | 168 + .../Clustering/RedditClusteringP2P.v2.json | 1335 + .../RuSciBenchGRNTIClusteringP2P.json | 4 + .../TwentyNewsgroupsClustering.v2.json | 75 + .../Clustering/WikiClusteringP2P.json | 75 + .../Core17InstructionRetrieval.json | 18 +- .../CEDRClassification.json | 43 +- .../MultiEURLEXMultilabelClassification.json | 1732 -- .../PawsXPairClassification.json | 160 +- .../PairClassification/TwitterURLCorpus.json | 10 +- .../PairClassification/XNLI.json | 300 +- .../Reranking/AskUbuntuDupQuestions.json | 15 +- .../Reranking/ESCIReranking.json | 60 +- .../WikipediaRerankingMultilingual.json | 255 +- .../Retrieval/AppsRetrieval.json | 17 +- .../Retrieval/BelebeleRetrieval.json | 6413 +++- .../Retrieval/COIRCodeSearchNetRetrieval.json | 117 +- .../Retrieval/CodeEditSearchRetrieval.json | 236 +- .../Retrieval/CodeFeedbackMT.json | 17 +- .../Retrieval/CodeFeedbackST.json | 17 +- .../Retrieval/CodeSearchNetCCRetrieval.json | 117 +- .../Retrieval/CodeSearchNetRetrieval.json | 117 +- .../Retrieval/CodeTransOceanContest.json | 17 +- .../Retrieval/CodeTransOceanDL.json | 17 +- mteb/descriptive_stats/Retrieval/CosQA.json | 17 +- .../Retrieval/JaqketRetrieval.json | 17 +- .../Retrieval/StackOverflowQA.json | 17 +- .../Retrieval/SyntheticText2SQL.json | 17 +- .../Retrieval/Touche2020.json | 17 +- .../Retrieval/Touche2020Retrieval.v3.json | 17 +- ...lowIRCrossLingualInstructionRetrieval.json | 70 +- .../mFollowIRInstructionRetrieval.json | 70 +- mteb/descriptive_stats/STS/STS12.json | 10 +- mteb/descriptive_stats/STS/STS17.json | 118 +- .../Summarization/SummEval.json | 53 +- mteb/leaderboard/figures.py | 10 +- mteb/leaderboard/table.py | 2 +- pyproject.toml | 2 +- tests/test_benchmark/mock_tasks.py | 893 +- tests/test_tasks/test_metadata.py | 6 +- 69 files changed, 47907 insertions(+), 4933 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/BUCC.v2.json create mode 100644 mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NTREXBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/PhincBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json create mode 100644 mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json delete mode 100644 mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json diff --git a/docs/tasks.md b/docs/tasks.md index 164daf7102..fd61d8af39 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -25,24 +25,24 @@ The following tables give you an overview of the tasks in MTEB. | [AmazonPolarityClassification](https://huggingface.co/datasets/amazon_polarity) (Julian McAuley, 2013) | ['eng'] | Classification | p2p | [Reviews, Written] | None | None | | [AmazonReviewsClassification](https://arxiv.org/abs/2010.02573) (Phillip Keung, 2020) | ['cmn', 'deu', 'eng', 'fra', 'jpn', 'spa'] | Classification | s2s | [Reviews, Written] | None | None | | [AngryTweetsClassification](https://aclanthology.org/2021.nodalida-main.53/) (Pauli et al., 2021) | ['dan'] | Classification | s2s | [Social, Written] | None | None | -| [AppsRetrieval](https://arxiv.org/abs/2105.09938) (Dan Hendrycks, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 12530} | {'test': {'number_of_characters': 2245.84, 'num_samples': 12530, 'num_queries': 3765, 'num_documents': 8765, 'average_document_length': 0.07, 'average_query_length': 0.44, 'average_relevant_docs_per_query': 1.0}} | +| [AppsRetrieval](https://arxiv.org/abs/2105.09938) (Dan Hendrycks, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 12530} | {'test': {'number_of_characters': 11335620, 'num_samples': 12530, 'num_queries': 3765, 'num_documents': 8765, 'min_document_length': 152, 'average_document_length': 717.27, 'max_document_length': 5742, 'unique_documents': 8765, 'min_query_length': 6, 'average_query_length': 1340.96, 'max_query_length': 289049, 'unique_queries': 3765, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3765}} | | [ArEntail](https://link.springer.com/article/10.1007/s10579-024-09731-1) (Obeidat et al., 2024) | ['ara'] | PairClassification | s2s | [News, Written] | None | None | -| [ArXivHierarchicalClusteringP2P](https://www.kaggle.com/Cornell-University/arxiv) | ['eng'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 2065284, 'average_text_length': 1008.44, 'average_labels_per_text': 1.46, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}} | +| [ArXivHierarchicalClusteringP2P](https://www.kaggle.com/Cornell-University/arxiv) | ['eng'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 2065284, 'min_text_length': 103, 'average_text_length': 1008.44, 'max_text_length': 2103, 'min_labels_per_text': 1, 'average_labels_per_text': 1.46, 'max_labels_per_text': 381, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}} | | [ArXivHierarchicalClusteringS2S](https://www.kaggle.com/Cornell-University/arxiv) | ['eng'] | Clustering | p2p | [Academic, Written] | None | None | | [ArguAna](http://argumentation.bplaced.net/arguana/data) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | [Medical, Written] | None | None | | [ArguAna-PL](https://huggingface.co/datasets/clarin-knext/arguana-pl) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [ArmenianParaphrasePC](https://github.com/ivannikov-lab/arpa-paraphrase-corpus) (Arthur Malajyan, 2020) | ['hye'] | PairClassification | s2s | [News, Written] | None | None | | [ArxivClassification](https://ieeexplore.ieee.org/document/8675939) (He et al., 2019) | ['eng'] | Classification | s2s | [Academic, Written] | None | None | -| [AskUbuntuDupQuestions](https://github.com/taolei87/askubuntu) | ['eng'] | Reranking | s2s | | {'test': 375} | {'test': {'num_samples': 375, 'number_of_characters': 413674, 'num_positive': 2255, 'num_negative': 5245, 'avg_query_len': 50.21, 'avg_positive_len': 52.54, 'avg_negative_len': 52.69}} | +| [AskUbuntuDupQuestions](https://github.com/taolei87/askubuntu) | ['eng'] | Reranking | s2s | | {'test': 375} | {'test': {'num_samples': 375, 'number_of_characters': 413674, 'num_positive': 2255, 'num_negative': 5245, 'min_query_length': 17, 'avg_query_length': 50.21, 'max_query_length': 148, 'unique_query': 374, 'min_positive_length': 15, 'avg_positive_length': 52.54, 'max_positive_length': 152, 'unique_positive': 2165, 'min_negative_length': 15, 'avg_negative_length': 52.69, 'max_negative_length': 148, 'unique_negative': 5002}} | | [Assin2RTE](https://link.springer.com/chapter/10.1007/978-3-030-41505-1_39) (Real et al., 2020) | ['por'] | PairClassification | s2s | [Written] | None | None | | [Assin2STS](https://link.springer.com/chapter/10.1007/978-3-030-41505-1_39) (Real et al., 2020) | ['por'] | STS | s2s | [Written] | None | None | | [AutoRAGRetrieval](https://arxiv.org/abs/2410.20878) (Dongkyu Kim, 2024) | ['kor'] | Retrieval | s2p | [Government, Medical, Legal, Social] | {'test': 834} | {'test': {'number_of_characters': 894.22, 'num_samples': 834, 'num_queries': 114, 'num_documents': 720, 'average_document_length': 1.15, 'average_query_length': 0.61, 'average_relevant_docs_per_query': 1.0}} | | [BIOSSES](https://tabilab.cmpe.boun.edu.tr/BIOSSES/DataSet.html) (Soğancıoğlu et al., 2017) | ['eng'] | STS | s2s | | None | None | | [BQ](https://aclanthology.org/2021.emnlp-main.357) (Shitao Xiao, 2024) | ['cmn'] | STS | s2s | | None | None | | [BSARDRetrieval](https://huggingface.co/datasets/maastrichtlawtech/bsard) (Louis et al., 2022) | ['fra'] | Retrieval | s2p | [Legal, Spoken] | None | None | -| [BUCC.v2](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) | ['cmn', 'deu', 'eng', 'fra', 'rus'] | BitextMining | s2s | [Written] | None | None | +| [BUCC.v2](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) | ['cmn', 'deu', 'eng', 'fra', 'rus'] | BitextMining | s2s | [Written] | {'test': 35000} | {'test': {'num_samples': 35000, 'number_of_characters': 6640032, 'unique_pairs': 34978, 'min_sentence1_length': 16, 'average_sentence1_length': 99.11, 'max_sentence1_length': 204, 'unique_sentence1': 34978, 'min_sentence2_length': 42, 'average_sentence2_length': 90.61, 'max_sentence2_length': 159, 'unique_sentence2': 25306, 'hf_subset_descriptive_stats': {'de-en': {'num_samples': 9580, 'number_of_characters': 1919197, 'unique_pairs': 9573, 'min_sentence1_length': 50, 'average_sentence1_length': 109.08, 'max_sentence1_length': 204, 'unique_sentence1': 9573, 'min_sentence2_length': 46, 'average_sentence2_length': 91.25, 'max_sentence2_length': 155, 'unique_sentence2': 9570}, 'fr-en': {'num_samples': 9086, 'number_of_characters': 1677545, 'unique_pairs': 9081, 'min_sentence1_length': 43, 'average_sentence1_length': 99.32, 'max_sentence1_length': 174, 'unique_sentence1': 9081, 'min_sentence2_length': 42, 'average_sentence2_length': 85.31, 'max_sentence2_length': 159, 'unique_sentence2': 9076}, 'ru-en': {'num_samples': 14435, 'number_of_characters': 2808206, 'unique_pairs': 14425, 'min_sentence1_length': 40, 'average_sentence1_length': 101.66, 'max_sentence1_length': 186, 'unique_sentence1': 14425, 'min_sentence2_length': 45, 'average_sentence2_length': 92.88, 'max_sentence2_length': 159, 'unique_sentence2': 14424}, 'zh-en': {'num_samples': 1899, 'number_of_characters': 235084, 'unique_pairs': 1899, 'min_sentence1_length': 16, 'average_sentence1_length': 28.43, 'max_sentence1_length': 40, 'unique_sentence1': 1899, 'min_sentence2_length': 48, 'average_sentence2_length': 95.36, 'max_sentence2_length': 159, 'unique_sentence2': 1899}}}} | | [Banking77Classification](https://arxiv.org/abs/2003.04807) | ['eng'] | Classification | s2s | [Written] | None | None | -| [BelebeleRetrieval](https://arxiv.org/abs/2308.16884) (Lucas Bandarkar, 2023) | ['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] | Retrieval | s2p | [Web, News, Written] | {'test': 521866} | {'test': {'number_of_characters': 76.5, 'num_samples': 521866, 'num_queries': 338378, 'num_documents': 183488, 'average_document_length': 0.0, 'average_query_length': 0.0, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'number_of_characters': 57.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'acm_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-acm_Arab': {'number_of_characters': 57.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-afr_Latn': {'number_of_characters': 80.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-afr_Latn': {'number_of_characters': 80.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-als_Latn': {'number_of_characters': 78.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-als_Latn': {'number_of_characters': 78.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-amh_Ethi': {'number_of_characters': 51.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-amh_Ethi': {'number_of_characters': 51.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-apc_Arab': {'number_of_characters': 57.86, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-apc_Arab': {'number_of_characters': 57.86, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-ars_Arab': {'number_of_characters': 58.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ars_Arab': {'number_of_characters': 58.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-ary_Arab': {'number_of_characters': 68.02, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ary_Arab': {'number_of_characters': 68.02, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-arz_Arab': {'number_of_characters': 59.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arz_Arab': {'number_of_characters': 59.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-asm_Beng': {'number_of_characters': 70.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-asm_Beng': {'number_of_characters': 70.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-azj_Latn': {'number_of_characters': 75.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-azj_Latn': {'number_of_characters': 75.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-bam_Latn': {'number_of_characters': 74.34, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bam_Latn': {'number_of_characters': 74.34, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-bod_Tibt': {'number_of_characters': 88.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bod_Tibt': {'number_of_characters': 88.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-bul_Cyrl': {'number_of_characters': 74.89, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bul_Cyrl': {'number_of_characters': 74.89, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-cat_Latn': {'number_of_characters': 77.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-cat_Latn': {'number_of_characters': 77.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-ceb_Latn': {'number_of_characters': 83.2, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ceb_Latn': {'number_of_characters': 83.2, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-ces_Latn': {'number_of_characters': 69.73, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ces_Latn': {'number_of_characters': 69.73, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-ckb_Arab': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ckb_Arab': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-dan_Latn': {'number_of_characters': 74.97, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-dan_Latn': {'number_of_characters': 74.97, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-deu_Latn': {'number_of_characters': 77.32, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-deu_Latn': {'number_of_characters': 77.32, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-ell_Grek': {'number_of_characters': 88.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ell_Grek': {'number_of_characters': 88.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-est_Latn': {'number_of_characters': 69.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-est_Latn': {'number_of_characters': 69.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eus_Latn': {'number_of_characters': 76.45, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eus_Latn': {'number_of_characters': 76.45, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-fin_Latn': {'number_of_characters': 74.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fin_Latn': {'number_of_characters': 74.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-fra_Latn': {'number_of_characters': 92.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fra_Latn': {'number_of_characters': 92.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-fuv_Latn': {'number_of_characters': 60.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fuv_Latn': {'number_of_characters': 60.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-gaz_Latn': {'number_of_characters': 87.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-gaz_Latn': {'number_of_characters': 87.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-grn_Latn': {'number_of_characters': 77.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-grn_Latn': {'number_of_characters': 77.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-guj_Gujr': {'number_of_characters': 64.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-guj_Gujr': {'number_of_characters': 64.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-hat_Latn': {'number_of_characters': 72.65, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hat_Latn': {'number_of_characters': 72.65, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-hau_Latn': {'number_of_characters': 87.85, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hau_Latn': {'number_of_characters': 87.85, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-heb_Hebr': {'number_of_characters': 57.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-heb_Hebr': {'number_of_characters': 57.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-hrv_Latn': {'number_of_characters': 70.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hrv_Latn': {'number_of_characters': 70.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-hun_Latn': {'number_of_characters': 76.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hun_Latn': {'number_of_characters': 76.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-hye_Armn': {'number_of_characters': 77.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hye_Armn': {'number_of_characters': 77.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-ibo_Latn': {'number_of_characters': 74.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ibo_Latn': {'number_of_characters': 74.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-ilo_Latn': {'number_of_characters': 87.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ilo_Latn': {'number_of_characters': 87.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-ind_Latn': {'number_of_characters': 84.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ind_Latn': {'number_of_characters': 84.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-isl_Latn': {'number_of_characters': 79.27, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-isl_Latn': {'number_of_characters': 79.27, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-ita_Latn': {'number_of_characters': 85.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ita_Latn': {'number_of_characters': 85.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-jav_Latn': {'number_of_characters': 80.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jav_Latn': {'number_of_characters': 80.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-jpn_Jpan': {'number_of_characters': 37.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jpn_Jpan': {'number_of_characters': 37.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-kac_Latn': {'number_of_characters': 100.64, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kac_Latn': {'number_of_characters': 100.64, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-kan_Knda': {'number_of_characters': 74.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kan_Knda': {'number_of_characters': 74.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-kat_Geor': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kat_Geor': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-kaz_Cyrl': {'number_of_characters': 72.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kaz_Cyrl': {'number_of_characters': 72.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-kea_Latn': {'number_of_characters': 77.94, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kea_Latn': {'number_of_characters': 77.94, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-khk_Cyrl': {'number_of_characters': 75.33, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khk_Cyrl': {'number_of_characters': 75.33, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-khm_Khmr': {'number_of_characters': 77.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khm_Khmr': {'number_of_characters': 77.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-kin_Latn': {'number_of_characters': 81.9, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kin_Latn': {'number_of_characters': 81.9, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-kir_Cyrl': {'number_of_characters': 76.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kir_Cyrl': {'number_of_characters': 76.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-kor_Hang': {'number_of_characters': 37.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kor_Hang': {'number_of_characters': 37.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-lao_Laoo': {'number_of_characters': 65.31, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lao_Laoo': {'number_of_characters': 65.31, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-lin_Latn': {'number_of_characters': 83.57, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lin_Latn': {'number_of_characters': 83.57, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-lit_Latn': {'number_of_characters': 70.7, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lit_Latn': {'number_of_characters': 70.7, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-lug_Latn': {'number_of_characters': 80.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lug_Latn': {'number_of_characters': 80.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-luo_Latn': {'number_of_characters': 75.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-luo_Latn': {'number_of_characters': 75.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-lvs_Latn': {'number_of_characters': 71.98, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lvs_Latn': {'number_of_characters': 71.98, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-mal_Mlym': {'number_of_characters': 82.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mal_Mlym': {'number_of_characters': 82.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-mar_Deva': {'number_of_characters': 70.63, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mar_Deva': {'number_of_characters': 70.63, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-mkd_Cyrl': {'number_of_characters': 76.01, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mkd_Cyrl': {'number_of_characters': 76.01, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-mlt_Latn': {'number_of_characters': 77.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mlt_Latn': {'number_of_characters': 77.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-mri_Latn': {'number_of_characters': 83.71, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mri_Latn': {'number_of_characters': 83.71, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-mya_Mymr': {'number_of_characters': 91.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mya_Mymr': {'number_of_characters': 91.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-nld_Latn': {'number_of_characters': 77.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nld_Latn': {'number_of_characters': 77.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-nob_Latn': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nob_Latn': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-nso_Latn': {'number_of_characters': 88.77, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nso_Latn': {'number_of_characters': 88.77, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-nya_Latn': {'number_of_characters': 92.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nya_Latn': {'number_of_characters': 92.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-ory_Orya': {'number_of_characters': 74.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ory_Orya': {'number_of_characters': 74.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-pan_Guru': {'number_of_characters': 75.3, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pan_Guru': {'number_of_characters': 75.3, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-pbt_Arab': {'number_of_characters': 69.67, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pbt_Arab': {'number_of_characters': 69.67, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-pes_Arab': {'number_of_characters': 66.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pes_Arab': {'number_of_characters': 66.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-plt_Latn': {'number_of_characters': 97.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-plt_Latn': {'number_of_characters': 97.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-pol_Latn': {'number_of_characters': 76.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pol_Latn': {'number_of_characters': 76.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-por_Latn': {'number_of_characters': 80.12, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-por_Latn': {'number_of_characters': 80.12, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-ron_Latn': {'number_of_characters': 80.74, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ron_Latn': {'number_of_characters': 80.74, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-rus_Cyrl': {'number_of_characters': 85.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-rus_Cyrl': {'number_of_characters': 85.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-shn_Mymr': {'number_of_characters': 77.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-shn_Mymr': {'number_of_characters': 77.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-slk_Latn': {'number_of_characters': 70.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slk_Latn': {'number_of_characters': 70.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-slv_Latn': {'number_of_characters': 70.8, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slv_Latn': {'number_of_characters': 70.8, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-sna_Latn': {'number_of_characters': 83.31, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sna_Latn': {'number_of_characters': 83.31, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-snd_Arab': {'number_of_characters': 65.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-snd_Arab': {'number_of_characters': 65.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-som_Latn': {'number_of_characters': 92.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-som_Latn': {'number_of_characters': 92.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-sot_Latn': {'number_of_characters': 85.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sot_Latn': {'number_of_characters': 85.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-spa_Latn': {'number_of_characters': 84.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-spa_Latn': {'number_of_characters': 84.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-srp_Cyrl': {'number_of_characters': 69.5, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-srp_Cyrl': {'number_of_characters': 69.5, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-ssw_Latn': {'number_of_characters': 83.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ssw_Latn': {'number_of_characters': 83.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-sun_Latn': {'number_of_characters': 80.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sun_Latn': {'number_of_characters': 80.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-swe_Latn': {'number_of_characters': 70.68, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swe_Latn': {'number_of_characters': 70.68, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-swh_Latn': {'number_of_characters': 82.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swh_Latn': {'number_of_characters': 82.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-tam_Taml': {'number_of_characters': 83.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tam_Taml': {'number_of_characters': 83.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-tel_Telu': {'number_of_characters': 74.19, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tel_Telu': {'number_of_characters': 74.19, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-tgk_Cyrl': {'number_of_characters': 76.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgk_Cyrl': {'number_of_characters': 76.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-tgl_Latn': {'number_of_characters': 84.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgl_Latn': {'number_of_characters': 84.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-tha_Thai': {'number_of_characters': 61.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tha_Thai': {'number_of_characters': 61.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-tir_Ethi': {'number_of_characters': 54.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tir_Ethi': {'number_of_characters': 54.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-tsn_Latn': {'number_of_characters': 89.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tsn_Latn': {'number_of_characters': 89.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-tso_Latn': {'number_of_characters': 93.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tso_Latn': {'number_of_characters': 93.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-tur_Latn': {'number_of_characters': 73.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tur_Latn': {'number_of_characters': 73.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-ukr_Cyrl': {'number_of_characters': 74.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ukr_Cyrl': {'number_of_characters': 74.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-uzn_Latn': {'number_of_characters': 79.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-uzn_Latn': {'number_of_characters': 79.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-vie_Latn': {'number_of_characters': 75.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-vie_Latn': {'number_of_characters': 75.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-war_Latn': {'number_of_characters': 88.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-war_Latn': {'number_of_characters': 88.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-wol_Latn': {'number_of_characters': 72.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-wol_Latn': {'number_of_characters': 72.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-xho_Latn': {'number_of_characters': 80.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-xho_Latn': {'number_of_characters': 80.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-yor_Latn': {'number_of_characters': 70.64, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-yor_Latn': {'number_of_characters': 70.64, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-zho_Hans': {'number_of_characters': 23.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hans': {'number_of_characters': 23.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-zho_Hant': {'number_of_characters': 23.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hant': {'number_of_characters': 23.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-zsm_Latn': {'number_of_characters': 80.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zsm_Latn': {'number_of_characters': 80.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-zul_Latn': {'number_of_characters': 78.04, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zul_Latn': {'number_of_characters': 78.04, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}}}} | +| [BelebeleRetrieval](https://arxiv.org/abs/2308.16884) (Lucas Bandarkar, 2023) | ['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] | Retrieval | s2p | [Web, News, Written] | {'test': 521866} | {'test': {'number_of_characters': 25574620, 'num_samples': 521866, 'num_queries': 338378, 'num_documents': 183488, 'min_document_length': 4, 'average_document_length': 137.38, 'max_document_length': 237, 'unique_documents': 183488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 338378, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 183488, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'number_of_characters': 51232, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 102.98, 'max_document_length': 129, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'acm_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-acm_Arab': {'number_of_characters': 51232, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 102.98, 'max_document_length': 129, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'afr_Latn-afr_Latn': {'number_of_characters': 71217, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 143.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'afr_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-afr_Latn': {'number_of_characters': 71217, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 143.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'als_Latn-als_Latn': {'number_of_characters': 69498, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 140.41, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'als_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-als_Latn': {'number_of_characters': 69498, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 140.41, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'amh_Ethi-amh_Ethi': {'number_of_characters': 45221, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 90.67, 'max_document_length': 100, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'amh_Ethi-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-amh_Ethi': {'number_of_characters': 45221, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 90.67, 'max_document_length': 100, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'apc_Arab-apc_Arab': {'number_of_characters': 51248, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 103.02, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'apc_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-apc_Arab': {'number_of_characters': 51248, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 103.02, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ars_Arab-ars_Arab': {'number_of_characters': 51765, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 104.08, 'max_document_length': 119, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ars_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ars_Arab': {'number_of_characters': 51765, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 104.08, 'max_document_length': 119, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ary_Arab-ary_Arab': {'number_of_characters': 60261, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 121.49, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ary_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ary_Arab': {'number_of_characters': 60261, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 121.49, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arz_Arab-arz_Arab': {'number_of_characters': 52403, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 105.38, 'max_document_length': 115, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arz_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arz_Arab': {'number_of_characters': 52403, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 105.38, 'max_document_length': 115, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'asm_Beng-asm_Beng': {'number_of_characters': 62410, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 4, 'average_document_length': 125.89, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'asm_Beng-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-asm_Beng': {'number_of_characters': 62410, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 4, 'average_document_length': 125.89, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'azj_Latn-azj_Latn': {'number_of_characters': 67137, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.58, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'azj_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-azj_Latn': {'number_of_characters': 67137, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.58, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bam_Latn-bam_Latn': {'number_of_characters': 66084, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 133.42, 'max_document_length': 166, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bam_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bam_Latn': {'number_of_characters': 66084, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 133.42, 'max_document_length': 166, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bod_Tibt-bod_Tibt': {'number_of_characters': 79188, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.27, 'max_document_length': 213, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bod_Tibt-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bod_Tibt': {'number_of_characters': 79188, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.27, 'max_document_length': 213, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bul_Cyrl-bul_Cyrl': {'number_of_characters': 66577, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.43, 'max_document_length': 177, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bul_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bul_Cyrl': {'number_of_characters': 66577, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.43, 'max_document_length': 177, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'cat_Latn-cat_Latn': {'number_of_characters': 68842, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.07, 'max_document_length': 163, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'cat_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-cat_Latn': {'number_of_characters': 68842, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.07, 'max_document_length': 163, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ceb_Latn-ceb_Latn': {'number_of_characters': 74053, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 149.75, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ceb_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ceb_Latn': {'number_of_characters': 74053, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 149.75, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ces_Latn-ces_Latn': {'number_of_characters': 61936, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 124.92, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ces_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ces_Latn': {'number_of_characters': 61936, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 124.92, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ckb_Arab-ckb_Arab': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 131.03, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ckb_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ckb_Arab': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 131.03, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'dan_Latn-dan_Latn': {'number_of_characters': 66648, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.57, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'dan_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-dan_Latn': {'number_of_characters': 66648, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.57, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'deu_Latn-deu_Latn': {'number_of_characters': 68768, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 138.92, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'deu_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-deu_Latn': {'number_of_characters': 68768, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 138.92, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ell_Grek-ell_Grek': {'number_of_characters': 79210, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.32, 'max_document_length': 212, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ell_Grek-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ell_Grek': {'number_of_characters': 79210, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.32, 'max_document_length': 212, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'est_Latn-est_Latn': {'number_of_characters': 61779, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.6, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'est_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-est_Latn': {'number_of_characters': 61779, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.6, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eus_Latn-eus_Latn': {'number_of_characters': 67979, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 137.3, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eus_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-eus_Latn': {'number_of_characters': 67979, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 137.3, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fin_Latn-fin_Latn': {'number_of_characters': 66234, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fin_Latn': {'number_of_characters': 66234, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fra_Latn-fra_Latn': {'number_of_characters': 82464, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 166.98, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fra_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fra_Latn': {'number_of_characters': 82464, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 166.98, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fuv_Latn-fuv_Latn': {'number_of_characters': 53555, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 107.74, 'max_document_length': 122, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fuv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fuv_Latn': {'number_of_characters': 53555, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 107.74, 'max_document_length': 122, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'gaz_Latn-gaz_Latn': {'number_of_characters': 78315, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 158.48, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'gaz_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-gaz_Latn': {'number_of_characters': 78315, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 158.48, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'grn_Latn-grn_Latn': {'number_of_characters': 68572, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 138.52, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'grn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-grn_Latn': {'number_of_characters': 68572, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 138.52, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'guj_Gujr-guj_Gujr': {'number_of_characters': 57007, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 114.82, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'guj_Gujr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-guj_Gujr': {'number_of_characters': 57007, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 114.82, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hat_Latn-hat_Latn': {'number_of_characters': 64558, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.29, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hat_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hat_Latn': {'number_of_characters': 64558, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.29, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hau_Latn-hau_Latn': {'number_of_characters': 78240, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.33, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hau_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hau_Latn': {'number_of_characters': 78240, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.33, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'heb_Hebr-heb_Hebr': {'number_of_characters': 50598, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 101.68, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'heb_Hebr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-heb_Hebr': {'number_of_characters': 50598, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 101.68, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hrv_Latn-hrv_Latn': {'number_of_characters': 62928, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.95, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hrv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hrv_Latn': {'number_of_characters': 62928, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.95, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hun_Latn-hun_Latn': {'number_of_characters': 67941, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 137.22, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hun_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hun_Latn': {'number_of_characters': 67941, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 137.22, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hye_Armn-hye_Armn': {'number_of_characters': 68859, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.1, 'max_document_length': 193, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hye_Armn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hye_Armn': {'number_of_characters': 68859, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.1, 'max_document_length': 193, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ibo_Latn-ibo_Latn': {'number_of_characters': 66167, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 133.59, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ibo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ibo_Latn': {'number_of_characters': 66167, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 133.59, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ilo_Latn-ilo_Latn': {'number_of_characters': 78161, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.17, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ilo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ilo_Latn': {'number_of_characters': 78161, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.17, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ind_Latn-ind_Latn': {'number_of_characters': 74871, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 151.42, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ind_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ind_Latn': {'number_of_characters': 74871, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 151.42, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'isl_Latn-isl_Latn': {'number_of_characters': 70522, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 142.51, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'isl_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-isl_Latn': {'number_of_characters': 70522, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 142.51, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ita_Latn-ita_Latn': {'number_of_characters': 76124, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 153.99, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ita_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ita_Latn': {'number_of_characters': 76124, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 153.99, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jav_Latn-jav_Latn': {'number_of_characters': 71722, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 144.97, 'max_document_length': 174, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jav_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-jav_Latn': {'number_of_characters': 71722, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 144.97, 'max_document_length': 174, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jpn_Jpan-jpn_Jpan': {'number_of_characters': 33187, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 66.01, 'max_document_length': 76, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jpn_Jpan-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-jpn_Jpan': {'number_of_characters': 33187, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 66.01, 'max_document_length': 76, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kac_Latn-kac_Latn': {'number_of_characters': 89655, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 181.72, 'max_document_length': 195, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kac_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kac_Latn': {'number_of_characters': 89655, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 181.72, 'max_document_length': 195, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kan_Knda-kan_Knda': {'number_of_characters': 65899, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.04, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kan_Knda-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kan_Knda': {'number_of_characters': 65899, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.04, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kat_Geor-kat_Geor': {'number_of_characters': 68309, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.98, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kat_Geor-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kat_Geor': {'number_of_characters': 68309, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.98, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kaz_Cyrl-kaz_Cyrl': {'number_of_characters': 64657, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.49, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kaz_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kaz_Cyrl': {'number_of_characters': 64657, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.49, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kea_Latn-kea_Latn': {'number_of_characters': 69323, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.06, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kea_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kea_Latn': {'number_of_characters': 69323, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.06, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khk_Cyrl-khk_Cyrl': {'number_of_characters': 66977, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 135.25, 'max_document_length': 162, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khk_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-khk_Cyrl': {'number_of_characters': 66977, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 135.25, 'max_document_length': 162, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khm_Khmr-khm_Khmr': {'number_of_characters': 69150, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 139.7, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khm_Khmr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-khm_Khmr': {'number_of_characters': 69150, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 139.7, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kin_Latn-kin_Latn': {'number_of_characters': 72803, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 147.19, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'kin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kin_Latn': {'number_of_characters': 72803, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 147.19, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'kir_Cyrl-kir_Cyrl': {'number_of_characters': 67957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 137.26, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kir_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kir_Cyrl': {'number_of_characters': 67957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 137.26, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kor_Hang-kor_Hang': {'number_of_characters': 32708, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 65.02, 'max_document_length': 88, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kor_Hang-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kor_Hang': {'number_of_characters': 32708, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 65.02, 'max_document_length': 88, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lao_Laoo-lao_Laoo': {'number_of_characters': 57958, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 116.77, 'max_document_length': 142, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lao_Laoo-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lao_Laoo': {'number_of_characters': 57958, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 116.77, 'max_document_length': 142, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lin_Latn-lin_Latn': {'number_of_characters': 74223, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 150.1, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lin_Latn': {'number_of_characters': 74223, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 150.1, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lit_Latn-lit_Latn': {'number_of_characters': 62805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 126.7, 'max_document_length': 167, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lit_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lit_Latn': {'number_of_characters': 62805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 126.7, 'max_document_length': 167, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lug_Latn-lug_Latn': {'number_of_characters': 71566, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 144.65, 'max_document_length': 237, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lug_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lug_Latn': {'number_of_characters': 71566, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 144.65, 'max_document_length': 237, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'luo_Latn-luo_Latn': {'number_of_characters': 66805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 134.9, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'luo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-luo_Latn': {'number_of_characters': 66805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 134.9, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lvs_Latn-lvs_Latn': {'number_of_characters': 63957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 129.06, 'max_document_length': 172, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lvs_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lvs_Latn': {'number_of_characters': 63957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 129.06, 'max_document_length': 172, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mal_Mlym-mal_Mlym': {'number_of_characters': 73599, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.82, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mal_Mlym-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mal_Mlym': {'number_of_characters': 73599, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.82, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mar_Deva-mar_Deva': {'number_of_characters': 62671, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 126.42, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'mar_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mar_Deva': {'number_of_characters': 62671, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 126.42, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'mkd_Cyrl-mkd_Cyrl': {'number_of_characters': 67588, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 136.5, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mkd_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mkd_Cyrl': {'number_of_characters': 67588, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 136.5, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mlt_Latn-mlt_Latn': {'number_of_characters': 68480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 138.33, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mlt_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mlt_Latn': {'number_of_characters': 68480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 138.33, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mri_Latn-mri_Latn': {'number_of_characters': 74519, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 150.7, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mri_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mri_Latn': {'number_of_characters': 74519, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 150.7, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mya_Mymr-mya_Mymr': {'number_of_characters': 81331, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 164.66, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mya_Mymr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mya_Mymr': {'number_of_characters': 81331, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 164.66, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nld_Latn-nld_Latn': {'number_of_characters': 68789, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 138.96, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nld_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nld_Latn': {'number_of_characters': 68789, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 138.96, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nob_Latn-nob_Latn': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 131.03, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nob_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nob_Latn': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 131.03, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nso_Latn-nso_Latn': {'number_of_characters': 79073, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 160.03, 'max_document_length': 235, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nso_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nso_Latn': {'number_of_characters': 79073, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 160.03, 'max_document_length': 235, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nya_Latn-nya_Latn': {'number_of_characters': 82685, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.44, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nya_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nya_Latn': {'number_of_characters': 82685, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.44, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ory_Orya-ory_Orya': {'number_of_characters': 66638, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 10, 'average_document_length': 134.55, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ory_Orya-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ory_Orya': {'number_of_characters': 66638, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 10, 'average_document_length': 134.55, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pan_Guru-pan_Guru': {'number_of_characters': 66944, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.18, 'max_document_length': 157, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pan_Guru-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pan_Guru': {'number_of_characters': 66944, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.18, 'max_document_length': 157, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pbt_Arab-pbt_Arab': {'number_of_characters': 61880, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 124.8, 'max_document_length': 155, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pbt_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pbt_Arab': {'number_of_characters': 61880, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 124.8, 'max_document_length': 155, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pes_Arab-pes_Arab': {'number_of_characters': 59252, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 119.42, 'max_document_length': 152, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pes_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pes_Arab': {'number_of_characters': 59252, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 119.42, 'max_document_length': 152, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'plt_Latn-plt_Latn': {'number_of_characters': 86472, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 175.2, 'max_document_length': 222, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'plt_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-plt_Latn': {'number_of_characters': 86472, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 175.2, 'max_document_length': 222, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pol_Latn-pol_Latn': {'number_of_characters': 67664, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 136.66, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pol_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pol_Latn': {'number_of_characters': 67664, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 136.66, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'por_Latn-por_Latn': {'number_of_characters': 71281, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.07, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'por_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-por_Latn': {'number_of_characters': 71281, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.07, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ron_Latn-ron_Latn': {'number_of_characters': 71844, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 145.22, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ron_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ron_Latn': {'number_of_characters': 71844, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 145.22, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'rus_Cyrl-rus_Cyrl': {'number_of_characters': 75823, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 153.38, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'rus_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-rus_Cyrl': {'number_of_characters': 75823, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 153.38, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'shn_Mymr-shn_Mymr': {'number_of_characters': 69288, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 139.98, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'shn_Mymr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-shn_Mymr': {'number_of_characters': 69288, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 139.98, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slk_Latn-slk_Latn': {'number_of_characters': 62663, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 126.41, 'max_document_length': 146, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slk_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-slk_Latn': {'number_of_characters': 62663, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 126.41, 'max_document_length': 146, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slv_Latn-slv_Latn': {'number_of_characters': 62895, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.88, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-slv_Latn': {'number_of_characters': 62895, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.88, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sna_Latn-sna_Latn': {'number_of_characters': 74071, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.78, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sna_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sna_Latn': {'number_of_characters': 74071, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.78, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'snd_Arab-snd_Arab': {'number_of_characters': 58057, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 116.97, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'snd_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-snd_Arab': {'number_of_characters': 58057, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 116.97, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'som_Latn-som_Latn': {'number_of_characters': 82838, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.75, 'max_document_length': 201, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'som_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-som_Latn': {'number_of_characters': 82838, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.75, 'max_document_length': 201, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sot_Latn-sot_Latn': {'number_of_characters': 75794, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 153.32, 'max_document_length': 186, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sot_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sot_Latn': {'number_of_characters': 75794, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 153.32, 'max_document_length': 186, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'spa_Latn-spa_Latn': {'number_of_characters': 74920, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 151.52, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'spa_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-spa_Latn': {'number_of_characters': 74920, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 151.52, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'srp_Cyrl-srp_Cyrl': {'number_of_characters': 61657, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.35, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'srp_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-srp_Cyrl': {'number_of_characters': 61657, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.35, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ssw_Latn-ssw_Latn': {'number_of_characters': 73964, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 149.57, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ssw_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ssw_Latn': {'number_of_characters': 73964, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 149.57, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sun_Latn-sun_Latn': {'number_of_characters': 71320, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 144.15, 'max_document_length': 173, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sun_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sun_Latn': {'number_of_characters': 71320, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 144.15, 'max_document_length': 173, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swe_Latn-swe_Latn': {'number_of_characters': 62785, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 126.66, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swe_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-swe_Latn': {'number_of_characters': 62785, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 126.66, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swh_Latn-swh_Latn': {'number_of_characters': 73480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.57, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swh_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-swh_Latn': {'number_of_characters': 73480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.57, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tam_Taml-tam_Taml': {'number_of_characters': 73991, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.62, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tam_Taml-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tam_Taml': {'number_of_characters': 73991, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.62, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tel_Telu-tel_Telu': {'number_of_characters': 65945, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 133.13, 'max_document_length': 149, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tel_Telu-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tel_Telu': {'number_of_characters': 65945, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 133.13, 'max_document_length': 149, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgk_Cyrl-tgk_Cyrl': {'number_of_characters': 67829, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 136.99, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgk_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tgk_Cyrl': {'number_of_characters': 67829, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 136.99, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgl_Latn-tgl_Latn': {'number_of_characters': 75087, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 151.87, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgl_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tgl_Latn': {'number_of_characters': 75087, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 151.87, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tha_Thai-tha_Thai': {'number_of_characters': 54496, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 109.67, 'max_document_length': 123, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tha_Thai-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tha_Thai': {'number_of_characters': 54496, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 109.67, 'max_document_length': 123, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tir_Ethi-tir_Ethi': {'number_of_characters': 47775, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 95.9, 'max_document_length': 110, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tir_Ethi-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tir_Ethi': {'number_of_characters': 47775, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 95.9, 'max_document_length': 110, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tsn_Latn-tsn_Latn': {'number_of_characters': 79391, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 160.69, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tsn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tsn_Latn': {'number_of_characters': 79391, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 160.69, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tso_Latn-tso_Latn': {'number_of_characters': 83501, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 169.11, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tso_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tso_Latn': {'number_of_characters': 83501, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 169.11, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tur_Latn-tur_Latn': {'number_of_characters': 65382, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 131.98, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tur_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tur_Latn': {'number_of_characters': 65382, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 131.98, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ukr_Cyrl-ukr_Cyrl': {'number_of_characters': 65850, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 132.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ukr_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ukr_Cyrl': {'number_of_characters': 65850, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 132.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'uzn_Latn-uzn_Latn': {'number_of_characters': 70828, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 143.14, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'uzn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-uzn_Latn': {'number_of_characters': 70828, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 143.14, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'vie_Latn-vie_Latn': {'number_of_characters': 66724, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 134.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'vie_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-vie_Latn': {'number_of_characters': 66724, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 134.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'war_Latn-war_Latn': {'number_of_characters': 78444, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 158.75, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'war_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-war_Latn': {'number_of_characters': 78444, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 158.75, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'wol_Latn-wol_Latn': {'number_of_characters': 64521, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 130.22, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'wol_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-wol_Latn': {'number_of_characters': 64521, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 130.22, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'xho_Latn-xho_Latn': {'number_of_characters': 71629, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.78, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'xho_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-xho_Latn': {'number_of_characters': 71629, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.78, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'yor_Latn-yor_Latn': {'number_of_characters': 62752, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 126.59, 'max_document_length': 143, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'yor_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-yor_Latn': {'number_of_characters': 62752, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 126.59, 'max_document_length': 143, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hans-zho_Hans': {'number_of_characters': 20549, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 40.11, 'max_document_length': 64, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hans-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zho_Hans': {'number_of_characters': 20549, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 40.11, 'max_document_length': 64, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hant-zho_Hant': {'number_of_characters': 19947, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 38.88, 'max_document_length': 45, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hant-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zho_Hant': {'number_of_characters': 19947, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 38.88, 'max_document_length': 45, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zsm_Latn-zsm_Latn': {'number_of_characters': 72008, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 145.56, 'max_document_length': 210, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zsm_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zsm_Latn': {'number_of_characters': 72008, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 145.56, 'max_document_length': 210, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zul_Latn-zul_Latn': {'number_of_characters': 69413, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.24, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zul_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zul_Latn': {'number_of_characters': 69413, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.24, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}}}} | | [BengaliDocumentClassification](https://aclanthology.org/2023.eacl-main.4) | ['ben'] | Classification | s2s | [News, Written] | None | None | | [BengaliHateSpeechClassification](https://huggingface.co/datasets/bn_hate_speech) (Karim et al., 2020) | ['ben'] | Classification | s2s | [News, Written] | None | None | | [BengaliSentimentAnalysis](https://data.mendeley.com/datasets/p6zc7krs37/4) (Sazzed et al., 2020) | ['ben'] | Classification | s2s | [Reviews, Written] | None | None | @@ -52,19 +52,19 @@ The following tables give you an overview of the tasks in MTEB. | [BiorxivClusteringS2S.v2](https://api.biorxiv.org/) | ['eng'] | Clustering | s2s | [Academic, Written] | None | None | | [BlurbsClusteringP2P.v2](https://www.inf.uni-hamburg.de/en/inst/ab/lt/resources/data/germeval-2019-hmc.html) (Steffen Remus, 2019) | ['deu'] | Clustering | p2p | [Fiction, Written] | None | None | | [BlurbsClusteringS2S.v2](https://www.inf.uni-hamburg.de/en/inst/ab/lt/resources/data/germeval-2019-hmc.html) (Steffen Remus, 2019) | ['deu'] | Clustering | s2s | [Fiction, Written] | None | None | -| [BornholmBitextMining](https://aclanthology.org/W19-6138/) | ['dan'] | BitextMining | s2s | [Web, Social, Fiction, Written] | {'test': 500} | {'test': {'average_sentence1_length': 49.83, 'average_sentence2_length': 38.89, 'num_samples': 500, 'number_of_characters': 44361}} | +| [BornholmBitextMining](https://aclanthology.org/W19-6138/) | ['dan'] | BitextMining | s2s | [Web, Social, Fiction, Written] | {'test': 500} | {'test': {'num_samples': 500, 'number_of_characters': 44361, 'unique_pairs': 500, 'min_sentence1_length': 1, 'average_sentence1_length': 49.83, 'max_sentence1_length': 555, 'unique_sentence1': 497, 'min_sentence2_length': 5, 'average_sentence2_length': 38.89, 'max_sentence2_length': 453, 'unique_sentence2': 491}} | | [BrazilianToxicTweetsClassification](https://paperswithcode.com/dataset/told-br) (Joao Augusto Leite and Diego F. Silva and Kalina Bontcheva and Carolina Scarton, 2020) | ['por'] | MultilabelClassification | s2s | [Constructed, Written] | None | None | | [BrightRetrieval](https://huggingface.co/datasets/xlangai/BRIGHT) (Hongjin Su, 2024) | ['eng'] | Retrieval | s2p | [Non-fiction] | None | None | | [BulgarianStoreReviewSentimentClassfication](https://doi.org/10.7910/DVN/TXIK9P) (Georgieva-Trifonova et al., 2018) | ['bul'] | Classification | s2s | [Reviews, Written] | None | None | | [CBD](http://2019.poleval.pl/files/poleval2019.pdf) | ['pol'] | Classification | s2s | [Written, Social] | None | None | | [CDSC-E](https://aclanthology.org/P17-1073.pdf) | ['pol'] | PairClassification | s2s | [Written] | None | None | | [CDSC-R](https://aclanthology.org/P17-1073.pdf) | ['pol'] | STS | s2s | [Web, Written] | None | None | -| [CEDRClassification](https://www.sciencedirect.com/science/article/pii/S1877050921013247) (Sboev et al., 2021) | ['rus'] | MultilabelClassification | s2s | [Web, Social, Blog, Written] | {'test': 1882} | {'test': {'average_text_length': 91.21, 'number_of_characters': 171649, 'average_label_per_text': 0.62, 'num_samples': 1882, 'unique_labels': 6, 'labels': {'None': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}} | +| [CEDRClassification](https://www.sciencedirect.com/science/article/pii/S1877050921013247) (Sboev et al., 2021) | ['rus'] | MultilabelClassification | s2s | [Web, Social, Blog, Written] | {'test': 1882, 'train': 7528} | {'test': {'num_samples': 1882, 'number_of_characters': 171649, 'number_texts_in_train': 7, 'min_text_length': 6, 'average_text_length': 91.21, 'max_text_length': 220, 'unique_texts': 1875, 'min_labels_per_text': 0, 'average_label_per_text': 0.62, 'max_labels_per_text': 2, 'unique_labels': 6, 'labels': {'None': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}, 'train': {'num_samples': 7528, 'number_of_characters': 697322, 'number_texts_in_train': None, 'min_text_length': 5, 'average_text_length': 92.63, 'max_text_length': 280, 'unique_texts': 7500, 'min_labels_per_text': 0, 'average_label_per_text': 0.61, 'max_labels_per_text': 3, 'unique_labels': 6, 'labels': {'None': {'count': 3043}, '2': {'count': 607}, '0': {'count': 1569}, '3': {'count': 589}, '1': {'count': 1417}, '4': {'count': 411}}}} | | [CLSClusteringP2P.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | p2p | [Academic, Written] | None | None | | [CLSClusteringS2S.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | s2s | [Academic, Written] | None | None | | [CMedQAv1-reranking](https://github.com/zhangsheng93/cMedQA) (Zhang et al., 2017) | ['cmn'] | Reranking | s2s | [Medical, Written] | None | None | | [CMedQAv2-reranking](https://github.com/zhangsheng93/cMedQA2) (S. Zhang, 2018) | ['cmn'] | Reranking | s2s | | None | None | -| [COIRCodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1056326} | {'test': {'number_of_characters': 664.77, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 941.4, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 748.83, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'average_document_length': 0.0, 'average_query_length': 0.23, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 405.38, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 457.44, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'average_document_length': 0.0, 'average_query_length': 0.36, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 588.89, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 578.85, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}}}} | +| [COIRCodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1056326} | {'test': {'number_of_characters': 36843313, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'min_document_length': 54, 'average_document_length': 34.71, 'max_document_length': 334374, 'unique_documents': 1003765, 'min_query_length': 2, 'average_query_length': 38.19, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 14574651, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'min_document_length': 95, 'average_document_length': 49.99, 'max_document_length': 14008, 'unique_documents': 280310, 'min_query_length': 2, 'average_query_length': 37.58, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 2587540, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'min_document_length': 87, 'average_document_length': 37.9, 'max_document_length': 334374, 'unique_documents': 64854, 'min_query_length': 2, 'average_query_length': 39.41, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 3641108, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'min_document_length': 54, 'average_document_length': 17.96, 'max_document_length': 5280, 'unique_documents': 182440, 'min_query_length': 2, 'average_query_length': 44.92, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 629446, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'min_document_length': 83, 'average_document_length': 20.83, 'max_document_length': 3992, 'unique_documents': 27570, 'min_query_length': 2, 'average_query_length': 43.73, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 6791137, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'min_document_length': 77, 'average_document_length': 35.55, 'max_document_length': 7615, 'unique_documents': 180866, 'min_query_length': 2, 'average_query_length': 33.02, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 8619431, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'min_document_length': 94, 'average_document_length': 30.2, 'max_document_length': 4904, 'unique_documents': 267725, 'min_query_length': 2, 'average_query_length': 38.21, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}} | | [CPUSpeedTask](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/blob/c8376f967d1294419be1d3eb41217d04cd3a65d3/src/seb/registered_tasks/speed.py#L83-L96) | ['eng'] | Speed | s2s | [Fiction, Written] | None | None | | [CQADupstackAndroidRetrieval](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) (Hoogeveen et al., 2015) | ['eng'] | Retrieval | s2p | | None | None | | [CQADupstackEnglishRetrieval](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) (Hoogeveen et al., 2015) | ['eng'] | Retrieval | s2p | | None | None | @@ -125,13 +125,13 @@ The following tables give you an overview of the tasks in MTEB. | [ClimateFEVERHardNegatives](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | | None | None | | [Cmnli](https://huggingface.co/datasets/clue/viewer/cmnli) | ['cmn'] | PairClassification | s2s | | None | None | -| [CodeEditSearchRetrieval](https://huggingface.co/datasets/cassanof/CodeEditSearch/viewer) (Niklas Muennighoff, 2023) | ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] | Retrieval | p2p | [Programming, Written] | {'train': 26000} | {'train': {'number_of_characters': 71.99, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70.52, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 57.88, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'typescript': {'number_of_characters': 61.09, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 71.8, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 67.9, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 63.98, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 62.93, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'c': {'number_of_characters': 98.59, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'c++': {'number_of_characters': 115.48, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'rust': {'number_of_characters': 68.5, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'swift': {'number_of_characters': 58.28, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'scala': {'number_of_characters': 65.83, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'shell': {'number_of_characters': 73.06, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}}}} | -| [CodeFeedbackMT](https://arxiv.org/abs/2402.14658) (Tianyu Zheng, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 79660} | {'test': {'number_of_characters': 5894.4, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'average_document_length': 0.02, 'average_query_length': 0.33, 'average_relevant_docs_per_query': 1.0}} | -| [CodeFeedbackST](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 187832} | {'test': {'number_of_characters': 2246.58, 'num_samples': 187832, 'num_queries': 31306, 'num_documents': 156526, 'average_document_length': 0.01, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}} | -| [CodeSearchNetCCRetrieval](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1058035} | {'test': {'number_of_characters': 390.06, 'num_samples': 1058035, 'num_queries': 52561, 'num_documents': 1005474, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 553.79, 'num_samples': 295570, 'num_queries': 14918, 'num_documents': 280652, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 445.71, 'num_samples': 68492, 'num_queries': 3291, 'num_documents': 65201, 'average_document_length': 0.0, 'average_query_length': 0.13, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 235.77, 'num_samples': 190857, 'num_queries': 8122, 'num_documents': 182735, 'average_document_length': 0.0, 'average_query_length': 0.03, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 268.87, 'num_samples': 28849, 'num_queries': 1261, 'num_documents': 27588, 'average_document_length': 0.0, 'average_query_length': 0.21, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 344.53, 'num_samples': 192016, 'num_queries': 10955, 'num_documents': 181061, 'average_document_length': 0.0, 'average_query_length': 0.03, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 338.62, 'num_samples': 282251, 'num_queries': 14014, 'num_documents': 268237, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}}}} | -| [CodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 12000} | {'test': {'number_of_characters': 325.01, 'num_samples': 12000, 'num_queries': 6000, 'num_documents': 6000, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 467.55, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.47, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 187.02, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.19, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 126.21, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.13, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 314.82, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.31, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 691.36, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.69, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 163.12, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.16, 'average_relevant_docs_per_query': 1.0}}}} | -| [CodeTransOceanContest](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['c++', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 1229} | {'test': {'number_of_characters': 2520.65, 'num_samples': 1229, 'num_queries': 221, 'num_documents': 1008, 'average_document_length': 1.5, 'average_query_length': 4.58, 'average_relevant_docs_per_query': 1.0}} | -| [CodeTransOceanDL](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['python'] | Retrieval | p2p | [Programming, Written] | {'test': 996} | {'test': {'number_of_characters': 3347.7, 'num_samples': 996, 'num_queries': 180, 'num_documents': 816, 'average_document_length': 1.81, 'average_query_length': 10.38, 'average_relevant_docs_per_query': 1.0}} | +| [CodeEditSearchRetrieval](https://huggingface.co/datasets/cassanof/CodeEditSearch/viewer) (Niklas Muennighoff, 2023) | ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] | Retrieval | p2p | [Programming, Written] | {'train': 26000} | {'train': {'number_of_characters': 935841, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'min_document_length': 18, 'average_document_length': 70.99, 'max_document_length': 2532, 'unique_documents': 13000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 13000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70519, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 69.52, 'max_document_length': 1811, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 57880, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 56.88, 'max_document_length': 601, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'typescript': {'number_of_characters': 61092, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 60.09, 'max_document_length': 659, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 71797, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 70.8, 'max_document_length': 1529, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 67900, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 66.9, 'max_document_length': 751, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 63984, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 23, 'average_document_length': 62.98, 'max_document_length': 807, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 62927, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 61.93, 'max_document_length': 766, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c': {'number_of_characters': 98588, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 97.59, 'max_document_length': 1672, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c++': {'number_of_characters': 115480, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 114.48, 'max_document_length': 1856, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'rust': {'number_of_characters': 68503, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 67.5, 'max_document_length': 2532, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'swift': {'number_of_characters': 58279, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 57.28, 'max_document_length': 727, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'scala': {'number_of_characters': 65833, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 64.83, 'max_document_length': 685, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'shell': {'number_of_characters': 73059, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 72.06, 'max_document_length': 813, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}} | +| [CodeFeedbackMT](https://arxiv.org/abs/2402.14658) (Tianyu Zheng, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 79660} | {'test': {'number_of_characters': 156266302, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'min_document_length': 127, 'average_document_length': 885.13, 'max_document_length': 32432, 'unique_documents': 66383, 'min_query_length': 2, 'average_query_length': 7344.18, 'max_query_length': 9403, 'unique_queries': 13277, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13277}} | +| [CodeFeedbackST](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 187832} | {'test': {'number_of_characters': 260957682, 'num_samples': 187832, 'num_queries': 31306, 'num_documents': 156526, 'min_document_length': 26, 'average_document_length': 144.85, 'max_document_length': 13851, 'unique_documents': 156526, 'min_query_length': 1, 'average_query_length': 7611.46, 'max_query_length': 11354, 'unique_queries': 31306, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 31306}} | +| [CodeSearchNetCCRetrieval](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1058035} | {'test': {'number_of_characters': 22407915, 'num_samples': 1058035, 'num_queries': 52561, 'num_documents': 1005474, 'min_document_length': 23, 'average_document_length': 20.29, 'max_document_length': 214210, 'unique_documents': 1005474, 'min_query_length': 2, 'average_query_length': 38.26, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 8792958, 'num_samples': 295570, 'num_queries': 14918, 'num_documents': 280652, 'min_document_length': 38, 'average_document_length': 29.33, 'max_document_length': 8326, 'unique_documents': 280652, 'min_query_length': 2, 'average_query_length': 37.63, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 1590642, 'num_samples': 68492, 'num_queries': 3291, 'num_documents': 65201, 'min_document_length': 40, 'average_document_length': 22.4, 'max_document_length': 214210, 'unique_documents': 65201, 'min_query_length': 2, 'average_query_length': 39.62, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 2264134, 'num_samples': 190857, 'num_queries': 8122, 'num_documents': 182735, 'min_document_length': 23, 'average_document_length': 10.39, 'max_document_length': 3589, 'unique_documents': 182735, 'min_query_length': 2, 'average_query_length': 45.0, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 391703, 'num_samples': 28849, 'num_queries': 1261, 'num_documents': 27588, 'min_document_length': 36, 'average_document_length': 12.2, 'max_document_length': 2244, 'unique_documents': 27588, 'min_query_length': 2, 'average_query_length': 43.76, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 4114584, 'num_samples': 192016, 'num_queries': 10955, 'num_documents': 181061, 'min_document_length': 38, 'average_document_length': 20.72, 'max_document_length': 5066, 'unique_documents': 181061, 'min_query_length': 2, 'average_query_length': 33.06, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 5253894, 'num_samples': 282251, 'num_queries': 14014, 'num_documents': 268237, 'min_document_length': 40, 'average_document_length': 17.59, 'max_document_length': 2995, 'unique_documents': 268237, 'min_query_length': 2, 'average_query_length': 38.28, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}} | +| [CodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 12000} | {'test': {'number_of_characters': 1950074, 'num_samples': 12000, 'num_queries': 6000, 'num_documents': 6000, 'min_document_length': 2, 'average_document_length': 324.01, 'max_document_length': 17533, 'unique_documents': 6000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 6000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 6000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 467546, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 8, 'average_document_length': 466.55, 'max_document_length': 8636, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 187018, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 2, 'average_document_length': 186.02, 'max_document_length': 7657, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 126213, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 14, 'average_document_length': 125.21, 'max_document_length': 1501, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 314818, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 5, 'average_document_length': 313.82, 'max_document_length': 17533, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 691360, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 2, 'average_document_length': 690.36, 'max_document_length': 6473, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 163119, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 5, 'average_document_length': 162.12, 'max_document_length': 1240, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}} | +| [CodeTransOceanContest](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['c++', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 1229} | {'test': {'number_of_characters': 1744286, 'num_samples': 1229, 'num_queries': 221, 'num_documents': 1008, 'min_document_length': 8, 'average_document_length': 221.9, 'max_document_length': 4147, 'unique_documents': 1008, 'min_query_length': 8, 'average_query_length': 6880.58, 'max_query_length': 10852, 'unique_queries': 221, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 221}} | +| [CodeTransOceanDL](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['python'] | Retrieval | p2p | [Programming, Written] | {'test': 996} | {'test': {'number_of_characters': 1543912, 'num_samples': 996, 'num_queries': 180, 'num_documents': 816, 'min_document_length': 376, 'average_document_length': 411.98, 'max_document_length': 8285, 'unique_documents': 816, 'min_query_length': 58, 'average_query_length': 6709.67, 'max_query_length': 8469, 'unique_queries': 180, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 180}} | | [ContractNLIConfidentialityOfAgreementLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLIExplicitIdentificationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -146,9 +146,9 @@ The following tables give you an overview of the tasks in MTEB. | [ContractNLISharingWithEmployeesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLISharingWithThirdPartiesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLISurvivalOfObligationsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [Core17InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News, Written] | {'test': 19919} | {'test': {'num_samples': 19919, 'num_docs': 19899, 'num_queries': 20, 'number_of_characters': 44450333, 'average_document_length': 2233.03, 'average_query_length': 109.75, 'average_instruction_length': 295.55, 'average_changed_instruction_length': 355.2, 'average_relevant_docs_per_query': 32.7, 'average_top_ranked_per_query': 1000.0}} | +| [Core17InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News, Written] | {'test': 19919} | {'test': {'num_samples': 19919, 'num_docs': 19899, 'num_queries': 20, 'number_of_characters': 44450333, 'min_document_length': 7, 'average_document_length': 2233.03, 'max_document_length': 2959, 'unique_docs': 19143, 'min_query_length': 55, 'average_query_length': 109.75, 'max_query_length': 278, 'unique_queries': 20, 'min_instruction_length': 102, 'average_instruction_length': 295.55, 'max_instruction_length': 811, 'unique_instructions': 20, 'min_changed_instruction_length': 151, 'average_changed_instruction_length': 355.2, 'max_changed_instruction_length': 837, 'unique_changed_instructions': 20, 'min_average_relevant_docs_per_query': 4, 'average_relevant_docs_per_query': 32.7, 'max_average_relevant_docs_per_query': 55, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}} | | [CorporateLobbyingLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [CosQA](https://arxiv.org/abs/2105.13239) (Junjie Huang, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 21104} | {'test': {'number_of_characters': 313.95, 'num_samples': 21104, 'num_queries': 500, 'num_documents': 20604, 'average_document_length': 0.01, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}} | +| [CosQA](https://arxiv.org/abs/2105.13239) (Junjie Huang, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 21104} | {'test': {'number_of_characters': 5728450, 'num_samples': 21104, 'num_queries': 500, 'num_documents': 20604, 'min_document_length': 18, 'average_document_length': 0.89, 'max_document_length': 83, 'unique_documents': 20604, 'min_query_length': 88, 'average_query_length': 11420.09, 'max_query_length': 6396, 'unique_queries': 500, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 500}} | | [CovidRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | | [CrossLingualSemanticDiscriminationWMT19](https://huggingface.co/datasets/Andrianos/clsd_wmt19_21) | ['deu', 'fra'] | Retrieval | s2s | [News, Written] | None | None | | [CrossLingualSemanticDiscriminationWMT21](https://huggingface.co/datasets/Andrianos/clsd_wmt19_21) | ['deu', 'fra'] | Retrieval | s2s | [News, Written] | None | None | @@ -175,7 +175,7 @@ The following tables give you an overview of the tasks in MTEB. | [Diversity6LegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [DuRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) (Yifu Qiu, 2022) | ['cmn'] | Retrieval | s2p | | None | None | | [DutchBookReviewSentimentClassification](https://github.com/benjaminvdb/DBRD) (Benjamin et al., 2019) | ['nld'] | Classification | s2s | [Reviews, Written] | None | None | -| [ESCIReranking](https://github.com/amazon-science/esci-data/) (Chandan K. Reddy, 2022) | ['eng', 'jpn', 'spa'] | Reranking | s2p | [Written] | {'test': 29285} | {'test': {'num_samples': 29285, 'number_of_characters': 254538331, 'num_positive': 271416, 'num_negative': 44235, 'avg_query_len': 19.69, 'avg_positive_len': 803.92, 'avg_negative_len': 808.5, 'hf_subset_descriptive_stats': {'us': {'num_samples': 21296, 'number_of_characters': 186915609, 'num_positive': 189375, 'num_negative': 25463, 'avg_query_len': 21.44, 'avg_positive_len': 868.37, 'avg_negative_len': 864.45}, 'es': {'num_samples': 3703, 'number_of_characters': 48861389, 'num_positive': 39110, 'num_negative': 10183, 'avg_query_len': 20.68, 'avg_positive_len': 980.96, 'avg_negative_len': 1023.22}, 'jp': {'num_samples': 4286, 'number_of_characters': 18761333, 'num_positive': 42931, 'num_negative': 8589, 'avg_query_len': 10.15, 'avg_positive_len': 358.36, 'avg_negative_len': 388.08}}}} | +| [ESCIReranking](https://github.com/amazon-science/esci-data/) (Chandan K. Reddy, 2022) | ['eng', 'jpn', 'spa'] | Reranking | s2p | [Written] | {'test': 29285} | {'test': {'num_samples': 29285, 'number_of_characters': 254538331, 'num_positive': 271416, 'num_negative': 44235, 'min_query_length': 1, 'avg_query_length': 19.69, 'max_query_length': 151, 'unique_query': 29269, 'min_positive_length': 1, 'avg_positive_length': 803.92, 'max_positive_length': 8640, 'unique_positive': 217712, 'min_negative_length': 1, 'avg_negative_length': 808.5, 'max_negative_length': 4441, 'unique_negative': 39551, 'hf_subset_descriptive_stats': {'us': {'num_samples': 21296, 'number_of_characters': 186915609, 'num_positive': 189375, 'num_negative': 25463, 'min_query_length': 1, 'avg_query_length': 21.44, 'max_query_length': 151, 'unique_query': 21296, 'min_positive_length': 1, 'avg_positive_length': 868.37, 'max_positive_length': 5545, 'unique_positive': 150734, 'min_negative_length': 1, 'avg_negative_length': 864.45, 'max_negative_length': 3779, 'unique_negative': 23073}, 'es': {'num_samples': 3703, 'number_of_characters': 48861389, 'num_positive': 39110, 'num_negative': 10183, 'min_query_length': 3, 'avg_query_length': 20.68, 'max_query_length': 59, 'unique_query': 3703, 'min_positive_length': 1, 'avg_positive_length': 980.96, 'max_positive_length': 8640, 'unique_positive': 32921, 'min_negative_length': 1, 'avg_negative_length': 1023.22, 'max_negative_length': 4441, 'unique_negative': 9285}, 'jp': {'num_samples': 4286, 'number_of_characters': 18761333, 'num_positive': 42931, 'num_negative': 8589, 'min_query_length': 1, 'avg_query_length': 10.15, 'max_query_length': 60, 'unique_query': 4286, 'min_positive_length': 1, 'avg_positive_length': 358.36, 'max_positive_length': 3488, 'unique_positive': 35165, 'min_negative_length': 1, 'avg_negative_length': 388.08, 'max_negative_length': 3940, 'unique_negative': 7289}}}} | | [EcomRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | | [EightTagsClustering.v2](https://aclanthology.org/2020.lrec-1.207.pdf) | ['pol'] | Clustering | s2s | [Social, Written] | None | None | | [EmotionClassification](https://www.aclweb.org/anthology/D18-1404) | ['eng'] | Classification | s2s | [Social, Written] | None | None | @@ -231,13 +231,13 @@ The following tables give you an overview of the tasks in MTEB. | [HotpotQAHardNegatives](https://hotpotqa.github.io/) | ['eng'] | Retrieval | s2p | [Web, Written] | None | None | | [HunSum2AbstractiveRetrieval](https://arxiv.org/abs/2404.03555) (Botond Barta, 2024) | ['hun'] | Retrieval | s2p | [News, Written] | None | None | | [IFlyTek](https://www.cluebenchmarks.com/introduce.html) | ['cmn'] | Classification | s2s | | None | None | -| [IN22ConvBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Conv) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Social, Spoken, Fiction, Spoken] | {'test': 760518} | {'test': {'average_sentence1_length': 54.33, 'average_sentence2_length': 54.33, 'num_samples': 760518, 'number_of_characters': 82637104, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155988}, 'asm_Beng-brx_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 162044}, 'asm_Beng-doi_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167032}, 'asm_Beng-eng_Latn': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160716}, 'asm_Beng-gom_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156282}, 'asm_Beng-guj_Gujr': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 158269}, 'asm_Beng-hin_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159964}, 'asm_Beng-kan_Knda': {'average_sentence1_length': 53.75, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 165177}, 'asm_Beng-kas_Arab': {'average_sentence1_length': 53.75, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164681}, 'asm_Beng-mai_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162408}, 'asm_Beng-mal_Mlym': {'average_sentence1_length': 53.75, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172838}, 'asm_Beng-mar_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162747}, 'asm_Beng-mni_Mtei': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157316}, 'asm_Beng-npi_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160906}, 'asm_Beng-ory_Orya': {'average_sentence1_length': 53.75, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 164223}, 'asm_Beng-pan_Guru': {'average_sentence1_length': 53.75, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 160201}, 'asm_Beng-san_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158093}, 'asm_Beng-sat_Olck': {'average_sentence1_length': 53.75, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169379}, 'asm_Beng-snd_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 162623}, 'asm_Beng-tam_Taml': {'average_sentence1_length': 53.75, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174866}, 'asm_Beng-tel_Telu': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157690}, 'asm_Beng-urd_Arab': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 161305}, 'ben_Beng-asm_Beng': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 155988}, 'ben_Beng-brx_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 156448}, 'ben_Beng-doi_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 161436}, 'ben_Beng-eng_Latn': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 155120}, 'ben_Beng-gom_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 150686}, 'ben_Beng-guj_Gujr': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 152673}, 'ben_Beng-hin_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 154368}, 'ben_Beng-kan_Knda': {'average_sentence1_length': 50.03, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 159581}, 'ben_Beng-kas_Arab': {'average_sentence1_length': 50.03, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 159085}, 'ben_Beng-mai_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 156812}, 'ben_Beng-mal_Mlym': {'average_sentence1_length': 50.03, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 167242}, 'ben_Beng-mar_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 157151}, 'ben_Beng-mni_Mtei': {'average_sentence1_length': 50.03, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 151720}, 'ben_Beng-npi_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 155310}, 'ben_Beng-ory_Orya': {'average_sentence1_length': 50.03, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 158627}, 'ben_Beng-pan_Guru': {'average_sentence1_length': 50.03, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 154605}, 'ben_Beng-san_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 152497}, 'ben_Beng-sat_Olck': {'average_sentence1_length': 50.03, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 163783}, 'ben_Beng-snd_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 157027}, 'ben_Beng-tam_Taml': {'average_sentence1_length': 50.03, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 169270}, 'ben_Beng-tel_Telu': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 152094}, 'ben_Beng-urd_Arab': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 155709}, 'brx_Deva-asm_Beng': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162044}, 'brx_Deva-ben_Beng': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 156448}, 'brx_Deva-doi_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167492}, 'brx_Deva-eng_Latn': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161176}, 'brx_Deva-gom_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156742}, 'brx_Deva-guj_Gujr': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 158729}, 'brx_Deva-hin_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 160424}, 'brx_Deva-kan_Knda': {'average_sentence1_length': 54.06, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 165637}, 'brx_Deva-kas_Arab': {'average_sentence1_length': 54.06, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165141}, 'brx_Deva-mai_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162868}, 'brx_Deva-mal_Mlym': {'average_sentence1_length': 54.06, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173298}, 'brx_Deva-mar_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163207}, 'brx_Deva-mni_Mtei': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157776}, 'brx_Deva-npi_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161366}, 'brx_Deva-ory_Orya': {'average_sentence1_length': 54.06, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 164683}, 'brx_Deva-pan_Guru': {'average_sentence1_length': 54.06, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 160661}, 'brx_Deva-san_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158553}, 'brx_Deva-sat_Olck': {'average_sentence1_length': 54.06, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169839}, 'brx_Deva-snd_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163083}, 'brx_Deva-tam_Taml': {'average_sentence1_length': 54.06, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175326}, 'brx_Deva-tel_Telu': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158150}, 'brx_Deva-urd_Arab': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 161765}, 'doi_Deva-asm_Beng': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 167032}, 'doi_Deva-ben_Beng': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 161436}, 'doi_Deva-brx_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 167492}, 'doi_Deva-eng_Latn': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 166164}, 'doi_Deva-gom_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 161730}, 'doi_Deva-guj_Gujr': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 163717}, 'doi_Deva-hin_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 165412}, 'doi_Deva-kan_Knda': {'average_sentence1_length': 57.38, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 170625}, 'doi_Deva-kas_Arab': {'average_sentence1_length': 57.38, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 170129}, 'doi_Deva-mai_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 167856}, 'doi_Deva-mal_Mlym': {'average_sentence1_length': 57.38, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 178286}, 'doi_Deva-mar_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 168195}, 'doi_Deva-mni_Mtei': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 162764}, 'doi_Deva-npi_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 166354}, 'doi_Deva-ory_Orya': {'average_sentence1_length': 57.38, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 169671}, 'doi_Deva-pan_Guru': {'average_sentence1_length': 57.38, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 165649}, 'doi_Deva-san_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 163541}, 'doi_Deva-sat_Olck': {'average_sentence1_length': 57.38, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 174827}, 'doi_Deva-snd_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 168071}, 'doi_Deva-tam_Taml': {'average_sentence1_length': 57.38, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 180314}, 'doi_Deva-tel_Telu': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 163138}, 'doi_Deva-urd_Arab': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 166753}, 'eng_Latn-asm_Beng': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160716}, 'eng_Latn-ben_Beng': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155120}, 'eng_Latn-brx_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161176}, 'eng_Latn-doi_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166164}, 'eng_Latn-gom_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 155414}, 'eng_Latn-guj_Gujr': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157401}, 'eng_Latn-hin_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159096}, 'eng_Latn-kan_Knda': {'average_sentence1_length': 53.18, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164309}, 'eng_Latn-kas_Arab': {'average_sentence1_length': 53.18, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163813}, 'eng_Latn-mai_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161540}, 'eng_Latn-mal_Mlym': {'average_sentence1_length': 53.18, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171970}, 'eng_Latn-mar_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161879}, 'eng_Latn-mni_Mtei': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 156448}, 'eng_Latn-npi_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160038}, 'eng_Latn-ory_Orya': {'average_sentence1_length': 53.18, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163355}, 'eng_Latn-pan_Guru': {'average_sentence1_length': 53.18, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159333}, 'eng_Latn-san_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157225}, 'eng_Latn-sat_Olck': {'average_sentence1_length': 53.18, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 168511}, 'eng_Latn-snd_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161755}, 'eng_Latn-tam_Taml': {'average_sentence1_length': 53.18, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173998}, 'eng_Latn-tel_Telu': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156822}, 'eng_Latn-urd_Arab': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 160437}, 'gom_Deva-asm_Beng': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 156282}, 'gom_Deva-ben_Beng': {'average_sentence1_length': 50.23, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 150686}, 'gom_Deva-brx_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 156742}, 'gom_Deva-doi_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 161730}, 'gom_Deva-eng_Latn': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 155414}, 'gom_Deva-guj_Gujr': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 152967}, 'gom_Deva-hin_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 154662}, 'gom_Deva-kan_Knda': {'average_sentence1_length': 50.23, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 159875}, 'gom_Deva-kas_Arab': {'average_sentence1_length': 50.23, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 159379}, 'gom_Deva-mai_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 157106}, 'gom_Deva-mal_Mlym': {'average_sentence1_length': 50.23, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 167536}, 'gom_Deva-mar_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 157445}, 'gom_Deva-mni_Mtei': {'average_sentence1_length': 50.23, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 152014}, 'gom_Deva-npi_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 155604}, 'gom_Deva-ory_Orya': {'average_sentence1_length': 50.23, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 158921}, 'gom_Deva-pan_Guru': {'average_sentence1_length': 50.23, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 154899}, 'gom_Deva-san_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 152791}, 'gom_Deva-sat_Olck': {'average_sentence1_length': 50.23, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 164077}, 'gom_Deva-snd_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 157321}, 'gom_Deva-tam_Taml': {'average_sentence1_length': 50.23, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 169564}, 'gom_Deva-tel_Telu': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 152388}, 'gom_Deva-urd_Arab': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 156003}, 'guj_Gujr-asm_Beng': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 158269}, 'guj_Gujr-ben_Beng': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152673}, 'guj_Gujr-brx_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158729}, 'guj_Gujr-doi_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163717}, 'guj_Gujr-eng_Latn': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 157401}, 'guj_Gujr-gom_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152967}, 'guj_Gujr-hin_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156649}, 'guj_Gujr-kan_Knda': {'average_sentence1_length': 51.55, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161862}, 'guj_Gujr-kas_Arab': {'average_sentence1_length': 51.55, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 161366}, 'guj_Gujr-mai_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 159093}, 'guj_Gujr-mal_Mlym': {'average_sentence1_length': 51.55, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 169523}, 'guj_Gujr-mar_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 159432}, 'guj_Gujr-mni_Mtei': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 154001}, 'guj_Gujr-npi_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157591}, 'guj_Gujr-ory_Orya': {'average_sentence1_length': 51.55, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160908}, 'guj_Gujr-pan_Guru': {'average_sentence1_length': 51.55, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156886}, 'guj_Gujr-san_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 154778}, 'guj_Gujr-sat_Olck': {'average_sentence1_length': 51.55, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 166064}, 'guj_Gujr-snd_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 159308}, 'guj_Gujr-tam_Taml': {'average_sentence1_length': 51.55, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 171551}, 'guj_Gujr-tel_Telu': {'average_sentence1_length': 51.55, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 154375}, 'guj_Gujr-urd_Arab': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157990}, 'hin_Deva-asm_Beng': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 159964}, 'hin_Deva-ben_Beng': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 154368}, 'hin_Deva-brx_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 160424}, 'hin_Deva-doi_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 165412}, 'hin_Deva-eng_Latn': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 159096}, 'hin_Deva-gom_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 154662}, 'hin_Deva-guj_Gujr': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 156649}, 'hin_Deva-kan_Knda': {'average_sentence1_length': 52.68, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 163557}, 'hin_Deva-kas_Arab': {'average_sentence1_length': 52.68, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163061}, 'hin_Deva-mai_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 160788}, 'hin_Deva-mal_Mlym': {'average_sentence1_length': 52.68, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171218}, 'hin_Deva-mar_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161127}, 'hin_Deva-mni_Mtei': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 155696}, 'hin_Deva-npi_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 159286}, 'hin_Deva-ory_Orya': {'average_sentence1_length': 52.68, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 162603}, 'hin_Deva-pan_Guru': {'average_sentence1_length': 52.68, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 158581}, 'hin_Deva-san_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 156473}, 'hin_Deva-sat_Olck': {'average_sentence1_length': 52.68, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 167759}, 'hin_Deva-snd_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161003}, 'hin_Deva-tam_Taml': {'average_sentence1_length': 52.68, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173246}, 'hin_Deva-tel_Telu': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156070}, 'hin_Deva-urd_Arab': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 159685}, 'kan_Knda-asm_Beng': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 165177}, 'kan_Knda-ben_Beng': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 159581}, 'kan_Knda-brx_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 165637}, 'kan_Knda-doi_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 170625}, 'kan_Knda-eng_Latn': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 164309}, 'kan_Knda-gom_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 159875}, 'kan_Knda-guj_Gujr': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 161862}, 'kan_Knda-hin_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 163557}, 'kan_Knda-kas_Arab': {'average_sentence1_length': 56.14, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 168274}, 'kan_Knda-mai_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 166001}, 'kan_Knda-mal_Mlym': {'average_sentence1_length': 56.14, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 176431}, 'kan_Knda-mar_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 166340}, 'kan_Knda-mni_Mtei': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 160909}, 'kan_Knda-npi_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 164499}, 'kan_Knda-ory_Orya': {'average_sentence1_length': 56.14, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 167816}, 'kan_Knda-pan_Guru': {'average_sentence1_length': 56.14, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 163794}, 'kan_Knda-san_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 161686}, 'kan_Knda-sat_Olck': {'average_sentence1_length': 56.14, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172972}, 'kan_Knda-snd_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 166216}, 'kan_Knda-tam_Taml': {'average_sentence1_length': 56.14, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 178459}, 'kan_Knda-tel_Telu': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 161283}, 'kan_Knda-urd_Arab': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 164898}, 'kas_Arab-asm_Beng': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 164681}, 'kas_Arab-ben_Beng': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 159085}, 'kas_Arab-brx_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 165141}, 'kas_Arab-doi_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 170129}, 'kas_Arab-eng_Latn': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 163813}, 'kas_Arab-gom_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 159379}, 'kas_Arab-guj_Gujr': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 161366}, 'kas_Arab-hin_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 163061}, 'kas_Arab-kan_Knda': {'average_sentence1_length': 55.81, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 168274}, 'kas_Arab-mai_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 165505}, 'kas_Arab-mal_Mlym': {'average_sentence1_length': 55.81, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 175935}, 'kas_Arab-mar_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 165844}, 'kas_Arab-mni_Mtei': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 160413}, 'kas_Arab-npi_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 164003}, 'kas_Arab-ory_Orya': {'average_sentence1_length': 55.81, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 167320}, 'kas_Arab-pan_Guru': {'average_sentence1_length': 55.81, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 163298}, 'kas_Arab-san_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 161190}, 'kas_Arab-sat_Olck': {'average_sentence1_length': 55.81, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172476}, 'kas_Arab-snd_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 165720}, 'kas_Arab-tam_Taml': {'average_sentence1_length': 55.81, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 177963}, 'kas_Arab-tel_Telu': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 160787}, 'kas_Arab-urd_Arab': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 164402}, 'mai_Deva-asm_Beng': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162408}, 'mai_Deva-ben_Beng': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 156812}, 'mai_Deva-brx_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 162868}, 'mai_Deva-doi_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167856}, 'mai_Deva-eng_Latn': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161540}, 'mai_Deva-gom_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157106}, 'mai_Deva-guj_Gujr': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159093}, 'mai_Deva-hin_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 160788}, 'mai_Deva-kan_Knda': {'average_sentence1_length': 54.3, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166001}, 'mai_Deva-kas_Arab': {'average_sentence1_length': 54.3, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165505}, 'mai_Deva-mal_Mlym': {'average_sentence1_length': 54.3, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173662}, 'mai_Deva-mar_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163571}, 'mai_Deva-mni_Mtei': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158140}, 'mai_Deva-npi_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161730}, 'mai_Deva-ory_Orya': {'average_sentence1_length': 54.3, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165047}, 'mai_Deva-pan_Guru': {'average_sentence1_length': 54.3, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161025}, 'mai_Deva-san_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158917}, 'mai_Deva-sat_Olck': {'average_sentence1_length': 54.3, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170203}, 'mai_Deva-snd_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163447}, 'mai_Deva-tam_Taml': {'average_sentence1_length': 54.3, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175690}, 'mai_Deva-tel_Telu': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158514}, 'mai_Deva-urd_Arab': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162129}, 'mal_Mlym-asm_Beng': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 172838}, 'mal_Mlym-ben_Beng': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 167242}, 'mal_Mlym-brx_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 173298}, 'mal_Mlym-doi_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 178286}, 'mal_Mlym-eng_Latn': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 171970}, 'mal_Mlym-gom_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 167536}, 'mal_Mlym-guj_Gujr': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 169523}, 'mal_Mlym-hin_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 171218}, 'mal_Mlym-kan_Knda': {'average_sentence1_length': 61.24, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 176431}, 'mal_Mlym-kas_Arab': {'average_sentence1_length': 61.24, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 175935}, 'mal_Mlym-mai_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 173662}, 'mal_Mlym-mar_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 174001}, 'mal_Mlym-mni_Mtei': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 168570}, 'mal_Mlym-npi_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 172160}, 'mal_Mlym-ory_Orya': {'average_sentence1_length': 61.24, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 175477}, 'mal_Mlym-pan_Guru': {'average_sentence1_length': 61.24, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 171455}, 'mal_Mlym-san_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 169347}, 'mal_Mlym-sat_Olck': {'average_sentence1_length': 61.24, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 180633}, 'mal_Mlym-snd_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 173877}, 'mal_Mlym-tam_Taml': {'average_sentence1_length': 61.24, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 186120}, 'mal_Mlym-tel_Telu': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 168944}, 'mal_Mlym-urd_Arab': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 172559}, 'mar_Deva-asm_Beng': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162747}, 'mar_Deva-ben_Beng': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 157151}, 'mar_Deva-brx_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 163207}, 'mar_Deva-doi_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 168195}, 'mar_Deva-eng_Latn': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161879}, 'mar_Deva-gom_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157445}, 'mar_Deva-guj_Gujr': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159432}, 'mar_Deva-hin_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 161127}, 'mar_Deva-kan_Knda': {'average_sentence1_length': 54.53, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166340}, 'mar_Deva-kas_Arab': {'average_sentence1_length': 54.53, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165844}, 'mar_Deva-mai_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 163571}, 'mar_Deva-mal_Mlym': {'average_sentence1_length': 54.53, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 174001}, 'mar_Deva-mni_Mtei': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158479}, 'mar_Deva-npi_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 162069}, 'mar_Deva-ory_Orya': {'average_sentence1_length': 54.53, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165386}, 'mar_Deva-pan_Guru': {'average_sentence1_length': 54.53, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161364}, 'mar_Deva-san_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 159256}, 'mar_Deva-sat_Olck': {'average_sentence1_length': 54.53, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170542}, 'mar_Deva-snd_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163786}, 'mar_Deva-tam_Taml': {'average_sentence1_length': 54.53, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 176029}, 'mar_Deva-tel_Telu': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158853}, 'mar_Deva-urd_Arab': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162468}, 'mni_Mtei-asm_Beng': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 157316}, 'mni_Mtei-ben_Beng': {'average_sentence1_length': 50.91, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 151720}, 'mni_Mtei-brx_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 157776}, 'mni_Mtei-doi_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 162764}, 'mni_Mtei-eng_Latn': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 156448}, 'mni_Mtei-gom_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152014}, 'mni_Mtei-guj_Gujr': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154001}, 'mni_Mtei-hin_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 155696}, 'mni_Mtei-kan_Knda': {'average_sentence1_length': 50.91, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 160909}, 'mni_Mtei-kas_Arab': {'average_sentence1_length': 50.91, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 160413}, 'mni_Mtei-mai_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158140}, 'mni_Mtei-mal_Mlym': {'average_sentence1_length': 50.91, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 168570}, 'mni_Mtei-mar_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 158479}, 'mni_Mtei-npi_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 156638}, 'mni_Mtei-ory_Orya': {'average_sentence1_length': 50.91, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 159955}, 'mni_Mtei-pan_Guru': {'average_sentence1_length': 50.91, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 155933}, 'mni_Mtei-san_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 153825}, 'mni_Mtei-sat_Olck': {'average_sentence1_length': 50.91, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165111}, 'mni_Mtei-snd_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 158355}, 'mni_Mtei-tam_Taml': {'average_sentence1_length': 50.91, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 170598}, 'mni_Mtei-tel_Telu': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 153422}, 'mni_Mtei-urd_Arab': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157037}, 'npi_Deva-asm_Beng': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160906}, 'npi_Deva-ben_Beng': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155310}, 'npi_Deva-brx_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161366}, 'npi_Deva-doi_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166354}, 'npi_Deva-eng_Latn': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160038}, 'npi_Deva-gom_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 155604}, 'npi_Deva-guj_Gujr': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157591}, 'npi_Deva-hin_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159286}, 'npi_Deva-kan_Knda': {'average_sentence1_length': 53.3, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164499}, 'npi_Deva-kas_Arab': {'average_sentence1_length': 53.3, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164003}, 'npi_Deva-mai_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161730}, 'npi_Deva-mal_Mlym': {'average_sentence1_length': 53.3, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172160}, 'npi_Deva-mar_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162069}, 'npi_Deva-mni_Mtei': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 156638}, 'npi_Deva-ory_Orya': {'average_sentence1_length': 53.3, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163545}, 'npi_Deva-pan_Guru': {'average_sentence1_length': 53.3, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159523}, 'npi_Deva-san_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157415}, 'npi_Deva-sat_Olck': {'average_sentence1_length': 53.3, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 168701}, 'npi_Deva-snd_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161945}, 'npi_Deva-tam_Taml': {'average_sentence1_length': 53.3, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174188}, 'npi_Deva-tel_Telu': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157012}, 'npi_Deva-urd_Arab': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 160627}, 'ory_Orya-asm_Beng': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 164223}, 'ory_Orya-ben_Beng': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 158627}, 'ory_Orya-brx_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 164683}, 'ory_Orya-doi_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 169671}, 'ory_Orya-eng_Latn': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 163355}, 'ory_Orya-gom_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 158921}, 'ory_Orya-guj_Gujr': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 160908}, 'ory_Orya-hin_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 162603}, 'ory_Orya-kan_Knda': {'average_sentence1_length': 55.51, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 167816}, 'ory_Orya-kas_Arab': {'average_sentence1_length': 55.51, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 167320}, 'ory_Orya-mai_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 165047}, 'ory_Orya-mal_Mlym': {'average_sentence1_length': 55.51, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 175477}, 'ory_Orya-mar_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 165386}, 'ory_Orya-mni_Mtei': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 159955}, 'ory_Orya-npi_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 163545}, 'ory_Orya-pan_Guru': {'average_sentence1_length': 55.51, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 162840}, 'ory_Orya-san_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 160732}, 'ory_Orya-sat_Olck': {'average_sentence1_length': 55.51, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172018}, 'ory_Orya-snd_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 165262}, 'ory_Orya-tam_Taml': {'average_sentence1_length': 55.51, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 177505}, 'ory_Orya-tel_Telu': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 160329}, 'ory_Orya-urd_Arab': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 163944}, 'pan_Guru-asm_Beng': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160201}, 'pan_Guru-ben_Beng': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 154605}, 'pan_Guru-brx_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 160661}, 'pan_Guru-doi_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 165649}, 'pan_Guru-eng_Latn': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 159333}, 'pan_Guru-gom_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 154899}, 'pan_Guru-guj_Gujr': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 156886}, 'pan_Guru-hin_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 158581}, 'pan_Guru-kan_Knda': {'average_sentence1_length': 52.83, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 163794}, 'pan_Guru-kas_Arab': {'average_sentence1_length': 52.83, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163298}, 'pan_Guru-mai_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161025}, 'pan_Guru-mal_Mlym': {'average_sentence1_length': 52.83, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171455}, 'pan_Guru-mar_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161364}, 'pan_Guru-mni_Mtei': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 155933}, 'pan_Guru-npi_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 159523}, 'pan_Guru-ory_Orya': {'average_sentence1_length': 52.83, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 162840}, 'pan_Guru-san_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 156710}, 'pan_Guru-sat_Olck': {'average_sentence1_length': 52.83, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 167996}, 'pan_Guru-snd_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161240}, 'pan_Guru-tam_Taml': {'average_sentence1_length': 52.83, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173483}, 'pan_Guru-tel_Telu': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156307}, 'pan_Guru-urd_Arab': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 159922}, 'san_Deva-asm_Beng': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 158093}, 'san_Deva-ben_Beng': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152497}, 'san_Deva-brx_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158553}, 'san_Deva-doi_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163541}, 'san_Deva-eng_Latn': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 157225}, 'san_Deva-gom_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152791}, 'san_Deva-guj_Gujr': {'average_sentence1_length': 51.43, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154778}, 'san_Deva-hin_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156473}, 'san_Deva-kan_Knda': {'average_sentence1_length': 51.43, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161686}, 'san_Deva-kas_Arab': {'average_sentence1_length': 51.43, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 161190}, 'san_Deva-mai_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158917}, 'san_Deva-mal_Mlym': {'average_sentence1_length': 51.43, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 169347}, 'san_Deva-mar_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 159256}, 'san_Deva-mni_Mtei': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 153825}, 'san_Deva-npi_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157415}, 'san_Deva-ory_Orya': {'average_sentence1_length': 51.43, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160732}, 'san_Deva-pan_Guru': {'average_sentence1_length': 51.43, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156710}, 'san_Deva-sat_Olck': {'average_sentence1_length': 51.43, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165888}, 'san_Deva-snd_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 159132}, 'san_Deva-tam_Taml': {'average_sentence1_length': 51.43, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 171375}, 'san_Deva-tel_Telu': {'average_sentence1_length': 51.43, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 154199}, 'san_Deva-urd_Arab': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157814}, 'sat_Olck-asm_Beng': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 169379}, 'sat_Olck-ben_Beng': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 163783}, 'sat_Olck-brx_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 169839}, 'sat_Olck-doi_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 174827}, 'sat_Olck-eng_Latn': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 168511}, 'sat_Olck-gom_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 164077}, 'sat_Olck-guj_Gujr': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 166064}, 'sat_Olck-hin_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 167759}, 'sat_Olck-kan_Knda': {'average_sentence1_length': 58.94, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 172972}, 'sat_Olck-kas_Arab': {'average_sentence1_length': 58.94, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 172476}, 'sat_Olck-mai_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 170203}, 'sat_Olck-mal_Mlym': {'average_sentence1_length': 58.94, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 180633}, 'sat_Olck-mar_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 170542}, 'sat_Olck-mni_Mtei': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 165111}, 'sat_Olck-npi_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 168701}, 'sat_Olck-ory_Orya': {'average_sentence1_length': 58.94, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 172018}, 'sat_Olck-pan_Guru': {'average_sentence1_length': 58.94, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 167996}, 'sat_Olck-san_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 165888}, 'sat_Olck-snd_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 170418}, 'sat_Olck-tam_Taml': {'average_sentence1_length': 58.94, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 182661}, 'sat_Olck-tel_Telu': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 165485}, 'sat_Olck-urd_Arab': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 169100}, 'snd_Deva-asm_Beng': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162623}, 'snd_Deva-ben_Beng': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 157027}, 'snd_Deva-brx_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 163083}, 'snd_Deva-doi_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 168071}, 'snd_Deva-eng_Latn': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161755}, 'snd_Deva-gom_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157321}, 'snd_Deva-guj_Gujr': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159308}, 'snd_Deva-hin_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 161003}, 'snd_Deva-kan_Knda': {'average_sentence1_length': 54.45, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166216}, 'snd_Deva-kas_Arab': {'average_sentence1_length': 54.45, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165720}, 'snd_Deva-mai_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 163447}, 'snd_Deva-mal_Mlym': {'average_sentence1_length': 54.45, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173877}, 'snd_Deva-mar_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163786}, 'snd_Deva-mni_Mtei': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158355}, 'snd_Deva-npi_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161945}, 'snd_Deva-ory_Orya': {'average_sentence1_length': 54.45, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165262}, 'snd_Deva-pan_Guru': {'average_sentence1_length': 54.45, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161240}, 'snd_Deva-san_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 159132}, 'snd_Deva-sat_Olck': {'average_sentence1_length': 54.45, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170418}, 'snd_Deva-tam_Taml': {'average_sentence1_length': 54.45, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175905}, 'snd_Deva-tel_Telu': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158729}, 'snd_Deva-urd_Arab': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162344}, 'tam_Taml-asm_Beng': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 174866}, 'tam_Taml-ben_Beng': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 169270}, 'tam_Taml-brx_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 175326}, 'tam_Taml-doi_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 180314}, 'tam_Taml-eng_Latn': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 173998}, 'tam_Taml-gom_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 169564}, 'tam_Taml-guj_Gujr': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 171551}, 'tam_Taml-hin_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 173246}, 'tam_Taml-kan_Knda': {'average_sentence1_length': 62.59, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 178459}, 'tam_Taml-kas_Arab': {'average_sentence1_length': 62.59, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 177963}, 'tam_Taml-mai_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 175690}, 'tam_Taml-mal_Mlym': {'average_sentence1_length': 62.59, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 186120}, 'tam_Taml-mar_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 176029}, 'tam_Taml-mni_Mtei': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 170598}, 'tam_Taml-npi_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 174188}, 'tam_Taml-ory_Orya': {'average_sentence1_length': 62.59, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 177505}, 'tam_Taml-pan_Guru': {'average_sentence1_length': 62.59, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 173483}, 'tam_Taml-san_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 171375}, 'tam_Taml-sat_Olck': {'average_sentence1_length': 62.59, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 182661}, 'tam_Taml-snd_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 175905}, 'tam_Taml-tel_Telu': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 170972}, 'tam_Taml-urd_Arab': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 174587}, 'tel_Telu-asm_Beng': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 157690}, 'tel_Telu-ben_Beng': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152094}, 'tel_Telu-brx_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158150}, 'tel_Telu-doi_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163138}, 'tel_Telu-eng_Latn': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 156822}, 'tel_Telu-gom_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152388}, 'tel_Telu-guj_Gujr': {'average_sentence1_length': 51.16, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154375}, 'tel_Telu-hin_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156070}, 'tel_Telu-kan_Knda': {'average_sentence1_length': 51.16, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161283}, 'tel_Telu-kas_Arab': {'average_sentence1_length': 51.16, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 160787}, 'tel_Telu-mai_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158514}, 'tel_Telu-mal_Mlym': {'average_sentence1_length': 51.16, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 168944}, 'tel_Telu-mar_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 158853}, 'tel_Telu-mni_Mtei': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 153422}, 'tel_Telu-npi_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157012}, 'tel_Telu-ory_Orya': {'average_sentence1_length': 51.16, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160329}, 'tel_Telu-pan_Guru': {'average_sentence1_length': 51.16, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156307}, 'tel_Telu-san_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 154199}, 'tel_Telu-sat_Olck': {'average_sentence1_length': 51.16, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165485}, 'tel_Telu-snd_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 158729}, 'tel_Telu-tam_Taml': {'average_sentence1_length': 51.16, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 170972}, 'tel_Telu-urd_Arab': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157411}, 'urd_Arab-asm_Beng': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 161305}, 'urd_Arab-ben_Beng': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155709}, 'urd_Arab-brx_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161765}, 'urd_Arab-doi_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166753}, 'urd_Arab-eng_Latn': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160437}, 'urd_Arab-gom_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156003}, 'urd_Arab-guj_Gujr': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157990}, 'urd_Arab-hin_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159685}, 'urd_Arab-kan_Knda': {'average_sentence1_length': 53.57, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164898}, 'urd_Arab-kas_Arab': {'average_sentence1_length': 53.57, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164402}, 'urd_Arab-mai_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162129}, 'urd_Arab-mal_Mlym': {'average_sentence1_length': 53.57, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172559}, 'urd_Arab-mar_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162468}, 'urd_Arab-mni_Mtei': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157037}, 'urd_Arab-npi_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160627}, 'urd_Arab-ory_Orya': {'average_sentence1_length': 53.57, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163944}, 'urd_Arab-pan_Guru': {'average_sentence1_length': 53.57, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159922}, 'urd_Arab-san_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157814}, 'urd_Arab-sat_Olck': {'average_sentence1_length': 53.57, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169100}, 'urd_Arab-snd_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 162344}, 'urd_Arab-tam_Taml': {'average_sentence1_length': 53.57, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174587}, 'urd_Arab-tel_Telu': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157411}}}} | -| [IN22GenBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Gen) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, Legal, Government, News, Religious, Non-fiction, Written] | None | None | -| [IWSLT2017BitextMining](https://aclanthology.org/2017.iwslt-1.1/) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] | BitextMining | s2s | [Non-fiction, Fiction, Written] | None | None | +| [IN22ConvBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Conv) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Social, Spoken, Fiction, Spoken] | {'test': 760518} | {'test': {'num_samples': 760518, 'number_of_characters': 82637104, 'unique_pairs': 759283, 'min_sentence1_length': 3, 'average_sentence1_length': 54.33, 'max_sentence1_length': 239, 'unique_sentence1': 34430, 'min_sentence2_length': 3, 'average_sentence2_length': 54.33, 'max_sentence2_length': 239, 'unique_sentence2': 34430, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155988, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'asm_Beng-brx_Deva': {'num_samples': 1503, 'number_of_characters': 162044, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'asm_Beng-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167032, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'asm_Beng-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160716, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'asm_Beng-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156282, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'asm_Beng-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 158269, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'asm_Beng-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159964, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'asm_Beng-kan_Knda': {'num_samples': 1503, 'number_of_characters': 165177, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'asm_Beng-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164681, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'asm_Beng-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162408, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'asm_Beng-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172838, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'asm_Beng-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162747, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'asm_Beng-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157316, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'asm_Beng-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160906, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'asm_Beng-ory_Orya': {'num_samples': 1503, 'number_of_characters': 164223, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'asm_Beng-pan_Guru': {'num_samples': 1503, 'number_of_characters': 160201, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'asm_Beng-san_Deva': {'num_samples': 1503, 'number_of_characters': 158093, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'asm_Beng-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169379, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'asm_Beng-snd_Deva': {'num_samples': 1503, 'number_of_characters': 162623, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'asm_Beng-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174866, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'asm_Beng-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157690, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'asm_Beng-urd_Arab': {'num_samples': 1503, 'number_of_characters': 161305, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'ben_Beng-asm_Beng': {'num_samples': 1503, 'number_of_characters': 155988, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'ben_Beng-brx_Deva': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'ben_Beng-doi_Deva': {'num_samples': 1503, 'number_of_characters': 161436, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'ben_Beng-eng_Latn': {'num_samples': 1503, 'number_of_characters': 155120, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'ben_Beng-gom_Deva': {'num_samples': 1503, 'number_of_characters': 150686, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'ben_Beng-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 152673, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'ben_Beng-hin_Deva': {'num_samples': 1503, 'number_of_characters': 154368, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'ben_Beng-kan_Knda': {'num_samples': 1503, 'number_of_characters': 159581, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'ben_Beng-kas_Arab': {'num_samples': 1503, 'number_of_characters': 159085, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'ben_Beng-mai_Deva': {'num_samples': 1503, 'number_of_characters': 156812, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'ben_Beng-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 167242, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'ben_Beng-mar_Deva': {'num_samples': 1503, 'number_of_characters': 157151, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'ben_Beng-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 151720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'ben_Beng-npi_Deva': {'num_samples': 1503, 'number_of_characters': 155310, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'ben_Beng-ory_Orya': {'num_samples': 1503, 'number_of_characters': 158627, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'ben_Beng-pan_Guru': {'num_samples': 1503, 'number_of_characters': 154605, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'ben_Beng-san_Deva': {'num_samples': 1503, 'number_of_characters': 152497, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'ben_Beng-sat_Olck': {'num_samples': 1503, 'number_of_characters': 163783, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'ben_Beng-snd_Deva': {'num_samples': 1503, 'number_of_characters': 157027, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'ben_Beng-tam_Taml': {'num_samples': 1503, 'number_of_characters': 169270, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'ben_Beng-tel_Telu': {'num_samples': 1503, 'number_of_characters': 152094, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'ben_Beng-urd_Arab': {'num_samples': 1503, 'number_of_characters': 155709, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'brx_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162044, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'brx_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'brx_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167492, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'brx_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161176, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'brx_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156742, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'brx_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'brx_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 160424, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'brx_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 165637, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'brx_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165141, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'brx_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162868, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'brx_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'brx_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163207, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'brx_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157776, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'brx_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'brx_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 164683, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'brx_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 160661, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'brx_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 158553, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'brx_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169839, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'brx_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163083, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'brx_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175326, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'brx_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158150, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'brx_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 161765, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'doi_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 167032, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'doi_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 161436, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'doi_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 167492, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'doi_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 166164, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'doi_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'doi_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 163717, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'doi_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 165412, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'doi_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 170625, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'doi_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 170129, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'doi_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 167856, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'doi_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 178286, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'doi_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 168195, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'doi_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 162764, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'doi_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 166354, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'doi_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 169671, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'doi_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 165649, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'doi_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 163541, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'doi_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 174827, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'doi_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 168071, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'doi_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 180314, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'doi_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 163138, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'doi_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 166753, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'eng_Latn-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160716, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'eng_Latn-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155120, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'eng_Latn-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161176, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'eng_Latn-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166164, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'eng_Latn-gom_Deva': {'num_samples': 1503, 'number_of_characters': 155414, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'eng_Latn-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157401, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'eng_Latn-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159096, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'eng_Latn-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164309, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'eng_Latn-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163813, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'eng_Latn-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161540, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'eng_Latn-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171970, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'eng_Latn-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161879, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'eng_Latn-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'eng_Latn-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160038, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'eng_Latn-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163355, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'eng_Latn-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159333, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'eng_Latn-san_Deva': {'num_samples': 1503, 'number_of_characters': 157225, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'eng_Latn-sat_Olck': {'num_samples': 1503, 'number_of_characters': 168511, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'eng_Latn-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161755, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'eng_Latn-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173998, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'eng_Latn-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156822, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'eng_Latn-urd_Arab': {'num_samples': 1503, 'number_of_characters': 160437, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'gom_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 156282, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'gom_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 150686, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'gom_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 156742, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'gom_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'gom_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 155414, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'gom_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 152967, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'gom_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 154662, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'gom_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 159875, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'gom_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 159379, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'gom_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 157106, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'gom_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 167536, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'gom_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 157445, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'gom_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 152014, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'gom_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 155604, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'gom_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 158921, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'gom_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 154899, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'gom_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 152791, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'gom_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 164077, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'gom_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 157321, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'gom_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 169564, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'gom_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 152388, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'gom_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 156003, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'guj_Gujr-asm_Beng': {'num_samples': 1503, 'number_of_characters': 158269, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'guj_Gujr-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152673, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'guj_Gujr-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'guj_Gujr-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163717, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'guj_Gujr-eng_Latn': {'num_samples': 1503, 'number_of_characters': 157401, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'guj_Gujr-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152967, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'guj_Gujr-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156649, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'guj_Gujr-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161862, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'guj_Gujr-kas_Arab': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'guj_Gujr-mai_Deva': {'num_samples': 1503, 'number_of_characters': 159093, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'guj_Gujr-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 169523, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'guj_Gujr-mar_Deva': {'num_samples': 1503, 'number_of_characters': 159432, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'guj_Gujr-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 154001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'guj_Gujr-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157591, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'guj_Gujr-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160908, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'guj_Gujr-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156886, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'guj_Gujr-san_Deva': {'num_samples': 1503, 'number_of_characters': 154778, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'guj_Gujr-sat_Olck': {'num_samples': 1503, 'number_of_characters': 166064, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'guj_Gujr-snd_Deva': {'num_samples': 1503, 'number_of_characters': 159308, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'guj_Gujr-tam_Taml': {'num_samples': 1503, 'number_of_characters': 171551, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'guj_Gujr-tel_Telu': {'num_samples': 1503, 'number_of_characters': 154375, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'guj_Gujr-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157990, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'hin_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 159964, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'hin_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 154368, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'hin_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 160424, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'hin_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 165412, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'hin_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 159096, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'hin_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 154662, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'hin_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 156649, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'hin_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 163557, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'hin_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163061, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'hin_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 160788, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'hin_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171218, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'hin_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161127, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'hin_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 155696, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'hin_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 159286, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'hin_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 162603, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'hin_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 158581, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'hin_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 156473, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'hin_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 167759, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'hin_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'hin_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173246, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'hin_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156070, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'hin_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 159685, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'kan_Knda-asm_Beng': {'num_samples': 1503, 'number_of_characters': 165177, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'kan_Knda-ben_Beng': {'num_samples': 1503, 'number_of_characters': 159581, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'kan_Knda-brx_Deva': {'num_samples': 1503, 'number_of_characters': 165637, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'kan_Knda-doi_Deva': {'num_samples': 1503, 'number_of_characters': 170625, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'kan_Knda-eng_Latn': {'num_samples': 1503, 'number_of_characters': 164309, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'kan_Knda-gom_Deva': {'num_samples': 1503, 'number_of_characters': 159875, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'kan_Knda-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 161862, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'kan_Knda-hin_Deva': {'num_samples': 1503, 'number_of_characters': 163557, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'kan_Knda-kas_Arab': {'num_samples': 1503, 'number_of_characters': 168274, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'kan_Knda-mai_Deva': {'num_samples': 1503, 'number_of_characters': 166001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'kan_Knda-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 176431, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'kan_Knda-mar_Deva': {'num_samples': 1503, 'number_of_characters': 166340, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'kan_Knda-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 160909, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'kan_Knda-npi_Deva': {'num_samples': 1503, 'number_of_characters': 164499, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'kan_Knda-ory_Orya': {'num_samples': 1503, 'number_of_characters': 167816, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'kan_Knda-pan_Guru': {'num_samples': 1503, 'number_of_characters': 163794, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'kan_Knda-san_Deva': {'num_samples': 1503, 'number_of_characters': 161686, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'kan_Knda-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172972, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'kan_Knda-snd_Deva': {'num_samples': 1503, 'number_of_characters': 166216, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'kan_Knda-tam_Taml': {'num_samples': 1503, 'number_of_characters': 178459, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'kan_Knda-tel_Telu': {'num_samples': 1503, 'number_of_characters': 161283, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'kan_Knda-urd_Arab': {'num_samples': 1503, 'number_of_characters': 164898, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'kas_Arab-asm_Beng': {'num_samples': 1503, 'number_of_characters': 164681, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'kas_Arab-ben_Beng': {'num_samples': 1503, 'number_of_characters': 159085, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'kas_Arab-brx_Deva': {'num_samples': 1503, 'number_of_characters': 165141, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'kas_Arab-doi_Deva': {'num_samples': 1503, 'number_of_characters': 170129, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'kas_Arab-eng_Latn': {'num_samples': 1503, 'number_of_characters': 163813, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'kas_Arab-gom_Deva': {'num_samples': 1503, 'number_of_characters': 159379, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'kas_Arab-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'kas_Arab-hin_Deva': {'num_samples': 1503, 'number_of_characters': 163061, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'kas_Arab-kan_Knda': {'num_samples': 1503, 'number_of_characters': 168274, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'kas_Arab-mai_Deva': {'num_samples': 1503, 'number_of_characters': 165505, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'kas_Arab-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 175935, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'kas_Arab-mar_Deva': {'num_samples': 1503, 'number_of_characters': 165844, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'kas_Arab-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 160413, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'kas_Arab-npi_Deva': {'num_samples': 1503, 'number_of_characters': 164003, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'kas_Arab-ory_Orya': {'num_samples': 1503, 'number_of_characters': 167320, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'kas_Arab-pan_Guru': {'num_samples': 1503, 'number_of_characters': 163298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'kas_Arab-san_Deva': {'num_samples': 1503, 'number_of_characters': 161190, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'kas_Arab-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172476, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'kas_Arab-snd_Deva': {'num_samples': 1503, 'number_of_characters': 165720, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'kas_Arab-tam_Taml': {'num_samples': 1503, 'number_of_characters': 177963, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'kas_Arab-tel_Telu': {'num_samples': 1503, 'number_of_characters': 160787, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'kas_Arab-urd_Arab': {'num_samples': 1503, 'number_of_characters': 164402, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mai_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162408, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mai_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 156812, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mai_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 162868, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mai_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167856, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mai_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161540, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mai_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157106, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mai_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159093, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mai_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 160788, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mai_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166001, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mai_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165505, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mai_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173662, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mai_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163571, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mai_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158140, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mai_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mai_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165047, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mai_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161025, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mai_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 158917, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mai_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170203, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mai_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163447, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mai_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175690, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mai_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158514, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mai_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162129, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mal_Mlym-asm_Beng': {'num_samples': 1503, 'number_of_characters': 172838, 'unique_pairs': 1498, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mal_Mlym-ben_Beng': {'num_samples': 1503, 'number_of_characters': 167242, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mal_Mlym-brx_Deva': {'num_samples': 1503, 'number_of_characters': 173298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mal_Mlym-doi_Deva': {'num_samples': 1503, 'number_of_characters': 178286, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mal_Mlym-eng_Latn': {'num_samples': 1503, 'number_of_characters': 171970, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mal_Mlym-gom_Deva': {'num_samples': 1503, 'number_of_characters': 167536, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mal_Mlym-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 169523, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mal_Mlym-hin_Deva': {'num_samples': 1503, 'number_of_characters': 171218, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mal_Mlym-kan_Knda': {'num_samples': 1503, 'number_of_characters': 176431, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mal_Mlym-kas_Arab': {'num_samples': 1503, 'number_of_characters': 175935, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mal_Mlym-mai_Deva': {'num_samples': 1503, 'number_of_characters': 173662, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mal_Mlym-mar_Deva': {'num_samples': 1503, 'number_of_characters': 174001, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mal_Mlym-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 168570, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mal_Mlym-npi_Deva': {'num_samples': 1503, 'number_of_characters': 172160, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mal_Mlym-ory_Orya': {'num_samples': 1503, 'number_of_characters': 175477, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mal_Mlym-pan_Guru': {'num_samples': 1503, 'number_of_characters': 171455, 'unique_pairs': 1498, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mal_Mlym-san_Deva': {'num_samples': 1503, 'number_of_characters': 169347, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mal_Mlym-sat_Olck': {'num_samples': 1503, 'number_of_characters': 180633, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mal_Mlym-snd_Deva': {'num_samples': 1503, 'number_of_characters': 173877, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mal_Mlym-tam_Taml': {'num_samples': 1503, 'number_of_characters': 186120, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mal_Mlym-tel_Telu': {'num_samples': 1503, 'number_of_characters': 168944, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mal_Mlym-urd_Arab': {'num_samples': 1503, 'number_of_characters': 172559, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mar_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162747, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mar_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 157151, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mar_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 163207, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mar_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 168195, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mar_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161879, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mar_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157445, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mar_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159432, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mar_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 161127, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mar_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166340, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mar_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165844, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mar_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 163571, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mar_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 174001, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mar_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158479, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mar_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 162069, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mar_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165386, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mar_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161364, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mar_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 159256, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mar_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170542, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mar_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163786, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mar_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 176029, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mar_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158853, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mar_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162468, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mni_Mtei-asm_Beng': {'num_samples': 1503, 'number_of_characters': 157316, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mni_Mtei-ben_Beng': {'num_samples': 1503, 'number_of_characters': 151720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mni_Mtei-brx_Deva': {'num_samples': 1503, 'number_of_characters': 157776, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mni_Mtei-doi_Deva': {'num_samples': 1503, 'number_of_characters': 162764, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mni_Mtei-eng_Latn': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mni_Mtei-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152014, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mni_Mtei-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mni_Mtei-hin_Deva': {'num_samples': 1503, 'number_of_characters': 155696, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mni_Mtei-kan_Knda': {'num_samples': 1503, 'number_of_characters': 160909, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mni_Mtei-kas_Arab': {'num_samples': 1503, 'number_of_characters': 160413, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mni_Mtei-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158140, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mni_Mtei-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 168570, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mni_Mtei-mar_Deva': {'num_samples': 1503, 'number_of_characters': 158479, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mni_Mtei-npi_Deva': {'num_samples': 1503, 'number_of_characters': 156638, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mni_Mtei-ory_Orya': {'num_samples': 1503, 'number_of_characters': 159955, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mni_Mtei-pan_Guru': {'num_samples': 1503, 'number_of_characters': 155933, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mni_Mtei-san_Deva': {'num_samples': 1503, 'number_of_characters': 153825, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mni_Mtei-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165111, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mni_Mtei-snd_Deva': {'num_samples': 1503, 'number_of_characters': 158355, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mni_Mtei-tam_Taml': {'num_samples': 1503, 'number_of_characters': 170598, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mni_Mtei-tel_Telu': {'num_samples': 1503, 'number_of_characters': 153422, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mni_Mtei-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157037, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'npi_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160906, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'npi_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155310, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'npi_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'npi_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166354, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'npi_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160038, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'npi_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 155604, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'npi_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157591, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'npi_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159286, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'npi_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164499, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'npi_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'npi_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'npi_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172160, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'npi_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162069, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'npi_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 156638, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'npi_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163545, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'npi_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159523, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'npi_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 157415, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'npi_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 168701, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'npi_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161945, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'npi_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174188, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'npi_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157012, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'npi_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 160627, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'ory_Orya-asm_Beng': {'num_samples': 1503, 'number_of_characters': 164223, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'ory_Orya-ben_Beng': {'num_samples': 1503, 'number_of_characters': 158627, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'ory_Orya-brx_Deva': {'num_samples': 1503, 'number_of_characters': 164683, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'ory_Orya-doi_Deva': {'num_samples': 1503, 'number_of_characters': 169671, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'ory_Orya-eng_Latn': {'num_samples': 1503, 'number_of_characters': 163355, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'ory_Orya-gom_Deva': {'num_samples': 1503, 'number_of_characters': 158921, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'ory_Orya-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 160908, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'ory_Orya-hin_Deva': {'num_samples': 1503, 'number_of_characters': 162603, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'ory_Orya-kan_Knda': {'num_samples': 1503, 'number_of_characters': 167816, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'ory_Orya-kas_Arab': {'num_samples': 1503, 'number_of_characters': 167320, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'ory_Orya-mai_Deva': {'num_samples': 1503, 'number_of_characters': 165047, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'ory_Orya-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 175477, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'ory_Orya-mar_Deva': {'num_samples': 1503, 'number_of_characters': 165386, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'ory_Orya-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 159955, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'ory_Orya-npi_Deva': {'num_samples': 1503, 'number_of_characters': 163545, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'ory_Orya-pan_Guru': {'num_samples': 1503, 'number_of_characters': 162840, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'ory_Orya-san_Deva': {'num_samples': 1503, 'number_of_characters': 160732, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'ory_Orya-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172018, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'ory_Orya-snd_Deva': {'num_samples': 1503, 'number_of_characters': 165262, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'ory_Orya-tam_Taml': {'num_samples': 1503, 'number_of_characters': 177505, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'ory_Orya-tel_Telu': {'num_samples': 1503, 'number_of_characters': 160329, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'ory_Orya-urd_Arab': {'num_samples': 1503, 'number_of_characters': 163944, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'pan_Guru-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160201, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'pan_Guru-ben_Beng': {'num_samples': 1503, 'number_of_characters': 154605, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'pan_Guru-brx_Deva': {'num_samples': 1503, 'number_of_characters': 160661, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'pan_Guru-doi_Deva': {'num_samples': 1503, 'number_of_characters': 165649, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'pan_Guru-eng_Latn': {'num_samples': 1503, 'number_of_characters': 159333, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'pan_Guru-gom_Deva': {'num_samples': 1503, 'number_of_characters': 154899, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'pan_Guru-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 156886, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'pan_Guru-hin_Deva': {'num_samples': 1503, 'number_of_characters': 158581, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'pan_Guru-kan_Knda': {'num_samples': 1503, 'number_of_characters': 163794, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'pan_Guru-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163298, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'pan_Guru-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161025, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'pan_Guru-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171455, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'pan_Guru-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161364, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'pan_Guru-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 155933, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'pan_Guru-npi_Deva': {'num_samples': 1503, 'number_of_characters': 159523, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'pan_Guru-ory_Orya': {'num_samples': 1503, 'number_of_characters': 162840, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'pan_Guru-san_Deva': {'num_samples': 1503, 'number_of_characters': 156710, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'pan_Guru-sat_Olck': {'num_samples': 1503, 'number_of_characters': 167996, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'pan_Guru-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161240, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'pan_Guru-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173483, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'pan_Guru-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156307, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'pan_Guru-urd_Arab': {'num_samples': 1503, 'number_of_characters': 159922, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'san_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 158093, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'san_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152497, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'san_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158553, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'san_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163541, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'san_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 157225, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'san_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152791, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'san_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154778, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'san_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156473, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'san_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161686, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'san_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 161190, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'san_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158917, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'san_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 169347, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'san_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 159256, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'san_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 153825, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'san_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157415, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'san_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160732, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'san_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156710, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'san_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165888, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'san_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 159132, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'san_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 171375, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'san_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 154199, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'san_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157814, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'sat_Olck-asm_Beng': {'num_samples': 1503, 'number_of_characters': 169379, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'sat_Olck-ben_Beng': {'num_samples': 1503, 'number_of_characters': 163783, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'sat_Olck-brx_Deva': {'num_samples': 1503, 'number_of_characters': 169839, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'sat_Olck-doi_Deva': {'num_samples': 1503, 'number_of_characters': 174827, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'sat_Olck-eng_Latn': {'num_samples': 1503, 'number_of_characters': 168511, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'sat_Olck-gom_Deva': {'num_samples': 1503, 'number_of_characters': 164077, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'sat_Olck-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 166064, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'sat_Olck-hin_Deva': {'num_samples': 1503, 'number_of_characters': 167759, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'sat_Olck-kan_Knda': {'num_samples': 1503, 'number_of_characters': 172972, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'sat_Olck-kas_Arab': {'num_samples': 1503, 'number_of_characters': 172476, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'sat_Olck-mai_Deva': {'num_samples': 1503, 'number_of_characters': 170203, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'sat_Olck-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 180633, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'sat_Olck-mar_Deva': {'num_samples': 1503, 'number_of_characters': 170542, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'sat_Olck-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 165111, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'sat_Olck-npi_Deva': {'num_samples': 1503, 'number_of_characters': 168701, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'sat_Olck-ory_Orya': {'num_samples': 1503, 'number_of_characters': 172018, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'sat_Olck-pan_Guru': {'num_samples': 1503, 'number_of_characters': 167996, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'sat_Olck-san_Deva': {'num_samples': 1503, 'number_of_characters': 165888, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'sat_Olck-snd_Deva': {'num_samples': 1503, 'number_of_characters': 170418, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'sat_Olck-tam_Taml': {'num_samples': 1503, 'number_of_characters': 182661, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'sat_Olck-tel_Telu': {'num_samples': 1503, 'number_of_characters': 165485, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'sat_Olck-urd_Arab': {'num_samples': 1503, 'number_of_characters': 169100, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'snd_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162623, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'snd_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 157027, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'snd_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 163083, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'snd_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 168071, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'snd_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161755, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'snd_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157321, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'snd_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159308, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'snd_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 161003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'snd_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166216, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'snd_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'snd_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 163447, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'snd_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173877, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'snd_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163786, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'snd_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158355, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'snd_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161945, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'snd_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165262, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'snd_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161240, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'snd_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 159132, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'snd_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170418, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'snd_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175905, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'snd_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'snd_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162344, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'tam_Taml-asm_Beng': {'num_samples': 1503, 'number_of_characters': 174866, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'tam_Taml-ben_Beng': {'num_samples': 1503, 'number_of_characters': 169270, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'tam_Taml-brx_Deva': {'num_samples': 1503, 'number_of_characters': 175326, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'tam_Taml-doi_Deva': {'num_samples': 1503, 'number_of_characters': 180314, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'tam_Taml-eng_Latn': {'num_samples': 1503, 'number_of_characters': 173998, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'tam_Taml-gom_Deva': {'num_samples': 1503, 'number_of_characters': 169564, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'tam_Taml-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 171551, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'tam_Taml-hin_Deva': {'num_samples': 1503, 'number_of_characters': 173246, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'tam_Taml-kan_Knda': {'num_samples': 1503, 'number_of_characters': 178459, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'tam_Taml-kas_Arab': {'num_samples': 1503, 'number_of_characters': 177963, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'tam_Taml-mai_Deva': {'num_samples': 1503, 'number_of_characters': 175690, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'tam_Taml-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 186120, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'tam_Taml-mar_Deva': {'num_samples': 1503, 'number_of_characters': 176029, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'tam_Taml-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 170598, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'tam_Taml-npi_Deva': {'num_samples': 1503, 'number_of_characters': 174188, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'tam_Taml-ory_Orya': {'num_samples': 1503, 'number_of_characters': 177505, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'tam_Taml-pan_Guru': {'num_samples': 1503, 'number_of_characters': 173483, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'tam_Taml-san_Deva': {'num_samples': 1503, 'number_of_characters': 171375, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'tam_Taml-sat_Olck': {'num_samples': 1503, 'number_of_characters': 182661, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'tam_Taml-snd_Deva': {'num_samples': 1503, 'number_of_characters': 175905, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'tam_Taml-tel_Telu': {'num_samples': 1503, 'number_of_characters': 170972, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'tam_Taml-urd_Arab': {'num_samples': 1503, 'number_of_characters': 174587, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'tel_Telu-asm_Beng': {'num_samples': 1503, 'number_of_characters': 157690, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'tel_Telu-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152094, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'tel_Telu-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158150, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'tel_Telu-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163138, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'tel_Telu-eng_Latn': {'num_samples': 1503, 'number_of_characters': 156822, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'tel_Telu-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152388, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'tel_Telu-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154375, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'tel_Telu-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156070, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'tel_Telu-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161283, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'tel_Telu-kas_Arab': {'num_samples': 1503, 'number_of_characters': 160787, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'tel_Telu-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158514, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'tel_Telu-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 168944, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'tel_Telu-mar_Deva': {'num_samples': 1503, 'number_of_characters': 158853, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'tel_Telu-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 153422, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'tel_Telu-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157012, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'tel_Telu-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160329, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'tel_Telu-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156307, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'tel_Telu-san_Deva': {'num_samples': 1503, 'number_of_characters': 154199, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'tel_Telu-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165485, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'tel_Telu-snd_Deva': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'tel_Telu-tam_Taml': {'num_samples': 1503, 'number_of_characters': 170972, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'tel_Telu-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157411, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'urd_Arab-asm_Beng': {'num_samples': 1503, 'number_of_characters': 161305, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'urd_Arab-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155709, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'urd_Arab-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161765, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'urd_Arab-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166753, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'urd_Arab-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160437, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'urd_Arab-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'urd_Arab-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157990, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'urd_Arab-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159685, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'urd_Arab-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164898, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'urd_Arab-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164402, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'urd_Arab-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162129, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'urd_Arab-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172559, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'urd_Arab-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162468, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'urd_Arab-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157037, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'urd_Arab-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160627, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'urd_Arab-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163944, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'urd_Arab-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159922, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'urd_Arab-san_Deva': {'num_samples': 1503, 'number_of_characters': 157814, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'urd_Arab-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169100, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'urd_Arab-snd_Deva': {'num_samples': 1503, 'number_of_characters': 162344, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'urd_Arab-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174587, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'urd_Arab-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157411, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}}}} | +| [IN22GenBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Gen) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, Legal, Government, News, Religious, Non-fiction, Written] | {'test': 518144} | {'test': {'num_samples': 518144, 'number_of_characters': 162367876, 'unique_pairs': 518101, 'min_sentence1_length': 9, 'average_sentence1_length': 156.68, 'max_sentence1_length': 692, 'unique_sentence1': 23550, 'min_sentence2_length': 9, 'average_sentence2_length': 156.68, 'max_sentence2_length': 692, 'unique_sentence2': 23550, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'num_samples': 1024, 'number_of_characters': 310622, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'asm_Beng-brx_Deva': {'num_samples': 1024, 'number_of_characters': 323609, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'asm_Beng-doi_Deva': {'num_samples': 1024, 'number_of_characters': 319020, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'asm_Beng-eng_Latn': {'num_samples': 1024, 'number_of_characters': 320098, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'asm_Beng-gom_Deva': {'num_samples': 1024, 'number_of_characters': 312594, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'asm_Beng-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 309440, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'asm_Beng-hin_Deva': {'num_samples': 1024, 'number_of_characters': 320106, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'asm_Beng-kan_Knda': {'num_samples': 1024, 'number_of_characters': 332064, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'asm_Beng-kas_Arab': {'num_samples': 1024, 'number_of_characters': 322764, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'asm_Beng-mai_Deva': {'num_samples': 1024, 'number_of_characters': 308682, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'asm_Beng-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 343636, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'asm_Beng-mar_Deva': {'num_samples': 1024, 'number_of_characters': 321784, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'asm_Beng-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 313134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'asm_Beng-npi_Deva': {'num_samples': 1024, 'number_of_characters': 313419, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'asm_Beng-ory_Orya': {'num_samples': 1024, 'number_of_characters': 334226, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'asm_Beng-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306863, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'asm_Beng-san_Deva': {'num_samples': 1024, 'number_of_characters': 318079, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'asm_Beng-sat_Olck': {'num_samples': 1024, 'number_of_characters': 326732, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'asm_Beng-snd_Deva': {'num_samples': 1024, 'number_of_characters': 320421, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'asm_Beng-tam_Taml': {'num_samples': 1024, 'number_of_characters': 348346, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'asm_Beng-tel_Telu': {'num_samples': 1024, 'number_of_characters': 319045, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'asm_Beng-urd_Arab': {'num_samples': 1024, 'number_of_characters': 315134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'ben_Beng-asm_Beng': {'num_samples': 1024, 'number_of_characters': 310622, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'ben_Beng-brx_Deva': {'num_samples': 1024, 'number_of_characters': 313313, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'ben_Beng-doi_Deva': {'num_samples': 1024, 'number_of_characters': 308724, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'ben_Beng-eng_Latn': {'num_samples': 1024, 'number_of_characters': 309802, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'ben_Beng-gom_Deva': {'num_samples': 1024, 'number_of_characters': 302298, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'ben_Beng-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 299144, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'ben_Beng-hin_Deva': {'num_samples': 1024, 'number_of_characters': 309810, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'ben_Beng-kan_Knda': {'num_samples': 1024, 'number_of_characters': 321768, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'ben_Beng-kas_Arab': {'num_samples': 1024, 'number_of_characters': 312468, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'ben_Beng-mai_Deva': {'num_samples': 1024, 'number_of_characters': 298386, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'ben_Beng-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 333340, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'ben_Beng-mar_Deva': {'num_samples': 1024, 'number_of_characters': 311488, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'ben_Beng-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 302838, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'ben_Beng-npi_Deva': {'num_samples': 1024, 'number_of_characters': 303123, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'ben_Beng-ory_Orya': {'num_samples': 1024, 'number_of_characters': 323930, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'ben_Beng-pan_Guru': {'num_samples': 1024, 'number_of_characters': 296567, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'ben_Beng-san_Deva': {'num_samples': 1024, 'number_of_characters': 307783, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'ben_Beng-sat_Olck': {'num_samples': 1024, 'number_of_characters': 316436, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'ben_Beng-snd_Deva': {'num_samples': 1024, 'number_of_characters': 310125, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'ben_Beng-tam_Taml': {'num_samples': 1024, 'number_of_characters': 338050, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'ben_Beng-tel_Telu': {'num_samples': 1024, 'number_of_characters': 308749, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'ben_Beng-urd_Arab': {'num_samples': 1024, 'number_of_characters': 304838, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'brx_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 323609, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'brx_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 313313, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'brx_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 321711, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'brx_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 322789, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'brx_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 315285, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'brx_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 312131, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'brx_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 322797, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'brx_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 334755, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'brx_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 325455, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'brx_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 311373, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'brx_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 346327, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'brx_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 324475, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'brx_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 315825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'brx_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 316110, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'brx_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 336917, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'brx_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 309554, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'brx_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 320770, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'brx_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 329423, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'brx_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 323112, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'brx_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 351037, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'brx_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 321736, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'brx_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 317825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'doi_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 319020, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'doi_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 308724, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'doi_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 321711, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'doi_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 318200, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'doi_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 310696, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'doi_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 307542, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'doi_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 318208, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'doi_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 330166, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'doi_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 320866, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'doi_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 306784, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'doi_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 341738, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'doi_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 319886, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'doi_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 311236, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'doi_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 311521, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'doi_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 332328, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'doi_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304965, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'doi_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 316181, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'doi_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 324834, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'doi_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 318523, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'doi_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 346448, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'doi_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 317147, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'doi_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 313236, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'eng_Latn-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320098, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'eng_Latn-ben_Beng': {'num_samples': 1024, 'number_of_characters': 309802, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'eng_Latn-brx_Deva': {'num_samples': 1024, 'number_of_characters': 322789, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'eng_Latn-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318200, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'eng_Latn-gom_Deva': {'num_samples': 1024, 'number_of_characters': 311774, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'eng_Latn-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308620, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'eng_Latn-hin_Deva': {'num_samples': 1024, 'number_of_characters': 319286, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'eng_Latn-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331244, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'eng_Latn-kas_Arab': {'num_samples': 1024, 'number_of_characters': 321944, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'eng_Latn-mai_Deva': {'num_samples': 1024, 'number_of_characters': 307862, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'eng_Latn-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 342816, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'eng_Latn-mar_Deva': {'num_samples': 1024, 'number_of_characters': 320964, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'eng_Latn-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312314, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'eng_Latn-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312599, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'eng_Latn-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333406, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'eng_Latn-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306043, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'eng_Latn-san_Deva': {'num_samples': 1024, 'number_of_characters': 317259, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'eng_Latn-sat_Olck': {'num_samples': 1024, 'number_of_characters': 325912, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'eng_Latn-snd_Deva': {'num_samples': 1024, 'number_of_characters': 319601, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'eng_Latn-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347526, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'eng_Latn-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318225, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'eng_Latn-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314314, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'gom_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 312594, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'gom_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 302298, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'gom_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 315285, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'gom_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 310696, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'gom_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 311774, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'gom_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301116, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'gom_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 311782, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'gom_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 323740, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'gom_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 314440, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'gom_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 300358, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'gom_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 335312, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'gom_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 313460, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'gom_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 304810, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'gom_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 305095, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'gom_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 325902, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'gom_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 298539, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'gom_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 309755, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'gom_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 318408, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'gom_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312097, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'gom_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340022, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'gom_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 310721, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'gom_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 306810, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'guj_Gujr-asm_Beng': {'num_samples': 1024, 'number_of_characters': 309440, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'guj_Gujr-ben_Beng': {'num_samples': 1024, 'number_of_characters': 299144, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'guj_Gujr-brx_Deva': {'num_samples': 1024, 'number_of_characters': 312131, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'guj_Gujr-doi_Deva': {'num_samples': 1024, 'number_of_characters': 307542, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'guj_Gujr-eng_Latn': {'num_samples': 1024, 'number_of_characters': 308620, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'guj_Gujr-gom_Deva': {'num_samples': 1024, 'number_of_characters': 301116, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'guj_Gujr-hin_Deva': {'num_samples': 1024, 'number_of_characters': 308628, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'guj_Gujr-kan_Knda': {'num_samples': 1024, 'number_of_characters': 320586, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'guj_Gujr-kas_Arab': {'num_samples': 1024, 'number_of_characters': 311286, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'guj_Gujr-mai_Deva': {'num_samples': 1024, 'number_of_characters': 297204, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'guj_Gujr-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 332158, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'guj_Gujr-mar_Deva': {'num_samples': 1024, 'number_of_characters': 310306, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'guj_Gujr-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 301656, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'guj_Gujr-npi_Deva': {'num_samples': 1024, 'number_of_characters': 301941, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'guj_Gujr-ory_Orya': {'num_samples': 1024, 'number_of_characters': 322748, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'guj_Gujr-pan_Guru': {'num_samples': 1024, 'number_of_characters': 295385, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'guj_Gujr-san_Deva': {'num_samples': 1024, 'number_of_characters': 306601, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'guj_Gujr-sat_Olck': {'num_samples': 1024, 'number_of_characters': 315254, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'guj_Gujr-snd_Deva': {'num_samples': 1024, 'number_of_characters': 308943, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'guj_Gujr-tam_Taml': {'num_samples': 1024, 'number_of_characters': 336868, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'guj_Gujr-tel_Telu': {'num_samples': 1024, 'number_of_characters': 307567, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'guj_Gujr-urd_Arab': {'num_samples': 1024, 'number_of_characters': 303656, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'hin_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320106, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'hin_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 309810, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'hin_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 322797, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'hin_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318208, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'hin_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 319286, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'hin_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 311782, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'hin_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308628, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'hin_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331252, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'hin_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 321952, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'hin_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 307870, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'hin_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 342824, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'hin_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 320972, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'hin_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312322, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'hin_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312607, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'hin_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333414, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'hin_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306051, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'hin_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 317267, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'hin_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 325920, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'hin_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 319609, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'hin_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347534, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'hin_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318233, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'hin_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314322, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'kan_Knda-asm_Beng': {'num_samples': 1024, 'number_of_characters': 332064, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'kan_Knda-ben_Beng': {'num_samples': 1024, 'number_of_characters': 321768, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'kan_Knda-brx_Deva': {'num_samples': 1024, 'number_of_characters': 334755, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'kan_Knda-doi_Deva': {'num_samples': 1024, 'number_of_characters': 330166, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'kan_Knda-eng_Latn': {'num_samples': 1024, 'number_of_characters': 331244, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'kan_Knda-gom_Deva': {'num_samples': 1024, 'number_of_characters': 323740, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'kan_Knda-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 320586, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'kan_Knda-hin_Deva': {'num_samples': 1024, 'number_of_characters': 331252, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'kan_Knda-kas_Arab': {'num_samples': 1024, 'number_of_characters': 333910, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'kan_Knda-mai_Deva': {'num_samples': 1024, 'number_of_characters': 319828, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'kan_Knda-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 354782, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'kan_Knda-mar_Deva': {'num_samples': 1024, 'number_of_characters': 332930, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'kan_Knda-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 324280, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'kan_Knda-npi_Deva': {'num_samples': 1024, 'number_of_characters': 324565, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'kan_Knda-ory_Orya': {'num_samples': 1024, 'number_of_characters': 345372, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'kan_Knda-pan_Guru': {'num_samples': 1024, 'number_of_characters': 318009, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'kan_Knda-san_Deva': {'num_samples': 1024, 'number_of_characters': 329225, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'kan_Knda-sat_Olck': {'num_samples': 1024, 'number_of_characters': 337878, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'kan_Knda-snd_Deva': {'num_samples': 1024, 'number_of_characters': 331567, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'kan_Knda-tam_Taml': {'num_samples': 1024, 'number_of_characters': 359492, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'kan_Knda-tel_Telu': {'num_samples': 1024, 'number_of_characters': 330191, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'kan_Knda-urd_Arab': {'num_samples': 1024, 'number_of_characters': 326280, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'kas_Arab-asm_Beng': {'num_samples': 1024, 'number_of_characters': 322764, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'kas_Arab-ben_Beng': {'num_samples': 1024, 'number_of_characters': 312468, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'kas_Arab-brx_Deva': {'num_samples': 1024, 'number_of_characters': 325455, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'kas_Arab-doi_Deva': {'num_samples': 1024, 'number_of_characters': 320866, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'kas_Arab-eng_Latn': {'num_samples': 1024, 'number_of_characters': 321944, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'kas_Arab-gom_Deva': {'num_samples': 1024, 'number_of_characters': 314440, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'kas_Arab-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 311286, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'kas_Arab-hin_Deva': {'num_samples': 1024, 'number_of_characters': 321952, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'kas_Arab-kan_Knda': {'num_samples': 1024, 'number_of_characters': 333910, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'kas_Arab-mai_Deva': {'num_samples': 1024, 'number_of_characters': 310528, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'kas_Arab-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 345482, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'kas_Arab-mar_Deva': {'num_samples': 1024, 'number_of_characters': 323630, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'kas_Arab-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 314980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'kas_Arab-npi_Deva': {'num_samples': 1024, 'number_of_characters': 315265, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'kas_Arab-ory_Orya': {'num_samples': 1024, 'number_of_characters': 336072, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'kas_Arab-pan_Guru': {'num_samples': 1024, 'number_of_characters': 308709, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'kas_Arab-san_Deva': {'num_samples': 1024, 'number_of_characters': 319925, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'kas_Arab-sat_Olck': {'num_samples': 1024, 'number_of_characters': 328578, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'kas_Arab-snd_Deva': {'num_samples': 1024, 'number_of_characters': 322267, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'kas_Arab-tam_Taml': {'num_samples': 1024, 'number_of_characters': 350192, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'kas_Arab-tel_Telu': {'num_samples': 1024, 'number_of_characters': 320891, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'kas_Arab-urd_Arab': {'num_samples': 1024, 'number_of_characters': 316980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mai_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 308682, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mai_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 298386, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mai_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 311373, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mai_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 306784, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mai_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 307862, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mai_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 300358, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mai_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 297204, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mai_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 307870, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mai_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 319828, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mai_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 310528, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mai_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 331400, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mai_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 309548, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mai_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 300898, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mai_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 301183, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mai_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 321990, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mai_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 294627, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mai_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 305843, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mai_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 314496, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mai_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 308185, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mai_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 336110, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mai_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 306809, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mai_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 302898, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mal_Mlym-asm_Beng': {'num_samples': 1024, 'number_of_characters': 343636, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mal_Mlym-ben_Beng': {'num_samples': 1024, 'number_of_characters': 333340, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mal_Mlym-brx_Deva': {'num_samples': 1024, 'number_of_characters': 346327, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mal_Mlym-doi_Deva': {'num_samples': 1024, 'number_of_characters': 341738, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mal_Mlym-eng_Latn': {'num_samples': 1024, 'number_of_characters': 342816, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mal_Mlym-gom_Deva': {'num_samples': 1024, 'number_of_characters': 335312, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mal_Mlym-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 332158, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mal_Mlym-hin_Deva': {'num_samples': 1024, 'number_of_characters': 342824, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mal_Mlym-kan_Knda': {'num_samples': 1024, 'number_of_characters': 354782, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mal_Mlym-kas_Arab': {'num_samples': 1024, 'number_of_characters': 345482, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mal_Mlym-mai_Deva': {'num_samples': 1024, 'number_of_characters': 331400, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mal_Mlym-mar_Deva': {'num_samples': 1024, 'number_of_characters': 344502, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mal_Mlym-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 335852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mal_Mlym-npi_Deva': {'num_samples': 1024, 'number_of_characters': 336137, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mal_Mlym-ory_Orya': {'num_samples': 1024, 'number_of_characters': 356944, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mal_Mlym-pan_Guru': {'num_samples': 1024, 'number_of_characters': 329581, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mal_Mlym-san_Deva': {'num_samples': 1024, 'number_of_characters': 340797, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mal_Mlym-sat_Olck': {'num_samples': 1024, 'number_of_characters': 349450, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mal_Mlym-snd_Deva': {'num_samples': 1024, 'number_of_characters': 343139, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mal_Mlym-tam_Taml': {'num_samples': 1024, 'number_of_characters': 371064, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mal_Mlym-tel_Telu': {'num_samples': 1024, 'number_of_characters': 341763, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mal_Mlym-urd_Arab': {'num_samples': 1024, 'number_of_characters': 337852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mar_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 321784, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mar_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 311488, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mar_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 324475, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mar_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 319886, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mar_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 320964, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mar_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 313460, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mar_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 310306, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mar_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 320972, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mar_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 332930, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mar_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 323630, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mar_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 309548, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mar_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 344502, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mar_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 314000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mar_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 314285, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mar_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 335092, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mar_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 307729, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mar_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 318945, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mar_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 327598, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mar_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 321287, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mar_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 349212, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mar_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 319911, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mar_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 316000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mni_Mtei-asm_Beng': {'num_samples': 1024, 'number_of_characters': 313134, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mni_Mtei-ben_Beng': {'num_samples': 1024, 'number_of_characters': 302838, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mni_Mtei-brx_Deva': {'num_samples': 1024, 'number_of_characters': 315825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mni_Mtei-doi_Deva': {'num_samples': 1024, 'number_of_characters': 311236, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mni_Mtei-eng_Latn': {'num_samples': 1024, 'number_of_characters': 312314, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mni_Mtei-gom_Deva': {'num_samples': 1024, 'number_of_characters': 304810, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mni_Mtei-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301656, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mni_Mtei-hin_Deva': {'num_samples': 1024, 'number_of_characters': 312322, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mni_Mtei-kan_Knda': {'num_samples': 1024, 'number_of_characters': 324280, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mni_Mtei-kas_Arab': {'num_samples': 1024, 'number_of_characters': 314980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mni_Mtei-mai_Deva': {'num_samples': 1024, 'number_of_characters': 300898, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mni_Mtei-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 335852, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mni_Mtei-mar_Deva': {'num_samples': 1024, 'number_of_characters': 314000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mni_Mtei-npi_Deva': {'num_samples': 1024, 'number_of_characters': 305635, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mni_Mtei-ory_Orya': {'num_samples': 1024, 'number_of_characters': 326442, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mni_Mtei-pan_Guru': {'num_samples': 1024, 'number_of_characters': 299079, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mni_Mtei-san_Deva': {'num_samples': 1024, 'number_of_characters': 310295, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mni_Mtei-sat_Olck': {'num_samples': 1024, 'number_of_characters': 318948, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mni_Mtei-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312637, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mni_Mtei-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340562, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mni_Mtei-tel_Telu': {'num_samples': 1024, 'number_of_characters': 311261, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mni_Mtei-urd_Arab': {'num_samples': 1024, 'number_of_characters': 307350, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'npi_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 313419, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'npi_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 303123, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'npi_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 316110, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'npi_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 311521, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'npi_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 312599, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'npi_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 305095, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'npi_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301941, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'npi_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 312607, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'npi_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 324565, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'npi_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 315265, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'npi_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 301183, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'npi_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 336137, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'npi_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 314285, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'npi_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 305635, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'npi_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 326727, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'npi_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 299364, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'npi_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 310580, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'npi_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 319233, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'npi_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312922, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'npi_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340847, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'npi_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 311546, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'npi_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 307635, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'ory_Orya-asm_Beng': {'num_samples': 1024, 'number_of_characters': 334226, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'ory_Orya-ben_Beng': {'num_samples': 1024, 'number_of_characters': 323930, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'ory_Orya-brx_Deva': {'num_samples': 1024, 'number_of_characters': 336917, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'ory_Orya-doi_Deva': {'num_samples': 1024, 'number_of_characters': 332328, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'ory_Orya-eng_Latn': {'num_samples': 1024, 'number_of_characters': 333406, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'ory_Orya-gom_Deva': {'num_samples': 1024, 'number_of_characters': 325902, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'ory_Orya-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 322748, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'ory_Orya-hin_Deva': {'num_samples': 1024, 'number_of_characters': 333414, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'ory_Orya-kan_Knda': {'num_samples': 1024, 'number_of_characters': 345372, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'ory_Orya-kas_Arab': {'num_samples': 1024, 'number_of_characters': 336072, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'ory_Orya-mai_Deva': {'num_samples': 1024, 'number_of_characters': 321990, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'ory_Orya-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 356944, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'ory_Orya-mar_Deva': {'num_samples': 1024, 'number_of_characters': 335092, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'ory_Orya-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 326442, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'ory_Orya-npi_Deva': {'num_samples': 1024, 'number_of_characters': 326727, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'ory_Orya-pan_Guru': {'num_samples': 1024, 'number_of_characters': 320171, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'ory_Orya-san_Deva': {'num_samples': 1024, 'number_of_characters': 331387, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'ory_Orya-sat_Olck': {'num_samples': 1024, 'number_of_characters': 340040, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'ory_Orya-snd_Deva': {'num_samples': 1024, 'number_of_characters': 333729, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'ory_Orya-tam_Taml': {'num_samples': 1024, 'number_of_characters': 361654, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'ory_Orya-tel_Telu': {'num_samples': 1024, 'number_of_characters': 332353, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'ory_Orya-urd_Arab': {'num_samples': 1024, 'number_of_characters': 328442, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'pan_Guru-asm_Beng': {'num_samples': 1024, 'number_of_characters': 306863, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'pan_Guru-ben_Beng': {'num_samples': 1024, 'number_of_characters': 296567, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'pan_Guru-brx_Deva': {'num_samples': 1024, 'number_of_characters': 309554, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'pan_Guru-doi_Deva': {'num_samples': 1024, 'number_of_characters': 304965, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'pan_Guru-eng_Latn': {'num_samples': 1024, 'number_of_characters': 306043, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'pan_Guru-gom_Deva': {'num_samples': 1024, 'number_of_characters': 298539, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'pan_Guru-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 295385, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'pan_Guru-hin_Deva': {'num_samples': 1024, 'number_of_characters': 306051, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'pan_Guru-kan_Knda': {'num_samples': 1024, 'number_of_characters': 318009, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'pan_Guru-kas_Arab': {'num_samples': 1024, 'number_of_characters': 308709, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'pan_Guru-mai_Deva': {'num_samples': 1024, 'number_of_characters': 294627, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'pan_Guru-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 329581, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'pan_Guru-mar_Deva': {'num_samples': 1024, 'number_of_characters': 307729, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'pan_Guru-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 299079, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'pan_Guru-npi_Deva': {'num_samples': 1024, 'number_of_characters': 299364, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'pan_Guru-ory_Orya': {'num_samples': 1024, 'number_of_characters': 320171, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'pan_Guru-san_Deva': {'num_samples': 1024, 'number_of_characters': 304024, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'pan_Guru-sat_Olck': {'num_samples': 1024, 'number_of_characters': 312677, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'pan_Guru-snd_Deva': {'num_samples': 1024, 'number_of_characters': 306366, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'pan_Guru-tam_Taml': {'num_samples': 1024, 'number_of_characters': 334291, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'pan_Guru-tel_Telu': {'num_samples': 1024, 'number_of_characters': 304990, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'pan_Guru-urd_Arab': {'num_samples': 1024, 'number_of_characters': 301079, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'san_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 318079, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'san_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 307783, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'san_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 320770, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'san_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 316181, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'san_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 317259, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'san_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 309755, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'san_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 306601, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'san_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 317267, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'san_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 329225, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'san_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 319925, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'san_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 305843, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'san_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 340797, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'san_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 318945, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'san_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 310295, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'san_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 310580, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'san_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 331387, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'san_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304024, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'san_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 323893, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'san_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 317582, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'san_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 345507, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'san_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 316206, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'san_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 312295, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'sat_Olck-asm_Beng': {'num_samples': 1024, 'number_of_characters': 326732, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'sat_Olck-ben_Beng': {'num_samples': 1024, 'number_of_characters': 316436, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'sat_Olck-brx_Deva': {'num_samples': 1024, 'number_of_characters': 329423, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'sat_Olck-doi_Deva': {'num_samples': 1024, 'number_of_characters': 324834, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'sat_Olck-eng_Latn': {'num_samples': 1024, 'number_of_characters': 325912, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'sat_Olck-gom_Deva': {'num_samples': 1024, 'number_of_characters': 318408, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'sat_Olck-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 315254, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'sat_Olck-hin_Deva': {'num_samples': 1024, 'number_of_characters': 325920, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'sat_Olck-kan_Knda': {'num_samples': 1024, 'number_of_characters': 337878, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'sat_Olck-kas_Arab': {'num_samples': 1024, 'number_of_characters': 328578, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'sat_Olck-mai_Deva': {'num_samples': 1024, 'number_of_characters': 314496, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'sat_Olck-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 349450, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'sat_Olck-mar_Deva': {'num_samples': 1024, 'number_of_characters': 327598, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'sat_Olck-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 318948, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'sat_Olck-npi_Deva': {'num_samples': 1024, 'number_of_characters': 319233, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'sat_Olck-ory_Orya': {'num_samples': 1024, 'number_of_characters': 340040, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'sat_Olck-pan_Guru': {'num_samples': 1024, 'number_of_characters': 312677, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'sat_Olck-san_Deva': {'num_samples': 1024, 'number_of_characters': 323893, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'sat_Olck-snd_Deva': {'num_samples': 1024, 'number_of_characters': 326235, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'sat_Olck-tam_Taml': {'num_samples': 1024, 'number_of_characters': 354160, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'sat_Olck-tel_Telu': {'num_samples': 1024, 'number_of_characters': 324859, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'sat_Olck-urd_Arab': {'num_samples': 1024, 'number_of_characters': 320948, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'snd_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320421, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'snd_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 310125, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'snd_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 323112, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'snd_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318523, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'snd_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 319601, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'snd_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 312097, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'snd_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308943, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'snd_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 319609, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'snd_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331567, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'snd_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 322267, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'snd_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 308185, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'snd_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 343139, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'snd_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 321287, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'snd_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312637, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'snd_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312922, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'snd_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333729, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'snd_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306366, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'snd_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 317582, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'snd_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 326235, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'snd_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347849, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'snd_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318548, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'snd_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314637, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'tam_Taml-asm_Beng': {'num_samples': 1024, 'number_of_characters': 348346, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'tam_Taml-ben_Beng': {'num_samples': 1024, 'number_of_characters': 338050, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'tam_Taml-brx_Deva': {'num_samples': 1024, 'number_of_characters': 351037, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'tam_Taml-doi_Deva': {'num_samples': 1024, 'number_of_characters': 346448, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'tam_Taml-eng_Latn': {'num_samples': 1024, 'number_of_characters': 347526, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'tam_Taml-gom_Deva': {'num_samples': 1024, 'number_of_characters': 340022, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'tam_Taml-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 336868, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'tam_Taml-hin_Deva': {'num_samples': 1024, 'number_of_characters': 347534, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'tam_Taml-kan_Knda': {'num_samples': 1024, 'number_of_characters': 359492, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'tam_Taml-kas_Arab': {'num_samples': 1024, 'number_of_characters': 350192, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'tam_Taml-mai_Deva': {'num_samples': 1024, 'number_of_characters': 336110, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'tam_Taml-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 371064, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'tam_Taml-mar_Deva': {'num_samples': 1024, 'number_of_characters': 349212, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'tam_Taml-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 340562, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'tam_Taml-npi_Deva': {'num_samples': 1024, 'number_of_characters': 340847, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'tam_Taml-ory_Orya': {'num_samples': 1024, 'number_of_characters': 361654, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'tam_Taml-pan_Guru': {'num_samples': 1024, 'number_of_characters': 334291, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'tam_Taml-san_Deva': {'num_samples': 1024, 'number_of_characters': 345507, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'tam_Taml-sat_Olck': {'num_samples': 1024, 'number_of_characters': 354160, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'tam_Taml-snd_Deva': {'num_samples': 1024, 'number_of_characters': 347849, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'tam_Taml-tel_Telu': {'num_samples': 1024, 'number_of_characters': 346473, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'tam_Taml-urd_Arab': {'num_samples': 1024, 'number_of_characters': 342562, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'tel_Telu-asm_Beng': {'num_samples': 1024, 'number_of_characters': 319045, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'tel_Telu-ben_Beng': {'num_samples': 1024, 'number_of_characters': 308749, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'tel_Telu-brx_Deva': {'num_samples': 1024, 'number_of_characters': 321736, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'tel_Telu-doi_Deva': {'num_samples': 1024, 'number_of_characters': 317147, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'tel_Telu-eng_Latn': {'num_samples': 1024, 'number_of_characters': 318225, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'tel_Telu-gom_Deva': {'num_samples': 1024, 'number_of_characters': 310721, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'tel_Telu-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 307567, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'tel_Telu-hin_Deva': {'num_samples': 1024, 'number_of_characters': 318233, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'tel_Telu-kan_Knda': {'num_samples': 1024, 'number_of_characters': 330191, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'tel_Telu-kas_Arab': {'num_samples': 1024, 'number_of_characters': 320891, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'tel_Telu-mai_Deva': {'num_samples': 1024, 'number_of_characters': 306809, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'tel_Telu-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 341763, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'tel_Telu-mar_Deva': {'num_samples': 1024, 'number_of_characters': 319911, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'tel_Telu-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 311261, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'tel_Telu-npi_Deva': {'num_samples': 1024, 'number_of_characters': 311546, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'tel_Telu-ory_Orya': {'num_samples': 1024, 'number_of_characters': 332353, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'tel_Telu-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304990, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'tel_Telu-san_Deva': {'num_samples': 1024, 'number_of_characters': 316206, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'tel_Telu-sat_Olck': {'num_samples': 1024, 'number_of_characters': 324859, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'tel_Telu-snd_Deva': {'num_samples': 1024, 'number_of_characters': 318548, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'tel_Telu-tam_Taml': {'num_samples': 1024, 'number_of_characters': 346473, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'tel_Telu-urd_Arab': {'num_samples': 1024, 'number_of_characters': 313261, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'urd_Arab-asm_Beng': {'num_samples': 1024, 'number_of_characters': 315134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'urd_Arab-ben_Beng': {'num_samples': 1024, 'number_of_characters': 304838, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'urd_Arab-brx_Deva': {'num_samples': 1024, 'number_of_characters': 317825, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'urd_Arab-doi_Deva': {'num_samples': 1024, 'number_of_characters': 313236, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'urd_Arab-eng_Latn': {'num_samples': 1024, 'number_of_characters': 314314, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'urd_Arab-gom_Deva': {'num_samples': 1024, 'number_of_characters': 306810, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'urd_Arab-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 303656, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'urd_Arab-hin_Deva': {'num_samples': 1024, 'number_of_characters': 314322, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'urd_Arab-kan_Knda': {'num_samples': 1024, 'number_of_characters': 326280, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'urd_Arab-kas_Arab': {'num_samples': 1024, 'number_of_characters': 316980, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'urd_Arab-mai_Deva': {'num_samples': 1024, 'number_of_characters': 302898, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'urd_Arab-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 337852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'urd_Arab-mar_Deva': {'num_samples': 1024, 'number_of_characters': 316000, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'urd_Arab-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 307350, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'urd_Arab-npi_Deva': {'num_samples': 1024, 'number_of_characters': 307635, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'urd_Arab-ory_Orya': {'num_samples': 1024, 'number_of_characters': 328442, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'urd_Arab-pan_Guru': {'num_samples': 1024, 'number_of_characters': 301079, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'urd_Arab-san_Deva': {'num_samples': 1024, 'number_of_characters': 312295, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'urd_Arab-sat_Olck': {'num_samples': 1024, 'number_of_characters': 320948, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'urd_Arab-snd_Deva': {'num_samples': 1024, 'number_of_characters': 314637, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'urd_Arab-tam_Taml': {'num_samples': 1024, 'number_of_characters': 342562, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'urd_Arab-tel_Telu': {'num_samples': 1024, 'number_of_characters': 313261, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}}}} | +| [IWSLT2017BitextMining](https://aclanthology.org/2017.iwslt-1.1/) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] | BitextMining | s2s | [Non-fiction, Fiction, Written] | {'validation': 21938} | {'validation': {'num_samples': 21938, 'number_of_characters': 4256244, 'unique_pairs': 21840, 'min_sentence1_length': 2, 'average_sentence1_length': 97.01, 'max_sentence1_length': 521, 'unique_sentence1': 11563, 'min_sentence2_length': 2, 'average_sentence2_length': 97.01, 'max_sentence2_length': 521, 'unique_sentence2': 11563, 'hf_subset_descriptive_stats': {'ar-en': {'num_samples': 888, 'number_of_characters': 172499, 'unique_pairs': 887, 'min_sentence1_length': 4, 'average_sentence1_length': 85.49, 'max_sentence1_length': 369, 'unique_sentence1': 887, 'min_sentence2_length': 10, 'average_sentence2_length': 108.77, 'max_sentence2_length': 462, 'unique_sentence2': 881}, 'de-en': {'num_samples': 888, 'number_of_characters': 202336, 'unique_pairs': 883, 'min_sentence1_length': 6, 'average_sentence1_length': 119.03, 'max_sentence1_length': 521, 'unique_sentence1': 881, 'min_sentence2_length': 10, 'average_sentence2_length': 108.83, 'max_sentence2_length': 462, 'unique_sentence2': 881}, 'en-ar': {'num_samples': 888, 'number_of_characters': 172499, 'unique_pairs': 887, 'min_sentence1_length': 10, 'average_sentence1_length': 108.77, 'max_sentence1_length': 462, 'unique_sentence1': 881, 'min_sentence2_length': 4, 'average_sentence2_length': 85.49, 'max_sentence2_length': 369, 'unique_sentence2': 887}, 'en-de': {'num_samples': 888, 'number_of_characters': 202336, 'unique_pairs': 883, 'min_sentence1_length': 10, 'average_sentence1_length': 108.83, 'max_sentence1_length': 462, 'unique_sentence1': 881, 'min_sentence2_length': 6, 'average_sentence2_length': 119.03, 'max_sentence2_length': 521, 'unique_sentence2': 881}, 'en-fr': {'num_samples': 890, 'number_of_characters': 197619, 'unique_pairs': 883, 'min_sentence1_length': 10, 'average_sentence1_length': 108.41, 'max_sentence1_length': 462, 'unique_sentence1': 883, 'min_sentence2_length': 6, 'average_sentence2_length': 113.63, 'max_sentence2_length': 493, 'unique_sentence2': 881}, 'en-it': {'num_samples': 929, 'number_of_characters': 191803, 'unique_pairs': 924, 'min_sentence1_length': 10, 'average_sentence1_length': 103.0, 'max_sentence1_length': 433, 'unique_sentence1': 922, 'min_sentence2_length': 7, 'average_sentence2_length': 103.46, 'max_sentence2_length': 444, 'unique_sentence2': 918}, 'en-ja': {'num_samples': 871, 'number_of_characters': 132742, 'unique_pairs': 867, 'min_sentence1_length': 10, 'average_sentence1_length': 109.81, 'max_sentence1_length': 462, 'unique_sentence1': 864, 'min_sentence2_length': 5, 'average_sentence2_length': 42.59, 'max_sentence2_length': 225, 'unique_sentence2': 866}, 'en-ko': {'num_samples': 879, 'number_of_characters': 142659, 'unique_pairs': 874, 'min_sentence1_length': 10, 'average_sentence1_length': 107.74, 'max_sentence1_length': 462, 'unique_sentence1': 872, 'min_sentence2_length': 3, 'average_sentence2_length': 54.56, 'max_sentence2_length': 250, 'unique_sentence2': 872}, 'en-nl': {'num_samples': 1003, 'number_of_characters': 189637, 'unique_pairs': 1000, 'min_sentence1_length': 10, 'average_sentence1_length': 95.27, 'max_sentence1_length': 433, 'unique_sentence1': 996, 'min_sentence2_length': 4, 'average_sentence2_length': 93.8, 'max_sentence2_length': 477, 'unique_sentence2': 1000}, 'en-ro': {'num_samples': 914, 'number_of_characters': 194128, 'unique_pairs': 910, 'min_sentence1_length': 10, 'average_sentence1_length': 104.72, 'max_sentence1_length': 433, 'unique_sentence1': 907, 'min_sentence2_length': 9, 'average_sentence2_length': 107.67, 'max_sentence2_length': 448, 'unique_sentence2': 910}, 'en-zh': {'num_samples': 879, 'number_of_characters': 131126, 'unique_pairs': 877, 'min_sentence1_length': 10, 'average_sentence1_length': 109.37, 'max_sentence1_length': 462, 'unique_sentence1': 872, 'min_sentence2_length': 2, 'average_sentence2_length': 39.81, 'max_sentence2_length': 230, 'unique_sentence2': 867}, 'fr-en': {'num_samples': 890, 'number_of_characters': 197619, 'unique_pairs': 883, 'min_sentence1_length': 6, 'average_sentence1_length': 113.63, 'max_sentence1_length': 493, 'unique_sentence1': 881, 'min_sentence2_length': 10, 'average_sentence2_length': 108.41, 'max_sentence2_length': 462, 'unique_sentence2': 883}, 'it-en': {'num_samples': 929, 'number_of_characters': 191803, 'unique_pairs': 924, 'min_sentence1_length': 7, 'average_sentence1_length': 103.46, 'max_sentence1_length': 444, 'unique_sentence1': 918, 'min_sentence2_length': 10, 'average_sentence2_length': 103.0, 'max_sentence2_length': 433, 'unique_sentence2': 922}, 'it-nl': {'num_samples': 1001, 'number_of_characters': 188858, 'unique_pairs': 998, 'min_sentence1_length': 7, 'average_sentence1_length': 94.64, 'max_sentence1_length': 459, 'unique_sentence1': 994, 'min_sentence2_length': 7, 'average_sentence2_length': 94.03, 'max_sentence2_length': 505, 'unique_sentence2': 998}, 'it-ro': {'num_samples': 914, 'number_of_characters': 193339, 'unique_pairs': 911, 'min_sentence1_length': 7, 'average_sentence1_length': 103.91, 'max_sentence1_length': 435, 'unique_sentence1': 907, 'min_sentence2_length': 9, 'average_sentence2_length': 107.62, 'max_sentence2_length': 448, 'unique_sentence2': 910}, 'ja-en': {'num_samples': 871, 'number_of_characters': 132742, 'unique_pairs': 867, 'min_sentence1_length': 5, 'average_sentence1_length': 42.59, 'max_sentence1_length': 225, 'unique_sentence1': 866, 'min_sentence2_length': 10, 'average_sentence2_length': 109.81, 'max_sentence2_length': 462, 'unique_sentence2': 864}, 'ko-en': {'num_samples': 879, 'number_of_characters': 142659, 'unique_pairs': 874, 'min_sentence1_length': 3, 'average_sentence1_length': 54.56, 'max_sentence1_length': 250, 'unique_sentence1': 872, 'min_sentence2_length': 10, 'average_sentence2_length': 107.74, 'max_sentence2_length': 462, 'unique_sentence2': 872}, 'nl-en': {'num_samples': 1003, 'number_of_characters': 189637, 'unique_pairs': 1000, 'min_sentence1_length': 4, 'average_sentence1_length': 93.8, 'max_sentence1_length': 477, 'unique_sentence1': 1000, 'min_sentence2_length': 10, 'average_sentence2_length': 95.27, 'max_sentence2_length': 433, 'unique_sentence2': 996}, 'nl-it': {'num_samples': 1001, 'number_of_characters': 188858, 'unique_pairs': 998, 'min_sentence1_length': 7, 'average_sentence1_length': 94.03, 'max_sentence1_length': 505, 'unique_sentence1': 998, 'min_sentence2_length': 7, 'average_sentence2_length': 94.64, 'max_sentence2_length': 459, 'unique_sentence2': 994}, 'nl-ro': {'num_samples': 913, 'number_of_characters': 191376, 'unique_pairs': 911, 'min_sentence1_length': 7, 'average_sentence1_length': 102.02, 'max_sentence1_length': 478, 'unique_sentence1': 909, 'min_sentence2_length': 9, 'average_sentence2_length': 107.59, 'max_sentence2_length': 515, 'unique_sentence2': 909}, 'ro-en': {'num_samples': 914, 'number_of_characters': 194128, 'unique_pairs': 910, 'min_sentence1_length': 9, 'average_sentence1_length': 107.67, 'max_sentence1_length': 448, 'unique_sentence1': 910, 'min_sentence2_length': 10, 'average_sentence2_length': 104.72, 'max_sentence2_length': 433, 'unique_sentence2': 907}, 'ro-it': {'num_samples': 914, 'number_of_characters': 193339, 'unique_pairs': 911, 'min_sentence1_length': 9, 'average_sentence1_length': 107.62, 'max_sentence1_length': 448, 'unique_sentence1': 910, 'min_sentence2_length': 7, 'average_sentence2_length': 103.91, 'max_sentence2_length': 435, 'unique_sentence2': 907}, 'ro-nl': {'num_samples': 913, 'number_of_characters': 191376, 'unique_pairs': 911, 'min_sentence1_length': 9, 'average_sentence1_length': 107.59, 'max_sentence1_length': 515, 'unique_sentence1': 909, 'min_sentence2_length': 7, 'average_sentence2_length': 102.02, 'max_sentence2_length': 478, 'unique_sentence2': 909}, 'zh-en': {'num_samples': 879, 'number_of_characters': 131126, 'unique_pairs': 877, 'min_sentence1_length': 2, 'average_sentence1_length': 39.81, 'max_sentence1_length': 230, 'unique_sentence1': 867, 'min_sentence2_length': 10, 'average_sentence2_length': 109.37, 'max_sentence2_length': 462, 'unique_sentence2': 872}}}} | | [ImdbClassification](http://www.aclweb.org/anthology/P11-1015) | ['eng'] | Classification | p2p | [Reviews, Written] | None | None | | [InappropriatenessClassification](https://aclanthology.org/2021.bsnlp-1.4) | ['rus'] | Classification | s2s | [Web, Social, Written] | None | None | | [IndicCrosslingualSTS](https://huggingface.co/datasets/jaygala24/indic_sts) (Ramesh et al., 2022) | ['asm', 'ben', 'eng', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] | STS | s2s | [News, Non-fiction, Web, Spoken, Government, Written, Spoken] | None | None | -| [IndicGenBenchFloresBitextMining](https://github.com/google-research-datasets/indic-gen-bench/) (Harman Singh, 2024) | ['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, News, Written] | None | None | +| [IndicGenBenchFloresBitextMining](https://github.com/google-research-datasets/indic-gen-bench/) (Harman Singh, 2024) | ['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, News, Written] | {'validation': 57826, 'test': 58696} | {'validation': {'num_samples': 57826, 'number_of_characters': 14600950, 'unique_pairs': 57826, 'min_sentence1_length': 24, 'average_sentence1_length': 126.25, 'max_sentence1_length': 368, 'unique_sentence1': 29903, 'min_sentence2_length': 24, 'average_sentence2_length': 126.24, 'max_sentence2_length': 368, 'unique_sentence2': 29903, 'hf_subset_descriptive_stats': {'ben-eng': {'num_samples': 997, 'number_of_characters': 248469, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 123.65, 'max_sentence1_length': 320, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-ben': {'num_samples': 997, 'number_of_characters': 248469, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 123.65, 'max_sentence2_length': 320, 'unique_sentence2': 997}, 'guj-eng': {'num_samples': 997, 'number_of_characters': 245477, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 120.64, 'max_sentence1_length': 368, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-guj': {'num_samples': 997, 'number_of_characters': 245477, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 120.64, 'max_sentence2_length': 368, 'unique_sentence2': 997}, 'hin-eng': {'num_samples': 997, 'number_of_characters': 250573, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 125.76, 'max_sentence1_length': 355, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-hin': {'num_samples': 997, 'number_of_characters': 250564, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 125.75, 'max_sentence2_length': 355, 'unique_sentence2': 997}, 'kan-eng': {'num_samples': 997, 'number_of_characters': 257131, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 132.33, 'max_sentence1_length': 331, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-kan': {'num_samples': 997, 'number_of_characters': 256986, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 132.19, 'max_sentence2_length': 331, 'unique_sentence2': 997}, 'mal-eng': {'num_samples': 997, 'number_of_characters': 267295, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 142.53, 'max_sentence1_length': 360, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mal': {'num_samples': 997, 'number_of_characters': 267296, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 142.53, 'max_sentence2_length': 360, 'unique_sentence2': 997}, 'mar-eng': {'num_samples': 997, 'number_of_characters': 251107, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 126.29, 'max_sentence1_length': 321, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mar': {'num_samples': 997, 'number_of_characters': 250897, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 126.08, 'max_sentence2_length': 321, 'unique_sentence2': 997}, 'tam-eng': {'num_samples': 997, 'number_of_characters': 271322, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 146.57, 'max_sentence1_length': 358, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-tam': {'num_samples': 997, 'number_of_characters': 271322, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 146.57, 'max_sentence2_length': 358, 'unique_sentence2': 997}, 'tel-eng': {'num_samples': 997, 'number_of_characters': 252385, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 127.57, 'max_sentence1_length': 317, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-tel': {'num_samples': 997, 'number_of_characters': 252380, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 127.57, 'max_sentence2_length': 317, 'unique_sentence2': 997}, 'urd-eng': {'num_samples': 997, 'number_of_characters': 249824, 'unique_pairs': 997, 'min_sentence1_length': 37, 'average_sentence1_length': 125.01, 'max_sentence1_length': 295, 'unique_sentence1': 996, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-urd': {'num_samples': 997, 'number_of_characters': 249824, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 37, 'average_sentence2_length': 125.01, 'max_sentence2_length': 295, 'unique_sentence2': 996}, 'asm-eng': {'num_samples': 997, 'number_of_characters': 246220, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 121.39, 'max_sentence1_length': 314, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-asm': {'num_samples': 997, 'number_of_characters': 246224, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 121.39, 'max_sentence2_length': 314, 'unique_sentence2': 997}, 'bho-eng': {'num_samples': 997, 'number_of_characters': 246895, 'unique_pairs': 997, 'min_sentence1_length': 25, 'average_sentence1_length': 122.07, 'max_sentence1_length': 326, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bho': {'num_samples': 997, 'number_of_characters': 246919, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 25, 'average_sentence2_length': 122.09, 'max_sentence2_length': 326, 'unique_sentence2': 997}, 'nep-eng': {'num_samples': 997, 'number_of_characters': 245984, 'unique_pairs': 997, 'min_sentence1_length': 24, 'average_sentence1_length': 121.15, 'max_sentence1_length': 307, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-nep': {'num_samples': 997, 'number_of_characters': 245984, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 24, 'average_sentence2_length': 121.15, 'max_sentence2_length': 307, 'unique_sentence2': 997}, 'ory-eng': {'num_samples': 997, 'number_of_characters': 254206, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 129.4, 'max_sentence1_length': 308, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-ory': {'num_samples': 997, 'number_of_characters': 254206, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 129.4, 'max_sentence2_length': 308, 'unique_sentence2': 997}, 'pan-eng': {'num_samples': 997, 'number_of_characters': 251598, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 126.78, 'max_sentence1_length': 309, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-pan': {'num_samples': 997, 'number_of_characters': 251597, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 126.78, 'max_sentence2_length': 309, 'unique_sentence2': 997}, 'pus-eng': {'num_samples': 997, 'number_of_characters': 247450, 'unique_pairs': 997, 'min_sentence1_length': 32, 'average_sentence1_length': 122.62, 'max_sentence1_length': 300, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-pus': {'num_samples': 997, 'number_of_characters': 247450, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 32, 'average_sentence2_length': 122.62, 'max_sentence2_length': 300, 'unique_sentence2': 997}, 'san-eng': {'num_samples': 997, 'number_of_characters': 249042, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 124.22, 'max_sentence1_length': 311, 'unique_sentence1': 994, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-san': {'num_samples': 997, 'number_of_characters': 248877, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 124.06, 'max_sentence2_length': 311, 'unique_sentence2': 994}, 'awa-eng': {'num_samples': 997, 'number_of_characters': 247944, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 123.12, 'max_sentence1_length': 329, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-awa': {'num_samples': 997, 'number_of_characters': 247884, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 123.06, 'max_sentence2_length': 329, 'unique_sentence2': 997}, 'bgc-eng': {'num_samples': 997, 'number_of_characters': 245935, 'unique_pairs': 997, 'min_sentence1_length': 27, 'average_sentence1_length': 121.1, 'max_sentence1_length': 303, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bgc': {'num_samples': 997, 'number_of_characters': 245935, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 27, 'average_sentence2_length': 121.1, 'max_sentence2_length': 303, 'unique_sentence2': 997}, 'bod-eng': {'num_samples': 997, 'number_of_characters': 266515, 'unique_pairs': 997, 'min_sentence1_length': 26, 'average_sentence1_length': 141.75, 'max_sentence1_length': 355, 'unique_sentence1': 996, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bod': {'num_samples': 997, 'number_of_characters': 266495, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 26, 'average_sentence2_length': 141.73, 'max_sentence2_length': 355, 'unique_sentence2': 996}, 'boy-eng': {'num_samples': 997, 'number_of_characters': 260174, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 135.39, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-boy': {'num_samples': 997, 'number_of_characters': 260174, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 135.39, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'gbm-eng': {'num_samples': 997, 'number_of_characters': 247009, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 122.18, 'max_sentence1_length': 344, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-gbm': {'num_samples': 997, 'number_of_characters': 247009, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 122.18, 'max_sentence2_length': 344, 'unique_sentence2': 997}, 'gom-eng': {'num_samples': 997, 'number_of_characters': 244553, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 119.72, 'max_sentence1_length': 306, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-gom': {'num_samples': 997, 'number_of_characters': 244553, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 119.72, 'max_sentence2_length': 306, 'unique_sentence2': 997}, 'hne-eng': {'num_samples': 997, 'number_of_characters': 246416, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 121.59, 'max_sentence1_length': 321, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-hne': {'num_samples': 997, 'number_of_characters': 246405, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 121.58, 'max_sentence2_length': 321, 'unique_sentence2': 997}, 'raj-eng': {'num_samples': 997, 'number_of_characters': 249541, 'unique_pairs': 997, 'min_sentence1_length': 32, 'average_sentence1_length': 124.72, 'max_sentence1_length': 313, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-raj': {'num_samples': 997, 'number_of_characters': 249541, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 32, 'average_sentence2_length': 124.72, 'max_sentence2_length': 313, 'unique_sentence2': 997}, 'mai-eng': {'num_samples': 997, 'number_of_characters': 247991, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 123.17, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mai': {'num_samples': 997, 'number_of_characters': 247994, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 123.17, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'mni-eng': {'num_samples': 997, 'number_of_characters': 254308, 'unique_pairs': 997, 'min_sentence1_length': 39, 'average_sentence1_length': 129.5, 'max_sentence1_length': 310, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mni': {'num_samples': 997, 'number_of_characters': 254312, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 39, 'average_sentence2_length': 129.51, 'max_sentence2_length': 310, 'unique_sentence2': 997}, 'mup-eng': {'num_samples': 997, 'number_of_characters': 248486, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 123.66, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mup': {'num_samples': 997, 'number_of_characters': 248486, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 123.66, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'mwr-eng': {'num_samples': 997, 'number_of_characters': 248641, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 123.82, 'max_sentence1_length': 324, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mwr': {'num_samples': 997, 'number_of_characters': 248641, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 123.82, 'max_sentence2_length': 324, 'unique_sentence2': 997}, 'sat-eng': {'num_samples': 997, 'number_of_characters': 258279, 'unique_pairs': 997, 'min_sentence1_length': 37, 'average_sentence1_length': 133.49, 'max_sentence1_length': 333, 'unique_sentence1': 995, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-sat': {'num_samples': 997, 'number_of_characters': 258279, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 37, 'average_sentence2_length': 133.49, 'max_sentence2_length': 333, 'unique_sentence2': 995}}}, 'test': {'num_samples': 58696, 'number_of_characters': 15359416, 'unique_pairs': 58690, 'min_sentence1_length': 33, 'average_sentence1_length': 130.84, 'max_sentence1_length': 431, 'unique_sentence1': 30351, 'min_sentence2_length': 33, 'average_sentence2_length': 130.83, 'max_sentence2_length': 431, 'unique_sentence2': 30351, 'hf_subset_descriptive_stats': {'ben-eng': {'num_samples': 1012, 'number_of_characters': 261008, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 127.51, 'max_sentence1_length': 333, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-ben': {'num_samples': 1012, 'number_of_characters': 261008, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 127.51, 'max_sentence2_length': 333, 'unique_sentence2': 1012}, 'guj-eng': {'num_samples': 1012, 'number_of_characters': 258394, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 124.93, 'max_sentence1_length': 349, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-guj': {'num_samples': 1012, 'number_of_characters': 258394, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 124.93, 'max_sentence2_length': 349, 'unique_sentence2': 1012}, 'hin-eng': {'num_samples': 1012, 'number_of_characters': 263040, 'unique_pairs': 1012, 'min_sentence1_length': 41, 'average_sentence1_length': 129.52, 'max_sentence1_length': 381, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-hin': {'num_samples': 1012, 'number_of_characters': 263029, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 41, 'average_sentence2_length': 129.51, 'max_sentence2_length': 381, 'unique_sentence2': 1012}, 'kan-eng': {'num_samples': 1012, 'number_of_characters': 270091, 'unique_pairs': 1012, 'min_sentence1_length': 43, 'average_sentence1_length': 136.49, 'max_sentence1_length': 388, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-kan': {'num_samples': 1012, 'number_of_characters': 270021, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 43, 'average_sentence2_length': 136.42, 'max_sentence2_length': 388, 'unique_sentence2': 1012}, 'mal-eng': {'num_samples': 1012, 'number_of_characters': 281302, 'unique_pairs': 1012, 'min_sentence1_length': 48, 'average_sentence1_length': 147.57, 'max_sentence1_length': 376, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mal': {'num_samples': 1012, 'number_of_characters': 281302, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 48, 'average_sentence2_length': 147.57, 'max_sentence2_length': 376, 'unique_sentence2': 1012}, 'mar-eng': {'num_samples': 1012, 'number_of_characters': 265212, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 131.67, 'max_sentence1_length': 356, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mar': {'num_samples': 1012, 'number_of_characters': 265023, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 131.48, 'max_sentence2_length': 355, 'unique_sentence2': 1012}, 'tam-eng': {'num_samples': 1012, 'number_of_characters': 286099, 'unique_pairs': 1012, 'min_sentence1_length': 48, 'average_sentence1_length': 152.31, 'max_sentence1_length': 404, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-tam': {'num_samples': 1012, 'number_of_characters': 286099, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 48, 'average_sentence2_length': 152.31, 'max_sentence2_length': 404, 'unique_sentence2': 1012}, 'tel-eng': {'num_samples': 1012, 'number_of_characters': 264460, 'unique_pairs': 1012, 'min_sentence1_length': 39, 'average_sentence1_length': 130.92, 'max_sentence1_length': 359, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-tel': {'num_samples': 1012, 'number_of_characters': 264447, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 39, 'average_sentence2_length': 130.91, 'max_sentence2_length': 359, 'unique_sentence2': 1012}, 'urd-eng': {'num_samples': 1012, 'number_of_characters': 261886, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 128.38, 'max_sentence1_length': 348, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-urd': {'num_samples': 1012, 'number_of_characters': 261885, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 128.38, 'max_sentence2_length': 348, 'unique_sentence2': 1012}, 'asm-eng': {'num_samples': 1012, 'number_of_characters': 257902, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 124.44, 'max_sentence1_length': 329, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-asm': {'num_samples': 1012, 'number_of_characters': 257909, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 124.45, 'max_sentence2_length': 329, 'unique_sentence2': 1012}, 'bho-eng': {'num_samples': 1012, 'number_of_characters': 260578, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 127.09, 'max_sentence1_length': 367, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bho': {'num_samples': 1012, 'number_of_characters': 260601, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 127.11, 'max_sentence2_length': 367, 'unique_sentence2': 1012}, 'nep-eng': {'num_samples': 1012, 'number_of_characters': 258869, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 125.4, 'max_sentence1_length': 362, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-nep': {'num_samples': 1012, 'number_of_characters': 258869, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 125.4, 'max_sentence2_length': 362, 'unique_sentence2': 1012}, 'ory-eng': {'num_samples': 1012, 'number_of_characters': 266805, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 133.24, 'max_sentence1_length': 354, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-ory': {'num_samples': 1012, 'number_of_characters': 266805, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 133.24, 'max_sentence2_length': 354, 'unique_sentence2': 1012}, 'pan-eng': {'num_samples': 1012, 'number_of_characters': 265391, 'unique_pairs': 1012, 'min_sentence1_length': 37, 'average_sentence1_length': 131.84, 'max_sentence1_length': 380, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-pan': {'num_samples': 1012, 'number_of_characters': 265391, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 37, 'average_sentence2_length': 131.84, 'max_sentence2_length': 380, 'unique_sentence2': 1012}, 'pus-eng': {'num_samples': 1012, 'number_of_characters': 254422, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 121.0, 'max_sentence1_length': 325, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-pus': {'num_samples': 1012, 'number_of_characters': 254421, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 121.0, 'max_sentence2_length': 325, 'unique_sentence2': 1012}, 'san-eng': {'num_samples': 1012, 'number_of_characters': 260339, 'unique_pairs': 1012, 'min_sentence1_length': 33, 'average_sentence1_length': 126.85, 'max_sentence1_length': 358, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-san': {'num_samples': 1012, 'number_of_characters': 260224, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 33, 'average_sentence2_length': 126.74, 'max_sentence2_length': 358, 'unique_sentence2': 1011}, 'awa-eng': {'num_samples': 1012, 'number_of_characters': 260179, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 126.69, 'max_sentence1_length': 378, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-awa': {'num_samples': 1012, 'number_of_characters': 260137, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 126.65, 'max_sentence2_length': 378, 'unique_sentence2': 1012}, 'bgc-eng': {'num_samples': 1012, 'number_of_characters': 257450, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 124.0, 'max_sentence1_length': 332, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bgc': {'num_samples': 1012, 'number_of_characters': 257450, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 124.0, 'max_sentence2_length': 332, 'unique_sentence2': 1012}, 'bod-eng': {'num_samples': 1012, 'number_of_characters': 280188, 'unique_pairs': 1012, 'min_sentence1_length': 42, 'average_sentence1_length': 146.46, 'max_sentence1_length': 431, 'unique_sentence1': 1009, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bod': {'num_samples': 1012, 'number_of_characters': 280126, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 42, 'average_sentence2_length': 146.4, 'max_sentence2_length': 431, 'unique_sentence2': 1009}, 'boy-eng': {'num_samples': 1012, 'number_of_characters': 277538, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 143.85, 'max_sentence1_length': 396, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-boy': {'num_samples': 1012, 'number_of_characters': 277538, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 143.85, 'max_sentence2_length': 396, 'unique_sentence2': 1011}, 'gbm-eng': {'num_samples': 1012, 'number_of_characters': 261027, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 127.53, 'max_sentence1_length': 333, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-gbm': {'num_samples': 1012, 'number_of_characters': 261027, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 127.53, 'max_sentence2_length': 333, 'unique_sentence2': 1012}, 'gom-eng': {'num_samples': 1012, 'number_of_characters': 259182, 'unique_pairs': 1012, 'min_sentence1_length': 37, 'average_sentence1_length': 125.71, 'max_sentence1_length': 335, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-gom': {'num_samples': 1012, 'number_of_characters': 259182, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 37, 'average_sentence2_length': 125.71, 'max_sentence2_length': 335, 'unique_sentence2': 1012}, 'hne-eng': {'num_samples': 1012, 'number_of_characters': 258911, 'unique_pairs': 1012, 'min_sentence1_length': 42, 'average_sentence1_length': 125.44, 'max_sentence1_length': 327, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-hne': {'num_samples': 1012, 'number_of_characters': 258915, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 42, 'average_sentence2_length': 125.44, 'max_sentence2_length': 326, 'unique_sentence2': 1011}, 'raj-eng': {'num_samples': 1012, 'number_of_characters': 261987, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 128.48, 'max_sentence1_length': 338, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-raj': {'num_samples': 1012, 'number_of_characters': 261987, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 128.48, 'max_sentence2_length': 338, 'unique_sentence2': 1012}, 'mai-eng': {'num_samples': 1012, 'number_of_characters': 261374, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 127.87, 'max_sentence1_length': 350, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mai': {'num_samples': 1012, 'number_of_characters': 261377, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 127.88, 'max_sentence2_length': 350, 'unique_sentence2': 1012}, 'mni-eng': {'num_samples': 1012, 'number_of_characters': 268767, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 135.18, 'max_sentence1_length': 353, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mni': {'num_samples': 1012, 'number_of_characters': 268768, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 135.18, 'max_sentence2_length': 354, 'unique_sentence2': 1012}, 'mup-eng': {'num_samples': 1012, 'number_of_characters': 262034, 'unique_pairs': 1012, 'min_sentence1_length': 40, 'average_sentence1_length': 128.53, 'max_sentence1_length': 340, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mup': {'num_samples': 1012, 'number_of_characters': 262034, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 40, 'average_sentence2_length': 128.53, 'max_sentence2_length': 340, 'unique_sentence2': 1012}, 'mwr-eng': {'num_samples': 1012, 'number_of_characters': 263749, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.22, 'max_sentence1_length': 345, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mwr': {'num_samples': 1012, 'number_of_characters': 263749, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.22, 'max_sentence2_length': 345, 'unique_sentence2': 1012}, 'sat-eng': {'num_samples': 1012, 'number_of_characters': 271757, 'unique_pairs': 1012, 'min_sentence1_length': 43, 'average_sentence1_length': 138.13, 'max_sentence1_length': 366, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-sat': {'num_samples': 1012, 'number_of_characters': 271757, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 43, 'average_sentence2_length': 138.13, 'max_sentence2_length': 366, 'unique_sentence2': 1012}}}} | | [IndicLangClassification](https://arxiv.org/abs/2305.15814) | ['asm', 'ben', 'brx', 'doi', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | Classification | s2s | [Web, Non-fiction, Written] | None | None | | [IndicNLPNewsClassification](https://github.com/AI4Bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset) (Anoop Kunchukuttan, 2020) | ['guj', 'kan', 'mal', 'mar', 'ori', 'pan', 'tam', 'tel'] | Classification | s2s | [News, Written] | None | None | | [IndicQARetrieval](https://arxiv.org/abs/2212.05409) (Sumanth Doddapaneni, 2022) | ['asm', 'ben', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel'] | Retrieval | s2p | [Web, Written] | None | None | @@ -256,7 +256,7 @@ The following tables give you an overview of the tasks in MTEB. | [JSTS](https://aclanthology.org/2022.lrec-1.317.pdf#page=2.00) | ['jpn'] | STS | s2s | [Web, Written] | None | None | | [JaGovFaqsRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Web, Written] | None | None | | [JaQuADRetrieval](https://arxiv.org/abs/2202.01764) (ByungHoon So, 2022) | ['jpn'] | Retrieval | p2p | [Encyclopaedic, Non-fiction, Written] | None | None | -| [JaqketRetrieval](https://github.com/kumapo/JAQKET-dataset) | ['jpn'] | Retrieval | s2p | [Encyclopaedic, Non-fiction, Written] | {'test': 115226} | {'test': {'number_of_characters': 3799.7, 'num_samples': 115226, 'num_queries': 997, 'num_documents': 114229, 'average_document_length': 0.03, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}} | +| [JaqketRetrieval](https://github.com/kumapo/JAQKET-dataset) | ['jpn'] | Retrieval | s2p | [Encyclopaedic, Non-fiction, Written] | {'test': 115226} | {'test': {'number_of_characters': 428294530, 'num_samples': 115226, 'num_queries': 997, 'num_documents': 114229, 'min_document_length': 16, 'average_document_length': 0.44, 'max_document_length': 98, 'unique_documents': 114229, 'min_query_length': 8, 'average_query_length': 429532.57, 'max_query_length': 188424, 'unique_queries': 997, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 989}} | | [JavaneseIMDBClassification](https://github.com/w11wo/nlp-datasets#javanese-imdb) (Wongso et al., 2021) | ['jav'] | Classification | s2s | [Reviews, Written] | None | None | | [KLUE-NLI](https://arxiv.org/abs/2105.09680) (Sungjoon Park, 2021) | ['kor'] | PairClassification | s2s | [News, Encyclopaedic, Written] | None | None | | [KLUE-STS](https://arxiv.org/abs/2105.09680) (Sungjoon Park, 2021) | ['kor'] | STS | s2s | [Reviews, News, Spoken, Written, Spoken] | None | None | @@ -277,7 +277,7 @@ The following tables give you an overview of the tasks in MTEB. | [LEMBQMSumRetrieval](https://huggingface.co/datasets/dwzhu/LongEmbed) | ['eng'] | Retrieval | s2p | [Spoken, Written] | None | None | | [LEMBSummScreenFDRetrieval](https://huggingface.co/datasets/dwzhu/LongEmbed) | ['eng'] | Retrieval | s2p | [Spoken, Written] | None | None | | [LEMBWikimQARetrieval](https://huggingface.co/datasets/dwzhu/LongEmbed) (Ho et al., 2020) | ['eng'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | -| [LanguageClassification](https://huggingface.co/datasets/papluca/language-identification) (Conneau et al., 2018) | ['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] | Classification | s2s | [Reviews, Web, Non-fiction, Fiction, Government, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 224352, 'average_text_length': 109.55, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}} | +| [LanguageClassification](https://huggingface.co/datasets/papluca/language-identification) (Conneau et al., 2018) | ['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] | Classification | s2s | [Reviews, Web, Non-fiction, Fiction, Government, Written] | {'test': 2048, 'train': 70000} | {'test': {'num_samples': 2048, 'number_of_characters': 224352, 'num_texts_in_train': 31, 'min_text_length': 14, 'average_text_length': 109.55, 'max_text_length': 1270, 'unique_text': 2025, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}, 'train': {'num_samples': 70000, 'number_of_characters': 7760299, 'num_texts_in_train': None, 'min_text_length': 2, 'average_text_length': 110.86, 'max_text_length': 2422, 'unique_text': 68978, 'unique_labels': 20, 'labels': {'12': {'count': 3500}, '1': {'count': 3500}, '19': {'count': 3500}, '15': {'count': 3500}, '13': {'count': 3500}, '11': {'count': 3500}, '17': {'count': 3500}, '14': {'count': 3500}, '16': {'count': 3500}, '5': {'count': 3500}, '0': {'count': 3500}, '8': {'count': 3500}, '7': {'count': 3500}, '2': {'count': 3500}, '3': {'count': 3500}, '10': {'count': 3500}, '6': {'count': 3500}, '18': {'count': 3500}, '4': {'count': 3500}, '9': {'count': 3500}}}} | | [LccSentimentClassification](https://github.com/fnielsen/lcc-sentiment) | ['dan'] | Classification | s2s | [News, Web, Written] | None | None | | [LeCaRDv2](https://github.com/THUIR/LeCaRDv2) (Haitao Li, 2023) | ['zho'] | Retrieval | p2p | [Legal, Written] | None | None | | [LearnedHandsBenefitsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -333,21 +333,21 @@ The following tables give you an overview of the tasks in MTEB. | [MassiveScenarioClassification](https://arxiv.org/abs/2204.08582) (Jack FitzGerald, 2022) | ['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] | Classification | s2s | [Spoken] | None | None | | [MedicalQARetrieval](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3119-4) (Asma et al., 2019) | ['eng'] | Retrieval | s2s | [Medical, Written] | None | None | | [MedicalRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | -| [MedrxivClusteringP2P.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | p2p | [Academic, Medical, Written] | None | None | -| [MedrxivClusteringS2S.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | s2s | [Academic, Medical, Written] | None | None | +| [MedrxivClusteringP2P.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | p2p | [Academic, Medical, Written] | {'test': 37500} | {'test': {'num_samples': 37500, 'number_of_characters': 74294927, 'min_text_length': 148, 'average_text_length': 1981.2, 'max_text_length': 38759, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 8830, 'unique_labels': 51, 'labels': {'epidemiology': {'count': 6656}, 'public and global health': {'count': 3595}, 'oncology': {'count': 845}, 'allergy and immunology': {'count': 464}, 'orthopedics': {'count': 104}, 'health informatics': {'count': 1107}, 'occupational and environmental health': {'count': 415}, 'infectious diseases': {'count': 8830}, 'genetic and genomic medicine': {'count': 1918}, 'health policy': {'count': 527}, 'gastroenterology': {'count': 343}, 'radiology and imaging': {'count': 541}, 'pain medicine': {'count': 121}, 'neurology': {'count': 1773}, 'primary care research': {'count': 232}, 'rheumatology': {'count': 189}, 'endocrinology': {'count': 419}, 'hematology': {'count': 202}, 'addiction medicine': {'count': 178}, 'pediatrics': {'count': 589}, 'cardiovascular medicine': {'count': 855}, 'obstetrics and gynecology': {'count': 373}, 'health systems and quality improvement': {'count': 491}, 'nephrology': {'count': 241}, 'respiratory medicine': {'count': 482}, 'geriatric medicine': {'count': 169}, 'dentistry and oral medicine': {'count': 159}, 'psychiatry and clinical psychology': {'count': 1781}, 'nutrition': {'count': 240}, 'intensive care and critical care medicine': {'count': 368}, 'rehabilitation medicine and physical therapy': {'count': 322}, 'otolaryngology': {'count': 166}, 'nursing': {'count': 93}, 'transplantation': {'count': 118}, 'health economics': {'count': 327}, 'sports medicine': {'count': 180}, 'hiv aids': {'count': 363}, 'dermatology': {'count': 98}, 'pathology': {'count': 223}, 'emergency medicine': {'count': 191}, 'pharmacology and therapeutics': {'count': 221}, 'ophthalmology': {'count': 220}, 'medical ethics': {'count': 46}, 'palliative medicine': {'count': 45}, 'sexual and reproductive health': {'count': 156}, 'medical education': {'count': 203}, 'surgery': {'count': 162}, 'urology': {'count': 65}, 'anesthesia': {'count': 72}, 'toxicology': {'count': 16}, 'forensic medicine': {'count': 6}}}} | +| [MedrxivClusteringS2S.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | s2s | [Academic, Medical, Written] | {'test': 37500} | {'test': {'num_samples': 37500, 'number_of_characters': 4301276, 'min_text_length': 18, 'average_text_length': 114.7, 'max_text_length': 339, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 8830, 'unique_labels': 51, 'labels': {'epidemiology': {'count': 6656}, 'public and global health': {'count': 3595}, 'oncology': {'count': 845}, 'allergy and immunology': {'count': 464}, 'orthopedics': {'count': 104}, 'health informatics': {'count': 1107}, 'occupational and environmental health': {'count': 415}, 'infectious diseases': {'count': 8830}, 'genetic and genomic medicine': {'count': 1918}, 'health policy': {'count': 527}, 'gastroenterology': {'count': 343}, 'radiology and imaging': {'count': 541}, 'pain medicine': {'count': 121}, 'neurology': {'count': 1773}, 'primary care research': {'count': 232}, 'rheumatology': {'count': 189}, 'endocrinology': {'count': 419}, 'hematology': {'count': 202}, 'addiction medicine': {'count': 178}, 'pediatrics': {'count': 589}, 'cardiovascular medicine': {'count': 855}, 'obstetrics and gynecology': {'count': 373}, 'health systems and quality improvement': {'count': 491}, 'nephrology': {'count': 241}, 'respiratory medicine': {'count': 482}, 'geriatric medicine': {'count': 169}, 'dentistry and oral medicine': {'count': 159}, 'psychiatry and clinical psychology': {'count': 1781}, 'nutrition': {'count': 240}, 'intensive care and critical care medicine': {'count': 368}, 'rehabilitation medicine and physical therapy': {'count': 322}, 'otolaryngology': {'count': 166}, 'nursing': {'count': 93}, 'transplantation': {'count': 118}, 'health economics': {'count': 327}, 'sports medicine': {'count': 180}, 'hiv aids': {'count': 363}, 'dermatology': {'count': 98}, 'pathology': {'count': 223}, 'emergency medicine': {'count': 191}, 'pharmacology and therapeutics': {'count': 221}, 'ophthalmology': {'count': 220}, 'medical ethics': {'count': 46}, 'palliative medicine': {'count': 45}, 'sexual and reproductive health': {'count': 156}, 'medical education': {'count': 203}, 'surgery': {'count': 162}, 'urology': {'count': 65}, 'anesthesia': {'count': 72}, 'toxicology': {'count': 16}, 'forensic medicine': {'count': 6}}}} | | [MewsC16JaClustering](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Clustering | s2s | [News, Written] | None | None | | [MindSmallReranking](https://msnews.github.io/assets/doc/ACL2020_MIND.pdf) | ['eng'] | Reranking | s2s | [News, Written] | None | None | | MintakaRetrieval | ['ara', 'deu', 'fra', 'hin', 'ita', 'jpn', 'por', 'spa'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | | [Moroco](https://huggingface.co/datasets/moroco) (Andrei M. Butnaru, 2019) | ['ron'] | Classification | s2s | [News, Written] | None | None | | [MovieReviewSentimentClassification](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert) (Théophile Blard, 2020) | ['fra'] | Classification | s2s | [Reviews, Written] | None | None | | [MrTidyRetrieval](https://huggingface.co/datasets/castorini/mr-tydi) (Xinyu Zhang, 2021) | ['ara', 'ben', 'eng', 'fin', 'ind', 'jpn', 'kor', 'rus', 'swa', 'tel', 'tha'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | -| [MultiEURLEXMultilabelClassification](https://huggingface.co/datasets/coastalcph/multi_eurlex) (Chalkidis et al., 2021) | ['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] | MultilabelClassification | p2p | [Legal, Government, Written] | {'test': 115000} | {'test': {'average_text_length': 12014.41, 'number_of_characters': 1381657027, 'average_label_per_text': 3.59, 'num_samples': 115000, 'unique_labels': 21, 'labels': {'18': {'count': 50784}, '15': {'count': 30981}, '5': {'count': 24978}, '6': {'count': 45080}, '3': {'count': 63687}, '17': {'count': 37743}, '1': {'count': 15019}, '20': {'count': 14030}, '0': {'count': 17802}, '2': {'count': 22402}, '19': {'count': 10212}, '9': {'count': 3772}, '4': {'count': 9062}, '10': {'count': 7705}, '11': {'count': 12213}, '7': {'count': 14306}, '12': {'count': 11799}, '8': {'count': 13800}, '13': {'count': 2346}, '14': {'count': 4255}, '16': {'count': 1311}}, 'hf_subset_descriptive_stats': {'en': {'average_text_length': 11720.29, 'number_of_characters': 58601463, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'de': {'average_text_length': 12865.42, 'number_of_characters': 64327081, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fr': {'average_text_length': 13081.11, 'number_of_characters': 65405549, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'it': {'average_text_length': 12763.48, 'number_of_characters': 63817393, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'es': {'average_text_length': 13080.29, 'number_of_characters': 65401450, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pl': {'average_text_length': 12282.59, 'number_of_characters': 61412963, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'ro': {'average_text_length': 12836.93, 'number_of_characters': 64184661, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'nl': {'average_text_length': 12857.97, 'number_of_characters': 64289871, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'el': {'average_text_length': 12998.14, 'number_of_characters': 64990715, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hu': {'average_text_length': 12424.64, 'number_of_characters': 62123205, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pt': {'average_text_length': 12482.46, 'number_of_characters': 62412308, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'cs': {'average_text_length': 10783.47, 'number_of_characters': 53917338, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sv': {'average_text_length': 11612.48, 'number_of_characters': 58062387, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'bg': {'average_text_length': 12235.43, 'number_of_characters': 61177134, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'da': {'average_text_length': 11773.96, 'number_of_characters': 58869790, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fi': {'average_text_length': 12087.69, 'number_of_characters': 60438431, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sk': {'average_text_length': 11130.81, 'number_of_characters': 55654070, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lt': {'average_text_length': 11245.36, 'number_of_characters': 56226783, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hr': {'average_text_length': 11022.14, 'number_of_characters': 55110710, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sl': {'average_text_length': 10620.06, 'number_of_characters': 53100297, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'et': {'average_text_length': 10898.43, 'number_of_characters': 54492156, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lv': {'average_text_length': 10938.51, 'number_of_characters': 54692551, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'mt': {'average_text_length': 12589.74, 'number_of_characters': 62948721, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}}}} | +| [MultiEURLEXMultilabelClassification](https://huggingface.co/datasets/coastalcph/multi_eurlex) (Chalkidis et al., 2021) | ['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] | MultilabelClassification | p2p | [Legal, Government, Written] | None | None | | [MultiHateClassification](https://aclanthology.org/2022.woah-1.15/) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'nld', 'pol', 'por', 'spa'] | Classification | s2s | [Constructed, Written] | None | None | | [MultiLongDocRetrieval](https://arxiv.org/abs/2402.03216) (Jianlv Chen, 2024) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'por', 'rus', 'spa', 'tha'] | Retrieval | s2p | [Encyclopaedic, Written, Web, Non-fiction, Fiction] | None | None | | [MultilingualSentiment](https://github.com/tyqiangz/multilingual-sentiment-datasets) | ['cmn'] | Classification | s2s | | None | None | | [MultilingualSentimentClassification](https://huggingface.co/datasets/mteb/multilingual-sentiment-classification) | ['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] | Classification | s2s | [Reviews, Written] | None | None | | [MyanmarNews](https://huggingface.co/datasets/myanmar_news) (A. H. Khine, 2017) | ['mya'] | Classification | p2p | [News, Written] | None | None | -| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | | None | None | +| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | | {'test': 3956} | {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}} | | [NFCorpus-PL](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NLPJournalAbsIntroRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | | [NLPJournalTitleAbsRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | @@ -356,7 +356,7 @@ The following tables give you an overview of the tasks in MTEB. | [NQ-PL](https://ai.google.com/research/NaturalQuestions/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NQ-PLHardNegatives](https://ai.google.com/research/NaturalQuestions/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NQHardNegatives](https://ai.google.com/research/NaturalQuestions/) (Tom Kwiatkowski, 2019) | ['eng'] | Retrieval | s2p | | None | None | -| [NTREXBitextMining](https://huggingface.co/datasets/davidstap/NTREX) | ['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] | BitextMining | s2s | [News, Written] | None | None | +| [NTREXBitextMining](https://huggingface.co/datasets/davidstap/NTREX) | ['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] | BitextMining | s2s | [News, Written] | {'test': 3826252} | {'test': {'num_samples': 3826252, 'number_of_characters': 988355274, 'unique_pairs': 3820263, 'min_sentence1_length': 1, 'average_sentence1_length': 129.15, 'max_sentence1_length': 773, 'unique_sentence1': 241259, 'min_sentence2_length': 1, 'average_sentence2_length': 129.15, 'max_sentence2_length': 773, 'unique_sentence2': 241259, 'hf_subset_descriptive_stats': {'afr_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'afr_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'afr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'afr_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'afr_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'afr_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'afr_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'afr_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'afr_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'afr_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'amh_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'amh_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'amh_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'amh_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'amh_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'amh_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'amh_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'amh_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'amh_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'amh_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'amh_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'amh_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'amh_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'amh_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'arb_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'arb_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'arb_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'arb_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'arb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'arb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'arb_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'arb_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'arb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'arb_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'arb_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'arb_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'arb_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'arb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'arb_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'arb_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'arb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'arb_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'arb_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'arb_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'arb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'arb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'arb_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'arb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'arb_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'arb_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'arb_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'arb_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'arb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'arb_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'arb_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'arb_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'arb_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'aze_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'aze_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'aze_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'aze_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'aze_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'aze_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'aze_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'aze_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'aze_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bak_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'bak_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bak_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'bak_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'bak_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'bak_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'bak_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'bak_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'bak_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bel_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bel_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bel_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bel_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bel_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bel_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bel_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bel_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bel_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bel_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bel_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bel_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bel_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bem_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bem_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'bem_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'bem_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'bem_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'bem_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'bem_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'bem_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'ben_Beng-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ben_Beng-deu_Latn': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ben_Beng-div_Thaa': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'ben_Beng-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ben_Beng-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ben_Beng-eus_Latn': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'ben_Beng-fas_Arab': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ben_Beng-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ben_Beng-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ben_Beng-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'ben_Beng-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ben_Beng-hin_Deva': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ben_Beng-hun_Latn': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ben_Beng-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ben_Beng-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ben_Beng-kan_Knda': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'ben_Beng-kor_Hang': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ben_Beng-lit_Latn': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ben_Beng-mar_Deva': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'ben_Beng-nep_Deva': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ben_Beng-pan_Guru': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'ben_Beng-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ben_Beng-por_Latn': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ben_Beng-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ben_Beng-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ben_Beng-snd_Arab': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'ben_Beng-spa_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ben_Beng-swa_Latn': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ben_Beng-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ben_Beng-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ben_Beng-tel_Telu': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-tur_Latn': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ben_Beng-urd_Arab': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ben_Beng-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ben_Beng-zho_Hant': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ben_Beng-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'bod_Tibt-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'bod_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bod_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'bod_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'bod_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'bod_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'bod_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'bos_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bos_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bos_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bos_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bos_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bos_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bos_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bos_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bos_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bos_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bos_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bos_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bos_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bul_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bul_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bul_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bul_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bul_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bul_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bul_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bul_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bul_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bul_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bul_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bul_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bul_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'cat_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cat_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'cat_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'cat_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'cat_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'cat_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'cat_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'cat_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ces_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ces_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ces_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ces_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ces_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ces_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ces_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ces_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ces_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ces_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ces_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ces_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'ces_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ckb_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ckb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ckb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ckb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ckb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'ckb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'ckb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'ckb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'ckb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'ckb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'cym_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cym_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'dan_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'dan_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'dan_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dan_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'dan_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'dan_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'dan_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'dan_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'dan_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'dan_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'deu_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'deu_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'deu_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'deu_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'deu_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'deu_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'deu_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'deu_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'deu_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'deu_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'deu_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'deu_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'deu_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'deu_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'deu_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'deu_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'deu_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'deu_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'deu_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'deu_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'deu_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'deu_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'deu_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'deu_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'deu_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'deu_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'deu_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'deu_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'deu_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'div_Thaa-ben_Beng': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'div_Thaa-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'div_Thaa-eus_Latn': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'div_Thaa-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'div_Thaa-hin_Deva': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'div_Thaa-kan_Knda': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'div_Thaa-mar_Deva': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'div_Thaa-nep_Deva': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-pan_Guru': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'div_Thaa-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'div_Thaa-snd_Arab': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'div_Thaa-tam_Taml': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'div_Thaa-tel_Telu': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-urd_Arab': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'dzo_Tibt-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'dzo_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dzo_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'dzo_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'dzo_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'dzo_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'dzo_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'ell_Grek-arb_Arab': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ell_Grek-ben_Beng': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ell_Grek-deu_Latn': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ell_Grek-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ell_Grek-fas_Arab': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ell_Grek-fin_Latn': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ell_Grek-fra_Latn': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ell_Grek-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ell_Grek-hin_Deva': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ell_Grek-hun_Latn': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ell_Grek-hye_Armn': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ell_Grek-ind_Latn': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ell_Grek-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ell_Grek-kat_Geor': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'ell_Grek-kor_Hang': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ell_Grek-lit_Latn': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ell_Grek-nld_Latn': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ell_Grek-pol_Latn': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ell_Grek-por_Latn': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ell_Grek-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ell_Grek-spa_Latn': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ell_Grek-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ell_Grek-swa_Latn': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ell_Grek-swe_Latn': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ell_Grek-tam_Taml': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ell_Grek-tur_Latn': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ell_Grek-vie_Latn': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ell_Grek-zho_Hant': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ell_Grek-zul_Latn': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eng_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'eng_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'eng_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'eng_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'eng_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'eng_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'eng_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eng_Latn-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'eng_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'eng_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'eng_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'eng_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'eng_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'eng_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'eng_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'eng_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'eng_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eng_Latn-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'eng_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'eng_Latn-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'eng_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'eng_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'eng_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'eng_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'eng_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'eng_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'eng_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'eng_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'eng_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eng_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'eng_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'eng_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eng_Latn-hmn_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 165.64, 'max_sentence2_length': 643, 'unique_sentence2': 1997}, 'eng_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'eng_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'eng_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'eng_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'eng_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'eng_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eng_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'eng_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'eng_Latn-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'eng_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'eng_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'eng_Latn-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'eng_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'eng_Latn-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'eng_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'eng_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'eng_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'eng_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'eng_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eng_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'eng_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'eng_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'eng_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-mon_Mong': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'eng_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'eng_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'eng_Latn-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'eng_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'eng_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'eng_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'eng_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'eng_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'eng_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'eng_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eng_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'eng_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'eng_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'eng_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'eng_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'eng_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'eng_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'eng_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'eng_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'eng_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eng_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'eng_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'eng_Latn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'eng_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'eng_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'eng_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'eng_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'eng_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'eng_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'eng_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eng_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'eng_Latn-tha_Thai': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'eng_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'eng_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'eng_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'eng_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'eng_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'eng_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'eng_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'eng_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'eng_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'eng_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'eng_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'eng_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'eng_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'eng_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'eng_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'eng_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eus_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eus_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eus_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'eus_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eus_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eus_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eus_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eus_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eus_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eus_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eus_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eus_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ewe_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ewe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ewe_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ewe_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ewe_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ewe_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ewe_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'ewe_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'fao_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fao_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'fao_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fao_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fao_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'fao_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'fao_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fao_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'fao_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'fao_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fas_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fas_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'fas_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fas_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fas_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fas_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fas_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fas_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fas_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fas_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fas_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fas_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fas_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'fas_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fas_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fas_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'fas_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fas_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fas_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fas_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'fas_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'fas_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fas_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'fas_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fas_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fas_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fas_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'fas_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fas_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fas_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fas_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fij_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fij_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'fij_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fij_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fij_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fij_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fij_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fij_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fij_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fij_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fil_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fil_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'fil_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fil_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fil_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fil_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fil_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fil_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fil_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fil_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fin_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fin_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fin_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fin_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fin_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fin_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fin_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fin_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fin_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fin_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fin_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fin_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fin_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'fin_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fin_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fin_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fin_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fin_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fin_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fin_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fin_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fin_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fin_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fin_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fin_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fin_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fra_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fra_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fra_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'fra_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fra_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fra_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fra_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fra_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fra_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fra_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fra_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fra_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fra_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'fra_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fra_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fra_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fra_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'fra_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fra_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fra_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'fra_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fra_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fra_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fra_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fra_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fra_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fra_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fra_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fra_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fuc_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'fuc_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fuc_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'fuc_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'fuc_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'fuc_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'fuc_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'fuc_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'gle_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'gle_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'glg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'glg_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'glg_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'glg_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'glg_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'glg_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'guj_Gujr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'guj_Gujr-div_Thaa': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'guj_Gujr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'guj_Gujr-eus_Latn': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'guj_Gujr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'guj_Gujr-kan_Knda': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'guj_Gujr-mar_Deva': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'guj_Gujr-nep_Deva': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-pan_Guru': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'guj_Gujr-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'guj_Gujr-snd_Arab': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'guj_Gujr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'guj_Gujr-tel_Telu': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-urd_Arab': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hau_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'hau_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hau_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'hau_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'hau_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'hau_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'hau_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'hau_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hau_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'hau_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'hau_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'hau_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'hau_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'hau_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'heb_Hebr-arb_Arab': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'heb_Hebr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'heb_Hebr-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'heb_Hebr-deu_Latn': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'heb_Hebr-ell_Grek': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'heb_Hebr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'heb_Hebr-fas_Arab': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'heb_Hebr-fin_Latn': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'heb_Hebr-fra_Latn': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'heb_Hebr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'heb_Hebr-hun_Latn': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'heb_Hebr-ind_Latn': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'heb_Hebr-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'heb_Hebr-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'heb_Hebr-kor_Hang': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'heb_Hebr-lit_Latn': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'heb_Hebr-mey_Arab': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'heb_Hebr-nld_Latn': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'heb_Hebr-pol_Latn': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'heb_Hebr-por_Latn': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'heb_Hebr-prs_Arab': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'heb_Hebr-pus_Arab': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'heb_Hebr-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'heb_Hebr-shi_Arab': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'heb_Hebr-spa_Latn': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'heb_Hebr-swa_Latn': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'heb_Hebr-swe_Latn': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'heb_Hebr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'heb_Hebr-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'heb_Hebr-tur_Latn': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'heb_Hebr-vie_Latn': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'heb_Hebr-zho_Hant': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'heb_Hebr-zul_Latn': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hin_Deva-arb_Arab': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hin_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hin_Deva-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hin_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'hin_Deva-ell_Grek': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hin_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hin_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'hin_Deva-fas_Arab': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hin_Deva-fin_Latn': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hin_Deva-fra_Latn': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hin_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'hin_Deva-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hin_Deva-hun_Latn': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'hin_Deva-ind_Latn': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hin_Deva-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hin_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'hin_Deva-kor_Hang': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hin_Deva-lit_Latn': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hin_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'hin_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-nld_Latn': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hin_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'hin_Deva-pol_Latn': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hin_Deva-por_Latn': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hin_Deva-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hin_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'hin_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'hin_Deva-spa_Latn': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hin_Deva-swa_Latn': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hin_Deva-swe_Latn': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hin_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hin_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-tur_Latn': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hin_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hin_Deva-vie_Latn': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hin_Deva-zho_Hant': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hin_Deva-zul_Latn': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hmn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 165.64, 'max_sentence1_length': 643, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'hrv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'hrv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'hrv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'hrv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'hrv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hrv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hrv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'hrv_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hrv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'hrv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'hrv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'hun_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hun_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hun_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hun_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hun_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hun_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hun_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hun_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hun_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hun_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'hun_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hun_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hun_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hun_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'hun_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hun_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hun_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hun_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hun_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hun_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hun_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hun_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hun_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hun_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hun_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hun_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hun_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hye_Armn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hye_Armn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hye_Armn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'hye_Armn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ibo_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ibo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ibo_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ibo_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ibo_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ibo_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ibo_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'ibo_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ibo_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ibo_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ibo_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ibo_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ibo_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ibo_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ind_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ind_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ind_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ind_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ind_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ind_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ind_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ind_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ind_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ind_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ind_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ind_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ind_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ind_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ind_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ind_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ind_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ind_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ind_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ind_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ind_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ind_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ind_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ind_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ind_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ind_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ind_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ind_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ind_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'ind_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ind_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'ind_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ind_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ind_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ind_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'isl_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'isl_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'isl_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'isl_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'isl_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'isl_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'isl_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'isl_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'isl_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'isl_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ita_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ita_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ita_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ita_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ita_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ita_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ita_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'ita_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-arb_Arab': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'jpn_Jpan-ben_Beng': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'jpn_Jpan-deu_Latn': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'jpn_Jpan-ell_Grek': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'jpn_Jpan-eng_Latn': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'jpn_Jpan-fas_Arab': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'jpn_Jpan-fin_Latn': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'jpn_Jpan-fra_Latn': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'jpn_Jpan-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'jpn_Jpan-hin_Deva': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'jpn_Jpan-hun_Latn': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'jpn_Jpan-ind_Latn': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'jpn_Jpan-kor_Hang': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'jpn_Jpan-lit_Latn': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'jpn_Jpan-nld_Latn': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'jpn_Jpan-pol_Latn': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'jpn_Jpan-por_Latn': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'jpn_Jpan-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'jpn_Jpan-spa_Latn': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-swa_Latn': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'jpn_Jpan-swe_Latn': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'jpn_Jpan-tam_Taml': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'jpn_Jpan-tur_Latn': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'jpn_Jpan-vie_Latn': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'jpn_Jpan-yue_Hant': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'jpn_Jpan-zho_Hans': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'jpn_Jpan-zho_Hant': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'jpn_Jpan-zul_Latn': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'kan_Knda-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kan_Knda-div_Thaa': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'kan_Knda-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kan_Knda-eus_Latn': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'kan_Knda-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'kan_Knda-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kan_Knda-mar_Deva': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'kan_Knda-nep_Deva': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-pan_Guru': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'kan_Knda-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kan_Knda-snd_Arab': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'kan_Knda-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kan_Knda-tel_Telu': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-urd_Arab': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'kat_Geor-ell_Grek': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kat_Geor-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kat_Geor-hye_Armn': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kat_Geor-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'kaz_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kaz_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kaz_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kaz_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'kaz_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kaz_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kaz_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kaz_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kaz_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'khm_Khmr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'khm_Khmr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'khm_Khmr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'khm_Khmr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'khm_Khmr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'khm_Khmr-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'khm_Khmr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'kin_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'kin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kin_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'kin_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'kin_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'kin_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'kin_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'kin_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'kir_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kir_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kir_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kir_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'kir_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kir_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kir_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kir_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kir_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'kmr_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kmr_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'kmr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kmr_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kmr_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kmr_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'kmr_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'kmr_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'kmr_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'kmr_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'kor_Hang-arb_Arab': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kor_Hang-ben_Beng': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kor_Hang-deu_Latn': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'kor_Hang-ell_Grek': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kor_Hang-eng_Latn': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kor_Hang-fas_Arab': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kor_Hang-fin_Latn': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'kor_Hang-fra_Latn': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'kor_Hang-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kor_Hang-hin_Deva': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kor_Hang-hun_Latn': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'kor_Hang-ind_Latn': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'kor_Hang-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'kor_Hang-lit_Latn': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'kor_Hang-nld_Latn': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kor_Hang-pol_Latn': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'kor_Hang-por_Latn': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'kor_Hang-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'kor_Hang-spa_Latn': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'kor_Hang-swa_Latn': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'kor_Hang-swe_Latn': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'kor_Hang-tam_Taml': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kor_Hang-tur_Latn': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kor_Hang-vie_Latn': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'kor_Hang-yue_Hant': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'kor_Hang-zho_Hans': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'kor_Hang-zho_Hant': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'kor_Hang-zul_Latn': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'lao_Laoo-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'lao_Laoo-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'lao_Laoo-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lao_Laoo-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'lao_Laoo-mon_Mong': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'lao_Laoo-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'lao_Laoo-tha_Thai': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'lav_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lav_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lav_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lav_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'lit_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'lit_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'lit_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'lit_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'lit_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lit_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'lit_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lit_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'lit_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'lit_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'lit_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lit_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'lit_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'lit_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'lit_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'lit_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'lit_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'lit_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'lit_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'lit_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'lit_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'lit_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'lit_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'lit_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'lit_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'lit_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'lit_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ltz_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ltz_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'ltz_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ltz_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ltz_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'ltz_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'ltz_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ltz_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'ltz_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'ltz_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'mal_Mlym-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mal_Mlym-fij_Latn': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mal_Mlym-fil_Latn': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mal_Mlym-ind_Latn': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mal_Mlym-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mal_Mlym-mri_Latn': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mal_Mlym-msa_Latn': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mal_Mlym-smo_Latn': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mal_Mlym-tah_Latn': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mal_Mlym-ton_Latn': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mar_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'mar_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'mar_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mar_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'mar_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'mar_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'mar_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'mar_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'mar_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'mar_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'mar_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'mar_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'mey_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'mey_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'mey_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mey_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'mey_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'mey_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'mey_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'mey_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'mey_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'mey_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'mkd_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'mkd_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'mkd_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'mkd_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'mkd_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mkd_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'mkd_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mkd_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'mkd_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'mkd_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'mkd_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'mkd_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'mkd_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'mlg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlg_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mlg_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mlg_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mlg_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mlg_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mlg_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mlg_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mlg_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mlg_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mlt_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'mlt_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlt_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'mlt_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mlt_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'mlt_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'mlt_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'mlt_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'mon_Mong-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mon_Mong-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mon_Mong-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mon_Mong-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mon_Mong-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mon_Mong-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'mon_Mong-tha_Thai': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'mri_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mri_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mri_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mri_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mri_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mri_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mri_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mri_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mri_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mri_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'msa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'msa_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'msa_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'msa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'msa_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'msa_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'msa_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'msa_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'msa_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'msa_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mya_Mymr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mya_Mymr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mya_Mymr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mya_Mymr-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mya_Mymr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mya_Mymr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'mya_Mymr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'nde_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nde_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nde_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nde_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nde_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nde_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'nde_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nde_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'nep_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nep_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'nep_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nep_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'nep_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'nep_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nep_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'nep_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'nep_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'nep_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'nep_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'nep_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nep_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'nep_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'nld_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'nld_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nld_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nld_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nld_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'nld_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nld_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nld_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'nld_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'nld_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'nld_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'nld_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nld_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'nld_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'nld_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nld_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'nld_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'nld_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'nld_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nld_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nld_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nld_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'nld_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'nld_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'nld_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'nld_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nld_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nld_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nld_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'nld_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'nld_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nno_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nno_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nno_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nno_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nno_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nno_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nno_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nno_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nno_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nno_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nob_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nob_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nob_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nob_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nob_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nob_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nob_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nob_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nob_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nob_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nso_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'nso_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nso_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'nso_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'nso_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'nso_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'nso_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'nso_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nso_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'nso_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'nso_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'nso_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'nso_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'nso_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nya_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nya_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nya_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nya_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nya_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nya_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'nya_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nya_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'orm_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'orm_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'orm_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'orm_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'orm_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'orm_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'orm_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'orm_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'orm_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'orm_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'orm_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'orm_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'orm_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'orm_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'pan_Guru-ben_Beng': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pan_Guru-div_Thaa': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'pan_Guru-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pan_Guru-eus_Latn': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'pan_Guru-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'pan_Guru-hin_Deva': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pan_Guru-kan_Knda': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'pan_Guru-mar_Deva': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'pan_Guru-nep_Deva': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'pan_Guru-snd_Arab': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'pan_Guru-tam_Taml': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pan_Guru-tel_Telu': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-urd_Arab': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'pol_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pol_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'pol_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pol_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'pol_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'pol_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'pol_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'pol_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'pol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pol_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pol_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'pol_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pol_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pol_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'pol_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'pol_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'pol_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'pol_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'pol_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'pol_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'pol_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'pol_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'pol_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'pol_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'pol_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'pol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'pol_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'pol_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pol_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'pol_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'pol_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'pol_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'pol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'por_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'por_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'por_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'por_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'por_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'por_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'por_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'por_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'por_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'por_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'por_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'por_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'por_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'por_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'por_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'por_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'por_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'por_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'por_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'por_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'por_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'por_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'por_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'por_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'por_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'por_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'por_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'por_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'por_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'prs_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'prs_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'prs_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'prs_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'prs_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'prs_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'prs_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'prs_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'prs_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'prs_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'pus_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pus_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'pus_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pus_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pus_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pus_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'pus_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'pus_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'pus_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'pus_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'ron_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ron_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ron_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ron_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ron_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'ron_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ron_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ron_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'rus_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'rus_Cyrl-ben_Beng': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'rus_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'rus_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'rus_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'rus_Cyrl-deu_Latn': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'rus_Cyrl-ell_Grek': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'rus_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'rus_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'rus_Cyrl-fin_Latn': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-fra_Latn': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'rus_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'rus_Cyrl-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'rus_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-hun_Latn': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'rus_Cyrl-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'rus_Cyrl-kor_Hang': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'rus_Cyrl-lit_Latn': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'rus_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'rus_Cyrl-nld_Latn': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'rus_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'rus_Cyrl-por_Latn': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'rus_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'rus_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-spa_Latn': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'rus_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'rus_Cyrl-swa_Latn': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'rus_Cyrl-swe_Latn': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'rus_Cyrl-tam_Taml': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'rus_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'rus_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'rus_Cyrl-vie_Latn': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'rus_Cyrl-zho_Hant': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'rus_Cyrl-zul_Latn': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'shi_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'shi_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'shi_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'shi_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'shi_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'shi_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'shi_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'shi_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'shi_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'shi_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'sin_Sinh-ben_Beng': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'sin_Sinh-div_Thaa': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'sin_Sinh-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sin_Sinh-eus_Latn': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'sin_Sinh-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'sin_Sinh-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'sin_Sinh-kan_Knda': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'sin_Sinh-mar_Deva': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'sin_Sinh-nep_Deva': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-pan_Guru': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'sin_Sinh-snd_Arab': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'sin_Sinh-tam_Taml': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'sin_Sinh-tel_Telu': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-urd_Arab': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'slk_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slk_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slk_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slk_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slk_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slk_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slk_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slk_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slk_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'slk_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slk_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slk_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'slv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slv_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'slv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'smo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'smo_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'smo_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'smo_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'smo_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'smo_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'smo_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'smo_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'smo_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'smo_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'sna_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'sna_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sna_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'sna_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'sna_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'sna_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'sna_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'sna_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'snd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'snd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'snd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'snd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'snd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'snd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'snd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'snd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'snd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'snd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'snd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'snd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-urd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'som_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'som_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'som_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'som_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'som_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'som_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'som_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'som_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'som_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'som_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'som_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'som_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'som_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'som_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'spa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'spa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'spa_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'spa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'spa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'spa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'spa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'spa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'spa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'spa_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'spa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'spa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'spa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'spa_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'spa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'spa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'spa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'spa_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'spa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'spa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'spa_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'spa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'spa_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'spa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'spa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'spa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'spa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'spa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'spa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'sqi_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'sqi_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sqi_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'sqi_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'srp_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'srp_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'srp_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'srp_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ssw_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ssw_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ssw_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ssw_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'ssw_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ssw_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ssw_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ssw_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ssw_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ssw_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ssw_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ssw_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ssw_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ssw_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swa_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'swa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swa_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'swa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swa_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'swa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swa_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'swa_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'swa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swa_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'swa_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swa_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'swa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'swa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swa_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'swa_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'swa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swa_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'swa_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'swa_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'swa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swe_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swe_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swe_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'swe_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swe_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swe_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'swe_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swe_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swe_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swe_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swe_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swe_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swe_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swe_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'swe_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swe_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swe_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swe_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'swe_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swe_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'swe_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'swe_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swe_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swe_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swe_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swe_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'swe_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swe_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swe_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swe_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tah_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tah_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'tah_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'tah_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tah_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'tah_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'tah_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'tah_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'tah_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'tah_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'tam_Taml-arb_Arab': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tam_Taml-ben_Beng': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tam_Taml-deu_Latn': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tam_Taml-div_Thaa': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tam_Taml-ell_Grek': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tam_Taml-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tam_Taml-eus_Latn': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tam_Taml-fas_Arab': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tam_Taml-fin_Latn': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tam_Taml-fra_Latn': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tam_Taml-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tam_Taml-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tam_Taml-hin_Deva': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tam_Taml-hun_Latn': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tam_Taml-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tam_Taml-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tam_Taml-kan_Knda': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tam_Taml-kor_Hang': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tam_Taml-lit_Latn': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tam_Taml-mar_Deva': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tam_Taml-nep_Deva': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-nld_Latn': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tam_Taml-pan_Guru': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tam_Taml-pol_Latn': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tam_Taml-por_Latn': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tam_Taml-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tam_Taml-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tam_Taml-snd_Arab': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tam_Taml-spa_Latn': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tam_Taml-swa_Latn': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tam_Taml-swe_Latn': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tam_Taml-tel_Telu': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-tur_Latn': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tam_Taml-urd_Arab': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tam_Taml-vie_Latn': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tam_Taml-zho_Hant': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tam_Taml-zul_Latn': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tat_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tat_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tat_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tat_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tat_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tat_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tat_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tat_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tat_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tel_Telu-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tel_Telu-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tel_Telu-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tel_Telu-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tel_Telu-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tel_Telu-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tel_Telu-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tel_Telu-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tel_Telu-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tel_Telu-pan_Guru': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tel_Telu-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tel_Telu-snd_Arab': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tel_Telu-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tel_Telu-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tgk_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tgk_Cyrl-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'tgk_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tgk_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tgk_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tgk_Cyrl-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'tgk_Cyrl-mey_Arab': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'tgk_Cyrl-prs_Arab': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'tgk_Cyrl-pus_Arab': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'tgk_Cyrl-shi_Arab': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'tha_Thai-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'tha_Thai-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'tha_Thai-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tha_Thai-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'tha_Thai-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'tha_Thai-mon_Mong': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'tha_Thai-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'tir_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tir_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tir_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tir_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tir_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tir_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tir_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tir_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tir_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tir_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'tir_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tir_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tir_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tir_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ton_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ton_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ton_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ton_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ton_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ton_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ton_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ton_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ton_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ton_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'tsn_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tsn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tsn_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tsn_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tsn_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tsn_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tsn_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tsn_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tsn_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tsn_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'tsn_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tsn_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tsn_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tsn_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tuk_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tuk_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tuk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tuk_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tuk_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tuk_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tuk_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tuk_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tuk_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tur_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tur_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tur_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tur_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tur_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tur_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tur_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tur_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tur_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tur_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tur_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tur_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tur_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tur_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tur_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tur_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tur_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tur_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tur_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tur_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tur_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tur_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tur_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tur_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tur_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tur_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tur_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tur_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tur_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tur_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'uig_Arab-aze_Latn': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uig_Arab-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uig_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uig_Arab-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uig_Arab-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uig_Arab-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uig_Arab-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uig_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uig_Arab-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'ukr_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ukr_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ukr_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ukr_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'ukr_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ukr_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ukr_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ukr_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ukr_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ukr_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ukr_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ukr_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ukr_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'urd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'urd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'urd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'urd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'urd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'urd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'urd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'urd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'urd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'urd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'urd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'urd_Arab-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'urd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'urd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'uzb_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uzb_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uzb_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uzb_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uzb_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uzb_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uzb_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uzb_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uzb_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'ven_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ven_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ven_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'ven_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ven_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ven_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ven_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ven_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'vie_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'vie_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'vie_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'vie_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'vie_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'vie_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'vie_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'vie_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'vie_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'vie_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'vie_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'vie_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'vie_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'vie_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'vie_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'vie_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'vie_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'vie_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'vie_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'vie_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'vie_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'vie_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'vie_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'vie_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'vie_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'vie_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'vie_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'vie_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'wol_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'wol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'wol_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'wol_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'wol_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'wol_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'wol_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'wol_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'wol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'wol_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'wol_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'wol_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'wol_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'wol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'xho_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'xho_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'xho_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'xho_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'xho_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'xho_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'xho_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'xho_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'xho_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'xho_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'xho_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'xho_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'xho_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'xho_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yor_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'yor_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yor_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'yor_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'yor_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'yor_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'yor_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'yor_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'yor_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'yor_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'yor_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'yor_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'yor_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'yor_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yue_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yue_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'yue_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'yue_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'yue_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'yue_Hant-zho_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hans-eng_Latn': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hans-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hans-kor_Hang': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hans-vie_Latn': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hans-yue_Hant': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hans-zho_Hant': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hant-arb_Arab': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zho_Hant-ben_Beng': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zho_Hant-deu_Latn': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zho_Hant-ell_Grek': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zho_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hant-fas_Arab': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zho_Hant-fin_Latn': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zho_Hant-fra_Latn': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zho_Hant-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zho_Hant-hin_Deva': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zho_Hant-hun_Latn': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zho_Hant-ind_Latn': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zho_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hant-lit_Latn': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zho_Hant-nld_Latn': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zho_Hant-pol_Latn': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zho_Hant-por_Latn': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zho_Hant-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zho_Hant-spa_Latn': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zho_Hant-swa_Latn': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zho_Hant-swe_Latn': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zho_Hant-tam_Taml': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zho_Hant-tur_Latn': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zho_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hant-yue_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'zho_Hant-zul_Latn': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'zul_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'zul_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zul_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zul_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zul_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zul_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zul_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zul_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zul_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zul_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'zul_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zul_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zul_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zul_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'zul_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zul_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zul_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zul_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zul_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zul_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'zul_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'zul_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zul_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zul_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zul_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'zul_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zul_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'zul_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zul_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zul_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zul_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'zul_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'zul_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zul_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zul_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'zul_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'zul_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'zul_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}}}} | | [NYSJudicialEthicsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [NaijaSenti](https://github.com/hausanlp/NaijaSenti) | ['hau', 'ibo', 'pcm', 'yor'] | Classification | s2s | [Social, Written] | None | None | | [NarrativeQARetrieval](https://metatext.io/datasets/narrativeqa) (Tomáš Kočiský, 2017) | ['eng'] | Retrieval | s2p | | None | None | @@ -368,14 +368,14 @@ The following tables give you an overview of the tasks in MTEB. | [News21InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News, Written] | None | None | | [NewsClassification](https://arxiv.org/abs/1509.01626) (Zhang et al., 2015) | ['eng'] | Classification | s2s | [News, Written] | None | None | | [NoRecClassification](https://aclanthology.org/L18-1661/) | ['nob'] | Classification | s2s | [Written, Reviews] | None | None | -| [NollySentiBitextMining](https://github.com/IyanuSh/NollySenti) (Shode et al., 2023) | ['eng', 'hau', 'ibo', 'pcm', 'yor'] | BitextMining | s2s | [Social, Reviews, Written] | None | None | +| [NollySentiBitextMining](https://github.com/IyanuSh/NollySenti) (Shode et al., 2023) | ['eng', 'hau', 'ibo', 'pcm', 'yor'] | BitextMining | s2s | [Social, Reviews, Written] | {'train': 1640} | {'train': {'num_samples': 1640, 'number_of_characters': 445805, 'unique_pairs': 1632, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 3, 'average_sentence2_length': 135.52, 'max_sentence2_length': 1728, 'unique_sentence2': 1631, 'hf_subset_descriptive_stats': {'en-ha': {'num_samples': 410, 'number_of_characters': 115348, 'unique_pairs': 407, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 4, 'average_sentence2_length': 145.02, 'max_sentence2_length': 1728, 'unique_sentence2': 407}, 'en-ig': {'num_samples': 410, 'number_of_characters': 107173, 'unique_pairs': 409, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 5, 'average_sentence2_length': 125.08, 'max_sentence2_length': 1137, 'unique_sentence2': 408}, 'en-pcm': {'num_samples': 410, 'number_of_characters': 109955, 'unique_pairs': 408, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 3, 'average_sentence2_length': 131.87, 'max_sentence2_length': 1552, 'unique_sentence2': 408}, 'en-yo': {'num_samples': 410, 'number_of_characters': 113329, 'unique_pairs': 409, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 6, 'average_sentence2_length': 140.1, 'max_sentence2_length': 1338, 'unique_sentence2': 409}}}} | | [NorQuadRetrieval](https://aclanthology.org/2023.nodalida-1.17/) | ['nob'] | Retrieval | p2p | [Encyclopaedic, Non-fiction, Written] | None | None | | [NordicLangClassification](https://aclanthology.org/2021.vardial-1.8/) | ['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] | Classification | s2s | [Encyclopaedic] | None | None | -| [NorwegianCourtsBitextMining](https://opus.nlpl.eu/index.php) (Tiedemann et al., 2020) | ['nno', 'nob'] | BitextMining | s2s | [Legal, Written] | None | None | +| [NorwegianCourtsBitextMining](https://opus.nlpl.eu/index.php) (Tiedemann et al., 2020) | ['nno', 'nob'] | BitextMining | s2s | [Legal, Written] | {'test': 228} | {'test': {'num_samples': 228, 'number_of_characters': 37441, 'unique_pairs': 228, 'min_sentence1_length': 13, 'average_sentence1_length': 82.2, 'max_sentence1_length': 272, 'unique_sentence1': 227, 'min_sentence2_length': 10, 'average_sentence2_length': 82.02, 'max_sentence2_length': 269, 'unique_sentence2': 226}} | | [NorwegianParliamentClassification](https://huggingface.co/datasets/NbAiLab/norwegian_parliament) | ['nob'] | Classification | s2s | [Government, Spoken] | None | None | | [NusaParagraphEmotionClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction, Written] | None | None | | [NusaParagraphTopicClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction, Written] | None | None | -| [NusaTranslationBitextMining](https://huggingface.co/datasets/indonlp/nusatranslation_mt) (Cahyawijaya et al., 2023) | ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | BitextMining | s2s | [Social, Written] | {'train': 50200} | {'train': {'average_sentence1_length': 145.46, 'average_sentence2_length': 148.57, 'num_samples': 50200, 'number_of_characters': 14759870, 'hf_subset_descriptive_stats': {'ind-abs': {'average_sentence1_length': 148.37, 'average_sentence2_length': 147.31, 'num_samples': 1000, 'number_of_characters': 295680}, 'ind-btk': {'average_sentence1_length': 145.37, 'average_sentence2_length': 146.74, 'num_samples': 6600, 'number_of_characters': 1927907}, 'ind-bew': {'average_sentence1_length': 145.43, 'average_sentence2_length': 148.41, 'num_samples': 6600, 'number_of_characters': 1939300}, 'ind-bhp': {'average_sentence1_length': 133.53, 'average_sentence2_length': 128.14, 'num_samples': 1000, 'number_of_characters': 261666}, 'ind-jav': {'average_sentence1_length': 145.43, 'average_sentence2_length': 145.81, 'num_samples': 6600, 'number_of_characters': 1922162}, 'ind-mad': {'average_sentence1_length': 145.36, 'average_sentence2_length': 153.62, 'num_samples': 6600, 'number_of_characters': 1973257}, 'ind-mak': {'average_sentence1_length': 145.43, 'average_sentence2_length': 150.61, 'num_samples': 6600, 'number_of_characters': 1953868}, 'ind-min': {'average_sentence1_length': 145.43, 'average_sentence2_length': 148.06, 'num_samples': 6600, 'number_of_characters': 1937033}, 'ind-mui': {'average_sentence1_length': 150.45, 'average_sentence2_length': 150.99, 'num_samples': 1000, 'number_of_characters': 301448}, 'ind-rej': {'average_sentence1_length': 151.62, 'average_sentence2_length': 139.58, 'num_samples': 1000, 'number_of_characters': 291205}, 'ind-sun': {'average_sentence1_length': 145.43, 'average_sentence2_length': 150.99, 'num_samples': 6600, 'number_of_characters': 1956344}}}} | +| [NusaTranslationBitextMining](https://huggingface.co/datasets/indonlp/nusatranslation_mt) (Cahyawijaya et al., 2023) | ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | BitextMining | s2s | [Social, Written] | {'train': 50200} | {'train': {'num_samples': 50200, 'number_of_characters': 14759870, 'unique_pairs': 50140, 'min_sentence1_length': 5, 'average_sentence1_length': 145.46, 'max_sentence1_length': 873, 'unique_sentence1': 8258, 'min_sentence2_length': 5, 'average_sentence2_length': 148.57, 'max_sentence2_length': 980, 'unique_sentence2': 50102, 'hf_subset_descriptive_stats': {'ind-abs': {'num_samples': 1000, 'number_of_characters': 295680, 'unique_pairs': 999, 'min_sentence1_length': 5, 'average_sentence1_length': 148.37, 'max_sentence1_length': 727, 'unique_sentence1': 998, 'min_sentence2_length': 6, 'average_sentence2_length': 147.31, 'max_sentence2_length': 629, 'unique_sentence2': 998}, 'ind-btk': {'num_samples': 6600, 'number_of_characters': 1927907, 'unique_pairs': 6597, 'min_sentence1_length': 5, 'average_sentence1_length': 145.37, 'max_sentence1_length': 873, 'unique_sentence1': 6521, 'min_sentence2_length': 5, 'average_sentence2_length': 146.74, 'max_sentence2_length': 980, 'unique_sentence2': 6596}, 'ind-bew': {'num_samples': 6600, 'number_of_characters': 1939300, 'unique_pairs': 6595, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 148.41, 'max_sentence2_length': 840, 'unique_sentence2': 6590}, 'ind-bhp': {'num_samples': 1000, 'number_of_characters': 261666, 'unique_pairs': 1000, 'min_sentence1_length': 11, 'average_sentence1_length': 133.53, 'max_sentence1_length': 468, 'unique_sentence1': 999, 'min_sentence2_length': 10, 'average_sentence2_length': 128.14, 'max_sentence2_length': 459, 'unique_sentence2': 999}, 'ind-jav': {'num_samples': 6600, 'number_of_characters': 1922162, 'unique_pairs': 6594, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 5, 'average_sentence2_length': 145.81, 'max_sentence2_length': 854, 'unique_sentence2': 6585}, 'ind-mad': {'num_samples': 6600, 'number_of_characters': 1973257, 'unique_pairs': 6598, 'min_sentence1_length': 5, 'average_sentence1_length': 145.36, 'max_sentence1_length': 873, 'unique_sentence1': 6521, 'min_sentence2_length': 5, 'average_sentence2_length': 153.62, 'max_sentence2_length': 827, 'unique_sentence2': 6592}, 'ind-mak': {'num_samples': 6600, 'number_of_characters': 1953868, 'unique_pairs': 6594, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 150.61, 'max_sentence2_length': 888, 'unique_sentence2': 6586}, 'ind-min': {'num_samples': 6600, 'number_of_characters': 1937033, 'unique_pairs': 6595, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 148.06, 'max_sentence2_length': 837, 'unique_sentence2': 6591}, 'ind-mui': {'num_samples': 1000, 'number_of_characters': 301448, 'unique_pairs': 1000, 'min_sentence1_length': 11, 'average_sentence1_length': 150.45, 'max_sentence1_length': 451, 'unique_sentence1': 997, 'min_sentence2_length': 11, 'average_sentence2_length': 150.99, 'max_sentence2_length': 450, 'unique_sentence2': 1000}, 'ind-rej': {'num_samples': 1000, 'number_of_characters': 291205, 'unique_pairs': 1000, 'min_sentence1_length': 9, 'average_sentence1_length': 151.62, 'max_sentence1_length': 873, 'unique_sentence1': 998, 'min_sentence2_length': 8, 'average_sentence2_length': 139.58, 'max_sentence2_length': 784, 'unique_sentence2': 1000}, 'ind-sun': {'num_samples': 6600, 'number_of_characters': 1956344, 'unique_pairs': 6591, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 5, 'average_sentence2_length': 150.99, 'max_sentence2_length': 881, 'unique_sentence2': 6588}}}} | | [NusaX-senti](https://arxiv.org/abs/2205.15960) (Winata et al., 2022) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | Classification | s2s | [Reviews, Web, Social, Constructed, Written] | None | None | | [NusaXBitextMining](https://huggingface.co/datasets/indonlp/NusaX-senti/) (Winata et al., 2023) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | BitextMining | s2s | [Reviews, Written] | None | None | | [OPP115DataRetentionLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -400,10 +400,10 @@ The following tables give you an overview of the tasks in MTEB. | [PROALegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [PSC](http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf) | ['pol'] | PairClassification | s2s | [News, Written] | None | None | | [PatentClassification](https://aclanthology.org/P19-1212.pdf) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [PawsXPairClassification](https://arxiv.org/abs/1908.11828) (Yinfei Yang, 2019) | ['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] | PairClassification | s2s | [Web, Encyclopaedic, Written] | {'test': 14000, 'validation': 14000} | {'test': {'num_samples': 14000, 'number_of_characters': 2551922, 'avg_sentence1_len': 91.18, 'avg_sentence2_len': 91.1, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 478034, 'avg_sentence1_len': 119.78, 'avg_sentence2_len': 119.24, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'number_of_characters': 454362, 'avg_sentence1_len': 113.76, 'avg_sentence2_len': 113.42, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'number_of_characters': 471226, 'avg_sentence1_len': 117.81, 'avg_sentence2_len': 117.8, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 480033, 'avg_sentence1_len': 120.03, 'avg_sentence2_len': 119.99, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 235106, 'avg_sentence1_len': 58.68, 'avg_sentence2_len': 58.88, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 260149, 'avg_sentence1_len': 64.96, 'avg_sentence2_len': 65.11, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 173012, 'avg_sentence1_len': 43.23, 'avg_sentence2_len': 43.27, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'number_of_characters': 2524625, 'avg_sentence1_len': 90.13, 'avg_sentence2_len': 90.2, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 467643, 'avg_sentence1_len': 116.82, 'avg_sentence2_len': 117.0, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'number_of_characters': 451931, 'avg_sentence1_len': 113.11, 'avg_sentence2_len': 112.86, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'number_of_characters': 466112, 'avg_sentence1_len': 116.33, 'avg_sentence2_len': 116.73, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 478510, 'avg_sentence1_len': 119.5, 'avg_sentence2_len': 119.75, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 229655, 'avg_sentence1_len': 57.51, 'avg_sentence2_len': 57.32, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 261355, 'avg_sentence1_len': 65.16, 'avg_sentence2_len': 65.52, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 169419, 'avg_sentence1_len': 42.45, 'avg_sentence2_len': 42.26, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}} | +| [PawsXPairClassification](https://arxiv.org/abs/1908.11828) (Yinfei Yang, 2019) | ['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] | PairClassification | s2s | [Web, Encyclopaedic, Written] | {'test': 14000, 'validation': 14000} | {'test': {'num_samples': 14000, 'number_of_characters': 2551922, 'min_sentence1_length': 2, 'avg_sentence1_length': 91.18, 'max_sentence1_length': 268, 'unique_sentence1': 13404, 'min_sentence2_length': 2, 'avg_sentence2_length': 91.1, 'max_sentence2_length': 247, 'unique_sentence2': 13462, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 478034, 'min_sentence1_length': 2, 'avg_sentence1_length': 119.78, 'max_sentence1_length': 268, 'unique_sentence1': 1934, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.24, 'max_sentence2_length': 235, 'unique_sentence2': 1938, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'number_of_characters': 454362, 'min_sentence1_length': 25, 'avg_sentence1_length': 113.76, 'max_sentence1_length': 209, 'unique_sentence1': 1761, 'min_sentence2_length': 25, 'avg_sentence2_length': 113.42, 'max_sentence2_length': 209, 'unique_sentence2': 1800, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'number_of_characters': 471226, 'min_sentence1_length': 2, 'avg_sentence1_length': 117.81, 'max_sentence1_length': 226, 'unique_sentence1': 1955, 'min_sentence2_length': 22, 'avg_sentence2_length': 117.8, 'max_sentence2_length': 233, 'unique_sentence2': 1959, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 480033, 'min_sentence1_length': 2, 'avg_sentence1_length': 120.03, 'max_sentence1_length': 238, 'unique_sentence1': 1954, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.99, 'max_sentence2_length': 247, 'unique_sentence2': 1953, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 235106, 'min_sentence1_length': 2, 'avg_sentence1_length': 58.68, 'max_sentence1_length': 192, 'unique_sentence1': 1944, 'min_sentence2_length': 2, 'avg_sentence2_length': 58.88, 'max_sentence2_length': 198, 'unique_sentence2': 1941, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 260149, 'min_sentence1_length': 2, 'avg_sentence1_length': 64.96, 'max_sentence1_length': 153, 'unique_sentence1': 1954, 'min_sentence2_length': 2, 'avg_sentence2_length': 65.11, 'max_sentence2_length': 159, 'unique_sentence2': 1969, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 173012, 'min_sentence1_length': 2, 'avg_sentence1_length': 43.23, 'max_sentence1_length': 120, 'unique_sentence1': 1909, 'min_sentence2_length': 2, 'avg_sentence2_length': 43.27, 'max_sentence2_length': 113, 'unique_sentence2': 1909, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'number_of_characters': 2524625, 'min_sentence1_length': 2, 'avg_sentence1_length': 90.13, 'max_sentence1_length': 248, 'unique_sentence1': 13357, 'min_sentence2_length': 2, 'avg_sentence2_length': 90.2, 'max_sentence2_length': 275, 'unique_sentence2': 13397, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 467643, 'min_sentence1_length': 2, 'avg_sentence1_length': 116.82, 'max_sentence1_length': 248, 'unique_sentence1': 1914, 'min_sentence2_length': 2, 'avg_sentence2_length': 117.0, 'max_sentence2_length': 275, 'unique_sentence2': 1920, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'number_of_characters': 451931, 'min_sentence1_length': 25, 'avg_sentence1_length': 113.11, 'max_sentence1_length': 213, 'unique_sentence1': 1758, 'min_sentence2_length': 25, 'avg_sentence2_length': 112.86, 'max_sentence2_length': 213, 'unique_sentence2': 1771, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'number_of_characters': 466112, 'min_sentence1_length': 2, 'avg_sentence1_length': 116.33, 'max_sentence1_length': 240, 'unique_sentence1': 1938, 'min_sentence2_length': 2, 'avg_sentence2_length': 116.73, 'max_sentence2_length': 241, 'unique_sentence2': 1941, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 478510, 'min_sentence1_length': 2, 'avg_sentence1_length': 119.5, 'max_sentence1_length': 233, 'unique_sentence1': 1933, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.75, 'max_sentence2_length': 246, 'unique_sentence2': 1939, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 229655, 'min_sentence1_length': 2, 'avg_sentence1_length': 57.51, 'max_sentence1_length': 126, 'unique_sentence1': 1957, 'min_sentence2_length': 2, 'avg_sentence2_length': 57.32, 'max_sentence2_length': 121, 'unique_sentence2': 1969, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 261355, 'min_sentence1_length': 2, 'avg_sentence1_length': 65.16, 'max_sentence1_length': 178, 'unique_sentence1': 1963, 'min_sentence2_length': 2, 'avg_sentence2_length': 65.52, 'max_sentence2_length': 174, 'unique_sentence2': 1968, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 169419, 'min_sentence1_length': 2, 'avg_sentence1_length': 42.45, 'max_sentence1_length': 101, 'unique_sentence1': 1899, 'min_sentence2_length': 2, 'avg_sentence2_length': 42.26, 'max_sentence2_length': 120, 'unique_sentence2': 1895, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}} | | [PersianFoodSentimentClassification](https://hooshvare.github.io/docs/datasets/sa) (Mehrdad Farahani et al., 2020) | ['fas'] | Classification | s2s | [Reviews, Written] | None | None | | [PersonalJurisdictionLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [PhincBitextMining](https://huggingface.co/datasets/veezbo/phinc) (Srivastava et al., 2020) | ['eng', 'hin'] | BitextMining | s2s | [Social, Written] | None | None | +| [PhincBitextMining](https://huggingface.co/datasets/veezbo/phinc) (Srivastava et al., 2020) | ['eng', 'hin'] | BitextMining | s2s | [Social, Written] | {'train': 13738} | {'train': {'num_samples': 13738, 'number_of_characters': 2069457, 'unique_pairs': 13737, 'min_sentence1_length': 1, 'average_sentence1_length': 74.02, 'max_sentence1_length': 278, 'unique_sentence1': 13515, 'min_sentence2_length': 3, 'average_sentence2_length': 76.61, 'max_sentence2_length': 274, 'unique_sentence2': 13736, 'hf_subset_descriptive_stats': {'eng-eng_hin': {'num_samples': 13738, 'number_of_characters': 2069457, 'unique_pairs': 13737, 'min_sentence1_length': 1, 'average_sentence1_length': 74.02, 'max_sentence1_length': 278, 'unique_sentence1': 13515, 'min_sentence2_length': 3, 'average_sentence2_length': 76.61, 'max_sentence2_length': 274, 'unique_sentence2': 13736}}}} | | [PlscClusteringP2P.v2](https://huggingface.co/datasets/rafalposwiata/plsc) | ['pol'] | Clustering | s2s | [Academic, Written] | None | None | | [PlscClusteringS2S.v2](https://huggingface.co/datasets/rafalposwiata/plsc) | ['pol'] | Clustering | s2s | [Academic, Written] | None | None | | [PoemSentimentClassification](https://arxiv.org/abs/2011.02686) (Emily Sheng, 2020) | ['eng'] | Classification | s2s | [Reviews, Written] | None | None | @@ -423,7 +423,7 @@ The following tables give you an overview of the tasks in MTEB. | [RTE3](https://aclanthology.org/W07-1401/) | ['deu', 'eng', 'fra', 'ita'] | PairClassification | s2s | [News, Web, Encyclopaedic, Written] | None | None | | [RUParaPhraserSTS](https://aclanthology.org/2020.ngt-1.6) (Pivovarova et al., 2017) | ['rus'] | STS | s2s | [News, Written] | None | None | | [RedditClustering.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | s2s | [Web, Social, Written] | None | None | -| [RedditClusteringP2P.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | p2p | [Web, Social, Written] | None | None | +| [RedditClusteringP2P.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | p2p | [Web, Social, Written] | {'test': 459389} | {'test': {'num_samples': 459389, 'number_of_characters': 334286895, 'min_text_length': 79, 'average_text_length': 727.68, 'max_text_length': 4359, 'min_labels_per_text': 2, 'average_labels_per_text': 1.0, 'max_labels_per_text': 77908, 'unique_labels': 440, 'labels': {'FortNiteBR': {'count': 436}, 'buildapc': {'count': 8484}, 'offmychest': {'count': 570}, 'nus': {'count': 45}, 'relationship_advice': {'count': 16651}, 'premed': {'count': 201}, 'dogecoin': {'count': 8108}, 'GamingLaptops': {'count': 183}, 'asktransgender': {'count': 326}, 'MachineLearning': {'count': 61}, 'puppy101': {'count': 1597}, 'GunAccessoriesForSale': {'count': 2619}, 'Random_Acts_Of_Amazon': {'count': 1115}, 'Catholicism': {'count': 183}, 'MonsterHunter': {'count': 218}, 'tipofmypenis': {'count': 87}, 'samsung': {'count': 69}, 'PersonalFinanceCanada': {'count': 341}, 'Dyson_Sphere_Program': {'count': 55}, 'bleach': {'count': 41}, 'AmItheAsshole': {'count': 3730}, 'WallStreetbetsELITE': {'count': 328}, 'GlobalPowers': {'count': 35}, 'ABraThatFits': {'count': 159}, 'PokemonGoFriends': {'count': 1165}, 'NoMansSkyTheGame': {'count': 259}, 'masseffect': {'count': 233}, 'dating_advice': {'count': 559}, 'yoga': {'count': 50}, 'depression': {'count': 515}, 'COVID19positive': {'count': 180}, 'generationology': {'count': 37}, 'feedthebeast': {'count': 192}, 'EliteDangerous': {'count': 270}, 'alcoholicsanonymous': {'count': 93}, 'GoRVing': {'count': 35}, 'thedivision': {'count': 111}, 'breakingmom': {'count': 105}, 'AskAnAmerican': {'count': 80}, 'HypnoFair': {'count': 5}, 'JustUnsubbed': {'count': 13}, 'socialanxiety': {'count': 123}, 'dirtykikpals': {'count': 202}, 'askTO': {'count': 126}, 'AskCulinary': {'count': 108}, 'Bogleheads': {'count': 71}, 'dragonquest': {'count': 45}, 'NoContract': {'count': 30}, 'gorillaz': {'count': 14}, 'MondoGore': {'count': 8}, 'comicswap': {'count': 56}, 'VirtualYoutubers': {'count': 92}, 'Gta5Modding': {'count': 28}, 'obs': {'count': 61}, 'vcu': {'count': 9}, 'KingkillerChronicle': {'count': 17}, 'AmongUs': {'count': 41}, 'wireshark': {'count': 3}, 'Dodocodes': {'count': 46}, 'Aliexpress': {'count': 40}, 'LearnerDriverUK': {'count': 12}, 'PanicAttack': {'count': 23}, 'KassadinMains': {'count': 10}, 'islam': {'count': 93}, 'chronotrigger': {'count': 4}, 'skincareexchange': {'count': 13}, 'PokemonHome': {'count': 21}, 'survivinginfidelity': {'count': 71}, 'igcse': {'count': 21}, 'C25K': {'count': 21}, 'aorus': {'count': 2}, 'idleon': {'count': 19}, 'photography': {'count': 22}, 'cryptocoins': {'count': 7}, 'CanaryWharfBets': {'count': 7}, 'KillingEve': {'count': 7}, 'GameBuilderGarage': {'count': 16}, 'SauceSharingCommunity': {'count': 7}, 'turo': {'count': 9}, 'foodscience': {'count': 14}, 'HIMYM': {'count': 20}, 'HauntingOfHillHouse': {'count': 4}, 'GoodNotes': {'count': 8}, 'RedditWritesSeinfeld': {'count': 6}, 'AirReps': {'count': 2}, 'ADHD': {'count': 3811}, 'BuddyCrossing': {'count': 446}, 'libraryofruina': {'count': 98}, 'SluttyConfessions': {'count': 2787}, 'tipofmytongue': {'count': 7145}, 'fleshlight': {'count': 128}, 'amcstock': {'count': 13910}, 'teenagers': {'count': 77908}, 'suggestmeabook': {'count': 1540}, 'dirtypenpals': {'count': 5587}, 'MinecraftServer': {'count': 177}, 'CreditCards': {'count': 669}, 'Guitar': {'count': 10952}, 'rpg': {'count': 529}, 'NoFap': {'count': 14853}, 'lfg': {'count': 1093}, 'MarsWallStreet': {'count': 935}, 'SummonSign': {'count': 931}, 'AssassinsCreedValhala': {'count': 295}, 'hoi4': {'count': 432}, 'Coins4Sale': {'count': 260}, 'xbox': {'count': 459}, 'TooAfraidToAsk': {'count': 7404}, 'NBA2k': {'count': 553}, 'KGBTR': {'count': 943}, 'roblox': {'count': 220}, 'salesforce': {'count': 214}, 'TwoXChromosomes': {'count': 1736}, 'mechmarket': {'count': 4863}, 'Gaming_Headsets': {'count': 103}, 'pittsburgh': {'count': 189}, 'CryptoMars': {'count': 1606}, 'FridayNightFunkin': {'count': 378}, 'vaginismus': {'count': 122}, 'transpositive': {'count': 10}, 'comicbooks': {'count': 274}, 'BDSMcommunity': {'count': 185}, 'aliens': {'count': 201}, 'Scotch': {'count': 64}, 'KikRoleplay': {'count': 141}, 'Kayaking': {'count': 91}, '196': {'count': 47}, 'digimon': {'count': 140}, 'Evernote': {'count': 42}, 'logh': {'count': 22}, 'arlington': {'count': 15}, 'Adopted': {'count': 8}, 'DissonautUniverse': {'count': 4}, 'Midsommar': {'count': 12}, 'SofiawithanF': {'count': 83}, 'xmpp': {'count': 6}, 'ZombsRoyale': {'count': 16}, 'accesscontrol': {'count': 8}, 'WetlanderHumor': {'count': 2}, 'PoonamPandeyFanatics': {'count': 2}, 'screenplaychallenge': {'count': 2}, 'scatstories': {'count': 2}, 'techsupport': {'count': 290}, 'whatcarshouldIbuy': {'count': 79}, 'Stormlight_Archive': {'count': 15}, 'deadbydaylight': {'count': 126}, 'bicycling': {'count': 27}, 'oculus': {'count': 64}, 'Cartalk': {'count': 33}, 'Sims4': {'count': 43}, 'NoFeeAC': {'count': 95}, 'Crypto_com': {'count': 37}, 'ITCareerQuestions': {'count': 259}, 'aromantic': {'count': 18}, 'Revu': {'count': 3}, 'exalted': {'count': 2}, 'HilariaBaldwin': {'count': 20}, 'Testosterone': {'count': 35}, 'Screenwriting': {'count': 170}, 'LifeProTips': {'count': 49}, 'steinsgate': {'count': 13}, 'Baystreetbets': {'count': 10}, 'AskGirls': {'count': 7}, 'idlechampions': {'count': 7}, 'facebook': {'count': 17}, 'tf2trade': {'count': 4}, 'mfdoom': {'count': 3}, 'FiddlesticksMains': {'count': 2}, 'HFY': {'count': 10}, 'FiestaST': {'count': 2}, 'whatsthatbook': {'count': 994}, 'GearsOfWar': {'count': 879}, 'KazuhaMains': {'count': 175}, 'RepTime': {'count': 211}, 'AstroGaming': {'count': 141}, 'metalgearsolid': {'count': 152}, 'qBittorrent': {'count': 39}, 'ELLIPAL_Official': {'count': 24}, 'raisedbynarcissists': {'count': 4895}, 'unpopularopinion': {'count': 14901}, 'ACTrade': {'count': 5679}, 'askcarsales': {'count': 1339}, 'AskVet': {'count': 1357}, 'whowouldwin': {'count': 4493}, 'playstation': {'count': 1362}, 'anime': {'count': 6531}, 'GME': {'count': 12577}, 'DotA2': {'count': 2004}, 'cryptostreetbets': {'count': 2241}, 'MonsterHunterWorld': {'count': 698}, 'Market76': {'count': 14274}, 'DnD': {'count': 5092}, 'leagueoflegends': {'count': 3683}, 'doordash_drivers': {'count': 1626}, 'theta_network': {'count': 489}, 'exmuslim': {'count': 1369}, 'gonewildaudio': {'count': 2998}, 'conspiracy': {'count': 3587}, 'heroesofthestorm': {'count': 535}, 'FanFiction': {'count': 2782}, 'Doom': {'count': 1251}, 'texas': {'count': 269}, 'Vent': {'count': 1738}, 'selfimprovement': {'count': 1284}, 'youtubers': {'count': 706}, 'askseddit': {'count': 237}, 'boardgames': {'count': 1237}, 'bravelydefault': {'count': 347}, 'ConquerorsBlade': {'count': 238}, 'ChronicPain': {'count': 527}, 'teenagersnew': {'count': 256}, 'brasil': {'count': 1092}, 'MatthiasSubmissions': {'count': 921}, 'MarylandUnemployment': {'count': 314}, 'SaltLakeCity': {'count': 411}, 'BokunoheroFanfiction': {'count': 155}, 'BenignExistence': {'count': 125}, 'GayYoungOldDating': {'count': 156}, 'Bible': {'count': 202}, 'haskell': {'count': 154}, 'seduction': {'count': 400}, 'fantasywriters': {'count': 262}, 'HiveOS': {'count': 100}, 'PerkByDaylight': {'count': 15}, 'Hedgehog': {'count': 73}, 'xmen': {'count': 263}, 'HyperRP': {'count': 122}, 'emotestories': {'count': 3}, 'tutanota': {'count': 135}, 'CultoftheFranklin': {'count': 46}, 'langrisser': {'count': 62}, 'CozyGrove': {'count': 61}, 'Sverigesforsvarsmakt': {'count': 12}, 'silverbugbets': {'count': 21}, 'WreckingBallMains': {'count': 5}, 'capitalism_in_decay': {'count': 8}, 'paintdotnet': {'count': 11}, 'u_mawadom118': {'count': 4}, 'xboxfindfriends': {'count': 2}, 'CPTSD': {'count': 540}, 'destiny2': {'count': 318}, 'Wallstreetsilver': {'count': 1013}, 'DestinyTheGame': {'count': 1107}, 'blackopscoldwar': {'count': 400}, 'InstacartShoppers': {'count': 202}, 'RocketLeagueExchange': {'count': 832}, 'apexlegends': {'count': 3265}, 'kansascity': {'count': 53}, 'namenerds': {'count': 235}, 'help': {'count': 152}, 'Kengan_Ashura': {'count': 132}, 'thetagang': {'count': 165}, 'GameSale': {'count': 262}, 'Reduction': {'count': 109}, 'sex': {'count': 906}, 'bostonr4r': {'count': 75}, 'LegendsOfRuneterra': {'count': 231}, 'overlord': {'count': 48}, 'madisonwi': {'count': 53}, 'steelseries': {'count': 79}, 'ClashOfClansRecruit': {'count': 214}, 'CharacterRant': {'count': 55}, 'AirForce': {'count': 94}, 'sexstories': {'count': 92}, 'NameThatSong': {'count': 162}, 'depressed': {'count': 74}, 'ibs': {'count': 150}, '40kLore': {'count': 269}, 'podcasts': {'count': 88}, 'miraculousladybug': {'count': 150}, 'ask': {'count': 224}, 'EverMerge': {'count': 31}, 'TMJ': {'count': 54}, 'BitLifeApp': {'count': 39}, 'FireEmblemHeroes': {'count': 100}, 'software': {'count': 62}, 'ShieldAndroidTV': {'count': 70}, 'GriefSupport': {'count': 125}, 'onewheel': {'count': 37}, 'MensRights': {'count': 80}, 'nhl': {'count': 22}, 'ClashOfClans': {'count': 107}, 'ps3homebrew': {'count': 33}, 'LightNovels': {'count': 77}, 'redsox': {'count': 34}, 'CryptoMarkets': {'count': 44}, 'ugly': {'count': 47}, 'GCXRep': {'count': 12}, 'cscareerquestionsEU': {'count': 65}, 'MindHunter': {'count': 6}, 'starcraft2coop': {'count': 15}, 'nanocurrency': {'count': 1421}, 'ModelCars': {'count': 8}, 'UKJobs': {'count': 30}, 'Netherlands': {'count': 44}, 'clonewars': {'count': 8}, 'Julia': {'count': 11}, 'Prolactinoma': {'count': 9}, 'sofi': {'count': 11}, 'royalfamily': {'count': 6}, 'ConnecticutR4R': {'count': 8}, 'weather': {'count': 5}, 'oneui': {'count': 7}, 'KTM': {'count': 5}, 'Aerials': {'count': 3}, 'seoul': {'count': 2}, 'exjw': {'count': 3281}, 'ModernMagic': {'count': 699}, 'Paladins': {'count': 1242}, 'kdramarecommends': {'count': 1611}, 'hitbtc': {'count': 330}, 'endocrinology': {'count': 75}, 'Bath': {'count': 43}, 'NassauCountyHookups': {'count': 5}, 'feminineboys': {'count': 1248}, 'dreamsmp': {'count': 2018}, 'SquaredCircle': {'count': 2255}, 'Minecraft': {'count': 8753}, 'spirituality': {'count': 1809}, 'Eldenring': {'count': 1471}, 'Sat': {'count': 1172}, 'bonnaroo': {'count': 194}, 'gardening': {'count': 1892}, 'Unemployment': {'count': 6185}, 'mac': {'count': 1847}, 'Bestbuy': {'count': 437}, 'quittingkratom': {'count': 1081}, 'lawschooladmissions': {'count': 3436}, 'NiceHash': {'count': 2135}, 'McMaster': {'count': 815}, 'covidlonghaulers': {'count': 1299}, 'stalker': {'count': 758}, 'MLBTheShow': {'count': 2721}, 'FortniteCompetitive': {'count': 998}, 'dpdr': {'count': 514}, 'appliancerepair': {'count': 720}, 'thomasthetankengine': {'count': 207}, 'delhi': {'count': 217}, 'Huel': {'count': 300}, 'leafs': {'count': 203}, 'HotWheels': {'count': 170}, '90dayfianceuncensored': {'count': 550}, 'Throwers': {'count': 142}, 'Wavyhair': {'count': 270}, 'CryptoHorde': {'count': 128}, 'ShuumatsuNoValkyrie': {'count': 453}, 'TeensMeetTeens': {'count': 432}, 'dbrand': {'count': 108}, 'SLFmeetups': {'count': 18}, '1200isplentyketo': {'count': 48}, 'passive_income': {'count': 211}, 'BroadCity': {'count': 16}, 'RevenantMain': {'count': 71}, 'extrarfl': {'count': 25}, 'AgonGame': {'count': 5}, 'FitnessDE': {'count': 3}, 'gaming': {'count': 1277}, 'livesound': {'count': 91}, 'IBO': {'count': 1896}, 'EscapefromTarkov': {'count': 1300}, 'amex': {'count': 145}, 'DMAcademy': {'count': 1411}, 'VinylCollectors': {'count': 556}, 'cardano': {'count': 716}, 'brave_browser': {'count': 159}, 'dating': {'count': 952}, 'OculusQuest': {'count': 942}, 'Superstonk': {'count': 3089}, 'MtF': {'count': 957}, 'findaleague': {'count': 207}, 'Nioh': {'count': 398}, 'IRS': {'count': 715}, 'transgendercirclejerk': {'count': 353}, 'learnmath': {'count': 489}, 'piano': {'count': 263}, 'LeagueConnect': {'count': 216}, 'eu4': {'count': 561}, 'Wordpress': {'count': 345}, 'RoleplayingForReddit': {'count': 31}, 'LOONA': {'count': 89}, 'newtothenavy': {'count': 167}, 'HaircareScience': {'count': 118}, 'appletv': {'count': 167}, 'sissypersonals': {'count': 102}, 'raleigh': {'count': 168}, 'realonlyfansreviews': {'count': 21}, 'AskGames': {'count': 49}, 'PokemonTCG': {'count': 325}, 'controlgame': {'count': 109}, 'GoogleDataStudio': {'count': 16}, 'WhiteWolfRPG': {'count': 139}, 'MECoOp': {'count': 31}, 'snuffrp': {'count': 46}, 'lockpicking': {'count': 103}, 'wicked_edge': {'count': 105}, 'BMW': {'count': 99}, 'choiceofgames': {'count': 24}, 'hisdarkmaterials': {'count': 12}, 'SakuraGakuin': {'count': 24}, 'detrans': {'count': 55}, 'Smallville': {'count': 37}, 'kingofqueens': {'count': 7}, 'JamesHoffmann': {'count': 22}, 'stashinvest': {'count': 16}, 'ABA': {'count': 79}, 'ladybusiness': {'count': 10}, 'gamegrumps': {'count': 32}, 'GodEater': {'count': 21}, 'tomorrow': {'count': 39}, 'Tomorrowland': {'count': 9}, 'BlackCountryNewRoad': {'count': 5}, 'STAYC': {'count': 3}, 'SatoshiStreetBets': {'count': 3828}, 'AskLosAngeles': {'count': 1036}, 'buildapcforme': {'count': 1689}, 'ApplyingToCollege': {'count': 10675}, 'watercooling': {'count': 1209}, 'BreakUps': {'count': 4914}, 'FIFA': {'count': 3811}, 'emacs': {'count': 712}, 'trakstocks': {'count': 691}, 'Shittyaskflying': {'count': 147}, 'AmazonFC': {'count': 1178}, 'stocks': {'count': 4610}, 'BangaloreMains': {'count': 26}, 'pokemon': {'count': 3953}, 'religion': {'count': 684}, 'cuboulder': {'count': 269}, 'self': {'count': 1688}, 'tarot': {'count': 912}, 'turtles': {'count': 49}, 'TheMagnusArchives': {'count': 300}, 'Superhero_Ideas': {'count': 34}, 'NTU': {'count': 308}, 'touhou': {'count': 623}, 'JoJolion': {'count': 50}, 'lasers': {'count': 27}, 'popperpigs': {'count': 67}, 'aggretsuko': {'count': 20}, 'Library': {'count': 5}}}} | | [RestaurantReviewSentimentClassification](https://link.springer.com/chapter/10.1007/978-3-319-18117-2_2) (ElSahar et al., 2015) | ['ara'] | Classification | s2s | [Reviews, Written] | None | None | | [RiaNewsRetrieval](https://arxiv.org/abs/1901.07786) (Gavrilov et al., 2019) | ['rus'] | Retrieval | s2p | [News, Written] | None | None | | [RiaNewsRetrievalHardNegatives](https://arxiv.org/abs/1901.07786) (Gavrilov et al., 2019) | ['rus'] | Retrieval | s2p | [News, Written] | None | None | @@ -438,7 +438,7 @@ The following tables give you an overview of the tasks in MTEB. | [RuReviewsClassification](https://github.com/sismetanin/rureviews) (Sergey Smetanin, 2019) | ['rus'] | Classification | p2p | [Reviews, Written] | None | None | | [RuSTSBenchmarkSTS](https://github.com/PhilipMay/stsb-multi-mt/) (Philip May, 2021) | ['rus'] | STS | s2s | [News, Social, Web, Written] | None | None | | [RuSciBenchGRNTIClassification](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Classification | p2p | [Academic, Written] | None | None | -| [RuSciBenchGRNTIClusteringP2P](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 1822339, 'average_text_length': 889.81, 'average_labels_per_text': 1.0, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}} | +| [RuSciBenchGRNTIClusteringP2P](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 1822339, 'min_text_length': 84, 'average_text_length': 889.81, 'max_text_length': 3143, 'min_labels_per_text': 73, 'average_labels_per_text': 1.0, 'max_labels_per_text': 74, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}} | | [RuSciBenchOECDClassification](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Classification | p2p | [Academic, Written] | None | None | | [RuSciBenchOECDClusteringP2P](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Clustering | p2p | [Academic, Written] | None | None | | [SCDBPAccountabilityLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -467,12 +467,12 @@ The following tables give you an overview of the tasks in MTEB. | [SNLHierarchicalClusteringS2S](https://huggingface.co/datasets/navjordj/SNL_summarization) (Navjord et al., 2023) | ['nob'] | Clustering | s2s | [Encyclopaedic, Non-fiction, Written] | None | None | | [SNLRetrieval](https://huggingface.co/datasets/navjordj/SNL_summarization) (Navjord et al., 2023) | ['nob'] | Retrieval | p2p | [Encyclopaedic, Non-fiction, Written] | None | None | | [SRNCorpusBitextMining](https://arxiv.org/abs/2212.06383) (Zwennicker et al., 2022) | ['nld', 'srn'] | BitextMining | s2s | [Social, Web, Written] | None | None | -| [STS12](https://www.aclweb.org/anthology/S12-1051.pdf) (Agirre et al., 2012) | ['eng'] | STS | s2s | [Encyclopaedic, News, Written] | {'test': 3108} | {'test': {'num_samples': 3108, 'number_of_characters': 402118, 'average_sentence1_len': 63.79, 'average_sentence2_len': 65.59, 'avg_score': 3.51}} | +| [STS12](https://www.aclweb.org/anthology/S12-1051.pdf) (Agirre et al., 2012) | ['eng'] | STS | s2s | [Encyclopaedic, News, Written] | {'test': 3108} | {'test': {'num_samples': 3108, 'number_of_characters': 402118, 'min_sentence1_length': 3, 'average_sentence1_len': 63.79, 'max_sentence1_length': 220, 'unique_sentence1': 2236, 'min_sentence2_length': 7, 'average_sentence2_len': 65.59, 'max_sentence2_length': 204, 'unique_sentence2': 2797, 'min_score': 0.0, 'avg_score': 3.51, 'max_score': 5.0}} | | [STS13](https://www.aclweb.org/anthology/S13-1004/) (Eneko Agirre, 2013) | ['eng'] | STS | s2s | [Web, News, Non-fiction, Written] | None | None | | [STS14](https://www.aclweb.org/anthology/S14-1002) | ['eng'] | STS | s2s | [Blog, Web, Spoken] | None | None | | [STS15](https://www.aclweb.org/anthology/S15-2010) | ['eng'] | STS | s2s | [Blog, News, Web, Written, Spoken] | None | None | | [STS16](https://www.aclweb.org/anthology/S16-1001) | ['eng'] | STS | s2s | [Blog, Web, Spoken] | None | None | -| [STS17](https://alt.qcri.org/semeval2017/task1/) | ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] | STS | s2s | [News, Web, Written] | {'test': 5346} | {'test': {'num_samples': 5346, 'number_of_characters': 400264, 'average_sentence1_len': 38.15, 'average_sentence2_len': 36.73, 'avg_score': 2.36, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'number_of_characters': 183387, 'average_sentence1_len': 31.99, 'average_sentence2_len': 32.44, 'avg_score': 2.47}, 'ar-ar': {'num_samples': 250, 'number_of_characters': 16247, 'average_sentence1_len': 32.21, 'average_sentence2_len': 32.78, 'avg_score': 2.22}, 'en-ar': {'num_samples': 250, 'number_of_characters': 18764, 'average_sentence1_len': 42.36, 'average_sentence2_len': 32.7, 'avg_score': 2.14}, 'en-de': {'num_samples': 250, 'number_of_characters': 22177, 'average_sentence1_len': 43.95, 'average_sentence2_len': 44.76, 'avg_score': 2.28}, 'en-en': {'num_samples': 250, 'number_of_characters': 21669, 'average_sentence1_len': 43.95, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'en-tr': {'num_samples': 250, 'number_of_characters': 20879, 'average_sentence1_len': 41.92, 'average_sentence2_len': 41.6, 'avg_score': 2.13}, 'es-en': {'num_samples': 250, 'number_of_characters': 23216, 'average_sentence1_len': 50.84, 'average_sentence2_len': 42.02, 'avg_score': 2.15}, 'es-es': {'num_samples': 250, 'number_of_characters': 25265, 'average_sentence1_len': 49.84, 'average_sentence2_len': 51.22, 'avg_score': 2.23}, 'fr-en': {'num_samples': 250, 'number_of_characters': 23087, 'average_sentence1_len': 49.62, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'it-en': {'num_samples': 250, 'number_of_characters': 23188, 'average_sentence1_len': 50.03, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'nl-en': {'num_samples': 250, 'number_of_characters': 22385, 'average_sentence1_len': 46.82, 'average_sentence2_len': 42.72, 'avg_score': 2.28}}}} | +| [STS17](https://alt.qcri.org/semeval2017/task1/) | ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] | STS | s2s | [News, Web, Written] | {'test': 5346} | {'test': {'num_samples': 5346, 'number_of_characters': 400264, 'min_sentence1_length': 6, 'average_sentence1_len': 38.15, 'max_sentence1_length': 976, 'unique_sentence1': 4900, 'min_sentence2_length': 6, 'average_sentence2_len': 36.73, 'max_sentence2_length': 1007, 'unique_sentence2': 4470, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'number_of_characters': 183387, 'min_sentence1_length': 6, 'average_sentence1_len': 31.99, 'max_sentence1_length': 976, 'unique_sentence1': 2650, 'min_sentence2_length': 6, 'average_sentence2_len': 32.44, 'max_sentence2_length': 1007, 'unique_sentence2': 2720, 'min_score': 0.0, 'avg_score': 2.47, 'max_score': 5.0}, 'ar-ar': {'num_samples': 250, 'number_of_characters': 16247, 'min_sentence1_length': 11, 'average_sentence1_len': 32.21, 'max_sentence1_length': 99, 'unique_sentence1': 250, 'min_sentence2_length': 9, 'average_sentence2_len': 32.78, 'max_sentence2_length': 83, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.22, 'max_score': 5.0}, 'en-ar': {'num_samples': 250, 'number_of_characters': 18764, 'min_sentence1_length': 13, 'average_sentence1_len': 42.36, 'max_sentence1_length': 105, 'unique_sentence1': 250, 'min_sentence2_length': 10, 'average_sentence2_len': 32.7, 'max_sentence2_length': 104, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.14, 'max_score': 5.0}, 'en-de': {'num_samples': 250, 'number_of_characters': 22177, 'min_sentence1_length': 12, 'average_sentence1_len': 43.95, 'max_sentence1_length': 94, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 44.76, 'max_sentence2_length': 104, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-en': {'num_samples': 250, 'number_of_characters': 21669, 'min_sentence1_length': 12, 'average_sentence1_len': 43.95, 'max_sentence1_length': 94, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-tr': {'num_samples': 250, 'number_of_characters': 20879, 'min_sentence1_length': 15, 'average_sentence1_len': 41.92, 'max_sentence1_length': 101, 'unique_sentence1': 250, 'min_sentence2_length': 10, 'average_sentence2_len': 41.6, 'max_sentence2_length': 107, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.13, 'max_score': 5.0}, 'es-en': {'num_samples': 250, 'number_of_characters': 23216, 'min_sentence1_length': 12, 'average_sentence1_len': 50.84, 'max_sentence1_length': 160, 'unique_sentence1': 250, 'min_sentence2_length': 14, 'average_sentence2_len': 42.02, 'max_sentence2_length': 117, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.15, 'max_score': 5.0}, 'es-es': {'num_samples': 250, 'number_of_characters': 25265, 'min_sentence1_length': 18, 'average_sentence1_len': 49.84, 'max_sentence1_length': 136, 'unique_sentence1': 250, 'min_sentence2_length': 13, 'average_sentence2_len': 51.22, 'max_sentence2_length': 129, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.23, 'max_score': 5.0}, 'fr-en': {'num_samples': 250, 'number_of_characters': 23087, 'min_sentence1_length': 19, 'average_sentence1_len': 49.62, 'max_sentence1_length': 115, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'it-en': {'num_samples': 250, 'number_of_characters': 23188, 'min_sentence1_length': 15, 'average_sentence1_len': 50.03, 'max_sentence1_length': 113, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'nl-en': {'num_samples': 250, 'number_of_characters': 22385, 'min_sentence1_length': 14, 'average_sentence1_len': 46.82, 'max_sentence1_length': 123, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}}}} | | [STS22.v2](https://competitions.codalab.org/competitions/33835) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'pol', 'rus', 'spa', 'tur'] | STS | p2p | [News, Written] | None | None | | [STSB](https://aclanthology.org/2021.emnlp-main.357) (Shitao Xiao, 2024) | ['cmn'] | STS | s2s | | None | None | | [STSBenchmark](https://github.com/PhilipMay/stsb-multi-mt/) (Philip May, 2021) | ['eng'] | STS | s2s | | None | None | @@ -490,7 +490,7 @@ The following tables give you an overview of the tasks in MTEB. | [SinhalaNewsClassification](https://huggingface.co/datasets/NLPC-UOM/Sinhala-News-Category-classification) (Nisansa de Silva, 2015) | ['sin'] | Classification | s2s | [News, Written] | None | None | | [SinhalaNewsSourceClassification](https://huggingface.co/datasets/NLPC-UOM/Sinhala-News-Source-classification) (Dhananjaya et al., 2022) | ['sin'] | Classification | s2s | [News, Written] | None | None | | [SiswatiNewsClassification](https://huggingface.co/datasets/dsfsi/za-isizulu-siswati-news) (Madodonga et al., 2023) | ['ssw'] | Classification | s2s | [News, Written] | None | None | -| [SlovakHateSpeechClassification](https://huggingface.co/datasets/TUKE-KEMT/hate_speech_slovak) | ['slk'] | Classification | s2s | [Social, Written] | {'test': 1319} | {'test': {'num_samples': 1319, 'number_of_characters': 122279, 'average_text_length': 92.71, 'unique_labels': 2, 'labels': {'1': {'count': 360}, '0': {'count': 959}}}} | +| [SlovakHateSpeechClassification](https://huggingface.co/datasets/TUKE-KEMT/hate_speech_slovak) | ['slk'] | Classification | s2s | [Social, Written] | {'test': 1319, 'train': 11870} | {'test': {'num_samples': 1319, 'number_of_characters': 122279, 'num_texts_in_train': 46, 'min_text_length': 8, 'average_text_length': 92.71, 'max_text_length': 1584, 'unique_text': 1315, 'unique_labels': 2, 'labels': {'1': {'count': 360}, '0': {'count': 959}}}, 'train': {'num_samples': 11870, 'number_of_characters': 1130860, 'num_texts_in_train': None, 'min_text_length': 7, 'average_text_length': 95.27, 'max_text_length': 2112, 'unique_text': 11655, 'unique_labels': 2, 'labels': {'1': {'count': 3245}, '0': {'count': 8625}}}} | | [SlovakMovieReviewSentimentClassification](https://arxiv.org/pdf/2304.01922) ({ {S, 2023) | ['svk'] | Classification | s2s | [Reviews, Written] | None | None | | [SlovakSumRetrieval](https://huggingface.co/datasets/NaiveNeuron/slovaksum) | ['slk'] | Retrieval | s2s | [News, Social, Web, Written] | None | None | | [SouthAfricanLangClassification](https://www.kaggle.com/competitions/south-african-language-identification/) (ExploreAI Academy et al., 2022) | ['afr', 'eng', 'nbl', 'nso', 'sot', 'ssw', 'tsn', 'tso', 'ven', 'xho', 'zul'] | Classification | s2s | [Web, Non-fiction, Written] | None | None | @@ -504,7 +504,7 @@ The following tables give you an overview of the tasks in MTEB. | [StackExchangeClustering.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | s2s | [Web, Written] | None | None | | [StackExchangeClusteringP2P.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | p2p | [Web, Written] | None | None | | [StackOverflowDupQuestions](https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf) (Xueqing Liu, 2018) | ['eng'] | Reranking | s2s | | None | None | -| [StackOverflowQA](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 21925} | {'test': {'number_of_characters': 2506.11, 'num_samples': 21925, 'num_queries': 1994, 'num_documents': 19931, 'average_document_length': 0.06, 'average_query_length': 0.65, 'average_relevant_docs_per_query': 1.0}} | +| [StackOverflowQA](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 21925} | {'test': {'number_of_characters': 26584028, 'num_samples': 21925, 'num_queries': 1994, 'num_documents': 19931, 'min_document_length': 61, 'average_document_length': 130.32, 'max_document_length': 22234, 'unique_documents': 19931, 'min_query_length': 5, 'average_query_length': 12029.38, 'max_query_length': 46028, 'unique_queries': 1994, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1994}} | | [StatcanDialogueDatasetRetrieval](https://mcgill-nlp.github.io/statcan-dialogue-dataset/) | ['eng', 'fra'] | Retrieval | s2p | [Government, Web, Written] | None | None | | [SummEvalFrSummarization.v2](https://github.com/Yale-LILY/SummEval) (Fabbri et al., 2020) | ['fra'] | Summarization | p2p | [News, Written] | None | None | | [SummEvalSummarization.v2](https://github.com/Yale-LILY/SummEval) (Fabbri et al., 2020) | ['eng'] | Summarization | p2p | [News, Written] | None | None | @@ -518,7 +518,7 @@ The following tables give you an overview of the tasks in MTEB. | [SwissJudgementClassification](https://aclanthology.org/2021.nllp-1.3/) (Joel Niklaus, 2022) | ['deu', 'fra', 'ita'] | Classification | s2s | [Legal, Written] | None | None | | [SyntecReranking](https://huggingface.co/datasets/lyon-nlp/mteb-fr-reranking-syntec-s2p) (Mathieu Ciancone, 2024) | ['fra'] | Reranking | s2p | [Legal, Written] | None | None | | [SyntecRetrieval](https://huggingface.co/datasets/lyon-nlp/mteb-fr-retrieval-syntec-s2p) (Mathieu Ciancone, 2024) | ['fra'] | Retrieval | s2p | [Legal, Written] | None | None | -| [SyntheticText2SQL](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) (Meyer et al., 2024) | ['eng', 'sql'] | Retrieval | p2p | [Programming, Written] | {'test': 111702} | {'test': {'number_of_characters': 210.98, 'num_samples': 111702, 'num_queries': 5851, 'num_documents': 105851, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0}} | +| [SyntheticText2SQL](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) (Meyer et al., 2024) | ['eng', 'sql'] | Retrieval | p2p | [Programming, Written] | {'test': 111702} | {'test': {'number_of_characters': 14041553, 'num_samples': 111702, 'num_queries': 5851, 'num_documents': 105851, 'min_document_length': 13, 'average_document_length': 4.58, 'max_document_length': 281, 'unique_documents': 105851, 'min_query_length': 17, 'average_query_length': 2316.95, 'max_query_length': 762, 'unique_queries': 5851, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 5851}} | | [T2Reranking](https://arxiv.org/abs/2304.03679) (Xiaohui Xie, 2023) | ['cmn'] | Reranking | s2s | | None | None | | [T2Retrieval](https://arxiv.org/abs/2304.03679) (Xiaohui Xie, 2023) | ['cmn'] | Retrieval | s2p | | None | None | | [TERRa](https://arxiv.org/pdf/2010.15925) (Shavrina et al., 2020) | ['rus'] | PairClassification | s2s | [News, Web, Written] | None | None | @@ -528,7 +528,7 @@ The following tables give you an overview of the tasks in MTEB. | [TV2Nordretrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization) | ['dan'] | Retrieval | p2p | [News, Non-fiction, Written] | None | None | | [TamilNewsClassification](https://github.com/vanangamudi/tamil-news-classification) (Anoop Kunchukuttan, 2020) | ['tam'] | Classification | s2s | [News, Written] | None | None | | [Tatoeba](https://github.com/facebookresearch/LASER/tree/main/data/tatoeba/v1) (Tatoeba community, 2021) | ['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] | BitextMining | s2s | [Written] | None | None | -| [TbilisiCityHallBitextMining](https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles) | ['eng', 'kat'] | BitextMining | s2s | [News, Written] | None | None | +| [TbilisiCityHallBitextMining](https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles) | ['eng', 'kat'] | BitextMining | s2s | [News, Written] | {'test': 3640} | {'test': {'num_samples': 3640, 'number_of_characters': 572146, 'unique_pairs': 3640, 'min_sentence1_length': 13, 'average_sentence1_length': 78.59, 'max_sentence1_length': 203, 'unique_sentence1': 3636, 'min_sentence2_length': 13, 'average_sentence2_length': 78.59, 'max_sentence2_length': 203, 'unique_sentence2': 3636, 'hf_subset_descriptive_stats': {'kat_Geor-eng_Latn': {'num_samples': 1820, 'number_of_characters': 286073, 'unique_pairs': 1820, 'min_sentence1_length': 30, 'average_sentence1_length': 76.07, 'max_sentence1_length': 189, 'unique_sentence1': 1820, 'min_sentence2_length': 13, 'average_sentence2_length': 81.12, 'max_sentence2_length': 203, 'unique_sentence2': 1816}, 'eng_Latn-kat_Geor': {'num_samples': 1820, 'number_of_characters': 286073, 'unique_pairs': 1820, 'min_sentence1_length': 13, 'average_sentence1_length': 81.12, 'max_sentence1_length': 203, 'unique_sentence1': 1816, 'min_sentence2_length': 30, 'average_sentence2_length': 76.07, 'max_sentence2_length': 189, 'unique_sentence2': 1820}}}} | | [TelemarketingSalesRuleLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [TeluguAndhraJyotiNewsClassification](https://github.com/AnushaMotamarri/Telugu-Newspaper-Article-Dataset) | ['tel'] | Classification | s2s | [News, Written] | None | None | | [TempReasonL1](https://github.com/DAMO-NLP-SG/TempReason) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | @@ -547,7 +547,7 @@ The following tables give you an overview of the tasks in MTEB. | [ThuNewsClusteringS2S.v2](http://thuctc.thunlp.org/) (Sun et al., 2016) | ['cmn'] | Clustering | s2s | [News, Written] | None | None | | [TopiOCQA](https://mcgill-nlp.github.io/topiocqa) (Vaibhav Adlakha, 2022) | ['eng'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | | [TopiOCQAHardNegatives](https://mcgill-nlp.github.io/topiocqa) (Vaibhav Adlakha, 2022) | ['eng'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | -| [Touche2020Retrieval.v3](https://github.com/castorini/touche-error-analysis) | ['eng'] | Retrieval | s2p | [Academic] | {'test': 303781} | {'test': {'number_of_characters': 2140.82, 'num_samples': 303781, 'num_queries': 49, 'num_documents': 303732, 'average_document_length': 0.01, 'average_query_length': 0.89, 'average_relevant_docs_per_query': 34.94}} | +| [Touche2020Retrieval.v3](https://github.com/castorini/touche-error-analysis) | ['eng'] | Retrieval | s2p | [Academic] | {'test': 303781} | {'test': {'number_of_characters': 637047138, 'num_samples': 303781, 'num_queries': 49, 'num_documents': 303732, 'min_document_length': 16, 'average_document_length': 0.01, 'max_document_length': 83, 'unique_documents': 303732, 'min_query_length': 41, 'average_query_length': 13000918.57, 'max_query_length': 105983, 'unique_queries': 49, 'min_relevant_docs_per_query': 40, 'average_relevant_docs_per_query': 58.14, 'max_relevant_docs_per_query': 87, 'unique_relevant_docs': 2732}} | | [ToxicChatClassification](https://aclanthology.org/2023.findings-emnlp.311/) (Zi Lin, 2023) | ['eng'] | Classification | s2s | [Constructed, Written] | None | None | | [ToxicConversationsClassification](https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview) (cjadams, 2019) | ['eng'] | Classification | s2s | [Social, Written] | None | None | | [TswanaNewsClassification](https://link.springer.com/chapter/10.1007/978-3-031-49002-6_17) (Vukosi Marivate, 2023) | ['tsn'] | Classification | s2s | [News, Written] | None | None | @@ -560,10 +560,10 @@ The following tables give you an overview of the tasks in MTEB. | [TweetSentimentClassification](https://aclanthology.org/2022.lrec-1.27) | ['ara', 'deu', 'eng', 'fra', 'hin', 'ita', 'por', 'spa'] | Classification | s2s | [Social, Written] | None | None | | [TweetSentimentExtractionClassification](https://www.kaggle.com/competitions/tweet-sentiment-extraction/overview) (Maggie et al., 2020) | ['eng'] | Classification | s2s | [Social, Written] | None | None | | [TweetTopicSingleClassification](https://arxiv.org/abs/2209.09824) | ['eng'] | Classification | s2s | [Social, News, Written] | None | None | -| [TwentyNewsgroupsClustering.v2](https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html) (Ken Lang, 1995) | ['eng'] | Clustering | s2s | [News, Written] | None | None | +| [TwentyNewsgroupsClustering.v2](https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html) (Ken Lang, 1995) | ['eng'] | Clustering | s2s | [News, Written] | {'test': 59545} | {'test': {'num_samples': 59545, 'number_of_characters': 1907719, 'min_text_length': 11, 'average_text_length': 32.04, 'max_text_length': 120, 'min_labels_per_text': 2082, 'average_labels_per_text': 1.0, 'max_labels_per_text': 3236, 'unique_labels': 20, 'labels': {'12': {'count': 3137}, '6': {'count': 3070}, '0': {'count': 2613}, '2': {'count': 3155}, '10': {'count': 3220}, '17': {'count': 2986}, '14': {'count': 3106}, '13': {'count': 3055}, '1': {'count': 3056}, '16': {'count': 2911}, '9': {'count': 2984}, '3': {'count': 3070}, '15': {'count': 3090}, '7': {'count': 3036}, '5': {'count': 3124}, '11': {'count': 3236}, '18': {'count': 2483}, '8': {'count': 3090}, '19': {'count': 2082}, '4': {'count': 3041}}}} | | [TwitterHjerneRetrieval](https://huggingface.co/datasets/sorenmulli/da-hashtag-twitterhjerne) (Holm et al., 2024) | ['dan'] | Retrieval | p2p | [Social, Written] | None | None | | [TwitterSemEval2015](https://alt.qcri.org/semeval2015/task1/) | ['eng'] | PairClassification | s2s | | None | None | -| [TwitterURLCorpus](https://languagenet.github.io/) | ['eng'] | PairClassification | s2s | | {'test': 51534} | {'test': {'num_samples': 51534, 'number_of_characters': 8659940, 'avg_sentence1_len': 79.49, 'avg_sentence2_len': 88.55, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}} | +| [TwitterURLCorpus](https://languagenet.github.io/) | ['eng'] | PairClassification | s2s | | {'test': 51534} | {'test': {'num_samples': 51534, 'number_of_characters': 8659940, 'min_sentence1_length': 24, 'avg_sentence1_length': 79.49, 'max_sentence1_length': 126, 'unique_sentence1': 4329, 'min_sentence2_length': 6, 'avg_sentence2_length': 88.55, 'max_sentence2_length': 608, 'unique_sentence2': 41304, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}} | | [UCCVCommonLawLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [UkrFormalityClassification](https://huggingface.co/datasets/ukr-detect/ukr-formality-dataset-translated-gyafc) | ['ukr'] | Classification | s2s | [News, Written] | None | None | | [UnfairTOSLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -571,7 +571,7 @@ The following tables give you an overview of the tasks in MTEB. | [VGHierarchicalClusteringP2P](https://huggingface.co/datasets/navjordj/VG_summarization) (Navjord et al., 2023) | ['nob'] | Clustering | p2p | [News, Non-fiction, Written] | None | None | | [VGHierarchicalClusteringS2S](https://huggingface.co/datasets/navjordj/VG_summarization) (Navjord et al., 2023) | ['nob'] | Clustering | p2p | [News, Non-fiction, Written] | None | None | | [VideoRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | -| [VieMedEVBitextMining](https://aclanthology.org/2015.iwslt-evaluation.11/) (Nhu Vo, 2024) | ['eng', 'vie'] | BitextMining | s2s | [Medical, Written] | None | None | +| [VieMedEVBitextMining](https://aclanthology.org/2015.iwslt-evaluation.11/) (Nhu Vo, 2024) | ['eng', 'vie'] | BitextMining | s2s | [Medical, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 575910, 'unique_pairs': 2048, 'min_sentence1_length': 11, 'average_sentence1_length': 139.23, 'max_sentence1_length': 1291, 'unique_sentence1': 2048, 'min_sentence2_length': 11, 'average_sentence2_length': 141.98, 'max_sentence2_length': 1217, 'unique_sentence2': 2047}} | | [VieQuADRetrieval](https://aclanthology.org/2020.coling-main.233.pdf) | ['vie'] | Retrieval | s2p | [Encyclopaedic, Non-fiction, Written] | None | None | | [VieStudentFeedbackClassification](https://ieeexplore.ieee.org/document/8573337) (Nguyen et al., 2018) | ['vie'] | Classification | s2s | [Reviews, Written] | None | None | | [VoyageMMarcoReranking](https://arxiv.org/abs/2312.16144) (Benjamin Clavié, 2023) | ['jpn'] | Reranking | s2s | [Academic, Non-fiction, Written] | None | None | @@ -580,12 +580,12 @@ The following tables give you an overview of the tasks in MTEB. | [WebLINXCandidatesReranking](https://mcgill-nlp.github.io/weblinx) (Xing Han Lù, 2024) | ['eng'] | Reranking | p2p | [Academic, Web, Written] | None | None | | [WikiCitiesClustering](https://huggingface.co/datasets/wikipedia) | ['eng'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | | [WikiClusteringP2P.v2](https://github.com/Rysias/wiki-clustering) | ['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | -| [WikipediaRerankingMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-reranking-multilingual) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Reranking | s2p | [Encyclopaedic, Written] | {'test': 24000} | {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'avg_query_len': 59.09, 'avg_positive_len': 385.45, 'avg_negative_len': 381.24, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 60.83, 'avg_positive_len': 375.89, 'avg_negative_len': 374.19}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 47.27, 'avg_positive_len': 394.59, 'avg_negative_len': 393.98}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 56.27, 'avg_positive_len': 383.84, 'avg_negative_len': 368.25}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 56.75, 'avg_positive_len': 351.68, 'avg_negative_len': 344.46}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 70.0, 'avg_positive_len': 391.54, 'avg_negative_len': 399.27}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 68.37, 'avg_positive_len': 451.73, 'avg_negative_len': 453.14}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 48.67, 'avg_positive_len': 347.7, 'avg_negative_len': 344.84}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.34, 'avg_positive_len': 394.71, 'avg_negative_len': 377.84}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 50.78, 'avg_positive_len': 420.38, 'avg_negative_len': 409.52}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 70.05, 'avg_positive_len': 396.97, 'avg_negative_len': 393.33}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 65.34, 'avg_positive_len': 380.79, 'avg_negative_len': 375.03}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 65.12, 'avg_positive_len': 404.02, 'avg_negative_len': 397.55}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 61.97, 'avg_positive_len': 346.71, 'avg_negative_len': 348.59}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.67, 'avg_positive_len': 386.35, 'avg_negative_len': 384.06}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.29, 'avg_positive_len': 367.72, 'avg_negative_len': 366.84}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 57.73, 'avg_positive_len': 372.59, 'avg_negative_len': 368.94}}}} | +| [WikipediaRerankingMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-reranking-multilingual) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Reranking | s2p | [Encyclopaedic, Written] | {'test': 24000} | {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'min_query_length': 7, 'avg_query_length': 59.09, 'max_query_length': 180, 'unique_query': 23997, 'min_positive_length': 100, 'avg_positive_length': 385.45, 'max_positive_length': 3515, 'unique_positive': 23993, 'min_negative_length': 100, 'avg_negative_length': 381.24, 'max_negative_length': 9461, 'unique_negative': 191783, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 60.83, 'max_query_length': 166, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 375.89, 'max_positive_length': 2241, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 374.19, 'max_negative_length': 4869, 'unique_negative': 11996}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 7, 'avg_query_length': 47.27, 'max_query_length': 123, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.59, 'max_positive_length': 2338, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 393.98, 'max_negative_length': 5104, 'unique_negative': 11996}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.27, 'max_query_length': 137, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 383.84, 'max_positive_length': 2300, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 368.25, 'max_negative_length': 3487, 'unique_negative': 11982}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.75, 'max_query_length': 137, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 351.68, 'max_positive_length': 2159, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.46, 'max_negative_length': 2563, 'unique_negative': 11972}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 20, 'avg_query_length': 70.0, 'max_query_length': 180, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 391.54, 'max_positive_length': 2674, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 399.27, 'max_negative_length': 3083, 'unique_negative': 12000}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 68.37, 'max_query_length': 162, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 451.73, 'max_positive_length': 3515, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 453.14, 'max_negative_length': 3662, 'unique_negative': 12000}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 12, 'avg_query_length': 48.67, 'max_query_length': 119, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 347.7, 'max_positive_length': 2571, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.84, 'max_negative_length': 4707, 'unique_negative': 11978}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.34, 'max_query_length': 132, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.71, 'max_positive_length': 2129, 'unique_positive': 1498, 'min_negative_length': 100, 'avg_negative_length': 377.84, 'max_negative_length': 2574, 'unique_negative': 11972}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 13, 'avg_query_length': 50.78, 'max_query_length': 125, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 420.38, 'max_positive_length': 2361, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 409.52, 'max_negative_length': 5912, 'unique_negative': 11996}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 23, 'avg_query_length': 70.05, 'max_query_length': 156, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 396.97, 'max_positive_length': 2082, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 393.33, 'max_negative_length': 9461, 'unique_negative': 11993}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.34, 'max_query_length': 136, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 380.79, 'max_positive_length': 1864, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 375.03, 'max_negative_length': 3641, 'unique_negative': 11985}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.12, 'max_query_length': 176, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 404.02, 'max_positive_length': 3057, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 397.55, 'max_negative_length': 2877, 'unique_negative': 11991}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 61.97, 'max_query_length': 169, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 346.71, 'max_positive_length': 1917, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 348.59, 'max_negative_length': 4213, 'unique_negative': 11971}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 15, 'avg_query_length': 55.67, 'max_query_length': 146, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 386.35, 'max_positive_length': 2421, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 384.06, 'max_negative_length': 3668, 'unique_negative': 11974}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.29, 'max_query_length': 129, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 367.72, 'max_positive_length': 1450, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 366.84, 'max_negative_length': 2841, 'unique_negative': 11996}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 57.73, 'max_query_length': 133, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 372.59, 'max_positive_length': 2493, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 368.94, 'max_negative_length': 3680, 'unique_negative': 11999}}}} | | [WikipediaRetrievalMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-retrieval-multilingual-queries) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | | [WinoGrande](https://winogrande.allenai.org/) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | | [WisesightSentimentClassification](https://github.com/PyThaiNLP/wisesight-sentiment) | ['tha'] | Classification | s2s | [Social, News, Written] | None | None | | XMarket (Bonab et al., 2021) | ['deu', 'eng', 'spa'] | Retrieval | s2p | | None | None | -| [XNLI](https://aclanthology.org/D18-1269/) (Conneau et al., 2018) | ['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] | PairClassification | s2s | [Non-fiction, Fiction, Government, Written] | {'test': 19110, 'validation': 19110} | {'test': {'num_samples': 19110, 'number_of_characters': 2907145, 'avg_sentence1_len': 103.24, 'avg_sentence2_len': 48.89, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 179591, 'avg_sentence1_len': 89.57, 'avg_sentence2_len': 41.99, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 220646, 'avg_sentence1_len': 110.02, 'avg_sentence2_len': 51.63, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241224, 'avg_sentence1_len': 119.93, 'avg_sentence2_len': 56.79, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 240222, 'avg_sentence1_len': 119.05, 'avg_sentence2_len': 56.93, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212223, 'avg_sentence1_len': 105.67, 'avg_sentence2_len': 49.8, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232207, 'avg_sentence1_len': 115.43, 'avg_sentence2_len': 54.68, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 245259, 'avg_sentence1_len': 121.1, 'avg_sentence2_len': 58.58, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 211312, 'avg_sentence1_len': 104.63, 'avg_sentence2_len': 50.17, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 222797, 'avg_sentence1_len': 110.77, 'avg_sentence2_len': 52.45, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210103, 'avg_sentence1_len': 104.44, 'avg_sentence2_len': 49.48, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192788, 'avg_sentence1_len': 96.69, 'avg_sentence2_len': 44.54, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208658, 'avg_sentence1_len': 103.68, 'avg_sentence2_len': 49.19, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 223549, 'avg_sentence1_len': 111.31, 'avg_sentence2_len': 52.46, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 66566, 'avg_sentence1_len': 33.04, 'avg_sentence2_len': 15.73, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'number_of_characters': 2909058, 'avg_sentence1_len': 103.21, 'avg_sentence2_len': 49.02, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 177355, 'avg_sentence1_len': 88.32, 'avg_sentence2_len': 41.61, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 219988, 'avg_sentence1_len': 109.2, 'avg_sentence2_len': 51.97, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241852, 'avg_sentence1_len': 119.81, 'avg_sentence2_len': 57.37, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 241275, 'avg_sentence1_len': 119.88, 'avg_sentence2_len': 56.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212384, 'avg_sentence1_len': 105.72, 'avg_sentence2_len': 49.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232451, 'avg_sentence1_len': 115.17, 'avg_sentence2_len': 55.12, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 246857, 'avg_sentence1_len': 121.76, 'avg_sentence2_len': 59.09, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 212269, 'avg_sentence1_len': 105.06, 'avg_sentence2_len': 50.44, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 221152, 'avg_sentence1_len': 109.75, 'avg_sentence2_len': 52.27, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210482, 'avg_sentence1_len': 104.32, 'avg_sentence2_len': 49.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192640, 'avg_sentence1_len': 97.28, 'avg_sentence2_len': 43.84, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208305, 'avg_sentence1_len': 102.97, 'avg_sentence2_len': 49.64, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 224811, 'avg_sentence1_len': 112.26, 'avg_sentence2_len': 52.43, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 67237, 'avg_sentence1_len': 33.41, 'avg_sentence2_len': 15.85, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}} | +| [XNLI](https://aclanthology.org/D18-1269/) (Conneau et al., 2018) | ['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] | PairClassification | s2s | [Non-fiction, Fiction, Government, Written] | {'test': 19110, 'validation': 19110} | {'test': {'num_samples': 19110, 'number_of_characters': 2907145, 'min_sentence1_length': 3, 'avg_sentence1_length': 103.24, 'max_sentence1_length': 401, 'unique_sentence1': 15328, 'min_sentence2_length': 2, 'avg_sentence2_length': 48.89, 'max_sentence2_length': 187, 'unique_sentence2': 19104, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 179591, 'min_sentence1_length': 11, 'avg_sentence1_length': 89.57, 'max_sentence1_length': 242, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 41.99, 'max_sentence2_length': 115, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 220646, 'min_sentence1_length': 14, 'avg_sentence1_length': 110.02, 'max_sentence1_length': 303, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 51.63, 'max_sentence2_length': 150, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241224, 'min_sentence1_length': 3, 'avg_sentence1_length': 119.93, 'max_sentence1_length': 301, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 56.79, 'max_sentence2_length': 187, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 240222, 'min_sentence1_length': 13, 'avg_sentence1_length': 119.05, 'max_sentence1_length': 344, 'unique_sentence1': 1095, 'min_sentence2_length': 13, 'avg_sentence2_length': 56.93, 'max_sentence2_length': 172, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212223, 'min_sentence1_length': 19, 'avg_sentence1_length': 105.67, 'max_sentence1_length': 268, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 49.8, 'max_sentence2_length': 137, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232207, 'min_sentence1_length': 11, 'avg_sentence1_length': 115.43, 'max_sentence1_length': 385, 'unique_sentence1': 1094, 'min_sentence2_length': 8, 'avg_sentence2_length': 54.68, 'max_sentence2_length': 163, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 245259, 'min_sentence1_length': 9, 'avg_sentence1_length': 121.1, 'max_sentence1_length': 327, 'unique_sentence1': 1095, 'min_sentence2_length': 10, 'avg_sentence2_length': 58.58, 'max_sentence2_length': 169, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 211312, 'min_sentence1_length': 16, 'avg_sentence1_length': 104.63, 'max_sentence1_length': 401, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 50.17, 'max_sentence2_length': 162, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 222797, 'min_sentence1_length': 11, 'avg_sentence1_length': 110.77, 'max_sentence1_length': 306, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 52.45, 'max_sentence2_length': 167, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210103, 'min_sentence1_length': 10, 'avg_sentence1_length': 104.44, 'max_sentence1_length': 266, 'unique_sentence1': 1094, 'min_sentence2_length': 2, 'avg_sentence2_length': 49.48, 'max_sentence2_length': 146, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192788, 'min_sentence1_length': 12, 'avg_sentence1_length': 96.69, 'max_sentence1_length': 262, 'unique_sentence1': 1095, 'min_sentence2_length': 6, 'avg_sentence2_length': 44.54, 'max_sentence2_length': 129, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208658, 'min_sentence1_length': 15, 'avg_sentence1_length': 103.68, 'max_sentence1_length': 255, 'unique_sentence1': 1095, 'min_sentence2_length': 6, 'avg_sentence2_length': 49.19, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 223549, 'min_sentence1_length': 14, 'avg_sentence1_length': 111.31, 'max_sentence1_length': 265, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 52.46, 'max_sentence2_length': 143, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 66566, 'min_sentence1_length': 4, 'avg_sentence1_length': 33.04, 'max_sentence1_length': 112, 'unique_sentence1': 1095, 'min_sentence2_length': 3, 'avg_sentence2_length': 15.73, 'max_sentence2_length': 59, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'number_of_characters': 2909058, 'min_sentence1_length': 5, 'avg_sentence1_length': 103.21, 'max_sentence1_length': 323, 'unique_sentence1': 11171, 'min_sentence2_length': 3, 'avg_sentence2_length': 49.02, 'max_sentence2_length': 172, 'unique_sentence2': 19101, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 177355, 'min_sentence1_length': 13, 'avg_sentence1_length': 88.32, 'max_sentence1_length': 214, 'unique_sentence1': 798, 'min_sentence2_length': 6, 'avg_sentence2_length': 41.61, 'max_sentence2_length': 137, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 219988, 'min_sentence1_length': 16, 'avg_sentence1_length': 109.2, 'max_sentence1_length': 316, 'unique_sentence1': 798, 'min_sentence2_length': 10, 'avg_sentence2_length': 51.97, 'max_sentence2_length': 151, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241852, 'min_sentence1_length': 20, 'avg_sentence1_length': 119.81, 'max_sentence1_length': 298, 'unique_sentence1': 798, 'min_sentence2_length': 12, 'avg_sentence2_length': 57.37, 'max_sentence2_length': 162, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 241275, 'min_sentence1_length': 16, 'avg_sentence1_length': 119.88, 'max_sentence1_length': 302, 'unique_sentence1': 798, 'min_sentence2_length': 6, 'avg_sentence2_length': 56.88, 'max_sentence2_length': 171, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212384, 'min_sentence1_length': 20, 'avg_sentence1_length': 105.72, 'max_sentence1_length': 271, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 49.88, 'max_sentence2_length': 139, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232451, 'min_sentence1_length': 14, 'avg_sentence1_length': 115.17, 'max_sentence1_length': 265, 'unique_sentence1': 798, 'min_sentence2_length': 7, 'avg_sentence2_length': 55.12, 'max_sentence2_length': 148, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 246857, 'min_sentence1_length': 19, 'avg_sentence1_length': 121.76, 'max_sentence1_length': 323, 'unique_sentence1': 798, 'min_sentence2_length': 11, 'avg_sentence2_length': 59.09, 'max_sentence2_length': 172, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 212269, 'min_sentence1_length': 18, 'avg_sentence1_length': 105.06, 'max_sentence1_length': 277, 'unique_sentence1': 798, 'min_sentence2_length': 7, 'avg_sentence2_length': 50.44, 'max_sentence2_length': 152, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 221152, 'min_sentence1_length': 15, 'avg_sentence1_length': 109.75, 'max_sentence1_length': 310, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 52.27, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210482, 'min_sentence1_length': 13, 'avg_sentence1_length': 104.32, 'max_sentence1_length': 264, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 49.88, 'max_sentence2_length': 153, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192640, 'min_sentence1_length': 7, 'avg_sentence1_length': 97.28, 'max_sentence1_length': 255, 'unique_sentence1': 798, 'min_sentence2_length': 3, 'avg_sentence2_length': 43.84, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208305, 'min_sentence1_length': 15, 'avg_sentence1_length': 102.97, 'max_sentence1_length': 269, 'unique_sentence1': 798, 'min_sentence2_length': 10, 'avg_sentence2_length': 49.64, 'max_sentence2_length': 139, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 224811, 'min_sentence1_length': 18, 'avg_sentence1_length': 112.26, 'max_sentence1_length': 323, 'unique_sentence1': 798, 'min_sentence2_length': 9, 'avg_sentence2_length': 52.43, 'max_sentence2_length': 159, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 67237, 'min_sentence1_length': 5, 'avg_sentence1_length': 33.41, 'max_sentence1_length': 135, 'unique_sentence1': 798, 'min_sentence2_length': 3, 'avg_sentence2_length': 15.85, 'max_sentence2_length': 66, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}} | | [XNLIV2](https://arxiv.org/pdf/2301.06527) (Upadhyay et al., 2023) | ['asm', 'ben', 'bho', 'ell', 'guj', 'kan', 'mar', 'ory', 'pan', 'rus', 'san', 'tam', 'tur'] | PairClassification | s2s | [Non-fiction, Fiction, Government, Written] | None | None | | [XPQARetrieval](https://arxiv.org/abs/2305.09249) (Shen et al., 2023) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'pol', 'por', 'spa', 'tam'] | Retrieval | s2p | [Reviews, Written] | None | None | | [XQuADRetrieval](https://huggingface.co/datasets/xquad) (Mikel Artetxe, 2019) | ['arb', 'deu', 'ell', 'eng', 'hin', 'ron', 'rus', 'spa', 'tha', 'tur', 'vie', 'zho'] | Retrieval | s2p | [Web, Written] | None | None | @@ -594,8 +594,8 @@ The following tables give you an overview of the tasks in MTEB. | [YelpReviewFullClassification](https://arxiv.org/abs/1509.01626) (Zhang et al., 2015) | ['eng'] | Classification | s2s | [Reviews, Written] | None | None | | [YueOpenriceReviewClassification](https://github.com/Christainx/Dataset_Cantonese_Openrice) (Xiang et al., 2019) | ['yue'] | Classification | s2s | [Reviews, Spoken] | None | None | | [indonli](https://link.springer.com/chapter/10.1007/978-3-030-41505-1_39) | ['ind'] | PairClassification | s2s | [Encyclopaedic, Web, News, Written] | None | None | -| [mFollowIRCrossLingualInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['eng', 'fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'average_document_length': 2331.08, 'average_query_length': 81.88, 'average_instruction_length': 389.95, 'average_changed_instruction_length': 450.55, 'average_relevant_docs_per_query': 10.43, 'average_top_ranked_per_query': 1000.0, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'average_document_length': 3145.5, 'average_query_length': 80.08, 'average_instruction_length': 396.88, 'average_changed_instruction_length': 463.18, 'average_relevant_docs_per_query': 10.85, 'average_top_ranked_per_query': 1000.0}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'average_document_length': 2784.08, 'average_query_length': 81.88, 'average_instruction_length': 371.12, 'average_changed_instruction_length': 431.8, 'average_relevant_docs_per_query': 9.78, 'average_top_ranked_per_query': 1000.0}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'average_document_length': 1082.05, 'average_query_length': 83.56, 'average_instruction_length': 401.02, 'average_changed_instruction_length': 456.26, 'average_relevant_docs_per_query': 10.65, 'average_top_ranked_per_query': 1000.0}}}} | -| [mFollowIRInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'average_document_length': 2331.08, 'average_query_length': 57.11, 'average_instruction_length': 281.07, 'average_changed_instruction_length': 326.94, 'average_relevant_docs_per_query': 10.43, 'average_top_ranked_per_query': 1000.0, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'average_document_length': 3145.5, 'average_query_length': 72.65, 'average_instruction_length': 358.93, 'average_changed_instruction_length': 415.32, 'average_relevant_docs_per_query': 10.85, 'average_top_ranked_per_query': 1000.0}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'average_document_length': 2784.08, 'average_query_length': 77.5, 'average_instruction_length': 387.0, 'average_changed_instruction_length': 458.0, 'average_relevant_docs_per_query': 9.78, 'average_top_ranked_per_query': 1000.0}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'average_document_length': 1082.05, 'average_query_length': 23.7, 'average_instruction_length': 110.09, 'average_changed_instruction_length': 122.81, 'average_relevant_docs_per_query': 10.65, 'average_top_ranked_per_query': 1000.0}}}} | +| [mFollowIRCrossLingualInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['eng', 'fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 75, 'min_instruction_length': 93, 'average_instruction_length': 389.95, 'max_instruction_length': 887, 'unique_instructions': 75, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 450.55, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 80.08, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 150, 'average_instruction_length': 396.88, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 205, 'average_changed_instruction_length': 463.18, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 40, 'min_instruction_length': 93, 'average_instruction_length': 371.12, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 431.8, 'max_changed_instruction_length': 957, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 32, 'average_query_length': 83.56, 'max_query_length': 159, 'unique_queries': 43, 'min_instruction_length': 157, 'average_instruction_length': 401.02, 'max_instruction_length': 731, 'unique_instructions': 43, 'min_changed_instruction_length': 209, 'average_changed_instruction_length': 456.26, 'max_changed_instruction_length': 822, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}} | +| [mFollowIRInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 10, 'average_query_length': 57.11, 'max_query_length': 136, 'unique_queries': 123, 'min_instruction_length': 37, 'average_instruction_length': 281.07, 'max_instruction_length': 1009, 'unique_instructions': 123, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 326.94, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 72.65, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 121, 'average_instruction_length': 358.93, 'max_instruction_length': 759, 'unique_instructions': 40, 'min_changed_instruction_length': 163, 'average_changed_instruction_length': 415.32, 'max_changed_instruction_length': 842, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 26, 'average_query_length': 77.5, 'max_query_length': 136, 'unique_queries': 40, 'min_instruction_length': 78, 'average_instruction_length': 387.0, 'max_instruction_length': 1009, 'unique_instructions': 40, 'min_changed_instruction_length': 187, 'average_changed_instruction_length': 458.0, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 10, 'average_query_length': 23.7, 'max_query_length': 44, 'unique_queries': 43, 'min_instruction_length': 37, 'average_instruction_length': 110.09, 'max_instruction_length': 209, 'unique_instructions': 43, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 122.81, 'max_changed_instruction_length': 229, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}} | diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 0bdbdeaf84..8b9edfd52c 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -200,7 +200,11 @@ def calculate_metadata_metrics( descriptive_stats = {} hf_subset_stat = "hf_subset_descriptive_stats" - pbar_split = tqdm.tqdm(self.metadata.eval_splits, desc="Processing Splits...") + eval_splits = self.metadata.eval_splits + if self.metadata.type in ["Classification", "MultilabelClassification"]: + eval_splits += ["train"] + + pbar_split = tqdm.tqdm(eval_splits, desc="Processing Splits...") for split in pbar_split: pbar_split.set_postfix_str(f"Split: {split}") logger.info(f"Processing metadata for split {split}") @@ -215,12 +219,8 @@ def calculate_metadata_metrics( if isinstance(self.metadata.eval_langs, dict) else self.metadata.eval_langs ) - if self.metadata.type == "Classification": - eval_langs += ["train"] - pbar_subsets = tqdm.tqdm( - self.metadata.eval_langs, desc="Processing Languages..." - ) + pbar_subsets = tqdm.tqdm(eval_langs, desc="Processing Languages...") for hf_subset in pbar_subsets: pbar_subsets.set_postfix_str(f"Language: {hf_subset}") logger.info(f"Processing metadata for language {hf_subset}") diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 00a9160b9b..59d64039fd 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -21,14 +21,31 @@ class BitextDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + unique_pairs: Number of duplicate pairs + + min_sentence1_length: Minimum length of sentence1 average_sentence1_length: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + unique_sentence1: Number of duplicates in sentence1 + + min_sentence2_length: Minimum length of sentence2 average_sentence2_length: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 """ num_samples: int number_of_characters: int + unique_pairs: int + + min_sentence1_length: int average_sentence1_length: float + max_sentence1_length: int + unique_sentence1: int + + min_sentence2_length: int average_sentence2_length: float + max_sentence2_length: int + unique_sentence2: int class AbsTaskBitextMining(AbsTask): @@ -153,12 +170,24 @@ def _calculate_metrics_from_split( sent_1, sent_2 = pairs_cols[0] sentence1 = self.dataset[split][sent_1] sentence2 = self.dataset[split][sent_2] - total_s1_len = sum([len(s1) for s1 in sentence1]) - total_s2_len = sum([len(s2) for s2 in sentence2]) - + s1_len = [len(s1) for s1 in sentence1] + s2_len = [len(s2) for s2 in sentence2] + total_s1_len = sum(s1_len) + total_s2_len = sum(s2_len) + + unique_pairs = len(set(zip(sentence1, sentence2))) + unique_sentence1 = len(set(sentence1)) + unique_sentence2 = len(set(sentence2)) return BitextDescriptiveStatistics( - average_sentence1_length=total_s1_len / len(sentence1), - average_sentence2_length=total_s2_len / len(sentence2), num_samples=len(sentence1), number_of_characters=total_s1_len + total_s2_len, + unique_pairs=unique_pairs, + min_sentence1_length=min(s1_len), + average_sentence1_length=sum(s1_len) / len(sentence1), + max_sentence1_length=max(s1_len), + unique_sentence1=unique_sentence1, + min_sentence2_length=min(s2_len), + average_sentence2_length=total_s2_len / len(sentence2), + max_sentence2_length=max(s2_len), + unique_sentence2=unique_sentence2, ) diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 683d42b336..55766190fe 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -26,14 +26,26 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + number_texts_intersect_with_train: Number of texts in the train split + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_text: Number of unique texts + unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + number_texts_intersect_with_train: int | None + + min_text_length: int average_text_length: float + max_text_length: int + unique_text: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -206,25 +218,40 @@ def _undersample_data(self, X, y, samples_per_label: int, idxs=None): def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> ClassificationDescriptiveStatistics: + train_text = [] if hf_subset: text = self.dataset[hf_subset][split]["text"] label = self.dataset[hf_subset][split]["label"] + if split != "train": + train_text = self.dataset[hf_subset]["train"]["text"] elif compute_overall: text = [] label = [] for hf_subset in self.metadata.eval_langs: text.extend(self.dataset[hf_subset][split]["text"]) label.extend(self.dataset[hf_subset][split]["label"]) + if split != "train": + train_text.extend(self.dataset[hf_subset]["train"]["text"]) else: text = self.dataset[split]["text"] label = self.dataset[split]["label"] + if split != "train": + train_text = self.dataset["train"]["text"] - total_text_len = sum([len(t) for t in text]) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) label_count = Counter(label) + num_texts_in_train = ( + len(set(text) & set(train_text)) if split != "train" else None + ) return ClassificationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len, + number_texts_intersect_with_train=num_texts_in_train, + min_text_length=min(text_len), average_text_length=total_text_len / len(text), + max_text_length=max(text_len), + unique_text=len(set(text)), unique_labels=len(label_count), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index 7f2c94e144..3b5d0f492d 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -24,16 +24,31 @@ class ClusteringDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + + min_text_length: int average_text_length: float + max_text_length: int + unique_texts: int + + min_labels_per_text: int average_labels_per_text: float + max_labels_per_text: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -96,7 +111,11 @@ def _calculate_metrics_from_split( sentences = self.dataset[split]["sentences"] labels = self.dataset[split]["labels"] - total_text_len = sum([len(t) for t in sentences]) + text_len = [len(t) for t in sentences] + all_sentences = [] + for s in sentences: + all_sentences.extend(s) + total_text_len = sum(text_len) total_labels = [] for label in labels: if isinstance(label, list): @@ -107,8 +126,13 @@ def _calculate_metrics_from_split( return ClusteringDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), + max_text_length=max(text_len), + unique_texts=len(set(all_sentences)), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), + max_labels_per_text=max(label_counter.values()), unique_labels=len(label_counter), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index fedf392f71..61c82e9535 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -85,16 +85,30 @@ class ClusteringFastDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + + min_text_length: int average_text_length: float + max_text_length: int + unique_texts: int + + min_labels_per_text: int average_labels_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -226,7 +240,8 @@ def _calculate_metrics_from_split( sentences = self.dataset[split]["sentences"] labels = self.dataset[split]["labels"] - total_text_len = sum([len(t) for t in sentences]) + text_len = [len(t) for t in sentences] + total_text_len = sum(text_len) total_labels = [] for label in labels: if isinstance(label, list): @@ -237,8 +252,13 @@ def _calculate_metrics_from_split( return ClusteringFastDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), + max_text_length=max(text_len), + unique_texts=len(set(text_len)), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), + max_labels_per_text=max(label_counter.values()), unique_labels=len(label_counter), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index 6fd3acf905..16c7ac86ff 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -47,16 +47,32 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + number_texts_intersect_with_train: Number of texts in the train split + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_label_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + number_texts_intersect_with_train: int | None + + min_text_length: int average_text_length: float + max_text_length: int + unique_texts: int + + min_labels_per_text: int average_label_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -231,30 +247,48 @@ def _undersample_data_indices(self, y, samples_per_label, idxs=None): def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> MultilabelClassificationDescriptiveStatistics: + train_text = [] if hf_subset: text = self.dataset[hf_subset][split]["text"] label = self.dataset[hf_subset][split]["label"] + if split != "train": + train_text = self.dataset[hf_subset]["train"]["text"] elif compute_overall: text = [] label = [] for hf_subset in self.metadata.eval_langs: text.extend(self.dataset[hf_subset][split]["text"]) label.extend(self.dataset[hf_subset][split]["label"]) + if split != "train": + train_text.extend(self.dataset[hf_subset]["train"]["text"]) else: text = self.dataset[split]["text"] label = self.dataset[split]["label"] + if split != "train": + train_text = self.dataset["train"]["text"] - total_text_len = sum(len(t) for t in text) - total_label_len = sum(len(l) for l in label) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) + label_len = [len(l) for l in label] + total_label_len = sum(label_len) total_labels = [] for l in label: total_labels.extend(l if len(l) > 0 else [None]) label_count = Counter(total_labels) + num_texts_in_train = ( + len(set(text) & set(train_text)) if split != "train" else None + ) return MultilabelClassificationDescriptiveStatistics( - average_text_length=total_text_len / len(text), + num_samples=len(text), number_of_characters=total_text_len, + number_texts_intersect_with_train=num_texts_in_train, + min_text_length=min(text_len), + average_text_length=total_text_len / len(text), + max_text_length=max(text_len), + unique_texts=len(set(text)), + min_labels_per_text=min(label_len), average_label_per_text=total_label_len / len(label), - num_samples=len(text), + max_labels_per_text=max(label_len), unique_labels=len(label_count), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 0cbdafda8b..d11f96b938 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -20,16 +20,36 @@ class PairClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. - avg_sentence1_len: Average length of sentence1 - avg_sentence2_len: Average length of sentence2 + unique_pairs: Number of unique pairs + + min_sentence1_length: Minimum length of sentence1 + avg_sentence1_length: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + unique_sentence1: Number of unique sentence + + min_sentence2_length: Minimum length of sentence2 + avg_sentence2_length: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 + unique_sentence2: Number of unique sentence + unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int - avg_sentence1_len: float - avg_sentence2_len: float + unique_pairs: int + + min_sentence1_length: int + avg_sentence1_length: float + max_sentence1_length: int + unique_sentence1: int + + min_sentence2_length: int + avg_sentence2_length: float + max_sentence2_length: int + unique_sentence2: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -109,14 +129,23 @@ def _calculate_metrics_from_split( dataset["labels"][0] if len(dataset["labels"]) == 1 else dataset["labels"] ) - total_sentence1_len = sum([len(sentence) for sentence in sentence1]) - total_sentence2_len = sum([len(sentence) for sentence in sentence2]) + sentence1_len = [len(sentence) for sentence in sentence1] + total_sentence1_len = sum(sentence1_len) + sentence2_len = [len(sentence) for sentence in sentence2] + total_sentence2_len = sum(sentence2_len) label_count = Counter(labels) return PairClassificationDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, - avg_sentence1_len=total_sentence1_len / len(sentence1), - avg_sentence2_len=total_sentence2_len / len(sentence2), + unique_pairs=len(set(zip(sentence1, sentence2))), + min_sentence1_length=min(sentence1_len), + avg_sentence1_length=total_sentence1_len / len(sentence1), + max_sentence1_length=max(sentence1_len), + unique_sentence1=len(set(sentence1)), + min_sentence2_length=min(sentence2_len), + avg_sentence2_length=total_sentence2_len / len(sentence2), + max_sentence2_length=max(sentence2_len), + unique_sentence2=len(set(sentence2)), unique_labels=len(set(labels)), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index c980adad45..839966f7ac 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -27,26 +27,65 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: number of queries in the dataset num_documents: Number of documents number_of_characters: Total number of symbols in the dataset + + min_document_length: Minimum length of documents average_document_length: Average length of documents + max_document_length: Maximum length of documents + unique_documents: Number of unique documents + + min_query_length: Minimum length of queries average_query_length: Average length of queries + max_query_length: Maximum length of queries + unique_queries: Number of unique queries + + min_relevant_docs_per_query: Minimum number of relevant documents per query average_relevant_docs_per_query: Average number of relevant documents per query - average_instruction_length: Average length of instructions + max_relevant_docs_per_query: Maximum number of relevant documents per query + unique_relevant_docs: Number of unique relevant documents + + num_instructions: Number of instructions + min_instruction_length: Minimum length of instructions + average_instruction_length: Average length of instructions + max_instruction_length: Maximum length of instructions + unique_instructions: Number of unique instructions + + min_top_ranked_per_query: Minimum number of top ranked documents per query average_top_ranked_per_query: Average number of top ranked documents per query + max_top_ranked_per_query: Maximum number of relevant documents per query """ num_samples: int num_queries: int num_documents: int number_of_characters: int + + min_document_length: int average_document_length: float + max_document_length: int + unique_documents: int + + min_query_length: int average_query_length: float + max_query_length: int + unique_queries: int + + min_relevant_docs_per_query: int average_relevant_docs_per_query: float + max_relevant_docs_per_query: float + unique_relevant_docs: int + # these are for datasets with instructions - average_instruction_length: float - num_instructions: int + num_instructions: int | None + min_instruction_length: int | None + average_instruction_length: float | None + max_instruction_length: float | None + unique_instructions: int | None + # this is for datasets that do reranking - average_top_ranked_per_query: float + min_top_ranked_per_query: int | None + average_top_ranked_per_query: float | None + max_top_ranked_per_query: int | None class AbsTaskRetrieval(AbsTask): @@ -279,6 +318,8 @@ def _add_main_score(self, scores: ScoresDict) -> None: def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> RetrievalDescriptiveStatistics: + top_ranked = None + instructions = None if hf_subset and hf_subset in self.queries: queries = self.queries[hf_subset][split] corpus = self.corpus[hf_subset][split] @@ -318,6 +359,13 @@ def _calculate_metrics_from_split( num_documents = len(corpus) num_queries = len(queries) + # create a list of number of relevant docs per query + qrels_lengths = [ + len(relevant_docs[qid]) for qid in relevant_docs if qid in queries + ] + num_qrels = sum(qrels_lengths) + qrels_per_doc = num_qrels / len(relevant_docs) if num_queries else 0 + unique_qrels = len({doc for qid in relevant_docs for doc in relevant_docs[qid]}) # number of qrels that are not 0 num_qrels_non_zero = sum( sum(1 for doc_id in docs if docs[doc_id] != 0) @@ -326,42 +374,64 @@ def _calculate_metrics_from_split( qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if num_queries else 0 if self.instructions is not None: - total_instructions_len = sum( - [len(instruction) for instruction in instructions.values()] - ) + instructions_len = [ + len(instruction) for instruction in instructions.values() + ] num_instructions = len(instructions) + average_instruction_length = sum(instructions_len) + min_instruction_length = min(instructions_len) + max_instruction_length = max(instructions_len) + unique_instructions = len(set(instructions)) else: - total_instructions_len = 0 - num_instructions = 0 + num_instructions = None + average_instruction_length = None + min_instruction_length = None + max_instruction_length = None + unique_instructions = None if self.top_ranked is not None: top_ranked_per_query = ( - sum(len(docs) for docs in top_ranked.values()) / num_queries - if num_queries - else 0 + [len(docs) for docs in top_ranked.values()] if num_queries else None ) + min_top_ranked_per_query = min(top_ranked_per_query) + average_top_ranked_per_query = sum(top_ranked_per_query) / num_queries + max_top_ranked_per_query = max(top_ranked_per_query) else: - top_ranked_per_query = 0 + min_top_ranked_per_query = None + average_top_ranked_per_query = None + max_top_ranked_per_query = None return RetrievalDescriptiveStatistics( - number_of_characters=query_len + doc_len, + number_of_characters=sum(query_len) + sum(doc_len), num_samples=num_documents + num_queries, num_queries=num_queries, num_documents=num_documents, - average_document_length=doc_len / num_documents, - average_query_length=query_len / num_queries, + min_document_length=min(doc_len), + average_document_length=sum(doc_len) / num_documents, + max_document_length=max(doc_len), + unique_documents=len(set(corpus)), + min_query_length=min(query_len), + average_query_length=sum(query_len) / num_queries, + max_query_length=max(query_len), + unique_queries=len(set(queries)), + min_relevant_docs_per_query=min(qrels_lengths), average_relevant_docs_per_query=qrels_per_doc, - average_instruction_length=total_instructions_len / num_instructions - if num_instructions - else 0, + max_relevant_docs_per_query=max(qrels_lengths), + unique_relevant_docs=unique_qrels, num_instructions=num_instructions, - average_top_ranked_per_query=top_ranked_per_query, + min_instruction_length=min_instruction_length, + average_instruction_length=average_instruction_length, + max_instruction_length=max_instruction_length, + unique_instructions=unique_instructions, + min_top_ranked_per_query=min_top_ranked_per_query, + average_top_ranked_per_query=average_top_ranked_per_query, + max_top_ranked_per_query=max_top_ranked_per_query, ) def calculate_length( queries: dict[str, str], corpus: dict[str, str] -) -> tuple[int, int]: +) -> tuple[list[int], list[int]]: queries_lens = [] doc_lens = [] for query in queries.values(): @@ -376,9 +446,7 @@ def calculate_length( else: doc_lens.append(len(doc)) - doc_len = sum(doc_lens) / len(doc_lens) if doc_lens else 0 - query_len = sum(queries_lens) / len(queries_lens) if queries_lens else 0 - return query_len, doc_len + return doc_lens, queries_lens def process_docs( diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index c9fa896b69..0a7cb820ea 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -17,16 +17,38 @@ class STSDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + unique_pairs: Number of unique pairs + + min_sentence1_length: Minimum length of sentence1 average_sentence1_len: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + + min_sentence2_length: Minimum length of sentence2 average_sentence2_len: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 + + min_score: Minimum score avg_score: Average score + max_score: Maximum score """ num_samples: int number_of_characters: int + unique_pairs: int + + min_sentence1_length: int average_sentence1_len: float + max_sentence1_length: int + unique_sentence1: int + + min_sentence2_length: int average_sentence2_len: float + max_sentence2_length: int + unique_sentence2: int + + min_score: float avg_score: float + max_score: float class AbsTaskSTS(AbsTask): @@ -93,13 +115,24 @@ def _calculate_metrics_from_split( sentence2 = self.dataset[split]["sentence2"] score = self.dataset[split]["score"] - total_sentence1_len = sum([len(s) for s in sentence1]) - total_sentence2_len = sum([len(s) for s in sentence2]) + sentence1_len = [len(s) for s in sentence1] + sentence2_len = [len(s) for s in sentence2] + total_sentence1_len = sum(sentence1_len) + total_sentence2_len = sum(sentence2_len) avg_score = sum(score) / len(score) return STSDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, + unique_pairs=len(set(zip(sentence1, sentence2))), + min_sentence1_length=min(sentence1_len), average_sentence1_len=total_sentence1_len / len(sentence1), + max_sentence1_length=max(sentence1_len), + unique_sentence1=len(set(sentence1)), + min_sentence2_length=min(sentence2_len), average_sentence2_len=total_sentence2_len / len(sentence2), + max_sentence2_length=max(sentence2_len), + unique_sentence2=len(set(sentence2)), + min_score=min(score), avg_score=avg_score, + max_score=max(score), ) diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 6d792c3199..07fd420571 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -21,18 +21,48 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. - avg_text_len: Average length of text - avg_human_summaries_len: Average length of human summaries - avg_machine_summaries_len: Average length of machine summaries + + min_text_length: Minimum length of text + avg_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_human_summaries_length: Minimum length of human summaries + avg_human_summaries_length: Average length of human summaries + max_human_summaries_length: Maximum length of human summaries + unique_human_summaries: Number of unique human summaries + + min_machine_summaries_length: Minimum length of machine summaries + avg_machine_summaries_length: Average length of machine summaries + max_machine_summaries_length: Maximum length of machine summaries + unique_machine_summaries: Number of unique machine summaries + + min_relevance: Minimum relevance score avg_relevance: Average relevance score + max_relevance: Maximum relevance score """ num_samples: int number_of_characters: int - avg_text_len: float - avg_human_summaries_len: float - avg_machine_summaries_len: float + + min_text_length: int + avg_text_length: float + max_text_length: int + unique_texts: int + + min_human_summaries_length: int + avg_human_summaries_length: float + max_human_summaries_length: int + unique_human_summaries: int + + min_machine_summaries_length: int + avg_machine_summaries_length: float + max_machine_summaries_length: int + unique_machine_summaries: int + + min_relevance: float avg_relevance: float + max_relevance: float class AbsTaskSummarization(AbsTask): @@ -112,17 +142,39 @@ def _calculate_metrics_from_split( machine_summaries = self.dataset[split]["machine_summaries"] relevance = self.dataset[split]["relevance"] - total_text_len = sum(len(x) for x in text) - total_human_summaries_len = sum(len(x) for x in human_summaries) - total_machine_summaries_len = sum(len(x) for x in machine_summaries) + all_human_summaries = [] + for s in human_summaries: + all_human_summaries.extend(s) + + all_machine_summaries = [] + for s in machine_summaries: + all_machine_summaries.extend(s) + + text_len = [len(t) for t in text] + total_text_len = sum(text_len) + human_summaries_len = [len(s) for s in human_summaries] + total_human_summaries_len = sum(human_summaries_len) + machine_summaries_len = [len(s) for s in machine_summaries] + total_machine_summaries_len = sum(machine_summaries_len) total_relevance = sum(sum(x) / len(x) for x in relevance) return SummarizationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len + total_human_summaries_len + total_machine_summaries_len, - avg_text_len=total_text_len / len(text), - avg_human_summaries_len=total_human_summaries_len / len(text), - avg_machine_summaries_len=total_machine_summaries_len / len(text), + min_text_length=min(text_len), + avg_text_length=total_text_len / len(text), + max_text_length=max(text_len), + unique_texts=len(set(text)), + min_human_summaries_length=min(human_summaries_len), + avg_human_summaries_length=total_human_summaries_len / len(text), + max_human_summaries_length=max(human_summaries_len), + unique_human_summaries=len(set(all_human_summaries)), + min_machine_summaries_length=min(machine_summaries_len), + avg_machine_summaries_length=total_machine_summaries_len / len(text), + max_machine_summaries_length=max(machine_summaries_len), + unique_machine_summaries=len(set(all_machine_summaries)), + min_relevance=min(relevance), avg_relevance=total_relevance / len(relevance), + max_relevance=max(relevance), ) diff --git a/mteb/descriptive_stats/BitextMining/BUCC.v2.json b/mteb/descriptive_stats/BitextMining/BUCC.v2.json new file mode 100644 index 0000000000..75ef75ced5 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/BUCC.v2.json @@ -0,0 +1,69 @@ +{ + "test": { + "num_samples": 35000, + "number_of_characters": 6640032, + "unique_pairs": 34978, + "min_sentence1_length": 16, + "average_sentence1_length": 99.10931428571429, + "max_sentence1_length": 204, + "unique_sentence1": 34978, + "min_sentence2_length": 42, + "average_sentence2_length": 90.60588571428572, + "max_sentence2_length": 159, + "unique_sentence2": 25306, + "hf_subset_descriptive_stats": { + "de-en": { + "num_samples": 9580, + "number_of_characters": 1919197, + "unique_pairs": 9573, + "min_sentence1_length": 50, + "average_sentence1_length": 109.07974947807934, + "max_sentence1_length": 204, + "unique_sentence1": 9573, + "min_sentence2_length": 46, + "average_sentence2_length": 91.25396659707724, + "max_sentence2_length": 155, + "unique_sentence2": 9570 + }, + "fr-en": { + "num_samples": 9086, + "number_of_characters": 1677545, + "unique_pairs": 9081, + "min_sentence1_length": 43, + "average_sentence1_length": 99.31785163988553, + "max_sentence1_length": 174, + "unique_sentence1": 9081, + "min_sentence2_length": 42, + "average_sentence2_length": 85.3117983711204, + "max_sentence2_length": 159, + "unique_sentence2": 9076 + }, + "ru-en": { + "num_samples": 14435, + "number_of_characters": 2808206, + "unique_pairs": 14425, + "min_sentence1_length": 40, + "average_sentence1_length": 101.6593003117423, + "max_sentence1_length": 186, + "unique_sentence1": 14425, + "min_sentence2_length": 45, + "average_sentence2_length": 92.88216141323173, + "max_sentence2_length": 159, + "unique_sentence2": 14424 + }, + "zh-en": { + "num_samples": 1899, + "number_of_characters": 235084, + "unique_pairs": 1899, + "min_sentence1_length": 16, + "average_sentence1_length": 28.429699842022117, + "max_sentence1_length": 40, + "unique_sentence1": 1899, + "min_sentence2_length": 48, + "average_sentence2_length": 95.3638757240653, + "max_sentence2_length": 159, + "unique_sentence2": 1899 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json index 131c9966ac..0675e5e0ef 100644 --- a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json @@ -1,8 +1,15 @@ { "test": { + "num_samples": 500, + "number_of_characters": 44361, + "unique_pairs": 500, + "min_sentence1_length": 1, "average_sentence1_length": 49.834, + "max_sentence1_length": 555, + "unique_sentence1": 497, + "min_sentence2_length": 5, "average_sentence2_length": 38.888, - "num_samples": 500, - "number_of_characters": 44361 + "max_sentence2_length": 453, + "unique_sentence2": 491 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json index 507d9ad7bf..effafd237b 100644 --- a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json @@ -1,3045 +1,6594 @@ { "test": { - "average_sentence1_length": 54.32948595562498, - "average_sentence2_length": 54.32948595562498, "num_samples": 760518, "number_of_characters": 82637104, + "unique_pairs": 759283, + "min_sentence1_length": 3, + "average_sentence1_length": 54.32948595562498, + "max_sentence1_length": 239, + "unique_sentence1": 34430, + "min_sentence2_length": 3, + "average_sentence2_length": 54.32948595562498, + "max_sentence2_length": 239, + "unique_sentence2": 34430, "hf_subset_descriptive_stats": { "asm_Beng-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155988, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155988 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "asm_Beng-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 162044, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 162044 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "asm_Beng-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167032, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 167032 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "asm_Beng-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160716, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 160716 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "asm_Beng-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156282, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 156282 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "asm_Beng-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 158269, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 158269 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "asm_Beng-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159964, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159964 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "asm_Beng-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 165177, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 165177 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "asm_Beng-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164681, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 164681 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "asm_Beng-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162408, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 162408 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "asm_Beng-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172838, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 172838 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "asm_Beng-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162747, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 162747 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "asm_Beng-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157316, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 157316 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "asm_Beng-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160906, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 160906 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "asm_Beng-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 164223, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 164223 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "asm_Beng-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 160201, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 160201 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "asm_Beng-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158093, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 158093 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "asm_Beng-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169379, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 169379 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "asm_Beng-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 162623, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 162623 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "asm_Beng-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174866, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 174866 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "asm_Beng-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157690, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 157690 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "asm_Beng-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 161305, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 161305 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "ben_Beng-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 155988, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 155988 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "ben_Beng-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "ben_Beng-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 161436, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 161436 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "ben_Beng-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 155120, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 155120 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "ben_Beng-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 150686, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 150686 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "ben_Beng-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 152673, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 152673 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "ben_Beng-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 154368, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 154368 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "ben_Beng-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 159581, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 159581 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "ben_Beng-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 159085, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 159085 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "ben_Beng-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 156812, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 156812 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "ben_Beng-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 167242, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 167242 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "ben_Beng-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 157151, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 157151 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "ben_Beng-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 151720, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 151720 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "ben_Beng-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 155310, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 155310 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "ben_Beng-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 158627, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 158627 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "ben_Beng-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 154605, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 154605 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "ben_Beng-san_Deva": { + "num_samples": 1503, + "number_of_characters": 152497, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 152497 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "ben_Beng-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 163783, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 163783 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "ben_Beng-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 157027, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 157027 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "ben_Beng-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 169270, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 169270 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "ben_Beng-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 152094, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 152094 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "ben_Beng-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 155709, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 155709 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "brx_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162044, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162044 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "brx_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "brx_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167492, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 167492 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "brx_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161176, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161176 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "brx_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156742, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 156742 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "brx_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "brx_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 160424, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 160424 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "brx_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 165637, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 165637 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "brx_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165141, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165141 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "brx_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162868, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 162868 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "brx_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173298, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 173298 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "brx_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163207, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 163207 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "brx_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157776, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 157776 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "brx_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "brx_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 164683, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 164683 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "brx_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 160661, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 160661 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "brx_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158553, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 158553 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "brx_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169839, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 169839 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "brx_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163083, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 163083 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "brx_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175326, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 175326 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "brx_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158150, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158150 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "brx_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 161765, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 161765 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "doi_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 167032, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 167032 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "doi_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 161436, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 161436 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "doi_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 167492, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 167492 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "doi_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 166164, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 166164 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "doi_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "doi_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 163717, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 163717 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "doi_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 165412, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 165412 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "doi_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 170625, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 170625 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "doi_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 170129, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 170129 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "doi_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 167856, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 167856 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "doi_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 178286, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 178286 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "doi_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 168195, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 168195 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "doi_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 162764, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 162764 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "doi_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 166354, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 166354 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "doi_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 169671, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 169671 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "doi_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 165649, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 165649 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "doi_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 163541, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 163541 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "doi_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 174827, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 174827 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "doi_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 168071, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 168071 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "doi_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 180314, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 180314 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "doi_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 163138, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 163138 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "doi_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 166753, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 166753 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "eng_Latn-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160716, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 160716 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "eng_Latn-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155120, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155120 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "eng_Latn-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161176, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 161176 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "eng_Latn-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166164, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 166164 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "eng_Latn-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 155414, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 155414 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "eng_Latn-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157401, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 157401 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "eng_Latn-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159096, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159096 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "eng_Latn-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164309, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 164309 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "eng_Latn-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163813, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 163813 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "eng_Latn-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161540, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 161540 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "eng_Latn-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171970, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 171970 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "eng_Latn-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161879, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 161879 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "eng_Latn-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "eng_Latn-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160038, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 160038 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "eng_Latn-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163355, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 163355 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "eng_Latn-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159333, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 159333 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "eng_Latn-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157225, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 157225 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "eng_Latn-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 168511, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 168511 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "eng_Latn-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161755, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161755 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "eng_Latn-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173998, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 173998 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "eng_Latn-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156822, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 156822 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "eng_Latn-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 160437, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 160437 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "gom_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 156282, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 156282 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "gom_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 150686, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 150686 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "gom_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 156742, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 156742 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "gom_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "gom_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 155414, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 155414 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "gom_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 152967, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 152967 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "gom_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 154662, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 154662 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "gom_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 159875, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 159875 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "gom_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 159379, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 159379 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "gom_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 157106, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 157106 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "gom_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 167536, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 167536 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "gom_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 157445, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 157445 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "gom_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 152014, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 152014 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "gom_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 155604, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 155604 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "gom_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 158921, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 158921 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "gom_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 154899, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 154899 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "gom_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 152791, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 152791 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "gom_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 164077, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 164077 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "gom_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 157321, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 157321 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "gom_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 169564, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 169564 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "gom_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 152388, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 152388 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "gom_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 156003, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 156003 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "guj_Gujr-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 158269, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 158269 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "guj_Gujr-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152673, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 152673 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "guj_Gujr-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "guj_Gujr-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163717, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 163717 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "guj_Gujr-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 157401, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 157401 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "guj_Gujr-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152967, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152967 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "guj_Gujr-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156649, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 156649 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "guj_Gujr-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161862, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 161862 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "guj_Gujr-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "guj_Gujr-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 159093, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 159093 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "guj_Gujr-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 169523, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 169523 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "guj_Gujr-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 159432, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 159432 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "guj_Gujr-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 154001, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 154001 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "guj_Gujr-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157591, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 157591 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "guj_Gujr-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160908, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 160908 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "guj_Gujr-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156886, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 156886 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "guj_Gujr-san_Deva": { + "num_samples": 1503, + "number_of_characters": 154778, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 154778 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "guj_Gujr-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 166064, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 166064 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "guj_Gujr-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 159308, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 159308 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "guj_Gujr-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 171551, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 171551 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "guj_Gujr-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 154375, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 154375 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "guj_Gujr-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157990, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157990 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "hin_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 159964, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 159964 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "hin_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 154368, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 154368 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "hin_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 160424, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 160424 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "hin_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 165412, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 165412 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "hin_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 159096, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 159096 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "hin_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 154662, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 154662 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "hin_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 156649, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 156649 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "hin_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 163557, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 163557 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "hin_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163061, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 163061 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "hin_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 160788, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 160788 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "hin_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171218, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 171218 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "hin_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161127, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 161127 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "hin_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 155696, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 155696 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "hin_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 159286, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 159286 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "hin_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 162603, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 162603 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "hin_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 158581, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 158581 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "hin_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 156473, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 156473 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "hin_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 167759, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 167759 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "hin_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161003 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "hin_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173246, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 173246 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "hin_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156070, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 156070 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "hin_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 159685, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 159685 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "kan_Knda-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 165177, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 165177 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "kan_Knda-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 159581, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 159581 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "kan_Knda-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 165637, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 165637 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "kan_Knda-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 170625, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 170625 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "kan_Knda-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 164309, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 164309 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "kan_Knda-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 159875, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 159875 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "kan_Knda-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 161862, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 161862 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "kan_Knda-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 163557, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 163557 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "kan_Knda-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 168274, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 168274 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "kan_Knda-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 166001, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 166001 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "kan_Knda-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 176431, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 176431 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "kan_Knda-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 166340, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 166340 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "kan_Knda-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 160909, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 160909 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "kan_Knda-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 164499, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 164499 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "kan_Knda-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 167816, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 167816 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "kan_Knda-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 163794, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 163794 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "kan_Knda-san_Deva": { + "num_samples": 1503, + "number_of_characters": 161686, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 161686 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "kan_Knda-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172972, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 172972 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "kan_Knda-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 166216, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 166216 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "kan_Knda-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 178459, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 178459 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "kan_Knda-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 161283, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 161283 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "kan_Knda-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 164898, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 164898 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "kas_Arab-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 164681, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 164681 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "kas_Arab-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 159085, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 159085 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "kas_Arab-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 165141, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 165141 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "kas_Arab-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 170129, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 170129 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "kas_Arab-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 163813, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 163813 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "kas_Arab-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 159379, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 159379 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "kas_Arab-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "kas_Arab-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 163061, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 163061 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "kas_Arab-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 168274, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 168274 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "kas_Arab-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 165505, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 165505 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "kas_Arab-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 175935, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 175935 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "kas_Arab-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 165844, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 165844 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "kas_Arab-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 160413, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 160413 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "kas_Arab-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 164003, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 164003 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "kas_Arab-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 167320, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 167320 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "kas_Arab-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 163298, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 163298 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "kas_Arab-san_Deva": { + "num_samples": 1503, + "number_of_characters": 161190, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 161190 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "kas_Arab-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172476, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 172476 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "kas_Arab-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 165720, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 165720 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "kas_Arab-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 177963, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 177963 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "kas_Arab-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 160787, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 160787 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "kas_Arab-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 164402, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 164402 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mai_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162408, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162408 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mai_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 156812, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 156812 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mai_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 162868, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 162868 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mai_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167856, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 167856 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mai_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161540, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161540 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mai_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157106, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 157106 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mai_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159093, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 159093 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mai_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 160788, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 160788 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mai_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166001, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 166001 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mai_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165505, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165505 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mai_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173662, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 173662 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "mai_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163571, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 163571 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "mai_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158140, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 158140 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "mai_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mai_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165047, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 165047 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mai_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161025, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 161025 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mai_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158917, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 158917 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mai_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170203, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 170203 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mai_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163447, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 163447 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mai_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175690, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 175690 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mai_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158514, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158514 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mai_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162129, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 162129 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mal_Mlym-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 172838, + "unique_pairs": 1498, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 172838 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mal_Mlym-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 167242, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 167242 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mal_Mlym-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 173298, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 173298 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mal_Mlym-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 178286, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 178286 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mal_Mlym-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 171970, + "unique_pairs": 1499, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 171970 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mal_Mlym-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 167536, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 167536 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mal_Mlym-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 169523, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 169523 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mal_Mlym-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 171218, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 171218 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mal_Mlym-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 176431, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 176431 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mal_Mlym-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 175935, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 175935 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mal_Mlym-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 173662, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 173662 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "mal_Mlym-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 174001, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 174001 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "mal_Mlym-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 168570, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 168570 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "mal_Mlym-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 172160, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 172160 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mal_Mlym-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 175477, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 175477 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mal_Mlym-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 171455, + "unique_pairs": 1498, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 171455 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mal_Mlym-san_Deva": { + "num_samples": 1503, + "number_of_characters": 169347, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 169347 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mal_Mlym-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 180633, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 180633 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mal_Mlym-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 173877, + "unique_pairs": 1499, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 173877 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mal_Mlym-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 186120, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 186120 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mal_Mlym-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 168944, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 168944 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mal_Mlym-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 172559, + "unique_pairs": 1499, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 172559 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mar_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162747, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162747 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mar_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 157151, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 157151 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mar_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 163207, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 163207 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mar_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 168195, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 168195 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mar_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161879, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161879 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mar_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157445, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 157445 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mar_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159432, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 159432 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mar_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 161127, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 161127 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mar_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166340, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 166340 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mar_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165844, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165844 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mar_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 163571, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 163571 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "mar_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 174001, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 174001 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "mar_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158479, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 158479 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "mar_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 162069, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 162069 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mar_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165386, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 165386 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mar_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161364, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 161364 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mar_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 159256, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 159256 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mar_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170542, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 170542 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mar_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163786, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 163786 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mar_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 176029, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 176029 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mar_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158853, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158853 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mar_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162468, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 162468 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mni_Mtei-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 157316, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 157316 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mni_Mtei-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 151720, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 151720 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mni_Mtei-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 157776, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 157776 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mni_Mtei-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 162764, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 162764 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mni_Mtei-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mni_Mtei-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152014, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152014 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mni_Mtei-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154001, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 154001 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mni_Mtei-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 155696, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 155696 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mni_Mtei-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 160909, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 160909 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mni_Mtei-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 160413, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 160413 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mni_Mtei-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158140, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 158140 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "mni_Mtei-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 168570, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 168570 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "mni_Mtei-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 158479, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 158479 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "mni_Mtei-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 156638, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 156638 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mni_Mtei-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 159955, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 159955 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mni_Mtei-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 155933, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 155933 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mni_Mtei-san_Deva": { + "num_samples": 1503, + "number_of_characters": 153825, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 153825 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mni_Mtei-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165111, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 165111 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mni_Mtei-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 158355, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 158355 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mni_Mtei-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 170598, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 170598 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mni_Mtei-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 153422, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 153422 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mni_Mtei-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157037, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157037 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "npi_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160906, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 160906 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "npi_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155310, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155310 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "npi_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "npi_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166354, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 166354 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "npi_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160038, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 160038 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "npi_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 155604, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 155604 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "npi_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157591, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 157591 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "npi_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159286, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159286 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "npi_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164499, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 164499 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "npi_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 164003 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "npi_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "npi_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172160, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 172160 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "npi_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162069, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 162069 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "npi_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 156638, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 156638 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "npi_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163545, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 163545 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "npi_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159523, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 159523 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "npi_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157415, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 157415 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "npi_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 168701, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 168701 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "npi_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161945, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161945 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "npi_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174188, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 174188 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "npi_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157012, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 157012 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "npi_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 160627, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 160627 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "ory_Orya-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 164223, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 164223 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "ory_Orya-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 158627, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 158627 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "ory_Orya-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 164683, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 164683 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "ory_Orya-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 169671, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 169671 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "ory_Orya-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 163355, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 163355 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "ory_Orya-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 158921, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 158921 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "ory_Orya-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 160908, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 160908 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "ory_Orya-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 162603, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 162603 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "ory_Orya-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 167816, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 167816 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "ory_Orya-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 167320, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 167320 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "ory_Orya-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 165047, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 165047 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "ory_Orya-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 175477, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 175477 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "ory_Orya-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 165386, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 165386 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "ory_Orya-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 159955, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 159955 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "ory_Orya-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 163545, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 163545 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "ory_Orya-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 162840, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 162840 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "ory_Orya-san_Deva": { + "num_samples": 1503, + "number_of_characters": 160732, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 160732 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "ory_Orya-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172018, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 172018 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "ory_Orya-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 165262, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 165262 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "ory_Orya-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 177505, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 177505 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "ory_Orya-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 160329, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 160329 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "ory_Orya-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 163944, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 163944 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "pan_Guru-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160201, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 160201 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "pan_Guru-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 154605, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 154605 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "pan_Guru-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 160661, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 160661 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "pan_Guru-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 165649, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 165649 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "pan_Guru-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 159333, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 159333 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "pan_Guru-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 154899, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 154899 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "pan_Guru-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 156886, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 156886 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "pan_Guru-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 158581, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 158581 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "pan_Guru-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 163794, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 163794 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "pan_Guru-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163298, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 163298 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "pan_Guru-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161025, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 161025 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "pan_Guru-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171455, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 171455 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "pan_Guru-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161364, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 161364 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "pan_Guru-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 155933, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 155933 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "pan_Guru-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 159523, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 159523 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "pan_Guru-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 162840, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 162840 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "pan_Guru-san_Deva": { + "num_samples": 1503, + "number_of_characters": 156710, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 156710 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "pan_Guru-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 167996, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 167996 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "pan_Guru-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161240, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161240 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "pan_Guru-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173483, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 173483 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "pan_Guru-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156307, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 156307 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "pan_Guru-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 159922, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 159922 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "san_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 158093, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 158093 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "san_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152497, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 152497 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "san_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158553, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 158553 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "san_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163541, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 163541 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "san_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 157225, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 157225 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "san_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152791, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152791 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "san_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154778, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 154778 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "san_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156473, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 156473 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "san_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161686, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 161686 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "san_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 161190, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 161190 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "san_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158917, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 158917 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "san_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 169347, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 169347 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "san_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 159256, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 159256 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "san_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 153825, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 153825 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "san_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157415, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 157415 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "san_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160732, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 160732 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "san_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156710, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 156710 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "san_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165888, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 165888 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "san_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 159132, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 159132 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "san_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 171375, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 171375 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "san_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 154199, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 154199 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "san_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157814, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157814 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "sat_Olck-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 169379, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 169379 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "sat_Olck-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 163783, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 163783 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "sat_Olck-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 169839, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 169839 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "sat_Olck-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 174827, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 174827 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "sat_Olck-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 168511, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 168511 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "sat_Olck-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 164077, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 164077 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "sat_Olck-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 166064, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 166064 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "sat_Olck-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 167759, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 167759 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "sat_Olck-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 172972, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 172972 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "sat_Olck-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 172476, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 172476 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "sat_Olck-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 170203, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 170203 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "sat_Olck-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 180633, + "unique_pairs": 1501, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 180633 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "sat_Olck-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 170542, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 170542 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "sat_Olck-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 165111, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 165111 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "sat_Olck-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 168701, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 168701 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "sat_Olck-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 172018, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 172018 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "sat_Olck-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 167996, + "unique_pairs": 1501, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 167996 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "sat_Olck-san_Deva": { + "num_samples": 1503, + "number_of_characters": 165888, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 165888 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "sat_Olck-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 170418, + "unique_pairs": 1501, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 170418 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "sat_Olck-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 182661, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 182661 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "sat_Olck-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 165485, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 165485 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "sat_Olck-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 169100, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 169100 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "snd_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162623, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162623 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "snd_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 157027, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 157027 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "snd_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 163083, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 163083 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "snd_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 168071, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 168071 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "snd_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161755, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161755 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "snd_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157321, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 157321 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "snd_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159308, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 159308 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "snd_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 161003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 161003 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "snd_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166216, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 166216 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "snd_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165720, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165720 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "snd_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 163447, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 163447 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "snd_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173877, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 173877 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "snd_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163786, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 163786 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "snd_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158355, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 158355 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "snd_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161945, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 161945 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "snd_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165262, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 165262 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "snd_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161240, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 161240 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "snd_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 159132, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 159132 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "snd_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170418, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 170418 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "snd_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175905, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 175905 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "snd_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "snd_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162344, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 162344 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "tam_Taml-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 174866, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 174866 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "tam_Taml-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 169270, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 169270 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "tam_Taml-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 175326, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 175326 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "tam_Taml-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 180314, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 180314 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "tam_Taml-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 173998, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 173998 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "tam_Taml-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 169564, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 169564 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "tam_Taml-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 171551, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 171551 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "tam_Taml-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 173246, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 173246 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "tam_Taml-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 178459, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 178459 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "tam_Taml-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 177963, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 177963 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "tam_Taml-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 175690, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 175690 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "tam_Taml-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 186120, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 186120 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "tam_Taml-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 176029, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 176029 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "tam_Taml-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 170598, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 170598 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "tam_Taml-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 174188, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 174188 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "tam_Taml-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 177505, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 177505 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "tam_Taml-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 173483, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 173483 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "tam_Taml-san_Deva": { + "num_samples": 1503, + "number_of_characters": 171375, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 171375 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "tam_Taml-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 182661, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 182661 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "tam_Taml-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 175905, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 175905 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "tam_Taml-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 170972, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 170972 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "tam_Taml-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 174587, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 174587 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "tel_Telu-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 157690, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 157690 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "tel_Telu-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152094, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 152094 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "tel_Telu-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158150, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 158150 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "tel_Telu-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163138, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 163138 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "tel_Telu-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 156822, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 156822 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "tel_Telu-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152388, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152388 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "tel_Telu-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154375, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 154375 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "tel_Telu-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156070, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 156070 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "tel_Telu-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161283, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 161283 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "tel_Telu-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 160787, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 160787 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "tel_Telu-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158514, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 158514 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "tel_Telu-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 168944, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 168944 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "tel_Telu-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 158853, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 158853 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "tel_Telu-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 153422, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 153422 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "tel_Telu-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157012, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 157012 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "tel_Telu-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160329, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 160329 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "tel_Telu-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156307, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 156307 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "tel_Telu-san_Deva": { + "num_samples": 1503, + "number_of_characters": 154199, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 154199 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "tel_Telu-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165485, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 165485 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "tel_Telu-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "tel_Telu-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 170972, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 170972 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "tel_Telu-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157411, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157411 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "urd_Arab-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 161305, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 161305 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "urd_Arab-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155709, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155709 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "urd_Arab-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161765, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 161765 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "urd_Arab-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166753, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 166753 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "urd_Arab-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160437, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 160437 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "urd_Arab-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 156003 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "urd_Arab-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157990, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 157990 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "urd_Arab-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159685, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159685 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "urd_Arab-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164898, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 164898 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "urd_Arab-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164402, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 164402 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "urd_Arab-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162129, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 162129 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "urd_Arab-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172559, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 172559 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "urd_Arab-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162468, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 162468 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "urd_Arab-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157037, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 157037 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "urd_Arab-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160627, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 160627 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "urd_Arab-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163944, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 163944 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "urd_Arab-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159922, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 159922 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "urd_Arab-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157814, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 157814 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "urd_Arab-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169100, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 169100 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "urd_Arab-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 162344, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 162344 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "urd_Arab-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174587, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 174587 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "urd_Arab-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157411, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 157411 + "max_sentence2_length": 182, + "unique_sentence2": 1495 } } } diff --git a/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json new file mode 100644 index 0000000000..c53818c9ca --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json @@ -0,0 +1,6595 @@ +{ + "test": { + "num_samples": 518144, + "number_of_characters": 162367876, + "unique_pairs": 518101, + "min_sentence1_length": 9, + "average_sentence1_length": 156.6821925951087, + "max_sentence1_length": 692, + "unique_sentence1": 23550, + "min_sentence2_length": 9, + "average_sentence2_length": 156.6821925951087, + "max_sentence2_length": 692, + "unique_sentence2": 23550, + "hf_subset_descriptive_stats": { + "asm_Beng-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 310622, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "asm_Beng-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 323609, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "asm_Beng-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 319020, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "asm_Beng-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 320098, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "asm_Beng-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 312594, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "asm_Beng-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 309440, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "asm_Beng-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 320106, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "asm_Beng-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 332064, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "asm_Beng-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 322764, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "asm_Beng-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 308682, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "asm_Beng-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 343636, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "asm_Beng-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 321784, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "asm_Beng-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 313134, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "asm_Beng-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 313419, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "asm_Beng-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 334226, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "asm_Beng-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306863, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "asm_Beng-san_Deva": { + "num_samples": 1024, + "number_of_characters": 318079, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "asm_Beng-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 326732, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "asm_Beng-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 320421, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "asm_Beng-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 348346, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "asm_Beng-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 319045, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "asm_Beng-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 315134, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "ben_Beng-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 310622, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "ben_Beng-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 313313, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "ben_Beng-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 308724, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "ben_Beng-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 309802, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "ben_Beng-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 302298, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "ben_Beng-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 299144, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "ben_Beng-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 309810, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "ben_Beng-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 321768, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "ben_Beng-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 312468, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "ben_Beng-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 298386, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "ben_Beng-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 333340, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "ben_Beng-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 311488, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "ben_Beng-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 302838, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "ben_Beng-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 303123, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "ben_Beng-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 323930, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "ben_Beng-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 296567, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "ben_Beng-san_Deva": { + "num_samples": 1024, + "number_of_characters": 307783, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "ben_Beng-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 316436, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "ben_Beng-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 310125, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "ben_Beng-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 338050, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "ben_Beng-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 308749, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "ben_Beng-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 304838, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "brx_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 323609, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "brx_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 313313, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "brx_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 321711, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "brx_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 322789, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "brx_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 315285, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "brx_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 312131, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "brx_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 322797, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "brx_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 334755, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "brx_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 325455, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "brx_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 311373, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "brx_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 346327, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "brx_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 324475, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "brx_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 315825, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "brx_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 316110, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "brx_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 336917, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "brx_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 309554, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "brx_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 320770, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "brx_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 329423, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "brx_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 323112, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "brx_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 351037, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "brx_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 321736, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "brx_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 317825, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "doi_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 319020, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "doi_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 308724, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "doi_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 321711, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "doi_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 318200, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "doi_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 310696, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "doi_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 307542, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "doi_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 318208, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "doi_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 330166, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "doi_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 320866, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "doi_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 306784, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "doi_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 341738, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "doi_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 319886, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "doi_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 311236, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "doi_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 311521, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "doi_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 332328, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "doi_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304965, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "doi_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 316181, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "doi_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 324834, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "doi_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 318523, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "doi_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 346448, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "doi_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 317147, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "doi_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 313236, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "eng_Latn-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320098, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "eng_Latn-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 309802, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "eng_Latn-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 322789, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "eng_Latn-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318200, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "eng_Latn-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 311774, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "eng_Latn-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308620, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "eng_Latn-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 319286, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "eng_Latn-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331244, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "eng_Latn-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 321944, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "eng_Latn-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 307862, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "eng_Latn-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 342816, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "eng_Latn-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 320964, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "eng_Latn-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312314, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "eng_Latn-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312599, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "eng_Latn-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333406, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "eng_Latn-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306043, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "eng_Latn-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317259, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "eng_Latn-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 325912, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "eng_Latn-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 319601, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "eng_Latn-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347526, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "eng_Latn-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318225, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "eng_Latn-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314314, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "gom_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 312594, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "gom_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 302298, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "gom_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 315285, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "gom_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 310696, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "gom_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 311774, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "gom_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301116, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "gom_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 311782, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "gom_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 323740, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "gom_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 314440, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "gom_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 300358, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "gom_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 335312, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "gom_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 313460, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "gom_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 304810, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "gom_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 305095, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "gom_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 325902, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "gom_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 298539, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "gom_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 309755, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "gom_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 318408, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "gom_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312097, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "gom_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340022, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "gom_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 310721, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "gom_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 306810, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "guj_Gujr-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 309440, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "guj_Gujr-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 299144, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "guj_Gujr-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 312131, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "guj_Gujr-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 307542, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "guj_Gujr-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 308620, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "guj_Gujr-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 301116, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "guj_Gujr-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 308628, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "guj_Gujr-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 320586, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "guj_Gujr-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 311286, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "guj_Gujr-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 297204, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "guj_Gujr-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 332158, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "guj_Gujr-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 310306, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "guj_Gujr-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 301656, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "guj_Gujr-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 301941, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "guj_Gujr-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 322748, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "guj_Gujr-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 295385, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "guj_Gujr-san_Deva": { + "num_samples": 1024, + "number_of_characters": 306601, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "guj_Gujr-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 315254, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "guj_Gujr-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 308943, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "guj_Gujr-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 336868, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "guj_Gujr-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 307567, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "guj_Gujr-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 303656, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "hin_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320106, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "hin_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 309810, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "hin_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 322797, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "hin_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318208, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "hin_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 319286, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "hin_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 311782, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "hin_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308628, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "hin_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331252, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "hin_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 321952, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "hin_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 307870, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "hin_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 342824, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "hin_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 320972, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "hin_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312322, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "hin_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312607, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "hin_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333414, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "hin_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306051, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "hin_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317267, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "hin_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 325920, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "hin_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 319609, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "hin_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347534, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "hin_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318233, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "hin_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314322, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "kan_Knda-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 332064, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "kan_Knda-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 321768, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "kan_Knda-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 334755, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "kan_Knda-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 330166, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "kan_Knda-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 331244, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "kan_Knda-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 323740, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "kan_Knda-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 320586, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "kan_Knda-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 331252, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "kan_Knda-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 333910, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "kan_Knda-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 319828, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "kan_Knda-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 354782, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "kan_Knda-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 332930, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "kan_Knda-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 324280, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "kan_Knda-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 324565, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "kan_Knda-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 345372, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "kan_Knda-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 318009, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "kan_Knda-san_Deva": { + "num_samples": 1024, + "number_of_characters": 329225, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "kan_Knda-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 337878, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "kan_Knda-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 331567, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "kan_Knda-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 359492, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "kan_Knda-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 330191, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "kan_Knda-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 326280, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "kas_Arab-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 322764, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "kas_Arab-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 312468, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "kas_Arab-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 325455, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "kas_Arab-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 320866, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "kas_Arab-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 321944, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "kas_Arab-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 314440, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "kas_Arab-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 311286, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "kas_Arab-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 321952, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "kas_Arab-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 333910, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "kas_Arab-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 310528, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "kas_Arab-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 345482, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "kas_Arab-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 323630, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "kas_Arab-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 314980, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "kas_Arab-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 315265, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "kas_Arab-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 336072, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "kas_Arab-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 308709, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "kas_Arab-san_Deva": { + "num_samples": 1024, + "number_of_characters": 319925, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "kas_Arab-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 328578, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "kas_Arab-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 322267, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "kas_Arab-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 350192, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "kas_Arab-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 320891, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "kas_Arab-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 316980, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mai_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 308682, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mai_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 298386, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mai_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 311373, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mai_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 306784, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mai_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 307862, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mai_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 300358, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mai_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 297204, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mai_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 307870, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mai_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 319828, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mai_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 310528, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mai_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 331400, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "mai_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 309548, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "mai_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 300898, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "mai_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 301183, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mai_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 321990, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mai_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 294627, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mai_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 305843, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mai_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 314496, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mai_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 308185, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mai_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 336110, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mai_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 306809, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mai_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 302898, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mal_Mlym-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 343636, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mal_Mlym-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 333340, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mal_Mlym-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 346327, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mal_Mlym-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 341738, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mal_Mlym-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 342816, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mal_Mlym-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 335312, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mal_Mlym-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 332158, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mal_Mlym-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 342824, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mal_Mlym-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 354782, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mal_Mlym-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 345482, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mal_Mlym-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 331400, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "mal_Mlym-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 344502, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "mal_Mlym-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 335852, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "mal_Mlym-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 336137, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mal_Mlym-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 356944, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mal_Mlym-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 329581, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mal_Mlym-san_Deva": { + "num_samples": 1024, + "number_of_characters": 340797, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mal_Mlym-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 349450, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mal_Mlym-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 343139, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mal_Mlym-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 371064, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mal_Mlym-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 341763, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mal_Mlym-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 337852, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mar_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 321784, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mar_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 311488, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mar_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 324475, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mar_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 319886, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mar_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 320964, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mar_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 313460, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mar_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 310306, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mar_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 320972, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mar_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 332930, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mar_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 323630, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mar_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 309548, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "mar_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 344502, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "mar_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 314000, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "mar_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 314285, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mar_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 335092, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mar_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 307729, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mar_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 318945, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mar_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 327598, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mar_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 321287, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mar_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 349212, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mar_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 319911, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mar_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 316000, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mni_Mtei-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 313134, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mni_Mtei-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 302838, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mni_Mtei-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 315825, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mni_Mtei-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 311236, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mni_Mtei-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 312314, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mni_Mtei-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 304810, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mni_Mtei-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301656, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mni_Mtei-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 312322, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mni_Mtei-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 324280, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mni_Mtei-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 314980, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mni_Mtei-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 300898, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "mni_Mtei-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 335852, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "mni_Mtei-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 314000, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "mni_Mtei-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 305635, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mni_Mtei-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 326442, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mni_Mtei-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 299079, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mni_Mtei-san_Deva": { + "num_samples": 1024, + "number_of_characters": 310295, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mni_Mtei-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 318948, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mni_Mtei-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312637, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mni_Mtei-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340562, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mni_Mtei-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 311261, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mni_Mtei-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 307350, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "npi_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 313419, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "npi_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 303123, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "npi_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 316110, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "npi_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 311521, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "npi_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 312599, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "npi_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 305095, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "npi_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301941, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "npi_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 312607, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "npi_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 324565, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "npi_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 315265, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "npi_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 301183, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "npi_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 336137, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "npi_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 314285, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "npi_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 305635, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "npi_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 326727, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "npi_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 299364, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "npi_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 310580, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "npi_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 319233, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "npi_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312922, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "npi_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340847, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "npi_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 311546, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "npi_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 307635, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "ory_Orya-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 334226, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "ory_Orya-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 323930, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "ory_Orya-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 336917, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "ory_Orya-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 332328, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "ory_Orya-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 333406, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "ory_Orya-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 325902, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "ory_Orya-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 322748, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "ory_Orya-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 333414, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "ory_Orya-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 345372, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "ory_Orya-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 336072, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "ory_Orya-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 321990, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "ory_Orya-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 356944, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "ory_Orya-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 335092, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "ory_Orya-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 326442, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "ory_Orya-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 326727, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "ory_Orya-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 320171, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "ory_Orya-san_Deva": { + "num_samples": 1024, + "number_of_characters": 331387, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "ory_Orya-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 340040, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "ory_Orya-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 333729, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "ory_Orya-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 361654, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "ory_Orya-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 332353, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "ory_Orya-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 328442, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "pan_Guru-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 306863, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "pan_Guru-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 296567, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "pan_Guru-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 309554, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "pan_Guru-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 304965, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "pan_Guru-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 306043, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "pan_Guru-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 298539, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "pan_Guru-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 295385, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "pan_Guru-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 306051, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "pan_Guru-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 318009, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "pan_Guru-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 308709, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "pan_Guru-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 294627, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "pan_Guru-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 329581, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "pan_Guru-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 307729, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "pan_Guru-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 299079, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "pan_Guru-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 299364, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "pan_Guru-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 320171, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "pan_Guru-san_Deva": { + "num_samples": 1024, + "number_of_characters": 304024, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "pan_Guru-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 312677, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "pan_Guru-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 306366, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "pan_Guru-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 334291, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "pan_Guru-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 304990, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "pan_Guru-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 301079, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "san_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 318079, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "san_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 307783, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "san_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 320770, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "san_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 316181, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "san_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 317259, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "san_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 309755, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "san_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 306601, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "san_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 317267, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "san_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 329225, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "san_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 319925, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "san_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 305843, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "san_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 340797, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "san_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 318945, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "san_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 310295, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "san_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 310580, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "san_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 331387, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "san_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304024, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "san_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 323893, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "san_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 317582, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "san_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 345507, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "san_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 316206, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "san_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 312295, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "sat_Olck-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 326732, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "sat_Olck-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 316436, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "sat_Olck-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 329423, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "sat_Olck-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 324834, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "sat_Olck-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 325912, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "sat_Olck-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 318408, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "sat_Olck-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 315254, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "sat_Olck-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 325920, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "sat_Olck-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 337878, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "sat_Olck-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 328578, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "sat_Olck-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 314496, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "sat_Olck-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 349450, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "sat_Olck-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 327598, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "sat_Olck-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 318948, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "sat_Olck-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 319233, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "sat_Olck-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 340040, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "sat_Olck-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 312677, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "sat_Olck-san_Deva": { + "num_samples": 1024, + "number_of_characters": 323893, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "sat_Olck-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 326235, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "sat_Olck-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 354160, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "sat_Olck-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 324859, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "sat_Olck-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 320948, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "snd_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320421, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "snd_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 310125, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "snd_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 323112, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "snd_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318523, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "snd_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 319601, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "snd_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 312097, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "snd_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308943, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "snd_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 319609, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "snd_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331567, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "snd_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 322267, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "snd_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 308185, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "snd_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 343139, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "snd_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 321287, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "snd_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312637, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "snd_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312922, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "snd_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333729, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "snd_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306366, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "snd_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317582, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "snd_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 326235, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "snd_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347849, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "snd_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318548, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "snd_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314637, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "tam_Taml-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 348346, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "tam_Taml-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 338050, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "tam_Taml-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 351037, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "tam_Taml-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 346448, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "tam_Taml-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 347526, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "tam_Taml-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 340022, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "tam_Taml-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 336868, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "tam_Taml-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 347534, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "tam_Taml-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 359492, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "tam_Taml-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 350192, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "tam_Taml-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 336110, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "tam_Taml-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 371064, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "tam_Taml-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 349212, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "tam_Taml-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 340562, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "tam_Taml-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 340847, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "tam_Taml-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 361654, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "tam_Taml-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 334291, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "tam_Taml-san_Deva": { + "num_samples": 1024, + "number_of_characters": 345507, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "tam_Taml-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 354160, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "tam_Taml-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 347849, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "tam_Taml-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 346473, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "tam_Taml-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 342562, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "tel_Telu-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 319045, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "tel_Telu-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 308749, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "tel_Telu-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 321736, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "tel_Telu-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 317147, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "tel_Telu-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 318225, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "tel_Telu-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 310721, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "tel_Telu-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 307567, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "tel_Telu-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 318233, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "tel_Telu-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 330191, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "tel_Telu-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 320891, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "tel_Telu-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 306809, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "tel_Telu-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 341763, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "tel_Telu-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 319911, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "tel_Telu-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 311261, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "tel_Telu-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 311546, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "tel_Telu-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 332353, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "tel_Telu-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304990, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "tel_Telu-san_Deva": { + "num_samples": 1024, + "number_of_characters": 316206, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "tel_Telu-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 324859, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "tel_Telu-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 318548, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "tel_Telu-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 346473, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "tel_Telu-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 313261, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "urd_Arab-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 315134, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "urd_Arab-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 304838, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "urd_Arab-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 317825, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "urd_Arab-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 313236, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "urd_Arab-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 314314, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "urd_Arab-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 306810, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "urd_Arab-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 303656, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "urd_Arab-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 314322, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "urd_Arab-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 326280, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "urd_Arab-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 316980, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "urd_Arab-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 302898, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "urd_Arab-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 337852, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "urd_Arab-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 316000, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "urd_Arab-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 307350, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "urd_Arab-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 307635, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "urd_Arab-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 328442, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "urd_Arab-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 301079, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "urd_Arab-san_Deva": { + "num_samples": 1024, + "number_of_characters": 312295, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "urd_Arab-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 320948, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "urd_Arab-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 314637, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "urd_Arab-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 342562, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "urd_Arab-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 313261, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json new file mode 100644 index 0000000000..504c3f1905 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json @@ -0,0 +1,329 @@ +{ + "validation": { + "num_samples": 21938, + "number_of_characters": 4256244, + "unique_pairs": 21840, + "min_sentence1_length": 2, + "average_sentence1_length": 97.0061992889051, + "max_sentence1_length": 521, + "unique_sentence1": 11563, + "min_sentence2_length": 2, + "average_sentence2_length": 97.0061992889051, + "max_sentence2_length": 521, + "unique_sentence2": 11563, + "hf_subset_descriptive_stats": { + "ar-en": { + "num_samples": 888, + "number_of_characters": 172499, + "unique_pairs": 887, + "min_sentence1_length": 4, + "average_sentence1_length": 85.48873873873873, + "max_sentence1_length": 369, + "unique_sentence1": 887, + "min_sentence2_length": 10, + "average_sentence2_length": 108.76689189189189, + "max_sentence2_length": 462, + "unique_sentence2": 881 + }, + "de-en": { + "num_samples": 888, + "number_of_characters": 202336, + "unique_pairs": 883, + "min_sentence1_length": 6, + "average_sentence1_length": 119.02702702702703, + "max_sentence1_length": 521, + "unique_sentence1": 881, + "min_sentence2_length": 10, + "average_sentence2_length": 108.82882882882883, + "max_sentence2_length": 462, + "unique_sentence2": 881 + }, + "en-ar": { + "num_samples": 888, + "number_of_characters": 172499, + "unique_pairs": 887, + "min_sentence1_length": 10, + "average_sentence1_length": 108.76689189189189, + "max_sentence1_length": 462, + "unique_sentence1": 881, + "min_sentence2_length": 4, + "average_sentence2_length": 85.48873873873873, + "max_sentence2_length": 369, + "unique_sentence2": 887 + }, + "en-de": { + "num_samples": 888, + "number_of_characters": 202336, + "unique_pairs": 883, + "min_sentence1_length": 10, + "average_sentence1_length": 108.82882882882883, + "max_sentence1_length": 462, + "unique_sentence1": 881, + "min_sentence2_length": 6, + "average_sentence2_length": 119.02702702702703, + "max_sentence2_length": 521, + "unique_sentence2": 881 + }, + "en-fr": { + "num_samples": 890, + "number_of_characters": 197619, + "unique_pairs": 883, + "min_sentence1_length": 10, + "average_sentence1_length": 108.4123595505618, + "max_sentence1_length": 462, + "unique_sentence1": 883, + "min_sentence2_length": 6, + "average_sentence2_length": 113.63146067415731, + "max_sentence2_length": 493, + "unique_sentence2": 881 + }, + "en-it": { + "num_samples": 929, + "number_of_characters": 191803, + "unique_pairs": 924, + "min_sentence1_length": 10, + "average_sentence1_length": 103.0010764262648, + "max_sentence1_length": 433, + "unique_sentence1": 922, + "min_sentence2_length": 7, + "average_sentence2_length": 103.46071044133477, + "max_sentence2_length": 444, + "unique_sentence2": 918 + }, + "en-ja": { + "num_samples": 871, + "number_of_characters": 132742, + "unique_pairs": 867, + "min_sentence1_length": 10, + "average_sentence1_length": 109.80826636050517, + "max_sentence1_length": 462, + "unique_sentence1": 864, + "min_sentence2_length": 5, + "average_sentence2_length": 42.59357060849598, + "max_sentence2_length": 225, + "unique_sentence2": 866 + }, + "en-ko": { + "num_samples": 879, + "number_of_characters": 142659, + "unique_pairs": 874, + "min_sentence1_length": 10, + "average_sentence1_length": 107.74175199089875, + "max_sentence1_length": 462, + "unique_sentence1": 872, + "min_sentence2_length": 3, + "average_sentence2_length": 54.5551763367463, + "max_sentence2_length": 250, + "unique_sentence2": 872 + }, + "en-nl": { + "num_samples": 1003, + "number_of_characters": 189637, + "unique_pairs": 1000, + "min_sentence1_length": 10, + "average_sentence1_length": 95.26819541375872, + "max_sentence1_length": 433, + "unique_sentence1": 996, + "min_sentence2_length": 4, + "average_sentence2_length": 93.80159521435692, + "max_sentence2_length": 477, + "unique_sentence2": 1000 + }, + "en-ro": { + "num_samples": 914, + "number_of_characters": 194128, + "unique_pairs": 910, + "min_sentence1_length": 10, + "average_sentence1_length": 104.72100656455142, + "max_sentence1_length": 433, + "unique_sentence1": 907, + "min_sentence2_length": 9, + "average_sentence2_length": 107.67286652078775, + "max_sentence2_length": 448, + "unique_sentence2": 910 + }, + "en-zh": { + "num_samples": 879, + "number_of_characters": 131126, + "unique_pairs": 877, + "min_sentence1_length": 10, + "average_sentence1_length": 109.36518771331058, + "max_sentence1_length": 462, + "unique_sentence1": 872, + "min_sentence2_length": 2, + "average_sentence2_length": 39.811149032992034, + "max_sentence2_length": 230, + "unique_sentence2": 867 + }, + "fr-en": { + "num_samples": 890, + "number_of_characters": 197619, + "unique_pairs": 883, + "min_sentence1_length": 6, + "average_sentence1_length": 113.63146067415731, + "max_sentence1_length": 493, + "unique_sentence1": 881, + "min_sentence2_length": 10, + "average_sentence2_length": 108.4123595505618, + "max_sentence2_length": 462, + "unique_sentence2": 883 + }, + "it-en": { + "num_samples": 929, + "number_of_characters": 191803, + "unique_pairs": 924, + "min_sentence1_length": 7, + "average_sentence1_length": 103.46071044133477, + "max_sentence1_length": 444, + "unique_sentence1": 918, + "min_sentence2_length": 10, + "average_sentence2_length": 103.0010764262648, + "max_sentence2_length": 433, + "unique_sentence2": 922 + }, + "it-nl": { + "num_samples": 1001, + "number_of_characters": 188858, + "unique_pairs": 998, + "min_sentence1_length": 7, + "average_sentence1_length": 94.64235764235764, + "max_sentence1_length": 459, + "unique_sentence1": 994, + "min_sentence2_length": 7, + "average_sentence2_length": 94.02697302697302, + "max_sentence2_length": 505, + "unique_sentence2": 998 + }, + "it-ro": { + "num_samples": 914, + "number_of_characters": 193339, + "unique_pairs": 911, + "min_sentence1_length": 7, + "average_sentence1_length": 103.90809628008753, + "max_sentence1_length": 435, + "unique_sentence1": 907, + "min_sentence2_length": 9, + "average_sentence2_length": 107.62253829321664, + "max_sentence2_length": 448, + "unique_sentence2": 910 + }, + "ja-en": { + "num_samples": 871, + "number_of_characters": 132742, + "unique_pairs": 867, + "min_sentence1_length": 5, + "average_sentence1_length": 42.59357060849598, + "max_sentence1_length": 225, + "unique_sentence1": 866, + "min_sentence2_length": 10, + "average_sentence2_length": 109.80826636050517, + "max_sentence2_length": 462, + "unique_sentence2": 864 + }, + "ko-en": { + "num_samples": 879, + "number_of_characters": 142659, + "unique_pairs": 874, + "min_sentence1_length": 3, + "average_sentence1_length": 54.5551763367463, + "max_sentence1_length": 250, + "unique_sentence1": 872, + "min_sentence2_length": 10, + "average_sentence2_length": 107.74175199089875, + "max_sentence2_length": 462, + "unique_sentence2": 872 + }, + "nl-en": { + "num_samples": 1003, + "number_of_characters": 189637, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 93.80159521435692, + "max_sentence1_length": 477, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 95.26819541375872, + "max_sentence2_length": 433, + "unique_sentence2": 996 + }, + "nl-it": { + "num_samples": 1001, + "number_of_characters": 188858, + "unique_pairs": 998, + "min_sentence1_length": 7, + "average_sentence1_length": 94.02697302697302, + "max_sentence1_length": 505, + "unique_sentence1": 998, + "min_sentence2_length": 7, + "average_sentence2_length": 94.64235764235764, + "max_sentence2_length": 459, + "unique_sentence2": 994 + }, + "nl-ro": { + "num_samples": 913, + "number_of_characters": 191376, + "unique_pairs": 911, + "min_sentence1_length": 7, + "average_sentence1_length": 102.01971522453451, + "max_sentence1_length": 478, + "unique_sentence1": 909, + "min_sentence2_length": 9, + "average_sentence2_length": 107.59255202628697, + "max_sentence2_length": 515, + "unique_sentence2": 909 + }, + "ro-en": { + "num_samples": 914, + "number_of_characters": 194128, + "unique_pairs": 910, + "min_sentence1_length": 9, + "average_sentence1_length": 107.67286652078775, + "max_sentence1_length": 448, + "unique_sentence1": 910, + "min_sentence2_length": 10, + "average_sentence2_length": 104.72100656455142, + "max_sentence2_length": 433, + "unique_sentence2": 907 + }, + "ro-it": { + "num_samples": 914, + "number_of_characters": 193339, + "unique_pairs": 911, + "min_sentence1_length": 9, + "average_sentence1_length": 107.62253829321664, + "max_sentence1_length": 448, + "unique_sentence1": 910, + "min_sentence2_length": 7, + "average_sentence2_length": 103.90809628008753, + "max_sentence2_length": 435, + "unique_sentence2": 907 + }, + "ro-nl": { + "num_samples": 913, + "number_of_characters": 191376, + "unique_pairs": 911, + "min_sentence1_length": 9, + "average_sentence1_length": 107.59255202628697, + "max_sentence1_length": 515, + "unique_sentence1": 909, + "min_sentence2_length": 7, + "average_sentence2_length": 102.01971522453451, + "max_sentence2_length": 478, + "unique_sentence2": 909 + }, + "zh-en": { + "num_samples": 879, + "number_of_characters": 131126, + "unique_pairs": 877, + "min_sentence1_length": 2, + "average_sentence1_length": 39.811149032992034, + "max_sentence1_length": 230, + "unique_sentence1": 867, + "min_sentence2_length": 10, + "average_sentence2_length": 109.36518771331058, + "max_sentence2_length": 462, + "unique_sentence2": 872 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json new file mode 100644 index 0000000000..1aaed39454 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json @@ -0,0 +1,1540 @@ +{ + "validation": { + "num_samples": 57826, + "number_of_characters": 14600950, + "unique_pairs": 57826, + "min_sentence1_length": 24, + "average_sentence1_length": 126.2541071490333, + "max_sentence1_length": 368, + "unique_sentence1": 29903, + "min_sentence2_length": 24, + "average_sentence2_length": 126.24390412617161, + "max_sentence2_length": 368, + "unique_sentence2": 29903, + "hf_subset_descriptive_stats": { + "ben-eng": { + "num_samples": 997, + "number_of_characters": 248469, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 123.64593781344033, + "max_sentence1_length": 320, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-ben": { + "num_samples": 997, + "number_of_characters": 248469, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 123.64593781344033, + "max_sentence2_length": 320, + "unique_sentence2": 997 + }, + "guj-eng": { + "num_samples": 997, + "number_of_characters": 245477, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 120.64493480441324, + "max_sentence1_length": 368, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-guj": { + "num_samples": 997, + "number_of_characters": 245477, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 120.64493480441324, + "max_sentence2_length": 368, + "unique_sentence2": 997 + }, + "hin-eng": { + "num_samples": 997, + "number_of_characters": 250573, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 125.75626880641926, + "max_sentence1_length": 355, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-hin": { + "num_samples": 997, + "number_of_characters": 250564, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 125.74724172517553, + "max_sentence2_length": 355, + "unique_sentence2": 997 + }, + "kan-eng": { + "num_samples": 997, + "number_of_characters": 257131, + "unique_pairs": 997, + "min_sentence1_length": 34, + "average_sentence1_length": 132.33400200601807, + "max_sentence1_length": 331, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-kan": { + "num_samples": 997, + "number_of_characters": 256986, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 34, + "average_sentence2_length": 132.18856569709126, + "max_sentence2_length": 331, + "unique_sentence2": 997 + }, + "mal-eng": { + "num_samples": 997, + "number_of_characters": 267295, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 142.52858575727183, + "max_sentence1_length": 360, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mal": { + "num_samples": 997, + "number_of_characters": 267296, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 142.5295887662989, + "max_sentence2_length": 360, + "unique_sentence2": 997 + }, + "mar-eng": { + "num_samples": 997, + "number_of_characters": 251107, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 126.29187562688064, + "max_sentence1_length": 321, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mar": { + "num_samples": 997, + "number_of_characters": 250897, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 126.08124373119358, + "max_sentence2_length": 321, + "unique_sentence2": 997 + }, + "tam-eng": { + "num_samples": 997, + "number_of_characters": 271322, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 146.567703109328, + "max_sentence1_length": 358, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-tam": { + "num_samples": 997, + "number_of_characters": 271322, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 146.567703109328, + "max_sentence2_length": 358, + "unique_sentence2": 997 + }, + "tel-eng": { + "num_samples": 997, + "number_of_characters": 252385, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 127.57372116349048, + "max_sentence1_length": 317, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-tel": { + "num_samples": 997, + "number_of_characters": 252380, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 127.56870611835507, + "max_sentence2_length": 317, + "unique_sentence2": 997 + }, + "urd-eng": { + "num_samples": 997, + "number_of_characters": 249824, + "unique_pairs": 997, + "min_sentence1_length": 37, + "average_sentence1_length": 125.00501504513541, + "max_sentence1_length": 295, + "unique_sentence1": 996, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-urd": { + "num_samples": 997, + "number_of_characters": 249824, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 37, + "average_sentence2_length": 125.00501504513541, + "max_sentence2_length": 295, + "unique_sentence2": 996 + }, + "asm-eng": { + "num_samples": 997, + "number_of_characters": 246220, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 121.3901705115346, + "max_sentence1_length": 314, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-asm": { + "num_samples": 997, + "number_of_characters": 246224, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 121.39418254764293, + "max_sentence2_length": 314, + "unique_sentence2": 997 + }, + "bho-eng": { + "num_samples": 997, + "number_of_characters": 246895, + "unique_pairs": 997, + "min_sentence1_length": 25, + "average_sentence1_length": 122.06720160481444, + "max_sentence1_length": 326, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-bho": { + "num_samples": 997, + "number_of_characters": 246919, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 25, + "average_sentence2_length": 122.0912738214644, + "max_sentence2_length": 326, + "unique_sentence2": 997 + }, + "nep-eng": { + "num_samples": 997, + "number_of_characters": 245984, + "unique_pairs": 997, + "min_sentence1_length": 24, + "average_sentence1_length": 121.15346038114343, + "max_sentence1_length": 307, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-nep": { + "num_samples": 997, + "number_of_characters": 245984, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 24, + "average_sentence2_length": 121.15346038114343, + "max_sentence2_length": 307, + "unique_sentence2": 997 + }, + "ory-eng": { + "num_samples": 997, + "number_of_characters": 254206, + "unique_pairs": 997, + "min_sentence1_length": 34, + "average_sentence1_length": 129.4002006018054, + "max_sentence1_length": 308, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-ory": { + "num_samples": 997, + "number_of_characters": 254206, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 34, + "average_sentence2_length": 129.4002006018054, + "max_sentence2_length": 308, + "unique_sentence2": 997 + }, + "pan-eng": { + "num_samples": 997, + "number_of_characters": 251598, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 126.78435305917753, + "max_sentence1_length": 309, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-pan": { + "num_samples": 997, + "number_of_characters": 251597, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 126.78335005015045, + "max_sentence2_length": 309, + "unique_sentence2": 997 + }, + "pus-eng": { + "num_samples": 997, + "number_of_characters": 247450, + "unique_pairs": 997, + "min_sentence1_length": 32, + "average_sentence1_length": 122.62387161484453, + "max_sentence1_length": 300, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-pus": { + "num_samples": 997, + "number_of_characters": 247450, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 32, + "average_sentence2_length": 122.62387161484453, + "max_sentence2_length": 300, + "unique_sentence2": 997 + }, + "san-eng": { + "num_samples": 997, + "number_of_characters": 249042, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 124.22066198595788, + "max_sentence1_length": 311, + "unique_sentence1": 994, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-san": { + "num_samples": 997, + "number_of_characters": 248877, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 124.05516549648947, + "max_sentence2_length": 311, + "unique_sentence2": 994 + }, + "awa-eng": { + "num_samples": 997, + "number_of_characters": 247944, + "unique_pairs": 997, + "min_sentence1_length": 34, + "average_sentence1_length": 123.11935807422267, + "max_sentence1_length": 329, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-awa": { + "num_samples": 997, + "number_of_characters": 247884, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 34, + "average_sentence2_length": 123.05917753259779, + "max_sentence2_length": 329, + "unique_sentence2": 997 + }, + "bgc-eng": { + "num_samples": 997, + "number_of_characters": 245935, + "unique_pairs": 997, + "min_sentence1_length": 27, + "average_sentence1_length": 121.10431293881645, + "max_sentence1_length": 303, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-bgc": { + "num_samples": 997, + "number_of_characters": 245935, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 27, + "average_sentence2_length": 121.10431293881645, + "max_sentence2_length": 303, + "unique_sentence2": 997 + }, + "bod-eng": { + "num_samples": 997, + "number_of_characters": 266515, + "unique_pairs": 997, + "min_sentence1_length": 26, + "average_sentence1_length": 141.74623871614844, + "max_sentence1_length": 355, + "unique_sentence1": 996, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-bod": { + "num_samples": 997, + "number_of_characters": 266495, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 26, + "average_sentence2_length": 141.72617853560683, + "max_sentence2_length": 355, + "unique_sentence2": 996 + }, + "boy-eng": { + "num_samples": 997, + "number_of_characters": 260174, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 135.38615847542627, + "max_sentence1_length": 312, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-boy": { + "num_samples": 997, + "number_of_characters": 260174, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 135.38615847542627, + "max_sentence2_length": 312, + "unique_sentence2": 997 + }, + "gbm-eng": { + "num_samples": 997, + "number_of_characters": 247009, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 122.18154463390171, + "max_sentence1_length": 344, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-gbm": { + "num_samples": 997, + "number_of_characters": 247009, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 122.18154463390171, + "max_sentence2_length": 344, + "unique_sentence2": 997 + }, + "gom-eng": { + "num_samples": 997, + "number_of_characters": 244553, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 119.71815446339016, + "max_sentence1_length": 306, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-gom": { + "num_samples": 997, + "number_of_characters": 244553, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 119.71815446339016, + "max_sentence2_length": 306, + "unique_sentence2": 997 + }, + "hne-eng": { + "num_samples": 997, + "number_of_characters": 246416, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 121.58676028084253, + "max_sentence1_length": 321, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-hne": { + "num_samples": 997, + "number_of_characters": 246405, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 121.57572718154464, + "max_sentence2_length": 321, + "unique_sentence2": 997 + }, + "raj-eng": { + "num_samples": 997, + "number_of_characters": 249541, + "unique_pairs": 997, + "min_sentence1_length": 32, + "average_sentence1_length": 124.72116349047141, + "max_sentence1_length": 313, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-raj": { + "num_samples": 997, + "number_of_characters": 249541, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 32, + "average_sentence2_length": 124.72116349047141, + "max_sentence2_length": 313, + "unique_sentence2": 997 + }, + "mai-eng": { + "num_samples": 997, + "number_of_characters": 247991, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 123.16649949849548, + "max_sentence1_length": 312, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mai": { + "num_samples": 997, + "number_of_characters": 247994, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 123.16950852557673, + "max_sentence2_length": 312, + "unique_sentence2": 997 + }, + "mni-eng": { + "num_samples": 997, + "number_of_characters": 254308, + "unique_pairs": 997, + "min_sentence1_length": 39, + "average_sentence1_length": 129.5025075225677, + "max_sentence1_length": 310, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mni": { + "num_samples": 997, + "number_of_characters": 254312, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 39, + "average_sentence2_length": 129.50651955867602, + "max_sentence2_length": 310, + "unique_sentence2": 997 + }, + "mup-eng": { + "num_samples": 997, + "number_of_characters": 248486, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 123.6629889669007, + "max_sentence1_length": 312, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mup": { + "num_samples": 997, + "number_of_characters": 248486, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 123.6629889669007, + "max_sentence2_length": 312, + "unique_sentence2": 997 + }, + "mwr-eng": { + "num_samples": 997, + "number_of_characters": 248641, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 123.81845536609829, + "max_sentence1_length": 324, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mwr": { + "num_samples": 997, + "number_of_characters": 248641, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 123.81845536609829, + "max_sentence2_length": 324, + "unique_sentence2": 997 + }, + "sat-eng": { + "num_samples": 997, + "number_of_characters": 258279, + "unique_pairs": 997, + "min_sentence1_length": 37, + "average_sentence1_length": 133.4854563691073, + "max_sentence1_length": 333, + "unique_sentence1": 995, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-sat": { + "num_samples": 997, + "number_of_characters": 258279, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 37, + "average_sentence2_length": 133.4854563691073, + "max_sentence2_length": 333, + "unique_sentence2": 995 + } + } + }, + "test": { + "num_samples": 58696, + "number_of_characters": 15359416, + "unique_pairs": 58690, + "min_sentence1_length": 33, + "average_sentence1_length": 130.84266389532507, + "max_sentence1_length": 431, + "unique_sentence1": 30351, + "min_sentence2_length": 33, + "average_sentence2_length": 130.834724683113, + "max_sentence2_length": 431, + "unique_sentence2": 30351, + "hf_subset_descriptive_stats": { + "ben-eng": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-ben": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "guj-eng": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-guj": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hin-eng": { + "num_samples": 1012, + "number_of_characters": 263040, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.5197628458498, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-hin": { + "num_samples": 1012, + "number_of_characters": 263029, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.5088932806324, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kan-eng": { + "num_samples": 1012, + "number_of_characters": 270091, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.48715415019763, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-kan": { + "num_samples": 1012, + "number_of_characters": 270021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.4179841897233, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal-eng": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mal": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mar-eng": { + "num_samples": 1012, + "number_of_characters": 265212, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.66600790513834, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mar": { + "num_samples": 1012, + "number_of_characters": 265023, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47924901185772, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tam-eng": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-tam": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tel-eng": { + "num_samples": 1012, + "number_of_characters": 264460, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.92292490118578, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-tel": { + "num_samples": 1012, + "number_of_characters": 264447, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.9100790513834, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "urd-eng": { + "num_samples": 1012, + "number_of_characters": 261886, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37944664031622, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-urd": { + "num_samples": 1012, + "number_of_characters": 261885, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "asm-eng": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-asm": { + "num_samples": 1012, + "number_of_characters": 257909, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.449604743083, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bho-eng": { + "num_samples": 1012, + "number_of_characters": 260578, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08695652173913, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-bho": { + "num_samples": 1012, + "number_of_characters": 260601, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.1096837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nep-eng": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-nep": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ory-eng": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-ory": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pan-eng": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-pan": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pus-eng": { + "num_samples": 1012, + "number_of_characters": 254422, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00395256916995, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-pus": { + "num_samples": 1012, + "number_of_characters": 254421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "san-eng": { + "num_samples": 1012, + "number_of_characters": 260339, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.85079051383399, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-san": { + "num_samples": 1012, + "number_of_characters": 260224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73715415019763, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "awa-eng": { + "num_samples": 1012, + "number_of_characters": 260179, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.69268774703558, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-awa": { + "num_samples": 1012, + "number_of_characters": 260137, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bgc-eng": { + "num_samples": 1012, + "number_of_characters": 257450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 123.99604743083005, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-bgc": { + "num_samples": 1012, + "number_of_characters": 257450, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 123.99604743083005, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bod-eng": { + "num_samples": 1012, + "number_of_characters": 280188, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.46442687747034, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-bod": { + "num_samples": 1012, + "number_of_characters": 280126, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.40316205533597, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "boy-eng": { + "num_samples": 1012, + "number_of_characters": 277538, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 143.84584980237153, + "max_sentence1_length": 396, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-boy": { + "num_samples": 1012, + "number_of_characters": 277538, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 143.84584980237153, + "max_sentence2_length": 396, + "unique_sentence2": 1011 + }, + "gbm-eng": { + "num_samples": 1012, + "number_of_characters": 261027, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.53063241106719, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-gbm": { + "num_samples": 1012, + "number_of_characters": 261027, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.53063241106719, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gom-eng": { + "num_samples": 1012, + "number_of_characters": 259182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 125.70750988142292, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-gom": { + "num_samples": 1012, + "number_of_characters": 259182, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 125.70750988142292, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hne-eng": { + "num_samples": 1012, + "number_of_characters": 258911, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.43972332015811, + "max_sentence1_length": 327, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-hne": { + "num_samples": 1012, + "number_of_characters": 258915, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.44367588932806, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "raj-eng": { + "num_samples": 1012, + "number_of_characters": 261987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 128.47924901185772, + "max_sentence1_length": 338, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-raj": { + "num_samples": 1012, + "number_of_characters": 261987, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 128.47924901185772, + "max_sentence2_length": 338, + "unique_sentence2": 1012 + }, + "mai-eng": { + "num_samples": 1012, + "number_of_characters": 261374, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mai": { + "num_samples": 1012, + "number_of_characters": 261377, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87648221343873, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mni-eng": { + "num_samples": 1012, + "number_of_characters": 268767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.17885375494072, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mni": { + "num_samples": 1012, + "number_of_characters": 268768, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1798418972332, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mup-eng": { + "num_samples": 1012, + "number_of_characters": 262034, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 128.52569169960475, + "max_sentence1_length": 340, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mup": { + "num_samples": 1012, + "number_of_characters": 262034, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 128.52569169960475, + "max_sentence2_length": 340, + "unique_sentence2": 1012 + }, + "mwr-eng": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.22035573122528, + "max_sentence1_length": 345, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mwr": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.22035573122528, + "max_sentence2_length": 345, + "unique_sentence2": 1012 + }, + "sat-eng": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-sat": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json new file mode 100644 index 0000000000..3adf27b3df --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json @@ -0,0 +1,24925 @@ +{ + "test": { + "num_samples": 3826252, + "number_of_characters": 988355274, + "unique_pairs": 3820263, + "min_sentence1_length": 1, + "average_sentence1_length": 129.15449296073547, + "max_sentence1_length": 773, + "unique_sentence1": 241259, + "min_sentence2_length": 1, + "average_sentence2_length": 129.15449296073547, + "max_sentence2_length": 773, + "unique_sentence2": 241259, + "hf_subset_descriptive_stats": { + "afr_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 520490, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "afr_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 564002, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "afr_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 516072, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "afr_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 526155, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "afr_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 530560, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "afr_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 549109, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "afr_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 560267, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "afr_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 516709, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "afr_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 519796, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "afr_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 520179, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "amh_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 415227, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "amh_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 437473, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "amh_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 413608, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "amh_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 459006, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "amh_Ethi-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 404938, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "amh_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 458799, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "amh_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 455649, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "amh_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 440016, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "amh_Ethi-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 332745, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "amh_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 501790, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "amh_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 407310, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "amh_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 435597, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "amh_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 483595, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "amh_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 425239, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "arb_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 474983, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "arb_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 483548, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "arb_Arab-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 526831, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "arb_Arab-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 530308, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "arb_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 478901, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "arb_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 474520, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "arb_Arab-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 500981, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "arb_Arab-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 524289, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "arb_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 431477, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "arb_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 492756, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "arb_Arab-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 509557, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "arb_Arab-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 518153, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "arb_Arab-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 342807, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "arb_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 477127, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "arb_Arab-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 364586, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "arb_Arab-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 490578, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "arb_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 445016, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "arb_Arab-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 523096, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "arb_Arab-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 509047, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "arb_Arab-por_Latn": { + "num_samples": 1997, + "number_of_characters": 508396, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "arb_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 473717, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "arb_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 473814, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "arb_Arab-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506074, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "arb_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 446094, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "arb_Arab-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 519381, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "arb_Arab-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 503690, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "arb_Arab-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 483008, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "arb_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 541142, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "arb_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505328, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "arb_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 496794, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "arb_Arab-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 502302, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "arb_Arab-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 322659, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "arb_Arab-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 488913, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "aze_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515960, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "aze_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517354, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "aze_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 529910, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "aze_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520498, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "aze_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515560, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "aze_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 554908, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "aze_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 535247, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "aze_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 580656, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "aze_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 563329, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "bak_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 515960, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "bak_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 494046, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bak_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506602, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "bak_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497190, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "bak_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 492252, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "bak_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 531600, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "bak_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 511939, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "bak_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 557348, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "bak_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 540021, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "bel_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 511000, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "bel_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525979, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "bel_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497408, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "bel_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503810, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bel_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 512015, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "bel_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523981, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "bel_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 533956, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "bel_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530983, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "bel_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 509059, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "bel_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 508986, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "bel_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508393, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "bel_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 512231, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "bel_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518873, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "bem_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546212, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bem_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 537470, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "bem_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526972, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "bem_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 602279, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "bem_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 596231, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "bem_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582774, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "bem_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 596822, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "bem_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 598248, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "ben_Beng-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 474983, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ben_Beng-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 539452, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ben_Beng-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 547650, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "ben_Beng-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 542929, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "ben_Beng-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 491522, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ben_Beng-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 519005, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "ben_Beng-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 487141, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ben_Beng-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 513602, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ben_Beng-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 536910, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ben_Beng-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 488733, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "ben_Beng-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 444098, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ben_Beng-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 505377, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "ben_Beng-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 522178, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ben_Beng-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 530774, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "ben_Beng-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 355428, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "ben_Beng-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 509338, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "ben_Beng-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 377207, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "ben_Beng-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 503199, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "ben_Beng-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 504689, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "ben_Beng-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 492025, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "ben_Beng-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 535717, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ben_Beng-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 494224, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "ben_Beng-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 521668, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ben_Beng-por_Latn": { + "num_samples": 1997, + "number_of_characters": 521017, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ben_Beng-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518695, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ben_Beng-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 502543, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "ben_Beng-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 464129, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "ben_Beng-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ben_Beng-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 516311, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ben_Beng-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 495629, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ben_Beng-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 553763, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "ben_Beng-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 491329, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "ben_Beng-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 509415, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "ben_Beng-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 491800, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "ben_Beng-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 514923, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ben_Beng-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 335280, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "ben_Beng-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 501534, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "bod_Tibt-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 543850, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "bod_Tibt-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 548349, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bod_Tibt-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 589120, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "bod_Tibt-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 567609, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "bod_Tibt-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 559677, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "bod_Tibt-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 612483, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "bod_Tibt-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 538097, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "bos_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511000, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "bos_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 524799, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "bos_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 496228, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "bos_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 502630, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bos_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 510835, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "bos_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522801, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "bos_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 532776, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "bos_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 529803, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "bos_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 507879, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "bos_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 507806, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "bos_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 507213, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "bos_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 511051, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "bos_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517693, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "bul_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525979, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "bul_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 524799, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "bul_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 511207, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "bul_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517609, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bul_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 525814, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "bul_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537780, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "bul_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 547755, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "bul_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544782, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "bul_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 522858, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "bul_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 522785, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "bul_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522192, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "bul_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 526030, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "bul_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532672, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "cat_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 530680, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "cat_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 576068, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "cat_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 554946, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "cat_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 572177, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "cat_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 560435, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "cat_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 560175, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "cat_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 575445, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "cat_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 571160, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ces_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497408, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "ces_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 496228, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "ces_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511207, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "ces_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 489038, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ces_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 497243, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ces_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 509209, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "ces_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 519184, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ces_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516211, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ces_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 494287, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "ces_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 494214, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ces_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 493621, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "ces_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 497459, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "ces_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 504101, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "ckb_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 483548, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ckb_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500087, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ckb_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 495706, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ckb_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 452663, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ckb_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 498313, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "ckb_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 466202, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "ckb_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 494903, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "ckb_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 495000, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "ckb_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 467280, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "ckb_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526514, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "cym_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514225, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.4526790185278, + "max_sentence1_length": 444, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "cym_Latn-gle_Latn": { + "num_samples": 1997, + "number_of_characters": 561314, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.4526790185278, + "max_sentence1_length": 444, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 147.62593890836254, + "max_sentence2_length": 461, + "unique_sentence2": 1997 + }, + "dan_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 520490, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "dan_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547788, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "dan_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499858, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "dan_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509941, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "dan_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 514346, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "dan_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532895, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "dan_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 544053, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "dan_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 500495, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "dan_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 503582, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "dan_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 503965, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "deu_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 564002, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "deu_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 526831, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "deu_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 539452, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "deu_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 547788, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "deu_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 594777, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "deu_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 543370, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "deu_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 553453, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "deu_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 538989, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "deu_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 565450, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "deu_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 588758, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "deu_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 495946, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "deu_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 557225, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "deu_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 574026, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "deu_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 582622, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "deu_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 557858, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "deu_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 407276, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "deu_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 429055, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "deu_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 555047, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "deu_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 576407, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "deu_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 587565, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "deu_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 544007, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "deu_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 547094, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "deu_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 573516, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "deu_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 572865, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "deu_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 570543, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "deu_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 583850, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "deu_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 568159, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "deu_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 547477, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "deu_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 605611, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "deu_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 561263, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "deu_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 566771, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "deu_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 387128, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "deu_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 553382, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "div_Thaa-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 547650, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "div_Thaa-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551568, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "div_Thaa-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 579051, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "div_Thaa-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 548779, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "div_Thaa-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 565423, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "div_Thaa-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 569384, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "div_Thaa-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 564735, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "div_Thaa-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 552071, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "div_Thaa-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 554270, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "div_Thaa-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 562589, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "div_Thaa-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 524175, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "div_Thaa-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 613809, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "div_Thaa-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 551375, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "div_Thaa-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 551846, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "dzo_Tibt-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 543850, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "dzo_Tibt-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490941, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "dzo_Tibt-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 531712, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "dzo_Tibt-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 510201, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "dzo_Tibt-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 502269, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "dzo_Tibt-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 555075, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "dzo_Tibt-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 480689, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "ell_Grek-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 530308, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ell_Grek-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 542929, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "ell_Grek-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 594777, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ell_Grek-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546847, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ell_Grek-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 542466, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ell_Grek-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 568927, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ell_Grek-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 592235, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ell_Grek-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 499423, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ell_Grek-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 560702, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "ell_Grek-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 577503, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ell_Grek-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 563842, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "ell_Grek-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 586099, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "ell_Grek-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 410753, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "ell_Grek-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 565719, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "ell_Grek-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 432532, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "ell_Grek-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 558524, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "ell_Grek-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 591042, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ell_Grek-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 576993, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ell_Grek-por_Latn": { + "num_samples": 1997, + "number_of_characters": 576342, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ell_Grek-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 574020, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ell_Grek-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 587327, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ell_Grek-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 582734, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "ell_Grek-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 571636, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ell_Grek-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 550954, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ell_Grek-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 609088, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "ell_Grek-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 564740, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "ell_Grek-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 570248, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ell_Grek-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 390605, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "ell_Grek-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 556859, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "eng_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 516072, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "eng_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 415227, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "eng_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 478901, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "eng_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 517354, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "eng_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 494046, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "eng_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 503810, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "eng_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 546212, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "eng_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491522, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "eng_Latn-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 548349, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "eng_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 502630, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "eng_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517609, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "eng_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 530680, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "eng_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 489038, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "eng_Latn-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 500087, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "eng_Latn-cym_Latn": { + "num_samples": 1997, + "number_of_characters": 514225, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.4526790185278, + "max_sentence2_length": 444, + "unique_sentence2": 1997 + }, + "eng_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 499858, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "eng_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 543370, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "eng_Latn-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551568, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "eng_Latn-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 490941, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "eng_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 546847, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "eng_Latn-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 522923, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "eng_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 486698, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "eng_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 505523, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "eng_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 491059, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "eng_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 548225, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "eng_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 541140, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "eng_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 517520, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "eng_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 540828, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "eng_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 476200, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "eng_Latn-gle_Latn": { + "num_samples": 1997, + "number_of_characters": 542529, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 147.62593890836254, + "max_sentence2_length": 461, + "unique_sentence2": 1997 + }, + "eng_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 519706, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "eng_Latn-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492651, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "eng_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 517686, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "eng_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 448016, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "eng_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509295, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "eng_Latn-hmn_Latn": { + "num_samples": 1997, + "number_of_characters": 578510, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 165.6434651977967, + "max_sentence2_length": 643, + "unique_sentence2": 1997 + }, + "eng_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 503645, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "eng_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 526096, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "eng_Latn-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 512435, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "eng_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 493821, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "eng_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 534692, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "eng_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 509928, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "eng_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 536937, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "eng_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 359346, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "eng_Latn-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513256, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "eng_Latn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 514312, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "eng_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 507996, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "eng_Latn-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 536211, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "eng_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 551507, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "eng_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 498584, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "eng_Latn-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 493666, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "eng_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 381125, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "eng_Latn-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 514700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "eng_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 515908, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "eng_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 507117, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "eng_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 528477, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "eng_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 551872, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "eng_Latn-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508607, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "eng_Latn-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 461555, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "eng_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515611, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "eng_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 568028, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "eng_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 525195, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "eng_Latn-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 506768, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "eng_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 521844, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "eng_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 524903, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "eng_Latn-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 559574, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "eng_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 545459, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "eng_Latn-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 495943, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eng_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 539635, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "eng_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 496077, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "eng_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 499164, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "eng_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 539219, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "eng_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "eng_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 485151, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "eng_Latn-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498142, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "eng_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 525586, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "eng_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 524935, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "eng_Latn-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 490256, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "eng_Latn-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 490353, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "eng_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 540205, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "eng_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522613, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "eng_Latn-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 462633, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "eng_Latn-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506461, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "eng_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 500689, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "eng_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 500616, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "eng_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 525575, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "eng_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 546050, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "eng_Latn-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468047, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "eng_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 539012, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "eng_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 535920, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "eng_Latn-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 531327, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "eng_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 500023, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "eng_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 503861, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "eng_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 535862, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "eng_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 520229, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "eng_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 499547, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "eng_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 557343, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "eng_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557681, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "eng_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 493646, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "eng_Latn-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495247, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eng_Latn-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 521867, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "eng_Latn-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 485188, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "eng_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 412958, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "eng_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 561360, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "eng_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 582003, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "eng_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 532994, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "eng_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 513333, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "eng_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 558742, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "eng_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 510503, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "eng_Latn-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 495718, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "eng_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 541415, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "eng_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 547476, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "eng_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 518841, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "eng_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 487523, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "eng_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 515810, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "eng_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 563808, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "eng_Latn-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 326607, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "eng_Latn-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 332681, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "eng_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 339198, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "eng_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 505452, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "eus_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 519005, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "eus_Latn-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 579051, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "eus_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 522923, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "eus_Latn-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 520134, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "eus_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 536778, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "eus_Latn-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 540739, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "eus_Latn-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 536090, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "eus_Latn-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 523426, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eus_Latn-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 525625, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "eus_Latn-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 533944, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "eus_Latn-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 495530, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "eus_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 585164, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "eus_Latn-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 522730, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eus_Latn-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 523201, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "ewe_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 537470, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "ewe_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 486698, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ewe_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 467458, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "ewe_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 542765, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "ewe_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 536717, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "ewe_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 523260, + "unique_pairs": 1995, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "ewe_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 537308, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "ewe_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 538734, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "fao_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 526155, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fao_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 509941, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "fao_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 553453, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fao_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 505523, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fao_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 520011, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "fao_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 538560, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "fao_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 549718, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fao_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 506160, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "fao_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 509247, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "fao_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 509630, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fas_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 474520, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "fas_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 487141, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "fas_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 495706, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "fas_Arab-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 538989, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fas_Arab-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 542466, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "fas_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 491059, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fas_Arab-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 513139, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "fas_Arab-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 536447, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "fas_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 443635, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "fas_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 504914, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "fas_Arab-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 521715, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "fas_Arab-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 530311, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fas_Arab-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 354965, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "fas_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 489285, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "fas_Arab-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 376744, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "fas_Arab-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 502736, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "fas_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 457174, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "fas_Arab-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 535254, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fas_Arab-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 521205, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fas_Arab-por_Latn": { + "num_samples": 1997, + "number_of_characters": 520554, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "fas_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 485875, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "fas_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 485972, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "fas_Arab-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518232, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "fas_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 458252, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "fas_Arab-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 531539, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "fas_Arab-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 515848, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "fas_Arab-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 495166, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fas_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 553300, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "fas_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517486, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "fas_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 508952, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "fas_Arab-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 514460, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fas_Arab-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 334817, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "fas_Arab-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 501071, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "fij_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 548225, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fij_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 593925, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "fij_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 587477, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fij_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 604657, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "fij_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 620813, + "unique_pairs": 1995, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "fij_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 574629, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "fij_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 577688, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "fij_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 578360, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "fij_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 610128, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "fij_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 614145, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "fil_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 541140, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fil_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 593925, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "fil_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 580392, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fil_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 597572, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "fil_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 613728, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "fil_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 567544, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "fil_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 570603, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "fil_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 571275, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "fil_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 603043, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "fil_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 607060, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "fin_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 500981, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "fin_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 513602, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "fin_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 565450, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fin_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 568927, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "fin_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517520, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fin_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 513139, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "fin_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 562908, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "fin_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 470096, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "fin_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 531375, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "fin_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 548176, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "fin_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 556772, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fin_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 381426, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "fin_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 403205, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "fin_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 537988, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "fin_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 529197, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "fin_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 561715, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fin_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 547666, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fin_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 547015, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "fin_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "fin_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 558000, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "fin_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 542309, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "fin_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 521627, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fin_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 579761, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "fin_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 535413, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "fin_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 540921, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fin_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 361278, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "fin_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 527532, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "fra_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 524289, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "fra_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 536910, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "fra_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 576068, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "fra_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 588758, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fra_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 592235, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "fra_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 540828, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fra_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 536447, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "fra_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 562908, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "fra_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 565094, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fra_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 493404, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "fra_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 554683, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "fra_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 571484, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "fra_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 580080, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fra_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 582325, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "fra_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 404734, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "fra_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 426513, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "fra_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 552505, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "fra_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 570583, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "fra_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 585023, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fra_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 570974, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fra_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 570323, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "fra_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 585593, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "fra_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 568001, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "fra_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 581308, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "fra_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 565617, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "fra_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 544935, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fra_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 603069, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "fra_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 558721, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "fra_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 564229, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fra_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 384586, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "fra_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 550840, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "fuc_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 526972, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "fuc_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 476200, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fuc_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 467458, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "fuc_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 532267, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "fuc_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 526219, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "fuc_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 512762, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "fuc_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 526810, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "fuc_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 528236, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "gle_Latn-cym_Latn": { + "num_samples": 1997, + "number_of_characters": 561314, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 147.62593890836254, + "max_sentence1_length": 461, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.4526790185278, + "max_sentence2_length": 444, + "unique_sentence2": 1997 + }, + "gle_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 542529, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 147.62593890836254, + "max_sentence1_length": 461, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "glg_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 554946, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "glg_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 519706, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "glg_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 565094, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "glg_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 561203, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "glg_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 549461, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "glg_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 549201, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "glg_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 564471, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "glg_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 560186, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "guj_Gujr-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 488733, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "guj_Gujr-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 548779, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "guj_Gujr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 492651, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "guj_Gujr-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 520134, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "guj_Gujr-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 506506, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "guj_Gujr-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 510467, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "guj_Gujr-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 505818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "guj_Gujr-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 493154, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "guj_Gujr-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 495353, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "guj_Gujr-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 503672, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "guj_Gujr-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 465258, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "guj_Gujr-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 554892, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "guj_Gujr-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 492458, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "guj_Gujr-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 492929, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "hau_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 437473, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "hau_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517686, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hau_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 516067, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "hau_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 561465, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "hau_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 507397, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "hau_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 561258, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "hau_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 558108, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "hau_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 542475, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "hau_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 435204, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "hau_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 604249, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "hau_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 509769, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "hau_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 538056, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "hau_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 586054, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "hau_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 527698, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "heb_Hebr-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 431477, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "heb_Hebr-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 444098, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "heb_Hebr-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 452663, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "heb_Hebr-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 495946, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "heb_Hebr-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 499423, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "heb_Hebr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 448016, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "heb_Hebr-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 443635, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "heb_Hebr-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 470096, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "heb_Hebr-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 493404, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "heb_Hebr-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 461871, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "heb_Hebr-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 478672, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "heb_Hebr-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 487268, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "heb_Hebr-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 311922, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "heb_Hebr-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 446242, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "heb_Hebr-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 333701, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "heb_Hebr-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 459693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "heb_Hebr-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 414131, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "heb_Hebr-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 492211, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "heb_Hebr-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 478162, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "heb_Hebr-por_Latn": { + "num_samples": 1997, + "number_of_characters": 477511, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "heb_Hebr-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 442832, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "heb_Hebr-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 442929, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "heb_Hebr-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 475189, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "heb_Hebr-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 415209, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "heb_Hebr-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 488496, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "heb_Hebr-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 472805, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "heb_Hebr-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 452123, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "heb_Hebr-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 510257, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "heb_Hebr-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 474443, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "heb_Hebr-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 465909, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "heb_Hebr-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 471417, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "heb_Hebr-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 291774, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "heb_Hebr-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 458028, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "hin_Deva-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 492756, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "hin_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 505377, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "hin_Deva-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 557225, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "hin_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 565423, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "hin_Deva-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 560702, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "hin_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 509295, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hin_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 536778, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "hin_Deva-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 504914, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "hin_Deva-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 531375, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "hin_Deva-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 554683, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "hin_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 506506, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "hin_Deva-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 461871, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "hin_Deva-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 539951, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "hin_Deva-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 548547, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "hin_Deva-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 373201, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "hin_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 527111, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "hin_Deva-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 394980, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "hin_Deva-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 520972, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "hin_Deva-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 522462, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "hin_Deva-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 509798, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "hin_Deva-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 553490, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "hin_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 511997, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "hin_Deva-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 539441, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "hin_Deva-por_Latn": { + "num_samples": 1997, + "number_of_characters": 538790, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "hin_Deva-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 536468, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "hin_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 520316, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "hin_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 481902, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "hin_Deva-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 549775, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "hin_Deva-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 534084, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "hin_Deva-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 513402, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "hin_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 571536, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "hin_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 509102, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "hin_Deva-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 527188, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "hin_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 509573, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "hin_Deva-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 532696, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "hin_Deva-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 353053, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "hin_Deva-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 519307, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "hmn_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 578510, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 165.6434651977967, + "max_sentence1_length": 643, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hrv_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 512015, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "hrv_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 510835, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "hrv_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525814, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "hrv_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497243, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "hrv_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503645, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hrv_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523816, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "hrv_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 533791, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "hrv_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "hrv_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 508894, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "hrv_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 508821, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "hrv_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508228, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "hrv_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 512066, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "hrv_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518708, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "hun_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 509557, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "hun_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 522178, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "hun_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 574026, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "hun_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 577503, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "hun_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 526096, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hun_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 521715, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "hun_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 548176, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "hun_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 571484, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "hun_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 478672, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "hun_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 539951, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "hun_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 565348, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "hun_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 390002, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "hun_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 411781, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "hun_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 546564, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "hun_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 537773, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "hun_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 570291, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "hun_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 556242, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "hun_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 555591, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "hun_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 553269, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "hun_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 566576, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "hun_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 550885, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "hun_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 530203, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "hun_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 588337, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "hun_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 543989, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "hun_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 549497, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "hun_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 369854, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "hun_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 536108, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "hye_Armn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 563842, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "hye_Armn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 512435, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hye_Armn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 531307, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "hye_Armn-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 548322, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "ibo_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 413608, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "ibo_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493821, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ibo_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 516067, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "ibo_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 537600, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "ibo_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 483532, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "ibo_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 537393, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "ibo_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 534243, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "ibo_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 518610, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ibo_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 411339, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "ibo_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 580384, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "ibo_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 485904, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "ibo_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 514191, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "ibo_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 562189, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ibo_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 503833, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "ind_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 518153, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ind_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 530774, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "ind_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 582622, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ind_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 586099, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "ind_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 534692, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ind_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 530311, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ind_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 587477, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "ind_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 580392, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "ind_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 556772, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ind_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 580080, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ind_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 487268, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ind_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 548547, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "ind_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 565348, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ind_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 398598, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "ind_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 420377, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "ind_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 546369, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "ind_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 591124, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "ind_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 607280, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "ind_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 561096, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "ind_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 564155, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "ind_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 578887, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ind_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 564838, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ind_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 564187, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ind_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 561865, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ind_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 564827, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "ind_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 575172, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ind_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 559481, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ind_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 538799, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ind_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 596595, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "ind_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 596933, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "ind_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 600612, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "ind_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 552585, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "ind_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 558093, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ind_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 378450, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "ind_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 544704, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "isl_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 530560, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "isl_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 514346, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "isl_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 557858, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "isl_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 509928, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "isl_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 520011, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "isl_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 542965, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "isl_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 554123, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "isl_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 510565, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "isl_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 513652, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "isl_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 514035, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ita_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 572177, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "ita_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 536937, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ita_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 582325, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ita_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 561203, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ita_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 566692, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ita_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 566432, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ita_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 581702, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "ita_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 577417, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "jpn_Jpan-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 342807, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "jpn_Jpan-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 355428, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "jpn_Jpan-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 407276, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "jpn_Jpan-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 410753, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "jpn_Jpan-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 359346, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "jpn_Jpan-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 354965, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "jpn_Jpan-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 381426, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "jpn_Jpan-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 404734, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "jpn_Jpan-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 311922, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "jpn_Jpan-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 373201, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "jpn_Jpan-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 390002, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "jpn_Jpan-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 398598, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "jpn_Jpan-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 245031, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "jpn_Jpan-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 371023, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "jpn_Jpan-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 403541, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "jpn_Jpan-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 389492, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "jpn_Jpan-por_Latn": { + "num_samples": 1997, + "number_of_characters": 388841, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "jpn_Jpan-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 386519, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "jpn_Jpan-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 399826, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "jpn_Jpan-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 384135, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "jpn_Jpan-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 363453, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "jpn_Jpan-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 421587, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "jpn_Jpan-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 377239, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "jpn_Jpan-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 382747, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "jpn_Jpan-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 190513, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "jpn_Jpan-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 196587, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "jpn_Jpan-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 203104, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "jpn_Jpan-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 369358, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "kan_Knda-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 509338, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "kan_Knda-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 569384, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "kan_Knda-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 513256, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kan_Knda-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 540739, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "kan_Knda-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 510467, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "kan_Knda-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 527111, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "kan_Knda-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 526423, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "kan_Knda-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 513759, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "kan_Knda-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 515958, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "kan_Knda-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 524277, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "kan_Knda-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 485863, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "kan_Knda-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 575497, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "kan_Knda-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 513063, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "kan_Knda-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 513534, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "kat_Geor-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 565719, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "kat_Geor-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514312, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kat_Geor-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 531307, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "kat_Geor-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 550199, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 529910, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "kaz_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506602, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "kaz_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 507996, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kaz_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511140, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506202, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 545550, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 525889, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "kaz_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 571298, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 553971, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "khm_Khmr-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 589120, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "khm_Khmr-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 531712, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "khm_Khmr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 536211, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "khm_Khmr-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 555471, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "khm_Khmr-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 547539, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "khm_Khmr-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 600345, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "khm_Khmr-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 525959, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "kin_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 602279, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "kin_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551507, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kin_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 542765, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "kin_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 532267, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "kin_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 601526, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "kin_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 588069, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "kin_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 602117, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "kin_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 603543, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "kir_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 520498, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "kir_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497190, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "kir_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 498584, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kir_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511140, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "kir_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 496790, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "kir_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 536138, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "kir_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 516477, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "kir_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 561886, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "kir_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 544559, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "kmr_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 477127, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "kmr_Latn-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 498313, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "kmr_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493666, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kmr_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 489285, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "kmr_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 446242, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "kmr_Latn-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 459781, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "kmr_Latn-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 488482, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "kmr_Latn-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 488579, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "kmr_Latn-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 460859, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "kmr_Latn-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520093, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "kor_Hang-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 364586, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "kor_Hang-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 377207, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "kor_Hang-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 429055, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "kor_Hang-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 432532, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "kor_Hang-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 381125, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kor_Hang-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 376744, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "kor_Hang-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 403205, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "kor_Hang-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 426513, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "kor_Hang-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 333701, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "kor_Hang-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 394980, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "kor_Hang-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 411781, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "kor_Hang-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 420377, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "kor_Hang-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 245031, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "kor_Hang-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 392802, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "kor_Hang-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 425320, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "kor_Hang-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 411271, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "kor_Hang-por_Latn": { + "num_samples": 1997, + "number_of_characters": 410620, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "kor_Hang-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 408298, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "kor_Hang-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 421605, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "kor_Hang-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 405914, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "kor_Hang-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 385232, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "kor_Hang-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 443366, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "kor_Hang-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 399018, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "kor_Hang-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 404526, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "kor_Hang-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 212292, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "kor_Hang-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 218366, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "kor_Hang-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 224883, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "kor_Hang-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 391137, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "lao_Laoo-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 567609, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "lao_Laoo-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 510201, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "lao_Laoo-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "lao_Laoo-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 555471, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "lao_Laoo-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 526028, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "lao_Laoo-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 578834, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "lao_Laoo-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 504448, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "lav_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515908, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "lav_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 537988, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "lav_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 546564, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "lav_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 527585, + "unique_pairs": 1995, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "lit_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 490578, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "lit_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 503199, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "lit_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 555047, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "lit_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 558524, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "lit_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 507117, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "lit_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 502736, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "lit_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 529197, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "lit_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 552505, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "lit_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 459693, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "lit_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 520972, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "lit_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 537773, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "lit_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 546369, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "lit_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 371023, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "lit_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 392802, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "lit_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 527585, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "lit_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 551312, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "lit_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 537263, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "lit_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 536612, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "lit_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 534290, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "lit_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 547597, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "lit_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 531906, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "lit_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 511224, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "lit_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 569358, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "lit_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 525010, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "lit_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 530518, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "lit_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 350875, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "lit_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 517129, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "ltz_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 549109, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ltz_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 532895, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "ltz_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 576407, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ltz_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 528477, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ltz_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 538560, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "ltz_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 542965, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "ltz_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 572672, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ltz_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 529114, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "ltz_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 532201, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "ltz_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 532584, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "mal_Mlym-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551872, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mal_Mlym-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 604657, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "mal_Mlym-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 597572, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "mal_Mlym-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 591124, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "mal_Mlym-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 624460, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "mal_Mlym-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 578276, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "mal_Mlym-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 581335, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "mal_Mlym-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 582007, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "mal_Mlym-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 613775, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "mal_Mlym-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 617792, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "mar_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 504689, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "mar_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 564735, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "mar_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 508607, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mar_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 536090, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "mar_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 505818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "mar_Deva-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 522462, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "mar_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 526423, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "mar_Deva-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "mar_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 511309, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "mar_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 519628, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "mar_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 481214, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "mar_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 570848, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "mar_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 508414, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "mar_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 508885, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "mey_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 445016, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "mey_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 466202, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "mey_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 461555, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mey_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 457174, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "mey_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 414131, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "mey_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 459781, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "mey_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 456371, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "mey_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 456468, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "mey_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 428748, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "mey_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 487982, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "mkd_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523981, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 522801, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537780, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 509209, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "mkd_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515611, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mkd_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 523816, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "mkd_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 545757, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 542784, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 520860, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 520787, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520194, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "mkd_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 524032, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530674, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "mlg_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 568028, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mlg_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 620813, + "unique_pairs": 1995, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "mlg_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 613728, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "mlg_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 607280, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "mlg_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 624460, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "mlg_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 594432, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "mlg_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 597491, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "mlg_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 598163, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "mlg_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 629931, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "mlg_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 633948, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "mlt_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 560435, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "mlt_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525195, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mlt_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570583, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "mlt_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 549461, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "mlt_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 566692, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "mlt_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 554690, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "mlt_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 569960, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "mlt_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 565675, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "mon_Mong-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 559677, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "mon_Mong-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 502269, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "mon_Mong-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 506768, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mon_Mong-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 547539, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "mon_Mong-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 526028, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "mon_Mong-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 570902, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "mon_Mong-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 496516, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "mri_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 521844, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mri_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 574629, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "mri_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 567544, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "mri_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 561096, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "mri_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 578276, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "mri_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 594432, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "mri_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 551307, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "mri_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 551979, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "mri_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 583747, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "mri_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 587764, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "msa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 524903, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "msa_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 577688, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "msa_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 570603, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "msa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564155, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "msa_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 581335, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "msa_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 597491, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "msa_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 551307, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "msa_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 555038, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "msa_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 586806, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "msa_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 590823, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "mya_Mymr-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 612483, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "mya_Mymr-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 555075, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "mya_Mymr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 559574, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mya_Mymr-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 600345, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "mya_Mymr-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 578834, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "mya_Mymr-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 570902, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "mya_Mymr-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 549322, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "nde_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 596231, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "nde_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 545459, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nde_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 536717, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "nde_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526219, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "nde_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 601526, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "nde_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582021, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "nde_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 596069, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "nde_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 597495, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "nep_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 492025, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "nep_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 552071, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "nep_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495943, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nep_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 523426, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "nep_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 493154, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "nep_Deva-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509798, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "nep_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513759, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "nep_Deva-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "nep_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498645, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "nep_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506964, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "nep_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468550, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "nep_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 558184, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "nep_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495750, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "nep_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 496221, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "nld_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 560267, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nld_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 523096, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "nld_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 535717, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "nld_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 544053, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "nld_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 587565, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "nld_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 591042, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "nld_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539635, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nld_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 549718, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "nld_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 535254, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "nld_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 561715, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "nld_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 585023, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "nld_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 492211, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "nld_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 553490, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "nld_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 570291, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "nld_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 578887, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "nld_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 554123, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "nld_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 403541, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "nld_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 425320, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "nld_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 551312, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "nld_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 572672, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "nld_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 540272, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "nld_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 543359, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "nld_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 569781, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "nld_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 569130, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "nld_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 566808, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "nld_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 580115, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "nld_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 564424, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "nld_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 543742, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "nld_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 601876, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "nld_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 557528, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "nld_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 563036, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nld_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 383393, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "nld_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549647, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "nno_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 516709, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nno_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 500495, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "nno_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 544007, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "nno_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 496077, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nno_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 506160, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "nno_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 510565, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "nno_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 529114, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "nno_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 540272, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "nno_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 499801, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "nno_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 500184, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "nob_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 519796, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nob_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 503582, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "nob_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547094, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "nob_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499164, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nob_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509247, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "nob_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 513652, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "nob_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532201, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "nob_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 543359, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "nob_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 499801, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "nob_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 503271, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "nso_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 459006, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "nso_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539219, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nso_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 561465, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "nso_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 537600, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "nso_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 528930, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "nso_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 582791, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "nso_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 579641, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "nso_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 564008, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "nso_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 456737, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "nso_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 625782, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "nso_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 531302, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "nso_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 559589, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "nso_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 607587, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "nso_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549231, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "nya_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 582774, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "nya_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nya_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 523260, + "unique_pairs": 1995, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "nya_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 512762, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "nya_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 588069, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "nya_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 582021, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "nya_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 582612, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "nya_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 584038, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "orm_Ethi-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 404938, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "orm_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 485151, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "orm_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 507397, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "orm_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 483532, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "orm_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 528930, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "orm_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 528723, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "orm_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 525573, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "orm_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 509940, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "orm_Ethi-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 402669, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "orm_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 571714, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "orm_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 477234, + "unique_pairs": 1992, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "orm_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 505521, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "orm_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 553519, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "orm_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 495163, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "pan_Guru-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 494224, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "pan_Guru-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 554270, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "pan_Guru-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 498142, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "pan_Guru-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 525625, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "pan_Guru-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 495353, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "pan_Guru-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 511997, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "pan_Guru-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 515958, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "pan_Guru-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 511309, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "pan_Guru-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 498645, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "pan_Guru-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 509163, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "pan_Guru-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 470749, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "pan_Guru-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 560383, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "pan_Guru-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 497949, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "pan_Guru-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 498420, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "pol_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 509047, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "pol_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 533956, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "pol_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 521668, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "pol_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 532776, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "pol_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 547755, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "pol_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 519184, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "pol_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 573516, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "pol_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 576993, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "pol_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525586, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "pol_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 521205, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "pol_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 547666, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "pol_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570974, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "pol_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 478162, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "pol_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 539441, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "pol_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 533791, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "pol_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 556242, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "pol_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564838, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "pol_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 389492, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "pol_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 411271, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "pol_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 537263, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "pol_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 545757, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "pol_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 569781, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "pol_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 555081, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "pol_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 552759, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "pol_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 530835, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "pol_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 530762, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "pol_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 566066, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "pol_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530169, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "pol_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 534007, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "pol_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 550375, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "pol_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 529693, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "pol_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 587827, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "pol_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 543479, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "pol_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540649, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "pol_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 548987, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "pol_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 369344, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "pol_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 535598, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "por_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 508396, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "por_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 521017, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "por_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 560175, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "por_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 572865, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "por_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 576342, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "por_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 524935, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "por_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 520554, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "por_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 547015, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "por_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570323, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "por_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 549201, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "por_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 477511, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "por_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 538790, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "por_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 555591, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "por_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564187, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "por_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 566432, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "por_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 388841, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "por_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 410620, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "por_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 536612, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "por_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 554690, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "por_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 569130, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "por_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 555081, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "por_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 569700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "por_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 552108, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "por_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 565415, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "por_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 549724, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "por_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 529042, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "por_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 587176, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "por_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 542828, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "por_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 548336, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "por_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 368693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "por_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 534947, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "prs_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 473717, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "prs_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 494903, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "prs_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490256, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "prs_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 485875, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "prs_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 442832, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "prs_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 488482, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "prs_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 456371, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "prs_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 485169, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "prs_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 457449, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "prs_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516683, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "pus_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 473814, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "pus_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 495000, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "pus_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490353, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "pus_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 485972, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "pus_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 442929, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "pus_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 488579, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "pus_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 456468, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "pus_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 485169, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "pus_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 457546, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "pus_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516780, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "ron_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 575445, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "ron_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 540205, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ron_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 585593, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ron_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 564471, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ron_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 581702, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "ron_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 569960, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ron_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 569700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ron_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 580685, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "rus_Cyrl-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 506074, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "rus_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530983, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "rus_Cyrl-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 518695, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "rus_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 529803, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "rus_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544782, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "rus_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 516211, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "rus_Cyrl-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 570543, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "rus_Cyrl-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 574020, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "rus_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 522613, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "rus_Cyrl-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 518232, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "rus_Cyrl-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 544693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "rus_Cyrl-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 568001, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "rus_Cyrl-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 475189, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "rus_Cyrl-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 536468, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "rus_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 530818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "rus_Cyrl-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 553269, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "rus_Cyrl-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 561865, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "rus_Cyrl-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 386519, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "rus_Cyrl-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 408298, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "rus_Cyrl-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 534290, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "rus_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 542784, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "rus_Cyrl-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 566808, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "rus_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 552759, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "rus_Cyrl-por_Latn": { + "num_samples": 1997, + "number_of_characters": 552108, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "rus_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 527862, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "rus_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 527789, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "rus_Cyrl-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 563093, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "rus_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527196, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "rus_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 531034, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "rus_Cyrl-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 547402, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "rus_Cyrl-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 526720, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "rus_Cyrl-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 584854, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "rus_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 540506, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "rus_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537676, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "rus_Cyrl-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 546014, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "rus_Cyrl-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 366371, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "rus_Cyrl-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 532625, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "shi_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 446094, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "shi_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 467280, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "shi_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 462633, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "shi_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 458252, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "shi_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 415209, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "shi_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 460859, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "shi_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 428748, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "shi_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 457449, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "shi_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 457546, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "shi_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 489060, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "sin_Sinh-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 502543, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "sin_Sinh-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 562589, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "sin_Sinh-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 506461, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "sin_Sinh-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 533944, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "sin_Sinh-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 503672, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "sin_Sinh-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 520316, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "sin_Sinh-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 524277, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "sin_Sinh-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 519628, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "sin_Sinh-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 506964, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "sin_Sinh-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 509163, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "sin_Sinh-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 479068, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "sin_Sinh-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 568702, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "sin_Sinh-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 506268, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "sin_Sinh-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 506739, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "slk_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 509059, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "slk_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507879, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "slk_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522858, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "slk_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 494287, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "slk_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500689, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "slk_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508894, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "slk_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520860, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "slk_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530835, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "slk_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527862, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "slk_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 505865, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "slk_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505272, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "slk_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "slk_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515752, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "slv_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508986, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "slv_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507806, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "slv_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522785, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "slv_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 494214, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "slv_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500616, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "slv_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508821, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "slv_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520787, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "slv_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530762, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "slv_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527789, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "slv_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 505865, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "slv_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505199, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "slv_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 509037, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "slv_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515679, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "smo_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525575, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "smo_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 578360, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "smo_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 571275, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "smo_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564827, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "smo_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 582007, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "smo_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 598163, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "smo_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 551979, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "smo_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 555038, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "smo_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 587478, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "smo_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 591495, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "sna_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 596822, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "sna_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546050, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "sna_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 537308, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "sna_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526810, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "sna_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 602117, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "sna_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 596069, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "sna_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582612, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "sna_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 598086, + "unique_pairs": 1995, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "snd_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 464129, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "snd_Arab-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 524175, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "snd_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 468047, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "snd_Arab-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 495530, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "snd_Arab-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 465258, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "snd_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 481902, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "snd_Arab-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 485863, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "snd_Arab-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 481214, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "snd_Arab-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 468550, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "snd_Arab-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 470749, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "snd_Arab-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 479068, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "snd_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 530288, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "snd_Arab-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 467854, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "snd_Arab-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 468325, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "som_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 458799, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "som_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539012, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "som_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 561258, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "som_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 537393, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "som_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 582791, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "som_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 528723, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "som_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 579434, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "som_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 563801, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "som_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 456530, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "som_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 625575, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "som_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 531095, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "som_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 559382, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "som_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 607380, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "som_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549024, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "spa_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 519381, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "spa_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "spa_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 571160, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "spa_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 583850, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "spa_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 587327, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "spa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 535920, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "spa_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 531539, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "spa_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 558000, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "spa_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 581308, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "spa_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 560186, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "spa_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 488496, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "spa_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 549775, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "spa_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 566576, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "spa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 575172, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "spa_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 577417, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "spa_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 399826, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "spa_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 421605, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "spa_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 547597, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "spa_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 565675, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "spa_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 580115, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "spa_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 566066, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "spa_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 565415, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "spa_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 580685, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "spa_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 563093, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "spa_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 560709, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "spa_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 540027, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "spa_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 598161, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "spa_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 553813, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "spa_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 559321, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "spa_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 379678, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "spa_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 545932, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "sqi_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 582734, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "sqi_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 531327, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "sqi_Latn-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 548322, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "sqi_Latn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 550199, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "srp_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508393, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "srp_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507213, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "srp_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522192, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "srp_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 493621, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "srp_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500023, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "srp_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508228, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "srp_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520194, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "srp_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530169, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "srp_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527196, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "srp_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 505272, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "srp_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 505199, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "srp_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 508444, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "srp_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515086, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "srp_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 512231, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "srp_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 511051, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "srp_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526030, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "srp_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497459, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "srp_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503861, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "srp_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 512066, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "srp_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 524032, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "srp_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 534007, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "srp_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531034, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "srp_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "srp_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 509037, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "srp_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508444, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "srp_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518924, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "ssw_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 455649, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "ssw_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 535862, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ssw_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 558108, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "ssw_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 534243, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "ssw_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 579641, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "ssw_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 525573, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "ssw_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 579434, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "ssw_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 560651, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ssw_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 453380, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "ssw_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 622425, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "ssw_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 527945, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "ssw_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 556232, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "ssw_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 604230, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ssw_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 545874, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "swa_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 440016, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "swa_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 503690, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "swa_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 516311, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "swa_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 568159, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "swa_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 571636, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "swa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 520229, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "swa_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 515848, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "swa_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 542309, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "swa_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 565617, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "swa_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 542475, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "swa_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 472805, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "swa_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 534084, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "swa_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 550885, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "swa_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 518610, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "swa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 559481, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "swa_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 384135, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "swa_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 405914, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "swa_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 531906, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "swa_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 564424, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "swa_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 564008, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "swa_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 509940, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "swa_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 550375, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "swa_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 549724, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "swa_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 547402, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "swa_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 563801, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "swa_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 560709, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "swa_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 560651, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "swa_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 524336, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "swa_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 582470, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "swa_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 437747, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "swa_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 606792, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "swa_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 538122, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "swa_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 543630, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "swa_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 512312, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "swa_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 540599, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "swa_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 588597, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "swa_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 363987, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "swa_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 530241, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "swe_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 520179, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "swe_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 483008, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "swe_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 495629, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "swe_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 503965, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "swe_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547477, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "swe_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 550954, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "swe_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499547, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "swe_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509630, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "swe_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 495166, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "swe_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 521627, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "swe_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 544935, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "swe_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 452123, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "swe_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 513402, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "swe_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 530203, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "swe_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 538799, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "swe_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 514035, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "swe_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 363453, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "swe_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 385232, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "swe_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 511224, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "swe_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532584, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "swe_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 543742, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "swe_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 500184, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "swe_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 503271, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "swe_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 529693, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "swe_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 529042, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "swe_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526720, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "swe_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 540027, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "swe_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 524336, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "swe_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 561788, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "swe_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 517440, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "swe_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 522948, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "swe_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 343305, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "swe_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 509559, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "tah_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 557343, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tah_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 610128, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "tah_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 603043, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "tah_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 596595, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "tah_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 613775, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "tah_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 629931, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "tah_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 583747, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "tah_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 586806, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "tah_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 587478, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "tah_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 623263, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "tam_Taml-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 541142, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "tam_Taml-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 553763, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "tam_Taml-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 605611, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "tam_Taml-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 613809, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "tam_Taml-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 609088, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "tam_Taml-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 557681, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tam_Taml-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 585164, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "tam_Taml-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 553300, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "tam_Taml-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 579761, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "tam_Taml-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 603069, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "tam_Taml-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 554892, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "tam_Taml-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 510257, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "tam_Taml-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 571536, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "tam_Taml-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 588337, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "tam_Taml-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 596933, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "tam_Taml-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 421587, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "tam_Taml-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 575497, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "tam_Taml-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 443366, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "tam_Taml-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 569358, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "tam_Taml-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 570848, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "tam_Taml-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 558184, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "tam_Taml-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 601876, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tam_Taml-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 560383, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "tam_Taml-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 587827, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "tam_Taml-por_Latn": { + "num_samples": 1997, + "number_of_characters": 587176, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "tam_Taml-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 584854, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "tam_Taml-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 568702, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "tam_Taml-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 530288, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "tam_Taml-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 598161, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "tam_Taml-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 582470, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tam_Taml-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 561788, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "tam_Taml-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 557488, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "tam_Taml-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 575574, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "tam_Taml-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 557959, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "tam_Taml-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 581082, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "tam_Taml-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 401439, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "tam_Taml-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 567693, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "tat_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 515560, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "tat_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 492252, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "tat_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493646, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tat_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506202, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "tat_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 496790, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "tat_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 531200, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "tat_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 511539, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "tat_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 556948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "tat_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 539621, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "tel_Telu-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491329, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "tel_Telu-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551375, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "tel_Telu-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495247, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tel_Telu-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 522730, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "tel_Telu-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492458, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "tel_Telu-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509102, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "tel_Telu-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513063, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "tel_Telu-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508414, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "tel_Telu-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 495750, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "tel_Telu-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 497949, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "tel_Telu-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506268, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "tel_Telu-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 467854, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "tel_Telu-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557488, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "tel_Telu-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 495525, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 505328, + "unique_pairs": 1995, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "tgk_Cyrl-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 526514, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "tgk_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 521867, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tgk_Cyrl-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 517486, + "unique_pairs": 1995, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "tgk_Cyrl-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 474443, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 520093, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 487982, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "tgk_Cyrl-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 516683, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "tgk_Cyrl-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 516780, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 489060, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "tha_Thai-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 538097, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "tha_Thai-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 480689, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "tha_Thai-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 485188, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tha_Thai-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 525959, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "tha_Thai-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 504448, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "tha_Thai-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 496516, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "tha_Thai-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 549322, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "tir_Ethi-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 332745, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "tir_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 412958, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tir_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 435204, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "tir_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 411339, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "tir_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 456737, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "tir_Ethi-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 402669, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "tir_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 456530, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "tir_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 453380, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "tir_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 437747, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tir_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 499521, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "tir_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 405041, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "tir_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 433328, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "tir_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 481326, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "tir_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 422970, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "ton_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 561360, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ton_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 614145, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "ton_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 607060, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "ton_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 600612, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "ton_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 617792, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "ton_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 633948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "ton_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 587764, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "ton_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 590823, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "ton_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 591495, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "ton_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 623263, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "tsn_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 501790, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "tsn_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 582003, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tsn_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 604249, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "tsn_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 580384, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "tsn_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 625782, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "tsn_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 571714, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "tsn_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 625575, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "tsn_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 622425, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "tsn_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 606792, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tsn_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 499521, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "tsn_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 574086, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "tsn_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 602373, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "tsn_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 650371, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "tsn_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 592015, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "tuk_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 554908, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "tuk_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531600, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "tuk_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 532994, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tuk_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 545550, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "tuk_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 536138, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "tuk_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531200, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tuk_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 550887, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "tuk_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 596296, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "tuk_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 578969, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "tur_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 496794, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "tur_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 535247, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "tur_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511939, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "tur_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 509415, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "tur_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 561263, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "tur_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 564740, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "tur_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 513333, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tur_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 508952, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "tur_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 535413, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "tur_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 558721, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "tur_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 465909, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "tur_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 527188, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "tur_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 543989, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "tur_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 552585, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "tur_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 377239, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "tur_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525889, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "tur_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516477, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "tur_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 399018, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "tur_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 525010, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "tur_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 557528, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tur_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 543479, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "tur_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 542828, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "tur_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540506, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "tur_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 553813, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "tur_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 538122, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tur_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 517440, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "tur_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 575574, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "tur_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511539, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tur_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 550887, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "tur_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 576635, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "tur_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 559308, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "tur_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 536734, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "tur_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 357091, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "tur_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 523345, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "uig_Arab-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 580656, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "uig_Arab-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 557348, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "uig_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 558742, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "uig_Arab-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 571298, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "uig_Arab-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 561886, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "uig_Arab-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 556948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "uig_Arab-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 596296, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "uig_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 576635, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "uig_Arab-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 604717, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518873, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 517693, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532672, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 504101, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 510503, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 518708, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530674, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 540649, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537676, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 515752, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 515679, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515086, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "ukr_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 518924, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "urd_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491800, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "urd_Arab-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551846, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "urd_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495718, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "urd_Arab-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 523201, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "urd_Arab-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492929, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "urd_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509573, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "urd_Arab-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513534, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "urd_Arab-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508885, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "urd_Arab-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 496221, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "urd_Arab-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498420, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "urd_Arab-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506739, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "urd_Arab-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468325, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "urd_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557959, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "urd_Arab-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495525, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "uzb_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 563329, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "uzb_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540021, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "uzb_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 541415, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "uzb_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 553971, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "uzb_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544559, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "uzb_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 539621, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "uzb_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 578969, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "uzb_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 559308, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "uzb_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 604717, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "ven_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 598248, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "ven_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 547476, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ven_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 538734, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "ven_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 528236, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "ven_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 603543, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "ven_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 597495, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "ven_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 584038, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "ven_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 598086, + "unique_pairs": 1995, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "vie_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 502302, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "vie_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 514923, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "vie_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 566771, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "vie_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 570248, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "vie_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 518841, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "vie_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 514460, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "vie_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 540921, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "vie_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 564229, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "vie_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 471417, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "vie_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 532696, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "vie_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 549497, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "vie_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 558093, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "vie_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 382747, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "vie_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 404526, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "vie_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 530518, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "vie_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 563036, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "vie_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 548987, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "vie_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 548336, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "vie_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 546014, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "vie_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 559321, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "vie_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 543630, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "vie_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 522948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "vie_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 581082, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "vie_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 536734, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "vie_Latn-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 350008, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "vie_Latn-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 356082, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "vie_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 362599, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "vie_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 528853, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "wol_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 407310, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "wol_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 487523, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "wol_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 509769, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "wol_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 485904, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "wol_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 531302, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "wol_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 477234, + "unique_pairs": 1992, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "wol_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 531095, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "wol_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 527945, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "wol_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 512312, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "wol_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 405041, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "wol_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 574086, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "wol_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 507893, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "wol_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 555891, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "wol_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 497535, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "xho_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 435597, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "xho_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515810, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "xho_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 538056, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "xho_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 514191, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "xho_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 559589, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "xho_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 505521, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "xho_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 559382, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "xho_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 556232, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "xho_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 540599, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "xho_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 433328, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "xho_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 602373, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "xho_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 507893, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "xho_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 584178, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "xho_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 525822, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "yor_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 483595, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "yor_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 563808, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "yor_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 586054, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "yor_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 562189, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "yor_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 607587, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "yor_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 553519, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "yor_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 607380, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "yor_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 604230, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "yor_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 588597, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "yor_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 481326, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "yor_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 650371, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "yor_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 555891, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "yor_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 584178, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "yor_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 573820, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "yue_Hant-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 326607, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "yue_Hant-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 190513, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "yue_Hant-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 212292, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "yue_Hant-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 350008, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "yue_Hant-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 163848, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "yue_Hant-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 170365, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "zho_Hans-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 332681, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "zho_Hans-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 196587, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "zho_Hans-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 218366, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "zho_Hans-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 356082, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "zho_Hans-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 163848, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "zho_Hans-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 176439, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "zho_Hant-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 322659, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "zho_Hant-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 335280, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "zho_Hant-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 387128, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "zho_Hant-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 390605, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "zho_Hant-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 339198, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "zho_Hant-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 334817, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "zho_Hant-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 361278, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "zho_Hant-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 384586, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "zho_Hant-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 291774, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "zho_Hant-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 353053, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "zho_Hant-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 369854, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "zho_Hant-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 378450, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "zho_Hant-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 203104, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "zho_Hant-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 224883, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "zho_Hant-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 350875, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "zho_Hant-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 383393, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "zho_Hant-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 369344, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "zho_Hant-por_Latn": { + "num_samples": 1997, + "number_of_characters": 368693, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "zho_Hant-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 366371, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "zho_Hant-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 379678, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "zho_Hant-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 363987, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "zho_Hant-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 343305, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "zho_Hant-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 401439, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "zho_Hant-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 357091, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "zho_Hant-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 362599, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "zho_Hant-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 170365, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "zho_Hant-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 176439, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "zho_Hant-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 349210, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "zul_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 425239, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "zul_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 488913, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "zul_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 501534, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "zul_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 553382, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "zul_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 556859, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "zul_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 505452, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "zul_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 501071, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "zul_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 527532, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "zul_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 550840, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "zul_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 527698, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "zul_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 458028, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "zul_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 519307, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "zul_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 536108, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "zul_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 503833, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "zul_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 544704, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "zul_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 369358, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "zul_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 391137, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "zul_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 517129, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "zul_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 549647, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "zul_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 549231, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "zul_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 495163, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "zul_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 535598, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "zul_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 534947, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "zul_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532625, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "zul_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 549024, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "zul_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 545932, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "zul_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 545874, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "zul_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 530241, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "zul_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 509559, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "zul_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 567693, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "zul_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 422970, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "zul_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 592015, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "zul_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 523345, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "zul_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 528853, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "zul_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 497535, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "zul_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 525822, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "zul_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 573820, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "zul_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 349210, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json new file mode 100644 index 0000000000..754f13c767 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json @@ -0,0 +1,69 @@ +{ + "train": { + "num_samples": 1640, + "number_of_characters": 445805, + "unique_pairs": 1632, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 3, + "average_sentence2_length": 135.515243902439, + "max_sentence2_length": 1728, + "unique_sentence2": 1631, + "hf_subset_descriptive_stats": { + "en-ha": { + "num_samples": 410, + "number_of_characters": 115348, + "unique_pairs": 407, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 4, + "average_sentence2_length": 145.01951219512196, + "max_sentence2_length": 1728, + "unique_sentence2": 407 + }, + "en-ig": { + "num_samples": 410, + "number_of_characters": 107173, + "unique_pairs": 409, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 5, + "average_sentence2_length": 125.08048780487805, + "max_sentence2_length": 1137, + "unique_sentence2": 408 + }, + "en-pcm": { + "num_samples": 410, + "number_of_characters": 109955, + "unique_pairs": 408, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 3, + "average_sentence2_length": 131.8658536585366, + "max_sentence2_length": 1552, + "unique_sentence2": 408 + }, + "en-yo": { + "num_samples": 410, + "number_of_characters": 113329, + "unique_pairs": 409, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 6, + "average_sentence2_length": 140.0951219512195, + "max_sentence2_length": 1338, + "unique_sentence2": 409 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json new file mode 100644 index 0000000000..96403e4c83 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json @@ -0,0 +1,15 @@ +{ + "test": { + "num_samples": 228, + "number_of_characters": 37441, + "unique_pairs": 228, + "min_sentence1_length": 13, + "average_sentence1_length": 82.19736842105263, + "max_sentence1_length": 272, + "unique_sentence1": 227, + "min_sentence2_length": 10, + "average_sentence2_length": 82.01754385964912, + "max_sentence2_length": 269, + "unique_sentence2": 226 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json index 60a8e055c4..9efdf2f8d7 100644 --- a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json @@ -1,75 +1,159 @@ { "train": { - "average_sentence1_length": 145.4552390438247, - "average_sentence2_length": 148.56607569721115, "num_samples": 50200, "number_of_characters": 14759870, + "unique_pairs": 50140, + "min_sentence1_length": 5, + "average_sentence1_length": 145.4552390438247, + "max_sentence1_length": 873, + "unique_sentence1": 8258, + "min_sentence2_length": 5, + "average_sentence2_length": 148.56607569721115, + "max_sentence2_length": 980, + "unique_sentence2": 50102, "hf_subset_descriptive_stats": { "ind-abs": { + "num_samples": 1000, + "number_of_characters": 295680, + "unique_pairs": 999, + "min_sentence1_length": 5, "average_sentence1_length": 148.366, + "max_sentence1_length": 727, + "unique_sentence1": 998, + "min_sentence2_length": 6, "average_sentence2_length": 147.314, - "num_samples": 1000, - "number_of_characters": 295680 + "max_sentence2_length": 629, + "unique_sentence2": 998 }, "ind-btk": { + "num_samples": 6600, + "number_of_characters": 1927907, + "unique_pairs": 6597, + "min_sentence1_length": 5, "average_sentence1_length": 145.36666666666667, + "max_sentence1_length": 873, + "unique_sentence1": 6521, + "min_sentence2_length": 5, "average_sentence2_length": 146.74045454545455, - "num_samples": 6600, - "number_of_characters": 1927907 + "max_sentence2_length": 980, + "unique_sentence2": 6596 }, "ind-bew": { + "num_samples": 6600, + "number_of_characters": 1939300, + "unique_pairs": 6595, + "min_sentence1_length": 5, "average_sentence1_length": 145.4280303030303, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 6, "average_sentence2_length": 148.40530303030303, - "num_samples": 6600, - "number_of_characters": 1939300 + "max_sentence2_length": 840, + "unique_sentence2": 6590 }, "ind-bhp": { + "num_samples": 1000, + "number_of_characters": 261666, + "unique_pairs": 1000, + "min_sentence1_length": 11, "average_sentence1_length": 133.528, + "max_sentence1_length": 468, + "unique_sentence1": 999, + "min_sentence2_length": 10, "average_sentence2_length": 128.138, - "num_samples": 1000, - "number_of_characters": 261666 + "max_sentence2_length": 459, + "unique_sentence2": 999 }, "ind-jav": { + "num_samples": 6600, + "number_of_characters": 1922162, + "unique_pairs": 6594, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 5, "average_sentence2_length": 145.8089393939394, - "num_samples": 6600, - "number_of_characters": 1922162 + "max_sentence2_length": 854, + "unique_sentence2": 6585 }, "ind-mad": { + "num_samples": 6600, + "number_of_characters": 1973257, + "unique_pairs": 6598, + "min_sentence1_length": 5, "average_sentence1_length": 145.35545454545453, + "max_sentence1_length": 873, + "unique_sentence1": 6521, + "min_sentence2_length": 5, "average_sentence2_length": 153.6228787878788, - "num_samples": 6600, - "number_of_characters": 1973257 + "max_sentence2_length": 827, + "unique_sentence2": 6592 }, "ind-mak": { + "num_samples": 6600, + "number_of_characters": 1953868, + "unique_pairs": 6594, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 6, "average_sentence2_length": 150.6128787878788, - "num_samples": 6600, - "number_of_characters": 1953868 + "max_sentence2_length": 888, + "unique_sentence2": 6586 }, "ind-min": { + "num_samples": 6600, + "number_of_characters": 1937033, + "unique_pairs": 6595, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 6, "average_sentence2_length": 148.0621212121212, - "num_samples": 6600, - "number_of_characters": 1937033 + "max_sentence2_length": 837, + "unique_sentence2": 6591 }, "ind-mui": { + "num_samples": 1000, + "number_of_characters": 301448, + "unique_pairs": 1000, + "min_sentence1_length": 11, "average_sentence1_length": 150.454, + "max_sentence1_length": 451, + "unique_sentence1": 997, + "min_sentence2_length": 11, "average_sentence2_length": 150.994, - "num_samples": 1000, - "number_of_characters": 301448 + "max_sentence2_length": 450, + "unique_sentence2": 1000 }, "ind-rej": { + "num_samples": 1000, + "number_of_characters": 291205, + "unique_pairs": 1000, + "min_sentence1_length": 9, "average_sentence1_length": 151.622, + "max_sentence1_length": 873, + "unique_sentence1": 998, + "min_sentence2_length": 8, "average_sentence2_length": 139.583, - "num_samples": 1000, - "number_of_characters": 291205 + "max_sentence2_length": 784, + "unique_sentence2": 1000 }, "ind-sun": { + "num_samples": 6600, + "number_of_characters": 1956344, + "unique_pairs": 6591, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 5, "average_sentence2_length": 150.9880303030303, - "num_samples": 6600, - "number_of_characters": 1956344 + "max_sentence2_length": 881, + "unique_sentence2": 6588 } } } diff --git a/mteb/descriptive_stats/BitextMining/PhincBitextMining.json b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json new file mode 100644 index 0000000000..f4b237d87d --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json @@ -0,0 +1,30 @@ +{ + "train": { + "num_samples": 13738, + "number_of_characters": 2069457, + "unique_pairs": 13737, + "min_sentence1_length": 1, + "average_sentence1_length": 74.02300189256079, + "max_sentence1_length": 278, + "unique_sentence1": 13515, + "min_sentence2_length": 3, + "average_sentence2_length": 76.61442713640996, + "max_sentence2_length": 274, + "unique_sentence2": 13736, + "hf_subset_descriptive_stats": { + "eng-eng_hin": { + "num_samples": 13738, + "number_of_characters": 2069457, + "unique_pairs": 13737, + "min_sentence1_length": 1, + "average_sentence1_length": 74.02300189256079, + "max_sentence1_length": 278, + "unique_sentence1": 13515, + "min_sentence2_length": 3, + "average_sentence2_length": 76.61442713640996, + "max_sentence2_length": 274, + "unique_sentence2": 13736 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json new file mode 100644 index 0000000000..12f4003727 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 3640, + "number_of_characters": 572146, + "unique_pairs": 3640, + "min_sentence1_length": 13, + "average_sentence1_length": 78.59148351648352, + "max_sentence1_length": 203, + "unique_sentence1": 3636, + "min_sentence2_length": 13, + "average_sentence2_length": 78.59148351648352, + "max_sentence2_length": 203, + "unique_sentence2": 3636, + "hf_subset_descriptive_stats": { + "kat_Geor-eng_Latn": { + "num_samples": 1820, + "number_of_characters": 286073, + "unique_pairs": 1820, + "min_sentence1_length": 30, + "average_sentence1_length": 76.06593406593407, + "max_sentence1_length": 189, + "unique_sentence1": 1820, + "min_sentence2_length": 13, + "average_sentence2_length": 81.11703296703297, + "max_sentence2_length": 203, + "unique_sentence2": 1816 + }, + "eng_Latn-kat_Geor": { + "num_samples": 1820, + "number_of_characters": 286073, + "unique_pairs": 1820, + "min_sentence1_length": 13, + "average_sentence1_length": 81.11703296703297, + "max_sentence1_length": 203, + "unique_sentence1": 1816, + "min_sentence2_length": 30, + "average_sentence2_length": 76.06593406593407, + "max_sentence2_length": 189, + "unique_sentence2": 1820 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json new file mode 100644 index 0000000000..2d97df573e --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json @@ -0,0 +1,15 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 575910, + "unique_pairs": 2048, + "min_sentence1_length": 11, + "average_sentence1_length": 139.22802734375, + "max_sentence1_length": 1291, + "unique_sentence1": 2048, + "min_sentence2_length": 11, + "average_sentence2_length": 141.97802734375, + "max_sentence2_length": 1217, + "unique_sentence2": 2047 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LanguageClassification.json b/mteb/descriptive_stats/Classification/LanguageClassification.json index cf8b83d5d1..6622d23be1 100644 --- a/mteb/descriptive_stats/Classification/LanguageClassification.json +++ b/mteb/descriptive_stats/Classification/LanguageClassification.json @@ -2,7 +2,11 @@ "test": { "num_samples": 2048, "number_of_characters": 224352, + "num_texts_in_train": 31, + "min_text_length": 14, "average_text_length": 109.546875, + "max_text_length": 1270, + "unique_text": 2025, "unique_labels": 20, "labels": { "17": { @@ -66,5 +70,77 @@ "count": 103 } } + }, + "train": { + "num_samples": 70000, + "number_of_characters": 7760299, + "num_texts_in_train": null, + "min_text_length": 2, + "average_text_length": 110.86141428571429, + "max_text_length": 2422, + "unique_text": 68978, + "unique_labels": 20, + "labels": { + "12": { + "count": 3500 + }, + "1": { + "count": 3500 + }, + "19": { + "count": 3500 + }, + "15": { + "count": 3500 + }, + "13": { + "count": 3500 + }, + "11": { + "count": 3500 + }, + "17": { + "count": 3500 + }, + "14": { + "count": 3500 + }, + "16": { + "count": 3500 + }, + "5": { + "count": 3500 + }, + "0": { + "count": 3500 + }, + "8": { + "count": 3500 + }, + "7": { + "count": 3500 + }, + "2": { + "count": 3500 + }, + "3": { + "count": 3500 + }, + "10": { + "count": 3500 + }, + "6": { + "count": 3500 + }, + "18": { + "count": 3500 + }, + "4": { + "count": 3500 + }, + "9": { + "count": 3500 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json index 23225ae223..63fcfd3e51 100644 --- a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json +++ b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json @@ -2,7 +2,11 @@ "test": { "num_samples": 1319, "number_of_characters": 122279, + "num_texts_in_train": 46, + "min_text_length": 8, "average_text_length": 92.70583775587566, + "max_text_length": 1584, + "unique_text": 1315, "unique_labels": 2, "labels": { "1": { @@ -12,5 +16,23 @@ "count": 959 } } + }, + "train": { + "num_samples": 11870, + "number_of_characters": 1130860, + "num_texts_in_train": null, + "min_text_length": 7, + "average_text_length": 95.27042965459141, + "max_text_length": 2112, + "unique_text": 11655, + "unique_labels": 2, + "labels": { + "1": { + "count": 3245 + }, + "0": { + "count": 8625 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json index 8a5118e0c6..e6066a83c2 100644 --- a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 2048, "number_of_characters": 2065284, + "min_text_length": 103, "average_text_length": 1008.439453125, + "max_text_length": 2103, + "min_labels_per_text": 1, "average_labels_per_text": 1.46337890625, + "max_labels_per_text": 381, "unique_labels": 129, "labels": { "cs": { diff --git a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json index f1dda79201..2d9a0a01bb 100644 --- a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json +++ b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json @@ -2,8 +2,13 @@ "test": { "num_samples": 10, "number_of_characters": 75000, + "min_text_length": 5000, "average_text_length": 7500.0, + "max_text_length": 10000, + "unique_texts": 41555, + "min_labels_per_text": 1, "average_labels_per_text": 7500.0, + "max_labels_per_text": 14251, "unique_labels": 26, "labels": { "neuroscience": { diff --git a/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json new file mode 100644 index 0000000000..0370d5147e --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json @@ -0,0 +1,168 @@ +{ + "test": { + "num_samples": 37500, + "number_of_characters": 74294927, + "min_text_length": 148, + "average_text_length": 1981.1980533333333, + "max_text_length": 38759, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 8830, + "unique_labels": 51, + "labels": { + "epidemiology": { + "count": 6656 + }, + "public and global health": { + "count": 3595 + }, + "oncology": { + "count": 845 + }, + "allergy and immunology": { + "count": 464 + }, + "orthopedics": { + "count": 104 + }, + "health informatics": { + "count": 1107 + }, + "occupational and environmental health": { + "count": 415 + }, + "infectious diseases": { + "count": 8830 + }, + "genetic and genomic medicine": { + "count": 1918 + }, + "health policy": { + "count": 527 + }, + "gastroenterology": { + "count": 343 + }, + "radiology and imaging": { + "count": 541 + }, + "pain medicine": { + "count": 121 + }, + "neurology": { + "count": 1773 + }, + "primary care research": { + "count": 232 + }, + "rheumatology": { + "count": 189 + }, + "endocrinology": { + "count": 419 + }, + "hematology": { + "count": 202 + }, + "addiction medicine": { + "count": 178 + }, + "pediatrics": { + "count": 589 + }, + "cardiovascular medicine": { + "count": 855 + }, + "obstetrics and gynecology": { + "count": 373 + }, + "health systems and quality improvement": { + "count": 491 + }, + "nephrology": { + "count": 241 + }, + "respiratory medicine": { + "count": 482 + }, + "geriatric medicine": { + "count": 169 + }, + "dentistry and oral medicine": { + "count": 159 + }, + "psychiatry and clinical psychology": { + "count": 1781 + }, + "nutrition": { + "count": 240 + }, + "intensive care and critical care medicine": { + "count": 368 + }, + "rehabilitation medicine and physical therapy": { + "count": 322 + }, + "otolaryngology": { + "count": 166 + }, + "nursing": { + "count": 93 + }, + "transplantation": { + "count": 118 + }, + "health economics": { + "count": 327 + }, + "sports medicine": { + "count": 180 + }, + "hiv aids": { + "count": 363 + }, + "dermatology": { + "count": 98 + }, + "pathology": { + "count": 223 + }, + "emergency medicine": { + "count": 191 + }, + "pharmacology and therapeutics": { + "count": 221 + }, + "ophthalmology": { + "count": 220 + }, + "medical ethics": { + "count": 46 + }, + "palliative medicine": { + "count": 45 + }, + "sexual and reproductive health": { + "count": 156 + }, + "medical education": { + "count": 203 + }, + "surgery": { + "count": 162 + }, + "urology": { + "count": 65 + }, + "anesthesia": { + "count": 72 + }, + "toxicology": { + "count": 16 + }, + "forensic medicine": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json new file mode 100644 index 0000000000..7b55ddd4dc --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json @@ -0,0 +1,168 @@ +{ + "test": { + "num_samples": 37500, + "number_of_characters": 4301276, + "min_text_length": 18, + "average_text_length": 114.70069333333333, + "max_text_length": 339, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 8830, + "unique_labels": 51, + "labels": { + "epidemiology": { + "count": 6656 + }, + "public and global health": { + "count": 3595 + }, + "oncology": { + "count": 845 + }, + "allergy and immunology": { + "count": 464 + }, + "orthopedics": { + "count": 104 + }, + "health informatics": { + "count": 1107 + }, + "occupational and environmental health": { + "count": 415 + }, + "infectious diseases": { + "count": 8830 + }, + "genetic and genomic medicine": { + "count": 1918 + }, + "health policy": { + "count": 527 + }, + "gastroenterology": { + "count": 343 + }, + "radiology and imaging": { + "count": 541 + }, + "pain medicine": { + "count": 121 + }, + "neurology": { + "count": 1773 + }, + "primary care research": { + "count": 232 + }, + "rheumatology": { + "count": 189 + }, + "endocrinology": { + "count": 419 + }, + "hematology": { + "count": 202 + }, + "addiction medicine": { + "count": 178 + }, + "pediatrics": { + "count": 589 + }, + "cardiovascular medicine": { + "count": 855 + }, + "obstetrics and gynecology": { + "count": 373 + }, + "health systems and quality improvement": { + "count": 491 + }, + "nephrology": { + "count": 241 + }, + "respiratory medicine": { + "count": 482 + }, + "geriatric medicine": { + "count": 169 + }, + "dentistry and oral medicine": { + "count": 159 + }, + "psychiatry and clinical psychology": { + "count": 1781 + }, + "nutrition": { + "count": 240 + }, + "intensive care and critical care medicine": { + "count": 368 + }, + "rehabilitation medicine and physical therapy": { + "count": 322 + }, + "otolaryngology": { + "count": 166 + }, + "nursing": { + "count": 93 + }, + "transplantation": { + "count": 118 + }, + "health economics": { + "count": 327 + }, + "sports medicine": { + "count": 180 + }, + "hiv aids": { + "count": 363 + }, + "dermatology": { + "count": 98 + }, + "pathology": { + "count": 223 + }, + "emergency medicine": { + "count": 191 + }, + "pharmacology and therapeutics": { + "count": 221 + }, + "ophthalmology": { + "count": 220 + }, + "medical ethics": { + "count": 46 + }, + "palliative medicine": { + "count": 45 + }, + "sexual and reproductive health": { + "count": 156 + }, + "medical education": { + "count": 203 + }, + "surgery": { + "count": 162 + }, + "urology": { + "count": 65 + }, + "anesthesia": { + "count": 72 + }, + "toxicology": { + "count": 16 + }, + "forensic medicine": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json new file mode 100644 index 0000000000..ba997dbefc --- /dev/null +++ b/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json @@ -0,0 +1,1335 @@ +{ + "test": { + "num_samples": 459389, + "number_of_characters": 334286895, + "min_text_length": 79, + "average_text_length": 727.6771864367671, + "max_text_length": 4359, + "min_labels_per_text": 2, + "average_labels_per_text": 1.0, + "max_labels_per_text": 77908, + "unique_labels": 440, + "labels": { + "FortNiteBR": { + "count": 436 + }, + "buildapc": { + "count": 8484 + }, + "offmychest": { + "count": 570 + }, + "nus": { + "count": 45 + }, + "relationship_advice": { + "count": 16651 + }, + "premed": { + "count": 201 + }, + "dogecoin": { + "count": 8108 + }, + "GamingLaptops": { + "count": 183 + }, + "asktransgender": { + "count": 326 + }, + "MachineLearning": { + "count": 61 + }, + "puppy101": { + "count": 1597 + }, + "GunAccessoriesForSale": { + "count": 2619 + }, + "Random_Acts_Of_Amazon": { + "count": 1115 + }, + "Catholicism": { + "count": 183 + }, + "MonsterHunter": { + "count": 218 + }, + "tipofmypenis": { + "count": 87 + }, + "samsung": { + "count": 69 + }, + "PersonalFinanceCanada": { + "count": 341 + }, + "Dyson_Sphere_Program": { + "count": 55 + }, + "bleach": { + "count": 41 + }, + "AmItheAsshole": { + "count": 3730 + }, + "WallStreetbetsELITE": { + "count": 328 + }, + "GlobalPowers": { + "count": 35 + }, + "ABraThatFits": { + "count": 159 + }, + "PokemonGoFriends": { + "count": 1165 + }, + "NoMansSkyTheGame": { + "count": 259 + }, + "masseffect": { + "count": 233 + }, + "dating_advice": { + "count": 559 + }, + "yoga": { + "count": 50 + }, + "depression": { + "count": 515 + }, + "COVID19positive": { + "count": 180 + }, + "generationology": { + "count": 37 + }, + "feedthebeast": { + "count": 192 + }, + "EliteDangerous": { + "count": 270 + }, + "alcoholicsanonymous": { + "count": 93 + }, + "GoRVing": { + "count": 35 + }, + "thedivision": { + "count": 111 + }, + "breakingmom": { + "count": 105 + }, + "AskAnAmerican": { + "count": 80 + }, + "HypnoFair": { + "count": 5 + }, + "JustUnsubbed": { + "count": 13 + }, + "socialanxiety": { + "count": 123 + }, + "dirtykikpals": { + "count": 202 + }, + "askTO": { + "count": 126 + }, + "AskCulinary": { + "count": 108 + }, + "Bogleheads": { + "count": 71 + }, + "dragonquest": { + "count": 45 + }, + "NoContract": { + "count": 30 + }, + "gorillaz": { + "count": 14 + }, + "MondoGore": { + "count": 8 + }, + "comicswap": { + "count": 56 + }, + "VirtualYoutubers": { + "count": 92 + }, + "Gta5Modding": { + "count": 28 + }, + "obs": { + "count": 61 + }, + "vcu": { + "count": 9 + }, + "KingkillerChronicle": { + "count": 17 + }, + "AmongUs": { + "count": 41 + }, + "wireshark": { + "count": 3 + }, + "Dodocodes": { + "count": 46 + }, + "Aliexpress": { + "count": 40 + }, + "LearnerDriverUK": { + "count": 12 + }, + "PanicAttack": { + "count": 23 + }, + "KassadinMains": { + "count": 10 + }, + "islam": { + "count": 93 + }, + "chronotrigger": { + "count": 4 + }, + "skincareexchange": { + "count": 13 + }, + "PokemonHome": { + "count": 21 + }, + "survivinginfidelity": { + "count": 71 + }, + "igcse": { + "count": 21 + }, + "C25K": { + "count": 21 + }, + "aorus": { + "count": 2 + }, + "idleon": { + "count": 19 + }, + "photography": { + "count": 22 + }, + "cryptocoins": { + "count": 7 + }, + "CanaryWharfBets": { + "count": 7 + }, + "KillingEve": { + "count": 7 + }, + "GameBuilderGarage": { + "count": 16 + }, + "SauceSharingCommunity": { + "count": 7 + }, + "turo": { + "count": 9 + }, + "foodscience": { + "count": 14 + }, + "HIMYM": { + "count": 20 + }, + "HauntingOfHillHouse": { + "count": 4 + }, + "GoodNotes": { + "count": 8 + }, + "RedditWritesSeinfeld": { + "count": 6 + }, + "AirReps": { + "count": 2 + }, + "ADHD": { + "count": 3811 + }, + "BuddyCrossing": { + "count": 446 + }, + "libraryofruina": { + "count": 98 + }, + "SluttyConfessions": { + "count": 2787 + }, + "tipofmytongue": { + "count": 7145 + }, + "fleshlight": { + "count": 128 + }, + "amcstock": { + "count": 13910 + }, + "teenagers": { + "count": 77908 + }, + "suggestmeabook": { + "count": 1540 + }, + "dirtypenpals": { + "count": 5587 + }, + "MinecraftServer": { + "count": 177 + }, + "CreditCards": { + "count": 669 + }, + "Guitar": { + "count": 10952 + }, + "rpg": { + "count": 529 + }, + "NoFap": { + "count": 14853 + }, + "lfg": { + "count": 1093 + }, + "MarsWallStreet": { + "count": 935 + }, + "SummonSign": { + "count": 931 + }, + "AssassinsCreedValhala": { + "count": 295 + }, + "hoi4": { + "count": 432 + }, + "Coins4Sale": { + "count": 260 + }, + "xbox": { + "count": 459 + }, + "TooAfraidToAsk": { + "count": 7404 + }, + "NBA2k": { + "count": 553 + }, + "KGBTR": { + "count": 943 + }, + "roblox": { + "count": 220 + }, + "salesforce": { + "count": 214 + }, + "TwoXChromosomes": { + "count": 1736 + }, + "mechmarket": { + "count": 4863 + }, + "Gaming_Headsets": { + "count": 103 + }, + "pittsburgh": { + "count": 189 + }, + "CryptoMars": { + "count": 1606 + }, + "FridayNightFunkin": { + "count": 378 + }, + "vaginismus": { + "count": 122 + }, + "transpositive": { + "count": 10 + }, + "comicbooks": { + "count": 274 + }, + "BDSMcommunity": { + "count": 185 + }, + "aliens": { + "count": 201 + }, + "Scotch": { + "count": 64 + }, + "KikRoleplay": { + "count": 141 + }, + "Kayaking": { + "count": 91 + }, + "196": { + "count": 47 + }, + "digimon": { + "count": 140 + }, + "Evernote": { + "count": 42 + }, + "logh": { + "count": 22 + }, + "arlington": { + "count": 15 + }, + "Adopted": { + "count": 8 + }, + "DissonautUniverse": { + "count": 4 + }, + "Midsommar": { + "count": 12 + }, + "SofiawithanF": { + "count": 83 + }, + "xmpp": { + "count": 6 + }, + "ZombsRoyale": { + "count": 16 + }, + "accesscontrol": { + "count": 8 + }, + "WetlanderHumor": { + "count": 2 + }, + "PoonamPandeyFanatics": { + "count": 2 + }, + "screenplaychallenge": { + "count": 2 + }, + "scatstories": { + "count": 2 + }, + "techsupport": { + "count": 290 + }, + "whatcarshouldIbuy": { + "count": 79 + }, + "Stormlight_Archive": { + "count": 15 + }, + "deadbydaylight": { + "count": 126 + }, + "bicycling": { + "count": 27 + }, + "oculus": { + "count": 64 + }, + "Cartalk": { + "count": 33 + }, + "Sims4": { + "count": 43 + }, + "NoFeeAC": { + "count": 95 + }, + "Crypto_com": { + "count": 37 + }, + "ITCareerQuestions": { + "count": 259 + }, + "aromantic": { + "count": 18 + }, + "Revu": { + "count": 3 + }, + "exalted": { + "count": 2 + }, + "HilariaBaldwin": { + "count": 20 + }, + "Testosterone": { + "count": 35 + }, + "Screenwriting": { + "count": 170 + }, + "LifeProTips": { + "count": 49 + }, + "steinsgate": { + "count": 13 + }, + "Baystreetbets": { + "count": 10 + }, + "AskGirls": { + "count": 7 + }, + "idlechampions": { + "count": 7 + }, + "facebook": { + "count": 17 + }, + "tf2trade": { + "count": 4 + }, + "mfdoom": { + "count": 3 + }, + "FiddlesticksMains": { + "count": 2 + }, + "HFY": { + "count": 10 + }, + "FiestaST": { + "count": 2 + }, + "whatsthatbook": { + "count": 994 + }, + "GearsOfWar": { + "count": 879 + }, + "KazuhaMains": { + "count": 175 + }, + "RepTime": { + "count": 211 + }, + "AstroGaming": { + "count": 141 + }, + "metalgearsolid": { + "count": 152 + }, + "qBittorrent": { + "count": 39 + }, + "ELLIPAL_Official": { + "count": 24 + }, + "raisedbynarcissists": { + "count": 4895 + }, + "unpopularopinion": { + "count": 14901 + }, + "ACTrade": { + "count": 5679 + }, + "askcarsales": { + "count": 1339 + }, + "AskVet": { + "count": 1357 + }, + "whowouldwin": { + "count": 4493 + }, + "playstation": { + "count": 1362 + }, + "anime": { + "count": 6531 + }, + "GME": { + "count": 12577 + }, + "DotA2": { + "count": 2004 + }, + "cryptostreetbets": { + "count": 2241 + }, + "MonsterHunterWorld": { + "count": 698 + }, + "Market76": { + "count": 14274 + }, + "DnD": { + "count": 5092 + }, + "leagueoflegends": { + "count": 3683 + }, + "doordash_drivers": { + "count": 1626 + }, + "theta_network": { + "count": 489 + }, + "exmuslim": { + "count": 1369 + }, + "gonewildaudio": { + "count": 2998 + }, + "conspiracy": { + "count": 3587 + }, + "heroesofthestorm": { + "count": 535 + }, + "FanFiction": { + "count": 2782 + }, + "Doom": { + "count": 1251 + }, + "texas": { + "count": 269 + }, + "Vent": { + "count": 1738 + }, + "selfimprovement": { + "count": 1284 + }, + "youtubers": { + "count": 706 + }, + "askseddit": { + "count": 237 + }, + "boardgames": { + "count": 1237 + }, + "bravelydefault": { + "count": 347 + }, + "ConquerorsBlade": { + "count": 238 + }, + "ChronicPain": { + "count": 527 + }, + "teenagersnew": { + "count": 256 + }, + "brasil": { + "count": 1092 + }, + "MatthiasSubmissions": { + "count": 921 + }, + "MarylandUnemployment": { + "count": 314 + }, + "SaltLakeCity": { + "count": 411 + }, + "BokunoheroFanfiction": { + "count": 155 + }, + "BenignExistence": { + "count": 125 + }, + "GayYoungOldDating": { + "count": 156 + }, + "Bible": { + "count": 202 + }, + "haskell": { + "count": 154 + }, + "seduction": { + "count": 400 + }, + "fantasywriters": { + "count": 262 + }, + "HiveOS": { + "count": 100 + }, + "PerkByDaylight": { + "count": 15 + }, + "Hedgehog": { + "count": 73 + }, + "xmen": { + "count": 263 + }, + "HyperRP": { + "count": 122 + }, + "emotestories": { + "count": 3 + }, + "tutanota": { + "count": 135 + }, + "CultoftheFranklin": { + "count": 46 + }, + "langrisser": { + "count": 62 + }, + "CozyGrove": { + "count": 61 + }, + "Sverigesforsvarsmakt": { + "count": 12 + }, + "silverbugbets": { + "count": 21 + }, + "WreckingBallMains": { + "count": 5 + }, + "capitalism_in_decay": { + "count": 8 + }, + "paintdotnet": { + "count": 11 + }, + "u_mawadom118": { + "count": 4 + }, + "xboxfindfriends": { + "count": 2 + }, + "CPTSD": { + "count": 540 + }, + "destiny2": { + "count": 318 + }, + "Wallstreetsilver": { + "count": 1013 + }, + "DestinyTheGame": { + "count": 1107 + }, + "blackopscoldwar": { + "count": 400 + }, + "InstacartShoppers": { + "count": 202 + }, + "RocketLeagueExchange": { + "count": 832 + }, + "apexlegends": { + "count": 3265 + }, + "kansascity": { + "count": 53 + }, + "namenerds": { + "count": 235 + }, + "help": { + "count": 152 + }, + "Kengan_Ashura": { + "count": 132 + }, + "thetagang": { + "count": 165 + }, + "GameSale": { + "count": 262 + }, + "Reduction": { + "count": 109 + }, + "sex": { + "count": 906 + }, + "bostonr4r": { + "count": 75 + }, + "LegendsOfRuneterra": { + "count": 231 + }, + "overlord": { + "count": 48 + }, + "madisonwi": { + "count": 53 + }, + "steelseries": { + "count": 79 + }, + "ClashOfClansRecruit": { + "count": 214 + }, + "CharacterRant": { + "count": 55 + }, + "AirForce": { + "count": 94 + }, + "sexstories": { + "count": 92 + }, + "NameThatSong": { + "count": 162 + }, + "depressed": { + "count": 74 + }, + "ibs": { + "count": 150 + }, + "40kLore": { + "count": 269 + }, + "podcasts": { + "count": 88 + }, + "miraculousladybug": { + "count": 150 + }, + "ask": { + "count": 224 + }, + "EverMerge": { + "count": 31 + }, + "TMJ": { + "count": 54 + }, + "BitLifeApp": { + "count": 39 + }, + "FireEmblemHeroes": { + "count": 100 + }, + "software": { + "count": 62 + }, + "ShieldAndroidTV": { + "count": 70 + }, + "GriefSupport": { + "count": 125 + }, + "onewheel": { + "count": 37 + }, + "MensRights": { + "count": 80 + }, + "nhl": { + "count": 22 + }, + "ClashOfClans": { + "count": 107 + }, + "ps3homebrew": { + "count": 33 + }, + "LightNovels": { + "count": 77 + }, + "redsox": { + "count": 34 + }, + "CryptoMarkets": { + "count": 44 + }, + "ugly": { + "count": 47 + }, + "GCXRep": { + "count": 12 + }, + "cscareerquestionsEU": { + "count": 65 + }, + "MindHunter": { + "count": 6 + }, + "starcraft2coop": { + "count": 15 + }, + "nanocurrency": { + "count": 1421 + }, + "ModelCars": { + "count": 8 + }, + "UKJobs": { + "count": 30 + }, + "Netherlands": { + "count": 44 + }, + "clonewars": { + "count": 8 + }, + "Julia": { + "count": 11 + }, + "Prolactinoma": { + "count": 9 + }, + "sofi": { + "count": 11 + }, + "royalfamily": { + "count": 6 + }, + "ConnecticutR4R": { + "count": 8 + }, + "weather": { + "count": 5 + }, + "oneui": { + "count": 7 + }, + "KTM": { + "count": 5 + }, + "Aerials": { + "count": 3 + }, + "seoul": { + "count": 2 + }, + "exjw": { + "count": 3281 + }, + "ModernMagic": { + "count": 699 + }, + "Paladins": { + "count": 1242 + }, + "kdramarecommends": { + "count": 1611 + }, + "hitbtc": { + "count": 330 + }, + "endocrinology": { + "count": 75 + }, + "Bath": { + "count": 43 + }, + "NassauCountyHookups": { + "count": 5 + }, + "feminineboys": { + "count": 1248 + }, + "dreamsmp": { + "count": 2018 + }, + "SquaredCircle": { + "count": 2255 + }, + "Minecraft": { + "count": 8753 + }, + "spirituality": { + "count": 1809 + }, + "Eldenring": { + "count": 1471 + }, + "Sat": { + "count": 1172 + }, + "bonnaroo": { + "count": 194 + }, + "gardening": { + "count": 1892 + }, + "Unemployment": { + "count": 6185 + }, + "mac": { + "count": 1847 + }, + "Bestbuy": { + "count": 437 + }, + "quittingkratom": { + "count": 1081 + }, + "lawschooladmissions": { + "count": 3436 + }, + "NiceHash": { + "count": 2135 + }, + "McMaster": { + "count": 815 + }, + "covidlonghaulers": { + "count": 1299 + }, + "stalker": { + "count": 758 + }, + "MLBTheShow": { + "count": 2721 + }, + "FortniteCompetitive": { + "count": 998 + }, + "dpdr": { + "count": 514 + }, + "appliancerepair": { + "count": 720 + }, + "thomasthetankengine": { + "count": 207 + }, + "delhi": { + "count": 217 + }, + "Huel": { + "count": 300 + }, + "leafs": { + "count": 203 + }, + "HotWheels": { + "count": 170 + }, + "90dayfianceuncensored": { + "count": 550 + }, + "Throwers": { + "count": 142 + }, + "Wavyhair": { + "count": 270 + }, + "CryptoHorde": { + "count": 128 + }, + "ShuumatsuNoValkyrie": { + "count": 453 + }, + "TeensMeetTeens": { + "count": 432 + }, + "dbrand": { + "count": 108 + }, + "SLFmeetups": { + "count": 18 + }, + "1200isplentyketo": { + "count": 48 + }, + "passive_income": { + "count": 211 + }, + "BroadCity": { + "count": 16 + }, + "RevenantMain": { + "count": 71 + }, + "extrarfl": { + "count": 25 + }, + "AgonGame": { + "count": 5 + }, + "FitnessDE": { + "count": 3 + }, + "gaming": { + "count": 1277 + }, + "livesound": { + "count": 91 + }, + "IBO": { + "count": 1896 + }, + "EscapefromTarkov": { + "count": 1300 + }, + "amex": { + "count": 145 + }, + "DMAcademy": { + "count": 1411 + }, + "VinylCollectors": { + "count": 556 + }, + "cardano": { + "count": 716 + }, + "brave_browser": { + "count": 159 + }, + "dating": { + "count": 952 + }, + "OculusQuest": { + "count": 942 + }, + "Superstonk": { + "count": 3089 + }, + "MtF": { + "count": 957 + }, + "findaleague": { + "count": 207 + }, + "Nioh": { + "count": 398 + }, + "IRS": { + "count": 715 + }, + "transgendercirclejerk": { + "count": 353 + }, + "learnmath": { + "count": 489 + }, + "piano": { + "count": 263 + }, + "LeagueConnect": { + "count": 216 + }, + "eu4": { + "count": 561 + }, + "Wordpress": { + "count": 345 + }, + "RoleplayingForReddit": { + "count": 31 + }, + "LOONA": { + "count": 89 + }, + "newtothenavy": { + "count": 167 + }, + "HaircareScience": { + "count": 118 + }, + "appletv": { + "count": 167 + }, + "sissypersonals": { + "count": 102 + }, + "raleigh": { + "count": 168 + }, + "realonlyfansreviews": { + "count": 21 + }, + "AskGames": { + "count": 49 + }, + "PokemonTCG": { + "count": 325 + }, + "controlgame": { + "count": 109 + }, + "GoogleDataStudio": { + "count": 16 + }, + "WhiteWolfRPG": { + "count": 139 + }, + "MECoOp": { + "count": 31 + }, + "snuffrp": { + "count": 46 + }, + "lockpicking": { + "count": 103 + }, + "wicked_edge": { + "count": 105 + }, + "BMW": { + "count": 99 + }, + "choiceofgames": { + "count": 24 + }, + "hisdarkmaterials": { + "count": 12 + }, + "SakuraGakuin": { + "count": 24 + }, + "detrans": { + "count": 55 + }, + "Smallville": { + "count": 37 + }, + "kingofqueens": { + "count": 7 + }, + "JamesHoffmann": { + "count": 22 + }, + "stashinvest": { + "count": 16 + }, + "ABA": { + "count": 79 + }, + "ladybusiness": { + "count": 10 + }, + "gamegrumps": { + "count": 32 + }, + "GodEater": { + "count": 21 + }, + "tomorrow": { + "count": 39 + }, + "Tomorrowland": { + "count": 9 + }, + "BlackCountryNewRoad": { + "count": 5 + }, + "STAYC": { + "count": 3 + }, + "SatoshiStreetBets": { + "count": 3828 + }, + "AskLosAngeles": { + "count": 1036 + }, + "buildapcforme": { + "count": 1689 + }, + "ApplyingToCollege": { + "count": 10675 + }, + "watercooling": { + "count": 1209 + }, + "BreakUps": { + "count": 4914 + }, + "FIFA": { + "count": 3811 + }, + "emacs": { + "count": 712 + }, + "trakstocks": { + "count": 691 + }, + "Shittyaskflying": { + "count": 147 + }, + "AmazonFC": { + "count": 1178 + }, + "stocks": { + "count": 4610 + }, + "BangaloreMains": { + "count": 26 + }, + "pokemon": { + "count": 3953 + }, + "religion": { + "count": 684 + }, + "cuboulder": { + "count": 269 + }, + "self": { + "count": 1688 + }, + "tarot": { + "count": 912 + }, + "turtles": { + "count": 49 + }, + "TheMagnusArchives": { + "count": 300 + }, + "Superhero_Ideas": { + "count": 34 + }, + "NTU": { + "count": 308 + }, + "touhou": { + "count": 623 + }, + "JoJolion": { + "count": 50 + }, + "lasers": { + "count": 27 + }, + "popperpigs": { + "count": 67 + }, + "aggretsuko": { + "count": 20 + }, + "Library": { + "count": 5 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json b/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json index 9eff1b40d4..126cd893bc 100644 --- a/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 2048, "number_of_characters": 1822339, + "min_text_length": 84, "average_text_length": 889.81396484375, + "max_text_length": 3143, + "min_labels_per_text": 73, "average_labels_per_text": 1.0, + "max_labels_per_text": 74, "unique_labels": 28, "labels": { "3": { diff --git a/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json b/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json new file mode 100644 index 0000000000..77be5a3b77 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json @@ -0,0 +1,75 @@ +{ + "test": { + "num_samples": 59545, + "number_of_characters": 1907719, + "min_text_length": 11, + "average_text_length": 32.03827357460744, + "max_text_length": 120, + "min_labels_per_text": 2082, + "average_labels_per_text": 1.0, + "max_labels_per_text": 3236, + "unique_labels": 20, + "labels": { + "12": { + "count": 3137 + }, + "6": { + "count": 3070 + }, + "0": { + "count": 2613 + }, + "2": { + "count": 3155 + }, + "10": { + "count": 3220 + }, + "17": { + "count": 2986 + }, + "14": { + "count": 3106 + }, + "13": { + "count": 3055 + }, + "1": { + "count": 3056 + }, + "16": { + "count": 2911 + }, + "9": { + "count": 2984 + }, + "3": { + "count": 3070 + }, + "15": { + "count": 3090 + }, + "7": { + "count": 3036 + }, + "5": { + "count": 3124 + }, + "11": { + "count": 3236 + }, + "18": { + "count": 2483 + }, + "8": { + "count": 3090 + }, + "19": { + "count": 2082 + }, + "4": { + "count": 3041 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json index 99b033bce0..4c1f303098 100644 --- a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json @@ -2,8 +2,13 @@ "test": { "num_samples": 140, "number_of_characters": 71680, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 49704, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 3986, "unique_labels": 282, "labels": { "Nauke": { @@ -857,8 +862,13 @@ "bs": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3860, + "min_labels_per_text": 6, "average_labels_per_text": 512.0, + "max_labels_per_text": 1492, "unique_labels": 17, "labels": { "Nauke": { @@ -917,8 +927,13 @@ "ca": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4596, + "min_labels_per_text": 20, "average_labels_per_text": 512.0, + "max_labels_per_text": 1844, "unique_labels": 8, "labels": { "Llocs": { @@ -950,8 +965,13 @@ "cs": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4782, + "min_labels_per_text": 21, "average_labels_per_text": 512.0, + "max_labels_per_text": 1559, "unique_labels": 21, "labels": { "Lid\u00c3\u00a9": { @@ -1022,8 +1042,13 @@ "da": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4725, + "min_labels_per_text": 35, "average_labels_per_text": 512.0, + "max_labels_per_text": 911, "unique_labels": 20, "labels": { "Natur": { @@ -1091,8 +1116,13 @@ "eu": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4474, + "min_labels_per_text": 110, "average_labels_per_text": 512.0, + "max_labels_per_text": 2486, "unique_labels": 5, "labels": { "Entitateak": { @@ -1115,8 +1145,13 @@ "gv": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2717, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1334, "unique_labels": 28, "labels": { "Chron-oaylleeaght": { @@ -1208,8 +1243,13 @@ "ilo": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2258, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 1405, "unique_labels": 34, "labels": { "Katutubo": { @@ -1319,8 +1359,13 @@ "ku": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3365, + "min_labels_per_text": 5, "average_labels_per_text": 512.0, + "max_labels_per_text": 1078, "unique_labels": 39, "labels": { "Kes": { @@ -1445,8 +1490,13 @@ "lv": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4540, + "min_labels_per_text": 13, "average_labels_per_text": 512.0, + "max_labels_per_text": 878, "unique_labels": 16, "labels": { "Kult\u00c5\u00abra": { @@ -1502,8 +1552,13 @@ "min": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3881, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 3986, "unique_labels": 16, "labels": { "Makaluak_iduik": { @@ -1559,8 +1614,13 @@ "mt": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 1887, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1634, "unique_labels": 27, "labels": { "\u00c4\u00a0eografija": { @@ -1649,8 +1709,13 @@ "sco": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2605, + "min_labels_per_text": 3, "average_labels_per_text": 512.0, + "max_labels_per_text": 1081, "unique_labels": 23, "labels": { "Life": { @@ -1727,8 +1792,13 @@ "sq": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3741, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1109, "unique_labels": 36, "labels": { "Gjeografi": { @@ -1844,8 +1914,13 @@ "wa": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2317, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 3653, "unique_labels": 6, "labels": { "Economeye": { diff --git a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json index 8a912bee43..897b23d7c7 100644 --- a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json @@ -4,11 +4,27 @@ "num_docs": 19899, "num_queries": 20, "number_of_characters": 44450333, + "min_document_length": 7, "average_document_length": 2233.0329664807277, + "max_document_length": 2959, + "unique_docs": 19143, + "min_query_length": 55, "average_query_length": 109.75, + "max_query_length": 278, + "unique_queries": 20, + "min_instruction_length": 102, "average_instruction_length": 295.55, + "max_instruction_length": 811, + "unique_instructions": 20, + "min_changed_instruction_length": 151, "average_changed_instruction_length": 355.2, + "max_changed_instruction_length": 837, + "unique_changed_instructions": 20, + "min_average_relevant_docs_per_query": 4, "average_relevant_docs_per_query": 32.7, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 55, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json index 2120a11139..d5d91adf50 100644 --- a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json +++ b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json @@ -1,9 +1,15 @@ { "test": { - "average_text_length": 91.20563230605738, + "num_samples": 1882, "number_of_characters": 171649, + "number_texts_in_train": 7, + "min_text_length": 6, + "average_text_length": 91.20563230605738, + "max_text_length": 220, + "unique_texts": 1875, + "min_labels_per_text": 0, "average_label_per_text": 0.620616365568544, - "num_samples": 1882, + "max_labels_per_text": 2, "unique_labels": 6, "labels": { "None": { @@ -25,5 +31,38 @@ "count": 125 } } + }, + "train": { + "num_samples": 7528, + "number_of_characters": 697322, + "number_texts_in_train": null, + "min_text_length": 5, + "average_text_length": 92.63044633368757, + "max_text_length": 280, + "unique_texts": 7500, + "min_labels_per_text": 0, + "average_label_per_text": 0.6101222104144527, + "max_labels_per_text": 3, + "unique_labels": 6, + "labels": { + "None": { + "count": 3043 + }, + "2": { + "count": 607 + }, + "0": { + "count": 1569 + }, + "3": { + "count": 589 + }, + "1": { + "count": 1417 + }, + "4": { + "count": 411 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json deleted file mode 100644 index 2f4f979d02..0000000000 --- a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json +++ /dev/null @@ -1,1732 +0,0 @@ -{ - "test": { - "average_text_length": 12014.408930434782, - "number_of_characters": 1381657027, - "average_label_per_text": 3.5938, - "num_samples": 115000, - "unique_labels": 21, - "labels": { - "18": { - "count": 50784 - }, - "15": { - "count": 30981 - }, - "5": { - "count": 24978 - }, - "6": { - "count": 45080 - }, - "3": { - "count": 63687 - }, - "17": { - "count": 37743 - }, - "1": { - "count": 15019 - }, - "20": { - "count": 14030 - }, - "0": { - "count": 17802 - }, - "2": { - "count": 22402 - }, - "19": { - "count": 10212 - }, - "9": { - "count": 3772 - }, - "4": { - "count": 9062 - }, - "10": { - "count": 7705 - }, - "11": { - "count": 12213 - }, - "7": { - "count": 14306 - }, - "12": { - "count": 11799 - }, - "8": { - "count": 13800 - }, - "13": { - "count": 2346 - }, - "14": { - "count": 4255 - }, - "16": { - "count": 1311 - } - }, - "hf_subset_descriptive_stats": { - "en": { - "average_text_length": 11720.2926, - "number_of_characters": 58601463, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "de": { - "average_text_length": 12865.4162, - "number_of_characters": 64327081, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "fr": { - "average_text_length": 13081.1098, - "number_of_characters": 65405549, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "it": { - "average_text_length": 12763.4786, - "number_of_characters": 63817393, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "es": { - "average_text_length": 13080.29, - "number_of_characters": 65401450, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "pl": { - "average_text_length": 12282.5926, - "number_of_characters": 61412963, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "ro": { - "average_text_length": 12836.9322, - "number_of_characters": 64184661, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "nl": { - "average_text_length": 12857.9742, - "number_of_characters": 64289871, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "el": { - "average_text_length": 12998.143, - "number_of_characters": 64990715, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "hu": { - "average_text_length": 12424.641, - "number_of_characters": 62123205, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "pt": { - "average_text_length": 12482.4616, - "number_of_characters": 62412308, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "cs": { - "average_text_length": 10783.4676, - "number_of_characters": 53917338, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sv": { - "average_text_length": 11612.4774, - "number_of_characters": 58062387, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "bg": { - "average_text_length": 12235.4268, - "number_of_characters": 61177134, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "da": { - "average_text_length": 11773.958, - "number_of_characters": 58869790, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "fi": { - "average_text_length": 12087.6862, - "number_of_characters": 60438431, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sk": { - "average_text_length": 11130.814, - "number_of_characters": 55654070, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "lt": { - "average_text_length": 11245.3566, - "number_of_characters": 56226783, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "hr": { - "average_text_length": 11022.142, - "number_of_characters": 55110710, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sl": { - "average_text_length": 10620.0594, - "number_of_characters": 53100297, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "et": { - "average_text_length": 10898.4312, - "number_of_characters": 54492156, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "lv": { - "average_text_length": 10938.5102, - "number_of_characters": 54692551, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "mt": { - "average_text_length": 12589.7442, - "number_of_characters": 62948721, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - } - } - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json index 63180983ca..849724bdba 100644 --- a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json +++ b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json @@ -2,8 +2,14 @@ "test": { "num_samples": 14000, "number_of_characters": 2551922, - "avg_sentence1_len": 91.17892857142857, - "avg_sentence2_len": 91.10121428571429, + "min_sentence1_length": 2, + "avg_sentence1_length": 91.17892857142857, + "max_sentence1_length": 268, + "unique_sentence1": 13404, + "min_sentence2_length": 2, + "avg_sentence2_length": 91.10121428571429, + "max_sentence2_length": 247, + "unique_sentence2": 13462, "unique_labels": 2, "labels": { "1": { @@ -17,8 +23,14 @@ "de": { "num_samples": 2000, "number_of_characters": 478034, - "avg_sentence1_len": 119.7815, - "avg_sentence2_len": 119.2355, + "min_sentence1_length": 2, + "avg_sentence1_length": 119.7815, + "max_sentence1_length": 268, + "unique_sentence1": 1934, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.2355, + "max_sentence2_length": 235, + "unique_sentence2": 1938, "unique_labels": 2, "labels": { "1": { @@ -32,8 +44,14 @@ "en": { "num_samples": 2000, "number_of_characters": 454362, - "avg_sentence1_len": 113.7575, - "avg_sentence2_len": 113.4235, + "min_sentence1_length": 25, + "avg_sentence1_length": 113.7575, + "max_sentence1_length": 209, + "unique_sentence1": 1761, + "min_sentence2_length": 25, + "avg_sentence2_length": 113.4235, + "max_sentence2_length": 209, + "unique_sentence2": 1800, "unique_labels": 2, "labels": { "1": { @@ -47,8 +65,14 @@ "es": { "num_samples": 2000, "number_of_characters": 471226, - "avg_sentence1_len": 117.815, - "avg_sentence2_len": 117.798, + "min_sentence1_length": 2, + "avg_sentence1_length": 117.815, + "max_sentence1_length": 226, + "unique_sentence1": 1955, + "min_sentence2_length": 22, + "avg_sentence2_length": 117.798, + "max_sentence2_length": 233, + "unique_sentence2": 1959, "unique_labels": 2, "labels": { "1": { @@ -62,8 +86,14 @@ "fr": { "num_samples": 2000, "number_of_characters": 480033, - "avg_sentence1_len": 120.028, - "avg_sentence2_len": 119.9885, + "min_sentence1_length": 2, + "avg_sentence1_length": 120.028, + "max_sentence1_length": 238, + "unique_sentence1": 1954, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.9885, + "max_sentence2_length": 247, + "unique_sentence2": 1953, "unique_labels": 2, "labels": { "1": { @@ -77,8 +107,14 @@ "ja": { "num_samples": 2000, "number_of_characters": 235106, - "avg_sentence1_len": 58.678, - "avg_sentence2_len": 58.875, + "min_sentence1_length": 2, + "avg_sentence1_length": 58.678, + "max_sentence1_length": 192, + "unique_sentence1": 1944, + "min_sentence2_length": 2, + "avg_sentence2_length": 58.875, + "max_sentence2_length": 198, + "unique_sentence2": 1941, "unique_labels": 2, "labels": { "1": { @@ -92,8 +128,14 @@ "ko": { "num_samples": 2000, "number_of_characters": 260149, - "avg_sentence1_len": 64.9605, - "avg_sentence2_len": 65.114, + "min_sentence1_length": 2, + "avg_sentence1_length": 64.9605, + "max_sentence1_length": 153, + "unique_sentence1": 1954, + "min_sentence2_length": 2, + "avg_sentence2_length": 65.114, + "max_sentence2_length": 159, + "unique_sentence2": 1969, "unique_labels": 2, "labels": { "1": { @@ -107,8 +149,14 @@ "zh": { "num_samples": 2000, "number_of_characters": 173012, - "avg_sentence1_len": 43.232, - "avg_sentence2_len": 43.274, + "min_sentence1_length": 2, + "avg_sentence1_length": 43.232, + "max_sentence1_length": 120, + "unique_sentence1": 1909, + "min_sentence2_length": 2, + "avg_sentence2_length": 43.274, + "max_sentence2_length": 113, + "unique_sentence2": 1909, "unique_labels": 2, "labels": { "1": { @@ -124,8 +172,14 @@ "validation": { "num_samples": 14000, "number_of_characters": 2524625, - "avg_sentence1_len": 90.12585714285714, - "avg_sentence2_len": 90.2045, + "min_sentence1_length": 2, + "avg_sentence1_length": 90.12585714285714, + "max_sentence1_length": 248, + "unique_sentence1": 13357, + "min_sentence2_length": 2, + "avg_sentence2_length": 90.2045, + "max_sentence2_length": 275, + "unique_sentence2": 13397, "unique_labels": 2, "labels": { "1": { @@ -139,8 +193,14 @@ "de": { "num_samples": 2000, "number_of_characters": 467643, - "avg_sentence1_len": 116.82, - "avg_sentence2_len": 117.0015, + "min_sentence1_length": 2, + "avg_sentence1_length": 116.82, + "max_sentence1_length": 248, + "unique_sentence1": 1914, + "min_sentence2_length": 2, + "avg_sentence2_length": 117.0015, + "max_sentence2_length": 275, + "unique_sentence2": 1920, "unique_labels": 2, "labels": { "1": { @@ -154,8 +214,14 @@ "en": { "num_samples": 2000, "number_of_characters": 451931, - "avg_sentence1_len": 113.1075, - "avg_sentence2_len": 112.858, + "min_sentence1_length": 25, + "avg_sentence1_length": 113.1075, + "max_sentence1_length": 213, + "unique_sentence1": 1758, + "min_sentence2_length": 25, + "avg_sentence2_length": 112.858, + "max_sentence2_length": 213, + "unique_sentence2": 1771, "unique_labels": 2, "labels": { "1": { @@ -169,8 +235,14 @@ "es": { "num_samples": 2000, "number_of_characters": 466112, - "avg_sentence1_len": 116.3285, - "avg_sentence2_len": 116.7275, + "min_sentence1_length": 2, + "avg_sentence1_length": 116.3285, + "max_sentence1_length": 240, + "unique_sentence1": 1938, + "min_sentence2_length": 2, + "avg_sentence2_length": 116.7275, + "max_sentence2_length": 241, + "unique_sentence2": 1941, "unique_labels": 2, "labels": { "1": { @@ -184,8 +256,14 @@ "fr": { "num_samples": 2000, "number_of_characters": 478510, - "avg_sentence1_len": 119.5045, - "avg_sentence2_len": 119.7505, + "min_sentence1_length": 2, + "avg_sentence1_length": 119.5045, + "max_sentence1_length": 233, + "unique_sentence1": 1933, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.7505, + "max_sentence2_length": 246, + "unique_sentence2": 1939, "unique_labels": 2, "labels": { "1": { @@ -199,8 +277,14 @@ "ja": { "num_samples": 2000, "number_of_characters": 229655, - "avg_sentence1_len": 57.5105, - "avg_sentence2_len": 57.317, + "min_sentence1_length": 2, + "avg_sentence1_length": 57.5105, + "max_sentence1_length": 126, + "unique_sentence1": 1957, + "min_sentence2_length": 2, + "avg_sentence2_length": 57.317, + "max_sentence2_length": 121, + "unique_sentence2": 1969, "unique_labels": 2, "labels": { "1": { @@ -214,8 +298,14 @@ "ko": { "num_samples": 2000, "number_of_characters": 261355, - "avg_sentence1_len": 65.162, - "avg_sentence2_len": 65.5155, + "min_sentence1_length": 2, + "avg_sentence1_length": 65.162, + "max_sentence1_length": 178, + "unique_sentence1": 1963, + "min_sentence2_length": 2, + "avg_sentence2_length": 65.5155, + "max_sentence2_length": 174, + "unique_sentence2": 1968, "unique_labels": 2, "labels": { "1": { @@ -229,8 +319,14 @@ "zh": { "num_samples": 2000, "number_of_characters": 169419, - "avg_sentence1_len": 42.448, - "avg_sentence2_len": 42.2615, + "min_sentence1_length": 2, + "avg_sentence1_length": 42.448, + "max_sentence1_length": 101, + "unique_sentence1": 1899, + "min_sentence2_length": 2, + "avg_sentence2_length": 42.2615, + "max_sentence2_length": 120, + "unique_sentence2": 1895, "unique_labels": 2, "labels": { "1": { diff --git a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json index 6ca4a56161..473a765dd9 100644 --- a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json +++ b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json @@ -2,8 +2,14 @@ "test": { "num_samples": 51534, "number_of_characters": 8659940, - "avg_sentence1_len": 79.48919160166103, - "avg_sentence2_len": 88.5540419916948, + "min_sentence1_length": 24, + "avg_sentence1_length": 79.48919160166103, + "max_sentence1_length": 126, + "unique_sentence1": 4329, + "min_sentence2_length": 6, + "avg_sentence2_length": 88.5540419916948, + "max_sentence2_length": 608, + "unique_sentence2": 41304, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/PairClassification/XNLI.json b/mteb/descriptive_stats/PairClassification/XNLI.json index 91ef224350..867fafdc85 100644 --- a/mteb/descriptive_stats/PairClassification/XNLI.json +++ b/mteb/descriptive_stats/PairClassification/XNLI.json @@ -2,8 +2,14 @@ "test": { "num_samples": 19110, "number_of_characters": 2907145, - "avg_sentence1_len": 103.23793825222397, - "avg_sentence2_len": 48.88895866038723, + "min_sentence1_length": 3, + "avg_sentence1_length": 103.23793825222397, + "max_sentence1_length": 401, + "unique_sentence1": 15328, + "min_sentence2_length": 2, + "avg_sentence2_length": 48.88895866038723, + "max_sentence2_length": 187, + "unique_sentence2": 19104, "unique_labels": 2, "labels": { "0": { @@ -17,8 +23,14 @@ "ar": { "num_samples": 1365, "number_of_characters": 179591, - "avg_sentence1_len": 89.57362637362637, - "avg_sentence2_len": 41.99487179487179, + "min_sentence1_length": 11, + "avg_sentence1_length": 89.57362637362637, + "max_sentence1_length": 242, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 41.99487179487179, + "max_sentence2_length": 115, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -32,8 +44,14 @@ "bg": { "num_samples": 1365, "number_of_characters": 220646, - "avg_sentence1_len": 110.01611721611722, - "avg_sentence2_len": 51.62930402930403, + "min_sentence1_length": 14, + "avg_sentence1_length": 110.01611721611722, + "max_sentence1_length": 303, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 51.62930402930403, + "max_sentence2_length": 150, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -47,8 +65,14 @@ "de": { "num_samples": 1365, "number_of_characters": 241224, - "avg_sentence1_len": 119.92600732600732, - "avg_sentence2_len": 56.794871794871796, + "min_sentence1_length": 3, + "avg_sentence1_length": 119.92600732600732, + "max_sentence1_length": 301, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 56.794871794871796, + "max_sentence2_length": 187, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -62,8 +86,14 @@ "el": { "num_samples": 1365, "number_of_characters": 240222, - "avg_sentence1_len": 119.05421245421246, - "avg_sentence2_len": 56.93260073260073, + "min_sentence1_length": 13, + "avg_sentence1_length": 119.05421245421246, + "max_sentence1_length": 344, + "unique_sentence1": 1095, + "min_sentence2_length": 13, + "avg_sentence2_length": 56.93260073260073, + "max_sentence2_length": 172, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -77,8 +107,14 @@ "en": { "num_samples": 1365, "number_of_characters": 212223, - "avg_sentence1_len": 105.67032967032966, - "avg_sentence2_len": 49.8043956043956, + "min_sentence1_length": 19, + "avg_sentence1_length": 105.67032967032966, + "max_sentence1_length": 268, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 49.8043956043956, + "max_sentence2_length": 137, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -92,8 +128,14 @@ "es": { "num_samples": 1365, "number_of_characters": 232207, - "avg_sentence1_len": 115.43296703296703, - "avg_sentence2_len": 54.68205128205128, + "min_sentence1_length": 11, + "avg_sentence1_length": 115.43296703296703, + "max_sentence1_length": 385, + "unique_sentence1": 1094, + "min_sentence2_length": 8, + "avg_sentence2_length": 54.68205128205128, + "max_sentence2_length": 163, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -107,8 +149,14 @@ "fr": { "num_samples": 1365, "number_of_characters": 245259, - "avg_sentence1_len": 121.0967032967033, - "avg_sentence2_len": 58.58021978021978, + "min_sentence1_length": 9, + "avg_sentence1_length": 121.0967032967033, + "max_sentence1_length": 327, + "unique_sentence1": 1095, + "min_sentence2_length": 10, + "avg_sentence2_length": 58.58021978021978, + "max_sentence2_length": 169, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -122,8 +170,14 @@ "hi": { "num_samples": 1365, "number_of_characters": 211312, - "avg_sentence1_len": 104.63443223443224, - "avg_sentence2_len": 50.17289377289377, + "min_sentence1_length": 16, + "avg_sentence1_length": 104.63443223443224, + "max_sentence1_length": 401, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 50.17289377289377, + "max_sentence2_length": 162, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -137,8 +191,14 @@ "ru": { "num_samples": 1365, "number_of_characters": 222797, - "avg_sentence1_len": 110.76923076923077, - "avg_sentence2_len": 52.452014652014654, + "min_sentence1_length": 11, + "avg_sentence1_length": 110.76923076923077, + "max_sentence1_length": 306, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 52.452014652014654, + "max_sentence2_length": 167, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -152,8 +212,14 @@ "sw": { "num_samples": 1365, "number_of_characters": 210103, - "avg_sentence1_len": 104.43956043956044, - "avg_sentence2_len": 49.48205128205128, + "min_sentence1_length": 10, + "avg_sentence1_length": 104.43956043956044, + "max_sentence1_length": 266, + "unique_sentence1": 1094, + "min_sentence2_length": 2, + "avg_sentence2_length": 49.48205128205128, + "max_sentence2_length": 146, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -167,8 +233,14 @@ "th": { "num_samples": 1365, "number_of_characters": 192788, - "avg_sentence1_len": 96.6923076923077, - "avg_sentence2_len": 44.544322344322346, + "min_sentence1_length": 12, + "avg_sentence1_length": 96.6923076923077, + "max_sentence1_length": 262, + "unique_sentence1": 1095, + "min_sentence2_length": 6, + "avg_sentence2_length": 44.544322344322346, + "max_sentence2_length": 129, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -182,8 +254,14 @@ "tr": { "num_samples": 1365, "number_of_characters": 208658, - "avg_sentence1_len": 103.67765567765568, - "avg_sentence2_len": 49.18534798534799, + "min_sentence1_length": 15, + "avg_sentence1_length": 103.67765567765568, + "max_sentence1_length": 255, + "unique_sentence1": 1095, + "min_sentence2_length": 6, + "avg_sentence2_length": 49.18534798534799, + "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -197,8 +275,14 @@ "vi": { "num_samples": 1365, "number_of_characters": 223549, - "avg_sentence1_len": 111.31208791208792, - "avg_sentence2_len": 52.46007326007326, + "min_sentence1_length": 14, + "avg_sentence1_length": 111.31208791208792, + "max_sentence1_length": 265, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.46007326007326, + "max_sentence2_length": 143, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -212,8 +296,14 @@ "zh": { "num_samples": 1365, "number_of_characters": 66566, - "avg_sentence1_len": 33.03589743589744, - "avg_sentence2_len": 15.73040293040293, + "min_sentence1_length": 4, + "avg_sentence1_length": 33.03589743589744, + "max_sentence1_length": 112, + "unique_sentence1": 1095, + "min_sentence2_length": 3, + "avg_sentence2_length": 15.73040293040293, + "max_sentence2_length": 59, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -229,8 +319,14 @@ "validation": { "num_samples": 19110, "number_of_characters": 2909058, - "avg_sentence1_len": 103.20790162218734, - "avg_sentence2_len": 49.01909994767138, + "min_sentence1_length": 5, + "avg_sentence1_length": 103.20790162218734, + "max_sentence1_length": 323, + "unique_sentence1": 11171, + "min_sentence2_length": 3, + "avg_sentence2_length": 49.01909994767138, + "max_sentence2_length": 172, + "unique_sentence2": 19101, "unique_labels": 2, "labels": { "0": { @@ -244,8 +340,14 @@ "ar": { "num_samples": 1365, "number_of_characters": 177355, - "avg_sentence1_len": 88.31868131868131, - "avg_sentence2_len": 41.61172161172161, + "min_sentence1_length": 13, + "avg_sentence1_length": 88.31868131868131, + "max_sentence1_length": 214, + "unique_sentence1": 798, + "min_sentence2_length": 6, + "avg_sentence2_length": 41.61172161172161, + "max_sentence2_length": 137, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -259,8 +361,14 @@ "bg": { "num_samples": 1365, "number_of_characters": 219988, - "avg_sentence1_len": 109.196336996337, - "avg_sentence2_len": 51.967032967032964, + "min_sentence1_length": 16, + "avg_sentence1_length": 109.196336996337, + "max_sentence1_length": 316, + "unique_sentence1": 798, + "min_sentence2_length": 10, + "avg_sentence2_length": 51.967032967032964, + "max_sentence2_length": 151, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -274,8 +382,14 @@ "de": { "num_samples": 1365, "number_of_characters": 241852, - "avg_sentence1_len": 119.81172161172161, - "avg_sentence2_len": 57.36923076923077, + "min_sentence1_length": 20, + "avg_sentence1_length": 119.81172161172161, + "max_sentence1_length": 298, + "unique_sentence1": 798, + "min_sentence2_length": 12, + "avg_sentence2_length": 57.36923076923077, + "max_sentence2_length": 162, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -289,8 +403,14 @@ "el": { "num_samples": 1365, "number_of_characters": 241275, - "avg_sentence1_len": 119.87545787545787, - "avg_sentence2_len": 56.88278388278388, + "min_sentence1_length": 16, + "avg_sentence1_length": 119.87545787545787, + "max_sentence1_length": 302, + "unique_sentence1": 798, + "min_sentence2_length": 6, + "avg_sentence2_length": 56.88278388278388, + "max_sentence2_length": 171, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -304,8 +424,14 @@ "en": { "num_samples": 1365, "number_of_characters": 212384, - "avg_sentence1_len": 105.71648351648352, - "avg_sentence2_len": 49.87619047619047, + "min_sentence1_length": 20, + "avg_sentence1_length": 105.71648351648352, + "max_sentence1_length": 271, + "unique_sentence1": 798, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.87619047619047, + "max_sentence2_length": 139, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -319,8 +445,14 @@ "es": { "num_samples": 1365, "number_of_characters": 232451, - "avg_sentence1_len": 115.17289377289377, - "avg_sentence2_len": 55.120879120879124, + "min_sentence1_length": 14, + "avg_sentence1_length": 115.17289377289377, + "max_sentence1_length": 265, + "unique_sentence1": 798, + "min_sentence2_length": 7, + "avg_sentence2_length": 55.120879120879124, + "max_sentence2_length": 148, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -334,8 +466,14 @@ "fr": { "num_samples": 1365, "number_of_characters": 246857, - "avg_sentence1_len": 121.75897435897436, - "avg_sentence2_len": 59.08864468864469, + "min_sentence1_length": 19, + "avg_sentence1_length": 121.75897435897436, + "max_sentence1_length": 323, + "unique_sentence1": 798, + "min_sentence2_length": 11, + "avg_sentence2_length": 59.08864468864469, + "max_sentence2_length": 172, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -349,8 +487,14 @@ "hi": { "num_samples": 1365, "number_of_characters": 212269, - "avg_sentence1_len": 105.06446886446886, - "avg_sentence2_len": 50.44395604395604, + "min_sentence1_length": 18, + "avg_sentence1_length": 105.06446886446886, + "max_sentence1_length": 277, + "unique_sentence1": 798, + "min_sentence2_length": 7, + "avg_sentence2_length": 50.44395604395604, + "max_sentence2_length": 152, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -364,8 +508,14 @@ "ru": { "num_samples": 1365, "number_of_characters": 221152, - "avg_sentence1_len": 109.74725274725274, - "avg_sentence2_len": 52.26886446886447, + "min_sentence1_length": 15, + "avg_sentence1_length": 109.74725274725274, + "max_sentence1_length": 310, + "unique_sentence1": 798, + "min_sentence2_length": 8, + "avg_sentence2_length": 52.26886446886447, + "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -379,8 +529,14 @@ "sw": { "num_samples": 1365, "number_of_characters": 210482, - "avg_sentence1_len": 104.32234432234432, - "avg_sentence2_len": 49.87692307692308, + "min_sentence1_length": 13, + "avg_sentence1_length": 104.32234432234432, + "max_sentence1_length": 264, + "unique_sentence1": 798, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.87692307692308, + "max_sentence2_length": 153, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -394,8 +550,14 @@ "th": { "num_samples": 1365, "number_of_characters": 192640, - "avg_sentence1_len": 97.28498168498169, - "avg_sentence2_len": 43.843223443223444, + "min_sentence1_length": 7, + "avg_sentence1_length": 97.28498168498169, + "max_sentence1_length": 255, + "unique_sentence1": 798, + "min_sentence2_length": 3, + "avg_sentence2_length": 43.843223443223444, + "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -409,8 +571,14 @@ "tr": { "num_samples": 1365, "number_of_characters": 208305, - "avg_sentence1_len": 102.96630036630036, - "avg_sentence2_len": 49.63809523809524, + "min_sentence1_length": 15, + "avg_sentence1_length": 102.96630036630036, + "max_sentence1_length": 269, + "unique_sentence1": 798, + "min_sentence2_length": 10, + "avg_sentence2_length": 49.63809523809524, + "max_sentence2_length": 139, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -424,8 +592,14 @@ "vi": { "num_samples": 1365, "number_of_characters": 224811, - "avg_sentence1_len": 112.26373626373626, - "avg_sentence2_len": 52.432967032967035, + "min_sentence1_length": 18, + "avg_sentence1_length": 112.26373626373626, + "max_sentence1_length": 323, + "unique_sentence1": 798, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.432967032967035, + "max_sentence2_length": 159, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -439,8 +613,14 @@ "zh": { "num_samples": 1365, "number_of_characters": 67237, - "avg_sentence1_len": 33.41098901098901, - "avg_sentence2_len": 15.846886446886447, + "min_sentence1_length": 5, + "avg_sentence1_length": 33.41098901098901, + "max_sentence1_length": 135, + "unique_sentence1": 798, + "min_sentence2_length": 3, + "avg_sentence2_length": 15.846886446886447, + "max_sentence2_length": 66, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json index c12f4f292f..a0ced7def7 100644 --- a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json @@ -4,8 +4,17 @@ "number_of_characters": 413674, "num_positive": 2255, "num_negative": 5245, - "avg_query_len": 50.205333333333336, - "avg_positive_len": 52.54013303769401, - "avg_negative_len": 52.69189704480458 + "min_query_length": 17, + "avg_query_length": 50.205333333333336, + "max_query_length": 148, + "unique_query": 374, + "min_positive_length": 15, + "avg_positive_length": 52.54013303769401, + "max_positive_length": 152, + "unique_positive": 2165, + "min_negative_length": 15, + "avg_negative_length": 52.69189704480458, + "max_negative_length": 148, + "unique_negative": 5002 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/ESCIReranking.json b/mteb/descriptive_stats/Reranking/ESCIReranking.json index 419b228ebf..9c9556be9d 100644 --- a/mteb/descriptive_stats/Reranking/ESCIReranking.json +++ b/mteb/descriptive_stats/Reranking/ESCIReranking.json @@ -4,36 +4,72 @@ "number_of_characters": 254538331, "num_positive": 271416, "num_negative": 44235, - "avg_query_len": 19.691890046098685, - "avg_positive_len": 803.9230995961918, - "avg_negative_len": 808.501458121397, + "min_query_length": 1, + "avg_query_length": 19.691890046098685, + "max_query_length": 151, + "unique_query": 29269, + "min_positive_length": 1, + "avg_positive_length": 803.9230995961918, + "max_positive_length": 8640, + "unique_positive": 217712, + "min_negative_length": 1, + "avg_negative_length": 808.501458121397, + "max_negative_length": 4441, + "unique_negative": 39551, "hf_subset_descriptive_stats": { "us": { "num_samples": 21296, "number_of_characters": 186915609, "num_positive": 189375, "num_negative": 25463, - "avg_query_len": 21.440833959429, - "avg_positive_len": 868.3698006600661, - "avg_negative_len": 864.4493578918431 + "min_query_length": 1, + "avg_query_length": 21.440833959429, + "max_query_length": 151, + "unique_query": 21296, + "min_positive_length": 1, + "avg_positive_length": 868.3698006600661, + "max_positive_length": 5545, + "unique_positive": 150734, + "min_negative_length": 1, + "avg_negative_length": 864.4493578918431, + "max_negative_length": 3779, + "unique_negative": 23073 }, "es": { "num_samples": 3703, "number_of_characters": 48861389, "num_positive": 39110, "num_negative": 10183, - "avg_query_len": 20.681609505806104, - "avg_positive_len": 980.9613142418818, - "avg_negative_len": 1023.2159481488756 + "min_query_length": 3, + "avg_query_length": 20.681609505806104, + "max_query_length": 59, + "unique_query": 3703, + "min_positive_length": 1, + "avg_positive_length": 980.9613142418818, + "max_positive_length": 8640, + "unique_positive": 32921, + "min_negative_length": 1, + "avg_negative_length": 1023.2159481488756, + "max_negative_length": 4441, + "unique_negative": 9285 }, "jp": { "num_samples": 4286, "number_of_characters": 18761333, "num_positive": 42931, "num_negative": 8589, - "avg_query_len": 10.146756882874476, - "avg_positive_len": 358.35792317905475, - "avg_negative_len": 388.075445337059 + "min_query_length": 1, + "avg_query_length": 10.146756882874476, + "max_query_length": 60, + "unique_query": 4286, + "min_positive_length": 1, + "avg_positive_length": 358.35792317905475, + "max_positive_length": 3488, + "unique_positive": 35165, + "min_negative_length": 1, + "avg_negative_length": 388.075445337059, + "max_negative_length": 3940, + "unique_negative": 7289 } } } diff --git a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json index 1c5fe0f039..0506ff39e5 100644 --- a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json +++ b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json @@ -4,153 +4,306 @@ "number_of_characters": 83866932, "num_positive": 24000, "num_negative": 192000, - "avg_query_len": 59.091208333333334, - "avg_positive_len": 385.45120833333334, - "avg_negative_len": 381.23913541666667, + "min_query_length": 7, + "avg_query_length": 59.091208333333334, + "max_query_length": 180, + "unique_query": 23997, + "min_positive_length": 100, + "avg_positive_length": 385.45120833333334, + "max_positive_length": 3515, + "unique_positive": 23993, + "min_negative_length": 100, + "avg_negative_length": 381.23913541666667, + "max_negative_length": 9461, + "unique_negative": 191783, "hf_subset_descriptive_stats": { "bg": { "num_samples": 1500, "number_of_characters": 5145316, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 60.82666666666667, - "avg_positive_len": 375.88866666666667, - "avg_negative_len": 374.18691666666666 + "min_query_length": 18, + "avg_query_length": 60.82666666666667, + "max_query_length": 166, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 375.88866666666667, + "max_positive_length": 2241, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 374.18691666666666, + "max_negative_length": 4869, + "unique_negative": 11996 }, "bn": { "num_samples": 1500, "number_of_characters": 5390581, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 47.266666666666666, - "avg_positive_len": 394.5946666666667, - "avg_negative_len": 393.98241666666667 + "min_query_length": 7, + "avg_query_length": 47.266666666666666, + "max_query_length": 123, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 394.5946666666667, + "max_positive_length": 2338, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 393.98241666666667, + "max_negative_length": 5104, + "unique_negative": 11996 }, "cs": { "num_samples": 1500, "number_of_characters": 5079180, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 56.272, - "avg_positive_len": 383.8446666666667, - "avg_negative_len": 368.2504166666667 + "min_query_length": 17, + "avg_query_length": 56.272, + "max_query_length": 137, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 383.8446666666667, + "max_positive_length": 2300, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 368.2504166666667, + "max_negative_length": 3487, + "unique_negative": 11982 }, "da": { "num_samples": 1500, "number_of_characters": 4746132, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 56.75066666666667, - "avg_positive_len": 351.6813333333333, - "avg_negative_len": 344.457 + "min_query_length": 17, + "avg_query_length": 56.75066666666667, + "max_query_length": 137, + "unique_query": 1499, + "min_positive_length": 100, + "avg_positive_length": 351.6813333333333, + "max_positive_length": 2159, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 344.457, + "max_negative_length": 2563, + "unique_negative": 11972 }, "de": { "num_samples": 1500, "number_of_characters": 5483592, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 70.004, - "avg_positive_len": 391.5366666666667, - "avg_negative_len": 399.27341666666666 + "min_query_length": 20, + "avg_query_length": 70.004, + "max_query_length": 180, + "unique_query": 1499, + "min_positive_length": 100, + "avg_positive_length": 391.5366666666667, + "max_positive_length": 2674, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 399.27341666666666, + "max_negative_length": 3083, + "unique_negative": 12000 }, "en": { "num_samples": 1500, "number_of_characters": 6217884, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 68.372, - "avg_positive_len": 451.72866666666664, - "avg_negative_len": 453.14441666666664 + "min_query_length": 18, + "avg_query_length": 68.372, + "max_query_length": 162, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 451.72866666666664, + "max_positive_length": 3515, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 453.14441666666664, + "max_negative_length": 3662, + "unique_negative": 12000 }, "fa": { "num_samples": 1500, "number_of_characters": 4732619, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 48.66733333333333, - "avg_positive_len": 347.704, - "avg_negative_len": 344.8385 + "min_query_length": 12, + "avg_query_length": 48.66733333333333, + "max_query_length": 119, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 347.704, + "max_positive_length": 2571, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 344.8385, + "max_negative_length": 4707, + "unique_negative": 11978 }, "fi": { "num_samples": 1500, "number_of_characters": 5209132, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.343333333333334, - "avg_positive_len": 394.7126666666667, - "avg_negative_len": 377.83733333333333 + "min_query_length": 14, + "avg_query_length": 55.343333333333334, + "max_query_length": 132, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 394.7126666666667, + "max_positive_length": 2129, + "unique_positive": 1498, + "min_negative_length": 100, + "avg_negative_length": 377.83733333333333, + "max_negative_length": 2574, + "unique_negative": 11972 }, "hi": { "num_samples": 1500, "number_of_characters": 5620959, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 50.77733333333333, - "avg_positive_len": 420.3786666666667, - "avg_negative_len": 409.51875 + "min_query_length": 13, + "avg_query_length": 50.77733333333333, + "max_query_length": 125, + "unique_query": 1499, + "min_positive_length": 100, + "avg_positive_length": 420.3786666666667, + "max_positive_length": 2361, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 409.51875, + "max_negative_length": 5912, + "unique_negative": 11996 }, "it": { "num_samples": 1500, "number_of_characters": 5420496, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 70.05466666666666, - "avg_positive_len": 396.97333333333336, - "avg_negative_len": 393.3295 + "min_query_length": 23, + "avg_query_length": 70.05466666666666, + "max_query_length": 156, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 396.97333333333336, + "max_positive_length": 2082, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 393.3295, + "max_negative_length": 9461, + "unique_negative": 11993 }, "nl": { "num_samples": 1500, "number_of_characters": 5169556, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 65.34466666666667, - "avg_positive_len": 380.79133333333334, - "avg_negative_len": 375.02933333333334 + "min_query_length": 18, + "avg_query_length": 65.34466666666667, + "max_query_length": 136, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 380.79133333333334, + "max_positive_length": 1864, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 375.02933333333334, + "max_negative_length": 3641, + "unique_negative": 11985 }, "pt": { "num_samples": 1500, "number_of_characters": 5474356, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 65.11933333333333, - "avg_positive_len": 404.01933333333335, - "avg_negative_len": 397.554 + "min_query_length": 18, + "avg_query_length": 65.11933333333333, + "max_query_length": 176, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 404.01933333333335, + "max_positive_length": 3057, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 397.554, + "max_negative_length": 2877, + "unique_negative": 11991 }, "ro": { "num_samples": 1500, "number_of_characters": 4796113, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 61.973333333333336, - "avg_positive_len": 346.70866666666666, - "avg_negative_len": 348.5908333333333 + "min_query_length": 14, + "avg_query_length": 61.973333333333336, + "max_query_length": 169, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 346.70866666666666, + "max_positive_length": 1917, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 348.5908333333333, + "max_negative_length": 4213, + "unique_negative": 11971 }, "sr": { "num_samples": 1500, "number_of_characters": 5271732, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.669333333333334, - "avg_positive_len": 386.34933333333333, - "avg_negative_len": 384.0586666666667 + "min_query_length": 15, + "avg_query_length": 55.669333333333334, + "max_query_length": 146, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 386.34933333333333, + "max_positive_length": 2421, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 384.0586666666667, + "max_negative_length": 3668, + "unique_negative": 11974 }, "no": { "num_samples": 1500, "number_of_characters": 5036586, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.288, - "avg_positive_len": 367.72, - "avg_negative_len": 366.8395 + "min_query_length": 14, + "avg_query_length": 55.288, + "max_query_length": 129, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 367.72, + "max_positive_length": 1450, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 366.8395, + "max_negative_length": 2841, + "unique_negative": 11996 }, "sv": { "num_samples": 1500, "number_of_characters": 5072698, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 57.73, - "avg_positive_len": 372.58733333333333, - "avg_negative_len": 368.93516666666665 + "min_query_length": 17, + "avg_query_length": 57.73, + "max_query_length": 133, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 372.58733333333333, + "max_positive_length": 2493, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 368.93516666666665, + "max_negative_length": 3680, + "unique_negative": 11999 } } } diff --git a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json index 8a71a1ad1a..caaab2453b 100644 --- a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2245.837090504686, + "number_of_characters": 11335620, "num_samples": 12530, "num_queries": 3765, "num_documents": 8765, - "average_document_length": 0.0657169048317138, - "average_query_length": 0.4435135244766838, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 152, + "average_document_length": 717.2737022247576, + "max_document_length": 5742, + "unique_documents": 8765, + "min_query_length": 6, + "average_query_length": 1340.9604249667996, + "max_query_length": 289049, + "unique_queries": 3765, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3765 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json index fe213d96d7..78c8a7e121 100644 --- a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json @@ -1,3396 +1,6789 @@ { "test": { - "number_of_characters": 76.49551684802204, + "number_of_characters": 25574620, "num_samples": 521866, "num_queries": 338378, "num_documents": 183488, - "average_document_length": 1.0899895361004534e-05, - "average_query_length": 0.000220154728877238, + "min_document_length": 4, + "average_document_length": 137.38034094872688, + "max_document_length": 237, + "unique_documents": 183488, + "min_query_length": 2, + "average_query_length": 1.0845149507355678, + "max_query_length": 2, + "unique_queries": 338378, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0000413738481817, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 183488, "hf_subset_descriptive_stats": { "acm_Arab-acm_Arab": { - "number_of_characters": 57.84, + "number_of_characters": 51232, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06204444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 102.98360655737704, + "max_document_length": 129, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "acm_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-acm_Arab": { - "number_of_characters": 57.84, + "number_of_characters": 51232, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06204444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 102.98360655737704, + "max_document_length": 129, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "afr_Latn-afr_Latn": { - "number_of_characters": 80.04555555555555, + "number_of_characters": 71217, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08671728395061729, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 143.93647540983608, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "afr_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-afr_Latn": { - "number_of_characters": 80.04555555555555, + "number_of_characters": 71217, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08671728395061729, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 143.93647540983608, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "als_Latn-als_Latn": { - "number_of_characters": 78.13555555555556, + "number_of_characters": 69498, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08459506172839507, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 140.4139344262295, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "als_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-als_Latn": { - "number_of_characters": 78.13555555555556, + "number_of_characters": 69498, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08459506172839507, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 140.4139344262295, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "amh_Ethi-amh_Ethi": { - "number_of_characters": 51.16111111111111, + "number_of_characters": 45221, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.05462345679012346, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 90.66598360655738, + "max_document_length": 100, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "amh_Ethi-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-amh_Ethi": { - "number_of_characters": 51.16111111111111, + "number_of_characters": 45221, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.05462345679012346, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 90.66598360655738, + "max_document_length": 100, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "apc_Arab-apc_Arab": { - "number_of_characters": 57.85777777777778, + "number_of_characters": 51248, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.062064197530864194, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 103.01639344262296, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "apc_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-apc_Arab": { - "number_of_characters": 57.85777777777778, + "number_of_characters": 51248, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.062064197530864194, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 103.01639344262296, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ars_Arab-ars_Arab": { - "number_of_characters": 58.43222222222222, + "number_of_characters": 51765, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06270246913580246, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 104.07581967213115, + "max_document_length": 119, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ars_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ars_Arab": { - "number_of_characters": 58.43222222222222, + "number_of_characters": 51765, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06270246913580246, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 104.07581967213115, + "max_document_length": 119, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ary_Arab-ary_Arab": { - "number_of_characters": 68.01893095768374, + "number_of_characters": 60261, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07351774048739837, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 121.48565573770492, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ary_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ary_Arab": { - "number_of_characters": 68.01893095768374, + "number_of_characters": 60261, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07351774048739837, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 121.48565573770492, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arz_Arab-arz_Arab": { - "number_of_characters": 59.14111111111111, + "number_of_characters": 52403, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06349012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 105.38319672131148, + "max_document_length": 115, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arz_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arz_Arab": { - "number_of_characters": 59.14111111111111, + "number_of_characters": 52403, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06349012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 105.38319672131148, + "max_document_length": 115, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "asm_Beng-asm_Beng": { - "number_of_characters": 70.26, + "number_of_characters": 62410, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07584444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 4, + "average_document_length": 125.88934426229508, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "asm_Beng-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-asm_Beng": { - "number_of_characters": 70.26, + "number_of_characters": 62410, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07584444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 4, + "average_document_length": 125.88934426229508, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "azj_Latn-azj_Latn": { - "number_of_characters": 75.51222222222222, + "number_of_characters": 67137, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08168024691358025, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.57581967213116, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "azj_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-azj_Latn": { - "number_of_characters": 75.51222222222222, + "number_of_characters": 67137, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08168024691358025, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.57581967213116, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bam_Latn-bam_Latn": { - "number_of_characters": 74.34222222222222, + "number_of_characters": 66084, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08038024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 133.41803278688525, + "max_document_length": 166, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bam_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bam_Latn": { - "number_of_characters": 74.34222222222222, + "number_of_characters": 66084, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08038024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 133.41803278688525, + "max_document_length": 166, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bod_Tibt-bod_Tibt": { - "number_of_characters": 88.90222222222222, + "number_of_characters": 79188, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09655802469135802, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.2704918032787, + "max_document_length": 213, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bod_Tibt-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bod_Tibt": { - "number_of_characters": 88.90222222222222, + "number_of_characters": 79188, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09655802469135802, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.2704918032787, + "max_document_length": 213, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bul_Cyrl-bul_Cyrl": { - "number_of_characters": 74.89, + "number_of_characters": 66577, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08098888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.42827868852459, + "max_document_length": 177, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bul_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bul_Cyrl": { - "number_of_characters": 74.89, + "number_of_characters": 66577, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08098888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.42827868852459, + "max_document_length": 177, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "cat_Latn-cat_Latn": { - "number_of_characters": 77.40666666666667, + "number_of_characters": 68842, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08378518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.06967213114754, + "max_document_length": 163, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "cat_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-cat_Latn": { - "number_of_characters": 77.40666666666667, + "number_of_characters": 68842, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08378518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.06967213114754, + "max_document_length": 163, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ceb_Latn-ceb_Latn": { - "number_of_characters": 83.19666666666667, + "number_of_characters": 74053, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09021851851851853, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 149.74795081967213, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ceb_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ceb_Latn": { - "number_of_characters": 83.19666666666667, + "number_of_characters": 74053, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09021851851851853, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 149.74795081967213, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ces_Latn-ces_Latn": { - "number_of_characters": 69.73333333333333, + "number_of_characters": 61936, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07525925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 124.91803278688525, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ces_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ces_Latn": { - "number_of_characters": 69.73333333333333, + "number_of_characters": 61936, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07525925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 124.91803278688525, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ckb_Arab-ckb_Arab": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 131.0266393442623, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ckb_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ckb_Arab": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 131.0266393442623, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "dan_Latn-dan_Latn": { - "number_of_characters": 74.96888888888888, + "number_of_characters": 66648, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08107654320987653, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.5737704918033, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "dan_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-dan_Latn": { - "number_of_characters": 74.96888888888888, + "number_of_characters": 66648, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08107654320987653, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.5737704918033, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "deu_Latn-deu_Latn": { - "number_of_characters": 77.32444444444444, + "number_of_characters": 68768, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08369382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 138.91803278688525, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "deu_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-deu_Latn": { - "number_of_characters": 77.32444444444444, + "number_of_characters": 68768, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08369382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 138.91803278688525, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ell_Grek-ell_Grek": { - "number_of_characters": 88.92666666666666, + "number_of_characters": 79210, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09658518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.3155737704918, + "max_document_length": 212, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ell_Grek-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ell_Grek": { - "number_of_characters": 88.92666666666666, + "number_of_characters": 79210, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09658518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.3155737704918, + "max_document_length": 212, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "est_Latn-est_Latn": { - "number_of_characters": 69.55888888888889, + "number_of_characters": 61779, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07506543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 124.59631147540983, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "est_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-est_Latn": { - "number_of_characters": 69.55888888888889, + "number_of_characters": 61779, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07506543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 124.59631147540983, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eus_Latn-eus_Latn": { - "number_of_characters": 76.44777777777777, + "number_of_characters": 67979, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08271975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 137.3012295081967, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eus_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-eus_Latn": { - "number_of_characters": 76.44777777777777, + "number_of_characters": 67979, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08271975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 137.3012295081967, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fin_Latn-fin_Latn": { - "number_of_characters": 74.50888888888889, + "number_of_characters": 66234, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08056543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.72540983606558, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fin_Latn": { - "number_of_characters": 74.50888888888889, + "number_of_characters": 66234, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08056543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.72540983606558, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fra_Latn-fra_Latn": { - "number_of_characters": 92.54222222222222, + "number_of_characters": 82464, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10060246913580247, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 166.98360655737704, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fra_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fra_Latn": { - "number_of_characters": 92.54222222222222, + "number_of_characters": 82464, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10060246913580247, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 166.98360655737704, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fuv_Latn-fuv_Latn": { - "number_of_characters": 60.42111111111111, + "number_of_characters": 53555, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06491234567901234, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 107.7438524590164, + "max_document_length": 122, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fuv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fuv_Latn": { - "number_of_characters": 60.42111111111111, + "number_of_characters": 53555, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06491234567901234, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 107.7438524590164, + "max_document_length": 122, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "gaz_Latn-gaz_Latn": { - "number_of_characters": 87.93222222222222, + "number_of_characters": 78315, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09548024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 158.48155737704917, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "gaz_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-gaz_Latn": { - "number_of_characters": 87.93222222222222, + "number_of_characters": 78315, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09548024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 158.48155737704917, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "grn_Latn-grn_Latn": { - "number_of_characters": 77.10666666666667, + "number_of_characters": 68572, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08345185185185186, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 138.51639344262296, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "grn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-grn_Latn": { - "number_of_characters": 77.10666666666667, + "number_of_characters": 68572, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08345185185185186, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 138.51639344262296, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "guj_Gujr-guj_Gujr": { - "number_of_characters": 64.25666666666666, + "number_of_characters": 57007, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06917407407407407, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 114.81762295081967, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "guj_Gujr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-guj_Gujr": { - "number_of_characters": 64.25666666666666, + "number_of_characters": 57007, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06917407407407407, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 114.81762295081967, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hat_Latn-hat_Latn": { - "number_of_characters": 72.64666666666666, + "number_of_characters": 64558, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07849629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.29098360655738, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hat_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hat_Latn": { - "number_of_characters": 72.64666666666666, + "number_of_characters": 64558, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07849629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.29098360655738, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hau_Latn-hau_Latn": { - "number_of_characters": 87.8488888888889, + "number_of_characters": 78240, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09538765432098766, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.327868852459, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hau_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hau_Latn": { - "number_of_characters": 87.8488888888889, + "number_of_characters": 78240, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09538765432098766, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.327868852459, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "heb_Hebr-heb_Hebr": { - "number_of_characters": 57.135555555555555, + "number_of_characters": 50598, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06126172839506173, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 101.68442622950819, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "heb_Hebr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-heb_Hebr": { - "number_of_characters": 57.135555555555555, + "number_of_characters": 50598, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06126172839506173, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 101.68442622950819, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hrv_Latn-hrv_Latn": { - "number_of_characters": 70.83555555555556, + "number_of_characters": 62928, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07648395061728396, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.95081967213115, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hrv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hrv_Latn": { - "number_of_characters": 70.83555555555556, + "number_of_characters": 62928, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07648395061728396, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.95081967213115, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hun_Latn-hun_Latn": { - "number_of_characters": 76.40555555555555, + "number_of_characters": 67941, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08267283950617284, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 137.2233606557377, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hun_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hun_Latn": { - "number_of_characters": 76.40555555555555, + "number_of_characters": 67941, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08267283950617284, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 137.2233606557377, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hye_Armn-hye_Armn": { - "number_of_characters": 77.42555555555556, + "number_of_characters": 68859, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08380617283950619, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.1045081967213, + "max_document_length": 193, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hye_Armn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hye_Armn": { - "number_of_characters": 77.42555555555556, + "number_of_characters": 68859, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08380617283950619, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.1045081967213, + "max_document_length": 193, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ibo_Latn-ibo_Latn": { - "number_of_characters": 74.51501668520578, + "number_of_characters": 66167, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08066186505584626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 19, + "average_document_length": 133.58811475409837, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ibo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ibo_Latn": { - "number_of_characters": 74.51501668520578, + "number_of_characters": 66167, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08066186505584626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 19, + "average_document_length": 133.58811475409837, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ilo_Latn-ilo_Latn": { - "number_of_characters": 87.7611111111111, + "number_of_characters": 78161, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09529012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.16598360655738, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ilo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ilo_Latn": { - "number_of_characters": 87.7611111111111, + "number_of_characters": 78161, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09529012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.16598360655738, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ind_Latn-ind_Latn": { - "number_of_characters": 84.10555555555555, + "number_of_characters": 74871, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09122839506172839, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 151.42418032786884, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ind_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ind_Latn": { - "number_of_characters": 84.10555555555555, + "number_of_characters": 74871, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09122839506172839, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 151.42418032786884, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "isl_Latn-isl_Latn": { - "number_of_characters": 79.27333333333333, + "number_of_characters": 70522, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08585925925925925, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 142.5122950819672, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "isl_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-isl_Latn": { - "number_of_characters": 79.27333333333333, + "number_of_characters": 70522, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08585925925925925, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 142.5122950819672, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ita_Latn-ita_Latn": { - "number_of_characters": 85.49777777777778, + "number_of_characters": 76124, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09277530864197532, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 153.99180327868854, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ita_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ita_Latn": { - "number_of_characters": 85.49777777777778, + "number_of_characters": 76124, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09277530864197532, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 153.99180327868854, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jav_Latn-jav_Latn": { - "number_of_characters": 80.60666666666667, + "number_of_characters": 71722, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734074074074075, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 144.97131147540983, + "max_document_length": 174, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jav_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-jav_Latn": { - "number_of_characters": 80.60666666666667, + "number_of_characters": 71722, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734074074074075, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 144.97131147540983, + "max_document_length": 174, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jpn_Jpan-jpn_Jpan": { - "number_of_characters": 37.79, + "number_of_characters": 33187, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039766666666666665, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 66.0061475409836, + "max_document_length": 76, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jpn_Jpan-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-jpn_Jpan": { - "number_of_characters": 37.79, + "number_of_characters": 33187, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039766666666666665, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 66.0061475409836, + "max_document_length": 76, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kac_Latn-kac_Latn": { - "number_of_characters": 100.64182424916574, + "number_of_characters": 89655, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10972394243511205, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 181.71926229508196, + "max_document_length": 195, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kac_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kac_Latn": { - "number_of_characters": 100.64182424916574, + "number_of_characters": 89655, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10972394243511205, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 181.71926229508196, + "max_document_length": 195, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kan_Knda-kan_Knda": { - "number_of_characters": 74.13666666666667, + "number_of_characters": 65899, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08015185185185185, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.0389344262295, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kan_Knda-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kan_Knda": { - "number_of_characters": 74.13666666666667, + "number_of_characters": 65899, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08015185185185185, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.0389344262295, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kat_Geor-kat_Geor": { - "number_of_characters": 76.81444444444445, + "number_of_characters": 68309, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312716049382717, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.97745901639345, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kat_Geor-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kat_Geor": { - "number_of_characters": 76.81444444444445, + "number_of_characters": 68309, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312716049382717, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.97745901639345, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kaz_Cyrl-kaz_Cyrl": { - "number_of_characters": 72.75666666666666, + "number_of_characters": 64657, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07861851851851852, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.49385245901638, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kaz_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kaz_Cyrl": { - "number_of_characters": 72.75666666666666, + "number_of_characters": 64657, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07861851851851852, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.49385245901638, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kea_Latn-kea_Latn": { - "number_of_characters": 77.94111111111111, + "number_of_characters": 69323, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08437901234567902, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.05532786885246, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kea_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kea_Latn": { - "number_of_characters": 77.94111111111111, + "number_of_characters": 69323, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08437901234567902, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.05532786885246, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khk_Cyrl-khk_Cyrl": { - "number_of_characters": 75.33444444444444, + "number_of_characters": 66977, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08148271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 135.24795081967213, + "max_document_length": 162, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khk_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-khk_Cyrl": { - "number_of_characters": 75.33444444444444, + "number_of_characters": 66977, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08148271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 135.24795081967213, + "max_document_length": 162, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khm_Khmr-khm_Khmr": { - "number_of_characters": 77.74888888888889, + "number_of_characters": 69150, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08416543209876542, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 139.70081967213116, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khm_Khmr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-khm_Khmr": { - "number_of_characters": 77.74888888888889, + "number_of_characters": 69150, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08416543209876542, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 139.70081967213116, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kin_Latn-kin_Latn": { - "number_of_characters": 81.89655172413794, + "number_of_characters": 72803, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08887269379770626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 147.18647540983608, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "kin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kin_Latn": { - "number_of_characters": 81.89655172413794, + "number_of_characters": 72803, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08887269379770626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 147.18647540983608, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "kir_Cyrl-kir_Cyrl": { - "number_of_characters": 76.42333333333333, + "number_of_characters": 67957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0826925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 137.25614754098362, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kir_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kir_Cyrl": { - "number_of_characters": 76.42333333333333, + "number_of_characters": 67957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0826925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 137.25614754098362, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kor_Hang-kor_Hang": { - "number_of_characters": 37.257777777777775, + "number_of_characters": 32708, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039175308641975305, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 65.02459016393442, + "max_document_length": 88, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kor_Hang-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kor_Hang": { - "number_of_characters": 37.257777777777775, + "number_of_characters": 32708, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039175308641975305, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 65.02459016393442, + "max_document_length": 88, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lao_Laoo-lao_Laoo": { - "number_of_characters": 65.31333333333333, + "number_of_characters": 57958, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07034814814814815, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 116.76639344262296, + "max_document_length": 142, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lao_Laoo-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lao_Laoo": { - "number_of_characters": 65.31333333333333, + "number_of_characters": 57958, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07034814814814815, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 116.76639344262296, + "max_document_length": 142, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lin_Latn-lin_Latn": { - "number_of_characters": 83.56681514476615, + "number_of_characters": 74223, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09083164270018503, - "average_relevant_docs_per_query": 1.0022271714922049 + "min_document_length": 17, + "average_document_length": 150.09631147540983, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0022271714922049, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lin_Latn": { - "number_of_characters": 83.56681514476615, + "number_of_characters": 74223, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09083164270018503, - "average_relevant_docs_per_query": 1.0022271714922049 + "min_document_length": 17, + "average_document_length": 150.09631147540983, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0022271714922049, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lit_Latn-lit_Latn": { - "number_of_characters": 70.69888888888889, + "number_of_characters": 62805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0763320987654321, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 126.69877049180327, + "max_document_length": 167, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lit_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lit_Latn": { - "number_of_characters": 70.69888888888889, + "number_of_characters": 62805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0763320987654321, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 126.69877049180327, + "max_document_length": 167, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lug_Latn-lug_Latn": { - "number_of_characters": 80.52057842046719, + "number_of_characters": 71566, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734213394935171, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 12, + "average_document_length": 144.6516393442623, + "max_document_length": 237, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lug_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lug_Latn": { - "number_of_characters": 80.52057842046719, + "number_of_characters": 71566, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734213394935171, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 12, + "average_document_length": 144.6516393442623, + "max_document_length": 237, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "luo_Latn-luo_Latn": { - "number_of_characters": 75.14333333333333, + "number_of_characters": 66805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08127037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 134.8954918032787, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "luo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-luo_Latn": { - "number_of_characters": 75.14333333333333, + "number_of_characters": 66805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08127037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 134.8954918032787, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lvs_Latn-lvs_Latn": { - "number_of_characters": 71.97888888888889, + "number_of_characters": 63957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07775432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 129.0594262295082, + "max_document_length": 172, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lvs_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lvs_Latn": { - "number_of_characters": 71.97888888888889, + "number_of_characters": 63957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07775432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 129.0594262295082, + "max_document_length": 172, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mal_Mlym-mal_Mlym": { - "number_of_characters": 82.69222222222223, + "number_of_characters": 73599, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08965802469135803, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.81762295081967, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mal_Mlym-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mal_Mlym": { - "number_of_characters": 82.69222222222223, + "number_of_characters": 73599, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08965802469135803, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.81762295081967, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mar_Deva-mar_Deva": { - "number_of_characters": 70.62625139043382, + "number_of_characters": 62671, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07633620844319669, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 15, + "average_document_length": 126.42418032786885, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "mar_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mar_Deva": { - "number_of_characters": 70.62625139043382, + "number_of_characters": 62671, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07633620844319669, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 15, + "average_document_length": 126.42418032786885, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "mkd_Cyrl-mkd_Cyrl": { - "number_of_characters": 76.01333333333334, + "number_of_characters": 67588, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08223703703703704, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 136.5, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mkd_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mkd_Cyrl": { - "number_of_characters": 76.01333333333334, + "number_of_characters": 67588, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08223703703703704, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 136.5, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mlt_Latn-mlt_Latn": { - "number_of_characters": 77.00444444444445, + "number_of_characters": 68480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08333827160493827, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 138.327868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mlt_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mlt_Latn": { - "number_of_characters": 77.00444444444445, + "number_of_characters": 68480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08333827160493827, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 138.327868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mri_Latn-mri_Latn": { - "number_of_characters": 83.71444444444444, + "number_of_characters": 74519, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09079382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 150.702868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mri_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mri_Latn": { - "number_of_characters": 83.71444444444444, + "number_of_characters": 74519, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09079382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 150.702868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mya_Mymr-mya_Mymr": { - "number_of_characters": 91.28333333333333, + "number_of_characters": 81331, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0992037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 164.66188524590163, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mya_Mymr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mya_Mymr": { - "number_of_characters": 91.28333333333333, + "number_of_characters": 81331, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0992037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 164.66188524590163, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nld_Latn-nld_Latn": { - "number_of_characters": 77.34777777777778, + "number_of_characters": 68789, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08371975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 138.9610655737705, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nld_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nld_Latn": { - "number_of_characters": 77.34777777777778, + "number_of_characters": 68789, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08371975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 138.9610655737705, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nob_Latn-nob_Latn": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 131.0266393442623, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nob_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nob_Latn": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 131.0266393442623, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nso_Latn-nso_Latn": { - "number_of_characters": 88.77444444444444, + "number_of_characters": 79073, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09641604938271604, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 160.03483606557376, + "max_document_length": 235, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nso_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nso_Latn": { - "number_of_characters": 88.77444444444444, + "number_of_characters": 79073, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09641604938271604, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 160.03483606557376, + "max_document_length": 235, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nya_Latn-nya_Latn": { - "number_of_characters": 92.78777777777778, + "number_of_characters": 82685, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1008753086419753, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.43647540983608, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nya_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nya_Latn": { - "number_of_characters": 92.78777777777778, + "number_of_characters": 82685, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1008753086419753, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.43647540983608, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ory_Orya-ory_Orya": { - "number_of_characters": 74.95777777777778, + "number_of_characters": 66638, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0810641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 10, + "average_document_length": 134.55327868852459, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ory_Orya-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ory_Orya": { - "number_of_characters": 74.95777777777778, + "number_of_characters": 66638, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0810641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 10, + "average_document_length": 134.55327868852459, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pan_Guru-pan_Guru": { - "number_of_characters": 75.29777777777778, + "number_of_characters": 66944, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08144197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.18032786885246, + "max_document_length": 157, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pan_Guru-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pan_Guru": { - "number_of_characters": 75.29777777777778, + "number_of_characters": 66944, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08144197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.18032786885246, + "max_document_length": 157, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pbt_Arab-pbt_Arab": { - "number_of_characters": 69.67111111111112, + "number_of_characters": 61880, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07519012345679013, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 124.80327868852459, + "max_document_length": 155, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pbt_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pbt_Arab": { - "number_of_characters": 69.67111111111112, + "number_of_characters": 61880, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07519012345679013, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 124.80327868852459, + "max_document_length": 155, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pes_Arab-pes_Arab": { - "number_of_characters": 66.75111111111111, + "number_of_characters": 59252, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07194567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 119.41803278688525, + "max_document_length": 152, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pes_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pes_Arab": { - "number_of_characters": 66.75111111111111, + "number_of_characters": 59252, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07194567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 119.41803278688525, + "max_document_length": 152, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "plt_Latn-plt_Latn": { - "number_of_characters": 96.99555555555555, + "number_of_characters": 86472, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10555061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 175.19672131147541, + "max_document_length": 222, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "plt_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-plt_Latn": { - "number_of_characters": 96.99555555555555, + "number_of_characters": 86472, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10555061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 175.19672131147541, + "max_document_length": 222, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pol_Latn-pol_Latn": { - "number_of_characters": 76.09777777777778, + "number_of_characters": 67664, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08233086419753087, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 136.65573770491804, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pol_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pol_Latn": { - "number_of_characters": 76.09777777777778, + "number_of_characters": 67664, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08233086419753087, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 136.65573770491804, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "por_Latn-por_Latn": { - "number_of_characters": 80.11666666666666, + "number_of_characters": 71281, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08679629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.06762295081967, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "por_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-por_Latn": { - "number_of_characters": 80.11666666666666, + "number_of_characters": 71281, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08679629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.06762295081967, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ron_Latn-ron_Latn": { - "number_of_characters": 80.74222222222222, + "number_of_characters": 71844, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08749135802469137, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 145.22131147540983, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ron_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ron_Latn": { - "number_of_characters": 80.74222222222222, + "number_of_characters": 71844, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08749135802469137, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 145.22131147540983, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "rus_Cyrl-rus_Cyrl": { - "number_of_characters": 85.16333333333333, + "number_of_characters": 75823, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0924037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 153.375, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "rus_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-rus_Cyrl": { - "number_of_characters": 85.16333333333333, + "number_of_characters": 75823, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0924037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 153.375, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "shn_Mymr-shn_Mymr": { - "number_of_characters": 77.90222222222222, + "number_of_characters": 69288, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0843358024691358, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 139.98360655737704, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "shn_Mymr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-shn_Mymr": { - "number_of_characters": 77.90222222222222, + "number_of_characters": 69288, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0843358024691358, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 139.98360655737704, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slk_Latn-slk_Latn": { - "number_of_characters": 70.5411111111111, + "number_of_characters": 62663, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07615679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 126.4077868852459, + "max_document_length": 146, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slk_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-slk_Latn": { - "number_of_characters": 70.5411111111111, + "number_of_characters": 62663, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07615679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 126.4077868852459, + "max_document_length": 146, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slv_Latn-slv_Latn": { - "number_of_characters": 70.79888888888888, + "number_of_characters": 62895, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0764432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.88319672131148, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-slv_Latn": { - "number_of_characters": 70.79888888888888, + "number_of_characters": 62895, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0764432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.88319672131148, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sna_Latn-sna_Latn": { - "number_of_characters": 83.30700778642937, + "number_of_characters": 74071, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09044161044096703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.78483606557376, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sna_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sna_Latn": { - "number_of_characters": 83.30700778642937, + "number_of_characters": 74071, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09044161044096703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.78483606557376, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "snd_Arab-snd_Arab": { - "number_of_characters": 65.42333333333333, + "number_of_characters": 58057, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07047037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 116.96926229508196, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "snd_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-snd_Arab": { - "number_of_characters": 65.42333333333333, + "number_of_characters": 58057, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07047037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 116.96926229508196, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "som_Latn-som_Latn": { - "number_of_characters": 92.95777777777778, + "number_of_characters": 82838, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1010641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.75, + "max_document_length": 201, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "som_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-som_Latn": { - "number_of_characters": 92.95777777777778, + "number_of_characters": 82838, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1010641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.75, + "max_document_length": 201, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sot_Latn-sot_Latn": { - "number_of_characters": 85.13111111111111, + "number_of_characters": 75794, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0923679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 153.3155737704918, + "max_document_length": 186, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sot_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sot_Latn": { - "number_of_characters": 85.13111111111111, + "number_of_characters": 75794, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0923679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 153.3155737704918, + "max_document_length": 186, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "spa_Latn-spa_Latn": { - "number_of_characters": 84.16, + "number_of_characters": 74920, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09128888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 151.52459016393442, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "spa_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-spa_Latn": { - "number_of_characters": 84.16, + "number_of_characters": 74920, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09128888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 151.52459016393442, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "srp_Cyrl-srp_Cyrl": { - "number_of_characters": 69.49833147942158, + "number_of_characters": 61657, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07508157005497394, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 124.34631147540983, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "srp_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-srp_Cyrl": { - "number_of_characters": 69.49833147942158, + "number_of_characters": 61657, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07508157005497394, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 124.34631147540983, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ssw_Latn-ssw_Latn": { - "number_of_characters": 83.09777777777778, + "number_of_characters": 73964, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09010864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 149.5655737704918, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ssw_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ssw_Latn": { - "number_of_characters": 83.09777777777778, + "number_of_characters": 73964, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09010864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 149.5655737704918, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sun_Latn-sun_Latn": { - "number_of_characters": 80.16, + "number_of_characters": 71320, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08684444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 144.14754098360655, + "max_document_length": 173, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sun_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sun_Latn": { - "number_of_characters": 80.16, + "number_of_characters": 71320, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08684444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 144.14754098360655, + "max_document_length": 173, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swe_Latn-swe_Latn": { - "number_of_characters": 70.67666666666666, + "number_of_characters": 62785, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07630740740740741, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 126.6577868852459, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swe_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-swe_Latn": { - "number_of_characters": 70.67666666666666, + "number_of_characters": 62785, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07630740740740741, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 126.6577868852459, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swh_Latn-swh_Latn": { - "number_of_characters": 82.56, + "number_of_characters": 73480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08951111111111111, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.5737704918033, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swh_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-swh_Latn": { - "number_of_characters": 82.56, + "number_of_characters": 73480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08951111111111111, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.5737704918033, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tam_Taml-tam_Taml": { - "number_of_characters": 83.12777777777778, + "number_of_characters": 73991, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09014197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.62090163934425, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tam_Taml-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tam_Taml": { - "number_of_characters": 83.12777777777778, + "number_of_characters": 73991, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09014197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.62090163934425, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tel_Telu-tel_Telu": { - "number_of_characters": 74.18777777777778, + "number_of_characters": 65945, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08020864197530865, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 133.13319672131146, + "max_document_length": 149, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tel_Telu-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tel_Telu": { - "number_of_characters": 74.18777777777778, + "number_of_characters": 65945, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08020864197530865, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 133.13319672131146, + "max_document_length": 149, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgk_Cyrl-tgk_Cyrl": { - "number_of_characters": 76.28111111111112, + "number_of_characters": 67829, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08253456790123458, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 136.99385245901638, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgk_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tgk_Cyrl": { - "number_of_characters": 76.28111111111112, + "number_of_characters": 67829, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08253456790123458, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 136.99385245901638, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgl_Latn-tgl_Latn": { - "number_of_characters": 84.34555555555555, + "number_of_characters": 75087, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09149506172839506, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 151.86680327868854, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgl_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tgl_Latn": { - "number_of_characters": 84.34555555555555, + "number_of_characters": 75087, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09149506172839506, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 151.86680327868854, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tha_Thai-tha_Thai": { - "number_of_characters": 61.46666666666667, + "number_of_characters": 54496, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06607407407407408, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 109.67213114754098, + "max_document_length": 123, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tha_Thai-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tha_Thai": { - "number_of_characters": 61.46666666666667, + "number_of_characters": 54496, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06607407407407408, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 109.67213114754098, + "max_document_length": 123, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tir_Ethi-tir_Ethi": { - "number_of_characters": 53.99888888888889, + "number_of_characters": 47775, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.057776543209876546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 95.89959016393442, + "max_document_length": 110, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tir_Ethi-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tir_Ethi": { - "number_of_characters": 53.99888888888889, + "number_of_characters": 47775, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.057776543209876546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 95.89959016393442, + "max_document_length": 110, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tsn_Latn-tsn_Latn": { - "number_of_characters": 89.12777777777778, + "number_of_characters": 79391, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09680864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 160.68647540983608, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tsn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tsn_Latn": { - "number_of_characters": 89.12777777777778, + "number_of_characters": 79391, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09680864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 160.68647540983608, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tso_Latn-tso_Latn": { - "number_of_characters": 93.69444444444444, + "number_of_characters": 83501, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10188271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 169.10860655737704, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tso_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tso_Latn": { - "number_of_characters": 93.69444444444444, + "number_of_characters": 83501, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10188271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 169.10860655737704, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tur_Latn-tur_Latn": { - "number_of_characters": 73.56222222222222, + "number_of_characters": 65382, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07951358024691357, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 131.9795081967213, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tur_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tur_Latn": { - "number_of_characters": 73.56222222222222, + "number_of_characters": 65382, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07951358024691357, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 131.9795081967213, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ukr_Cyrl-ukr_Cyrl": { - "number_of_characters": 74.08222222222223, + "number_of_characters": 65850, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08009135802469136, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 132.93852459016392, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ukr_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ukr_Cyrl": { - "number_of_characters": 74.08222222222223, + "number_of_characters": 65850, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08009135802469136, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 132.93852459016392, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "uzn_Latn-uzn_Latn": { - "number_of_characters": 79.61333333333333, + "number_of_characters": 70828, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08623703703703703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 143.13934426229508, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "uzn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-uzn_Latn": { - "number_of_characters": 79.61333333333333, + "number_of_characters": 70828, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08623703703703703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 143.13934426229508, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "vie_Latn-vie_Latn": { - "number_of_characters": 75.05333333333333, + "number_of_characters": 66724, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08117037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 134.7295081967213, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "vie_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-vie_Latn": { - "number_of_characters": 75.05333333333333, + "number_of_characters": 66724, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08117037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 134.7295081967213, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "war_Latn-war_Latn": { - "number_of_characters": 88.07555555555555, + "number_of_characters": 78444, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0956395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 158.74590163934425, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "war_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-war_Latn": { - "number_of_characters": 88.07555555555555, + "number_of_characters": 78444, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0956395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 158.74590163934425, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "wol_Latn-wol_Latn": { - "number_of_characters": 72.60555555555555, + "number_of_characters": 64521, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07845061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 130.21516393442624, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "wol_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-wol_Latn": { - "number_of_characters": 72.60555555555555, + "number_of_characters": 64521, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07845061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 130.21516393442624, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "xho_Latn-xho_Latn": { - "number_of_characters": 80.50333333333333, + "number_of_characters": 71629, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08722592592592593, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.78073770491804, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "xho_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-xho_Latn": { - "number_of_characters": 80.50333333333333, + "number_of_characters": 71629, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08722592592592593, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.78073770491804, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "yor_Latn-yor_Latn": { - "number_of_characters": 70.64, + "number_of_characters": 62752, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07626666666666666, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 126.59016393442623, + "max_document_length": 143, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "yor_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-yor_Latn": { - "number_of_characters": 70.64, + "number_of_characters": 62752, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07626666666666666, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 126.59016393442623, + "max_document_length": 143, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hans-zho_Hans": { - "number_of_characters": 23.747777777777777, + "number_of_characters": 20549, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.024164197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 40.10860655737705, + "max_document_length": 64, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hans-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zho_Hans": { - "number_of_characters": 23.747777777777777, + "number_of_characters": 20549, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.024164197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 40.10860655737705, + "max_document_length": 64, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hant-zho_Hant": { - "number_of_characters": 23.07888888888889, + "number_of_characters": 19947, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.02342098765432099, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 38.875, + "max_document_length": 45, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hant-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zho_Hant": { - "number_of_characters": 23.07888888888889, + "number_of_characters": 19947, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.02342098765432099, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 38.875, + "max_document_length": 45, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zsm_Latn-zsm_Latn": { - "number_of_characters": 80.92444444444445, + "number_of_characters": 72008, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08769382716049383, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 145.55737704918033, + "max_document_length": 210, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zsm_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zsm_Latn": { - "number_of_characters": 80.92444444444445, + "number_of_characters": 72008, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08769382716049383, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 145.55737704918033, + "max_document_length": 210, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zul_Latn-zul_Latn": { - "number_of_characters": 78.0411111111111, + "number_of_characters": 69413, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08449012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.23975409836066, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zul_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zul_Latn": { - "number_of_characters": 78.0411111111111, + "number_of_characters": 69413, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08449012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.23975409836066, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-urd_Arab": { - "number_of_characters": 72.52666666666667, - "num_samples": 1388, - "num_queries": 900, - "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "number_of_characters": 64450, + "num_samples": 1388, + "num_queries": 900, + "num_documents": 488, + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 } } } diff --git a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json index e9ff1f7874..3d27f624b9 100644 --- a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json @@ -1,66 +1,129 @@ { "test": { - "number_of_characters": 664.7686497593272, + "number_of_characters": 36843313, "num_samples": 1056326, "num_queries": 52561, "num_documents": 1003765, - "average_document_length": 1.9924982441109223e-06, - "average_query_length": 0.012609513703303347, + "min_document_length": 54, + "average_document_length": 34.70511822986456, + "max_document_length": 334374, + "unique_documents": 1003765, + "min_query_length": 2, + "average_query_length": 38.19428854093339, + "max_query_length": 2, + "unique_queries": 52561, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 52561, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 941.4041426464673, + "number_of_characters": 14574651, "num_samples": 295228, "num_queries": 14918, "num_documents": 280310, - "average_document_length": 7.134957725375477e-06, - "average_query_length": 0.0629711853228628, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 95, + "average_document_length": 49.994759373550714, + "max_document_length": 14008, + "unique_documents": 280310, + "min_query_length": 2, + "average_query_length": 37.5801045716584, + "max_query_length": 2, + "unique_queries": 14918, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14918 }, "javascript": { - "number_of_characters": 748.8343968398663, + "number_of_characters": 2587540, "num_samples": 68145, "num_queries": 3291, "num_documents": 64854, - "average_document_length": 3.0838498781879296e-05, - "average_query_length": 0.2269323600242681, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 87, + "average_document_length": 37.89792456903198, + "max_document_length": 334374, + "unique_documents": 64854, + "min_query_length": 2, + "average_query_length": 39.412944393801276, + "max_query_length": 2, + "unique_queries": 3291, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3291 }, "go": { - "number_of_characters": 405.3770007387343, + "number_of_characters": 3641108, "num_samples": 190562, "num_queries": 8122, "num_documents": 182440, - "average_document_length": 1.0962508221881167e-05, - "average_query_length": 0.049664737840277556, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 54, + "average_document_length": 17.957838193378645, + "max_document_length": 5280, + "unique_documents": 182440, + "min_query_length": 2, + "average_query_length": 44.9248953459739, + "max_query_length": 2, + "unique_queries": 8122, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 8122 }, "ruby": { - "number_of_characters": 457.43695479777955, + "number_of_characters": 629446, "num_samples": 28831, "num_queries": 1261, "num_documents": 27570, - "average_document_length": 7.254261878853827e-05, - "average_query_length": 0.3611712567785722, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 83, + "average_document_length": 20.830830612985128, + "max_document_length": 3992, + "unique_documents": 27570, + "min_query_length": 2, + "average_query_length": 43.72720063441713, + "max_query_length": 2, + "unique_queries": 1261, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1261 }, "java": { - "number_of_characters": 588.8922866271109, + "number_of_characters": 6791137, "num_samples": 191821, "num_queries": 10955, "num_documents": 180866, - "average_document_length": 1.1057910276116019e-05, - "average_query_length": 0.053573006538303145, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 77, + "average_document_length": 35.54789180940586, + "max_document_length": 7615, + "unique_documents": 180866, + "min_query_length": 2, + "average_query_length": 33.019808306709265, + "max_query_length": 2, + "unique_queries": 10955, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10955 }, "php": { - "number_of_characters": 578.8503639217925, + "number_of_characters": 8619431, "num_samples": 281739, "num_queries": 14014, "num_documents": 267725, - "average_document_length": 7.470352040339901e-06, - "average_query_length": 0.041162434987997175, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 94, + "average_document_length": 30.195091978709495, + "max_document_length": 4904, + "unique_documents": 267725, + "min_query_length": 2, + "average_query_length": 38.20822035107749, + "max_query_length": 2, + "unique_queries": 14014, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14014 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json index a0325c6385..6d73096d42 100644 --- a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json @@ -1,129 +1,255 @@ { "train": { - "number_of_characters": 71.98776923076923, + "number_of_characters": 935841, "num_samples": 26000, "num_queries": 13000, "num_documents": 13000, - "average_document_length": 7.692307692307693e-05, - "average_query_length": 0.005460597633136095, + "min_document_length": 18, + "average_document_length": 70.98776923076923, + "max_document_length": 2532, + "unique_documents": 13000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 13000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13000, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 70.519, + "number_of_characters": 70519, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.06951900000000001, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 21, + "average_document_length": 69.519, + "max_document_length": 1811, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "javascript": { - "number_of_characters": 57.88, + "number_of_characters": 57880, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.05688, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 56.88, + "max_document_length": 601, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "typescript": { - "number_of_characters": 61.092, + "number_of_characters": 61092, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.060092, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 60.092, + "max_document_length": 659, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "go": { - "number_of_characters": 71.797, + "number_of_characters": 71797, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.070797, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 70.797, + "max_document_length": 1529, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "ruby": { - "number_of_characters": 67.9, + "number_of_characters": 67900, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.0669, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 66.9, + "max_document_length": 751, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "java": { - "number_of_characters": 63.984, + "number_of_characters": 63984, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.062984, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 23, + "average_document_length": 62.984, + "max_document_length": 807, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "php": { - "number_of_characters": 62.927, + "number_of_characters": 62927, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.061927, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 21, + "average_document_length": 61.927, + "max_document_length": 766, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "c": { - "number_of_characters": 98.588, + "number_of_characters": 98588, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.097588, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 97.588, + "max_document_length": 1672, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "c++": { - "number_of_characters": 115.48, + "number_of_characters": 115480, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.11448, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 22, + "average_document_length": 114.48, + "max_document_length": 1856, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "rust": { - "number_of_characters": 68.503, + "number_of_characters": 68503, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.067503, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 67.503, + "max_document_length": 2532, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "swift": { - "number_of_characters": 58.279, + "number_of_characters": 58279, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.057279000000000004, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 57.279, + "max_document_length": 727, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "scala": { - "number_of_characters": 65.833, + "number_of_characters": 65833, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.064833, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 22, + "average_document_length": 64.833, + "max_document_length": 685, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "shell": { - "number_of_characters": 73.059, + "number_of_characters": 73059, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.072059, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 72.059, + "max_document_length": 813, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json index c498011923..1be18319cd 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 5894.401984777533, + "number_of_characters": 156266302, "num_samples": 79660, "num_queries": 13277, "num_documents": 66383, - "average_document_length": 0.022127347788495202, - "average_query_length": 0.3333224566192555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 127, + "average_document_length": 885.131117906693, + "max_document_length": 32432, + "unique_documents": 66383, + "min_query_length": 2, + "average_query_length": 7344.177374406869, + "max_query_length": 9403, + "unique_queries": 13277, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13277 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json index a7e6531492..4511605dd5 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2246.575885305433, + "number_of_characters": 260957682, "num_samples": 187832, "num_queries": 31306, "num_documents": 156526, - "average_document_length": 0.009725743421916316, - "average_query_length": 0.02313435668710662, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 26, + "average_document_length": 144.85253568097312, + "max_document_length": 13851, + "unique_documents": 156526, + "min_query_length": 1, + "average_query_length": 7611.464064396601, + "max_query_length": 11354, + "unique_queries": 31306, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 31306 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json index 96802f81df..a817119b43 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json @@ -1,66 +1,129 @@ { "test": { - "number_of_characters": 390.06276516809044, + "number_of_characters": 22407915, "num_samples": 1058035, "num_queries": 52561, "num_documents": 1005474, - "average_document_length": 1.9891116030847143e-06, - "average_query_length": 0.007383093266263778, + "min_document_length": 23, + "average_document_length": 20.28592186371801, + "max_document_length": 214210, + "unique_documents": 1005474, + "min_query_length": 2, + "average_query_length": 38.259317745096176, + "max_query_length": 2, + "unique_queries": 52561, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 52561, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 553.7934039415471, + "number_of_characters": 8792958, "num_samples": 295570, "num_queries": 14918, "num_documents": 280652, - "average_document_length": 7.126263130139817e-06, - "average_query_length": 0.0369884303486759, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 38, + "average_document_length": 29.330466200133973, + "max_document_length": 8326, + "unique_documents": 280652, + "min_query_length": 2, + "average_query_length": 37.62595522187961, + "max_query_length": 2, + "unique_queries": 14918, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14918 }, "javascript": { - "number_of_characters": 445.70707991491946, + "number_of_characters": 1590642, "num_samples": 68492, "num_queries": 3291, "num_documents": 65201, - "average_document_length": 3.0674376159874846e-05, - "average_query_length": 0.1348243937754237, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 40, + "average_document_length": 22.395975521847824, + "max_document_length": 214210, + "unique_documents": 65201, + "min_query_length": 2, + "average_query_length": 39.6238225463385, + "max_query_length": 2, + "unique_queries": 3291, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3291 }, "go": { - "number_of_characters": 235.76803742920464, + "number_of_characters": 2264134, "num_samples": 190857, "num_queries": 8122, "num_documents": 182735, - "average_document_length": 1.0944810791583441e-05, - "average_query_length": 0.028782077989313547, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 23, + "average_document_length": 10.390259118395491, + "max_document_length": 3589, + "unique_documents": 182735, + "min_query_length": 2, + "average_query_length": 44.99753755232701, + "max_query_length": 2, + "unique_queries": 8122, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 8122 }, "ruby": { - "number_of_characters": 268.8731165741475, + "number_of_characters": 391703, "num_samples": 28849, "num_queries": 1261, "num_documents": 27588, - "average_document_length": 7.24952878062926e-05, - "average_query_length": 0.21163609561788066, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 36, + "average_document_length": 12.198310859794113, + "max_document_length": 2244, + "unique_documents": 27588, + "min_query_length": 2, + "average_query_length": 43.75574940523394, + "max_query_length": 2, + "unique_queries": 1261, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1261 }, "java": { - "number_of_characters": 344.5341853035144, + "number_of_characters": 4114584, "num_samples": 192016, "num_queries": 10955, "num_documents": 181061, - "average_document_length": 1.1046001071462105e-05, - "average_query_length": 0.03126738341428703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 38, + "average_document_length": 20.724849636310413, + "max_document_length": 5066, + "unique_documents": 181061, + "min_query_length": 2, + "average_query_length": 33.055408489274306, + "max_query_length": 2, + "unique_queries": 10955, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10955 }, "php": { - "number_of_characters": 338.62194947909234, + "number_of_characters": 5253894, "num_samples": 282251, "num_queries": 14014, "num_documents": 268237, - "average_document_length": 7.456092932742314e-06, - "average_query_length": 0.024020404558234076, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 40, + "average_document_length": 17.586760961388624, + "max_document_length": 2995, + "unique_documents": 268237, + "min_query_length": 2, + "average_query_length": 38.28129013843299, + "max_query_length": 2, + "unique_queries": 14014, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14014 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json index 188d4eb7ec..853c4c79c6 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json @@ -1,66 +1,129 @@ { "test": { - "number_of_characters": 325.01233333333334, + "number_of_characters": 1950074, "num_samples": 12000, "num_queries": 6000, "num_documents": 6000, - "average_document_length": 0.00016666666666666666, - "average_query_length": 0.05400205555555556, + "min_document_length": 2, + "average_document_length": 324.01233333333334, + "max_document_length": 17533, + "unique_documents": 6000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 6000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 6000, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 467.546, + "number_of_characters": 467546, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.466546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 8, + "average_document_length": 466.546, + "max_document_length": 8636, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "javascript": { - "number_of_characters": 187.018, + "number_of_characters": 187018, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.186018, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 2, + "average_document_length": 186.018, + "max_document_length": 7657, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "go": { - "number_of_characters": 126.213, + "number_of_characters": 126213, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.125213, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 125.213, + "max_document_length": 1501, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "ruby": { - "number_of_characters": 314.818, + "number_of_characters": 314818, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.313818, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 5, + "average_document_length": 313.818, + "max_document_length": 17533, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "java": { - "number_of_characters": 691.36, + "number_of_characters": 691360, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.69036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 2, + "average_document_length": 690.36, + "max_document_length": 6473, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "php": { - "number_of_characters": 163.119, + "number_of_characters": 163119, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.162119, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 5, + "average_document_length": 162.119, + "max_document_length": 1240, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json index da6aa81741..07081e69c3 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2520.6537967751206, + "number_of_characters": 1744286, "num_samples": 1229, "num_queries": 221, "num_documents": 1008, - "average_document_length": 1.4965681295666415, - "average_query_length": 4.57969738539342, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 8, + "average_document_length": 221.90178571428572, + "max_document_length": 4147, + "unique_documents": 1008, + "min_query_length": 8, + "average_query_length": 6880.58371040724, + "max_query_length": 10852, + "unique_queries": 221, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 221 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json index cf266671f9..042658caad 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 3347.695751633987, + "number_of_characters": 1543912, "num_samples": 996, "num_queries": 180, "num_documents": 816, - "average_document_length": 1.8138155997693195, - "average_query_length": 10.37567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 376, + "average_document_length": 411.97549019607845, + "max_document_length": 8285, + "unique_documents": 816, + "min_query_length": 58, + "average_query_length": 6709.666666666667, + "max_query_length": 8469, + "unique_queries": 180, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 180 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CosQA.json b/mteb/descriptive_stats/Retrieval/CosQA.json index 5dd3a9637d..d8f17d4b21 100644 --- a/mteb/descriptive_stats/Retrieval/CosQA.json +++ b/mteb/descriptive_stats/Retrieval/CosQA.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 313.946741215298, + "number_of_characters": 5728450, "num_samples": 21104, "num_queries": 500, "num_documents": 20604, - "average_document_length": 0.013450433955314403, - "average_query_length": 0.073628, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 0.8933702193748787, + "max_document_length": 83, + "unique_documents": 20604, + "min_query_length": 88, + "average_query_length": 11420.086, + "max_query_length": 6396, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 500 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json index 6498bb736c..4598b2af77 100644 --- a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 3799.701347237398, + "number_of_characters": 428294530, "num_samples": 115226, "num_queries": 997, "num_documents": 114229, - "average_document_length": 0.03281999517532617, - "average_query_length": 0.050858694438380335, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 0.4425671239352529, + "max_document_length": 98, + "unique_documents": 114229, + "min_query_length": 8, + "average_query_length": 429532.5737211635, + "max_query_length": 188424, + "unique_queries": 997, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 989 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json index d0949feac1..51972461e6 100644 --- a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json +++ b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2506.1079405241967, + "number_of_characters": 26584028, "num_samples": 21925, "num_queries": 1994, "num_documents": 19931, - "average_document_length": 0.060382397340162784, - "average_query_length": 0.6532730085944896, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 61, + "average_document_length": 130.32145903366614, + "max_document_length": 22234, + "unique_documents": 19931, + "min_query_length": 5, + "average_query_length": 12029.38365095286, + "max_query_length": 46028, + "unique_queries": 1994, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1994 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json index c833692b92..56c3964a58 100644 --- a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json +++ b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 210.9770886090626, + "number_of_characters": 14041553, "num_samples": 111702, "num_queries": 5851, "num_documents": 105851, - "average_document_length": 0.0012099201759594499, - "average_query_length": 0.014169514281931103, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 4.582686984534865, + "max_document_length": 281, + "unique_documents": 105851, + "min_query_length": 17, + "average_query_length": 2316.9494103572038, + "max_query_length": 762, + "unique_queries": 5851, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 5851 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020.json b/mteb/descriptive_stats/Retrieval/Touche2020.json index 76798710b0..a3c37a54ee 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 1763.7633372731125, + "number_of_characters": 658107591, "num_samples": 382594, "num_queries": 49, "num_documents": 382545, - "average_document_length": 0.00449707816294695, - "average_query_length": 0.8862973760932945, - "average_relevant_docs_per_query": 19.020408163265305 + "min_document_length": 16, + "average_document_length": 0.0055627442523101854, + "max_document_length": 83, + "unique_documents": 382545, + "min_query_length": 3, + "average_query_length": 13430723.734693877, + "max_query_length": 106072, + "unique_queries": 49, + "min_relevant_docs_per_query": 40, + "average_relevant_docs_per_query": 45.183673469387756, + "max_relevant_docs_per_query": 52, + "unique_relevant_docs": 2099 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json index 3d04c572c6..1b436abd75 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2140.8203839475027, + "number_of_characters": 637047138, "num_samples": 303781, "num_queries": 49, "num_documents": 303732, - "average_document_length": 0.006905402830518125, - "average_query_length": 0.8862973760932945, - "average_relevant_docs_per_query": 34.93877551020408 + "min_document_length": 16, + "average_document_length": 0.007006176497701922, + "max_document_length": 83, + "unique_documents": 303732, + "min_query_length": 41, + "average_query_length": 13000918.57142857, + "max_query_length": 105983, + "unique_queries": 49, + "min_relevant_docs_per_query": 40, + "average_relevant_docs_per_query": 58.142857142857146, + "max_relevant_docs_per_query": 87, + "unique_relevant_docs": 2732 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json index 6a48e9b08e..f23a5ea1be 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json @@ -4,48 +4,112 @@ "num_docs": 121635, "num_queries": 123, "number_of_characters": 283654099, + "min_document_length": 74, "average_document_length": 2331.0777818884367, + "max_document_length": 24179, + "unique_docs": 121635, + "min_query_length": 32, "average_query_length": 81.8780487804878, + "max_query_length": 173, + "unique_queries": 75, + "min_instruction_length": 93, "average_instruction_length": 389.9512195121951, + "max_instruction_length": 887, + "unique_instructions": 75, + "min_changed_instruction_length": 180, "average_changed_instruction_length": 450.5528455284553, + "max_changed_instruction_length": 974, + "unique_changed_instructions": 123, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "eng-fas": { "num_samples": 41229, "num_docs": 41189, "num_queries": 40, "number_of_characters": 129597567, + "min_document_length": 99, "average_document_length": 3145.4990895627475, + "max_document_length": 24179, + "unique_docs": 41189, + "min_query_length": 34, "average_query_length": 80.075, + "max_query_length": 124, + "unique_queries": 40, + "min_instruction_length": 150, "average_instruction_length": 396.875, + "max_instruction_length": 887, + "unique_instructions": 40, + "min_changed_instruction_length": 205, "average_changed_instruction_length": 463.175, + "max_changed_instruction_length": 974, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 22, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "eng-rus": { "num_samples": 39366, "num_docs": 39326, "num_queries": 40, "number_of_characters": 109522175, + "min_document_length": 75, "average_document_length": 2784.0813456746173, + "max_document_length": 24061, + "unique_docs": 39326, + "min_query_length": 32, "average_query_length": 81.875, + "max_query_length": 173, + "unique_queries": 40, + "min_instruction_length": 93, "average_instruction_length": 371.125, + "max_instruction_length": 887, + "unique_instructions": 40, + "min_changed_instruction_length": 180, "average_changed_instruction_length": 431.8, + "max_changed_instruction_length": 957, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "eng-zho": { "num_samples": 41163, "num_docs": 41120, "num_queries": 43, "number_of_characters": 44534357, + "min_document_length": 74, "average_document_length": 1082.0501215953307, + "max_document_length": 23840, + "unique_docs": 41120, + "min_query_length": 32, "average_query_length": 83.55813953488372, + "max_query_length": 159, + "unique_queries": 43, + "min_instruction_length": 157, "average_instruction_length": 401.0232558139535, + "max_instruction_length": 731, + "unique_instructions": 43, + "min_changed_instruction_length": 209, "average_changed_instruction_length": 456.25581395348837, + "max_changed_instruction_length": 822, + "unique_changed_instructions": 43, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json index 893dfde705..54ae5d1ec2 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json @@ -4,48 +4,112 @@ "num_docs": 121635, "num_queries": 123, "number_of_characters": 283622456, + "min_document_length": 74, "average_document_length": 2331.0777818884367, + "max_document_length": 24179, + "unique_docs": 121635, + "min_query_length": 10, "average_query_length": 57.113821138211385, + "max_query_length": 136, + "unique_queries": 123, + "min_instruction_length": 37, "average_instruction_length": 281.0650406504065, + "max_instruction_length": 1009, + "unique_instructions": 123, + "min_changed_instruction_length": 44, "average_changed_instruction_length": 326.9430894308943, + "max_changed_instruction_length": 1083, + "unique_changed_instructions": 123, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "fas": { "num_samples": 41229, "num_docs": 41189, "num_queries": 40, "number_of_characters": 129593838, + "min_document_length": 99, "average_document_length": 3145.4990895627475, + "max_document_length": 24179, + "unique_docs": 41189, + "min_query_length": 34, "average_query_length": 72.65, + "max_query_length": 124, + "unique_queries": 40, + "min_instruction_length": 121, "average_instruction_length": 358.925, + "max_instruction_length": 759, + "unique_instructions": 40, + "min_changed_instruction_length": 163, "average_changed_instruction_length": 415.325, + "max_changed_instruction_length": 842, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 22, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "rus": { "num_samples": 39366, "num_docs": 39326, "num_queries": 40, "number_of_characters": 109523683, + "min_document_length": 75, "average_document_length": 2784.0813456746173, + "max_document_length": 24061, + "unique_docs": 39326, + "min_query_length": 26, "average_query_length": 77.5, + "max_query_length": 136, + "unique_queries": 40, + "min_instruction_length": 78, "average_instruction_length": 387.0, + "max_instruction_length": 1009, + "unique_instructions": 40, + "min_changed_instruction_length": 187, "average_changed_instruction_length": 458.0, + "max_changed_instruction_length": 1083, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "zho": { "num_samples": 41163, "num_docs": 41120, "num_queries": 43, "number_of_characters": 44504935, + "min_document_length": 74, "average_document_length": 1082.0501215953307, + "max_document_length": 23840, + "unique_docs": 41120, + "min_query_length": 10, "average_query_length": 23.697674418604652, + "max_query_length": 44, + "unique_queries": 43, + "min_instruction_length": 37, "average_instruction_length": 110.09302325581395, + "max_instruction_length": 209, + "unique_instructions": 43, + "min_changed_instruction_length": 44, "average_changed_instruction_length": 122.81395348837209, + "max_changed_instruction_length": 229, + "unique_changed_instructions": 43, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } } diff --git a/mteb/descriptive_stats/STS/STS12.json b/mteb/descriptive_stats/STS/STS12.json index e9205c1727..a7e11197ac 100644 --- a/mteb/descriptive_stats/STS/STS12.json +++ b/mteb/descriptive_stats/STS/STS12.json @@ -2,8 +2,16 @@ "test": { "num_samples": 3108, "number_of_characters": 402118, + "min_sentence1_length": 3, "average_sentence1_len": 63.78893178893179, + "max_sentence1_length": 220, + "unique_sentence1": 2236, + "min_sentence2_length": 7, "average_sentence2_len": 65.5926640926641, - "avg_score": 3.5060643500643507 + "max_sentence2_length": 204, + "unique_sentence2": 2797, + "min_score": 0.0, + "avg_score": 3.5060643500643507, + "max_score": 5.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS17.json b/mteb/descriptive_stats/STS/STS17.json index 164cc9d1e6..912738035b 100644 --- a/mteb/descriptive_stats/STS/STS17.json +++ b/mteb/descriptive_stats/STS/STS17.json @@ -2,86 +2,182 @@ "test": { "num_samples": 5346, "number_of_characters": 400264, + "min_sentence1_length": 6, "average_sentence1_len": 38.14665170220726, + "max_sentence1_length": 976, + "unique_sentence1": 4900, + "min_sentence2_length": 6, "average_sentence2_len": 36.72502805836139, + "max_sentence2_length": 1007, + "unique_sentence2": 4470, + "min_score": 0.0, "avg_score": 2.3554804214989464, + "max_score": 5.0, "hf_subset_descriptive_stats": { "ko-ko": { "num_samples": 2846, "number_of_characters": 183387, + "min_sentence1_length": 6, "average_sentence1_len": 31.991918482080113, + "max_sentence1_length": 976, + "unique_sentence1": 2650, + "min_sentence2_length": 6, "average_sentence2_len": 32.44483485593816, - "avg_score": 2.469359920356055 + "max_sentence2_length": 1007, + "unique_sentence2": 2720, + "min_score": 0.0, + "avg_score": 2.469359920356055, + "max_score": 5.0 }, "ar-ar": { "num_samples": 250, "number_of_characters": 16247, + "min_sentence1_length": 11, "average_sentence1_len": 32.208, + "max_sentence1_length": 99, + "unique_sentence1": 250, + "min_sentence2_length": 9, "average_sentence2_len": 32.78, - "avg_score": 2.216800000000001 + "max_sentence2_length": 83, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.216800000000001, + "max_score": 5.0 }, "en-ar": { "num_samples": 250, "number_of_characters": 18764, + "min_sentence1_length": 13, "average_sentence1_len": 42.36, + "max_sentence1_length": 105, + "unique_sentence1": 250, + "min_sentence2_length": 10, "average_sentence2_len": 32.696, - "avg_score": 2.1423999999999994 + "max_sentence2_length": 104, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.1423999999999994, + "max_score": 5.0 }, "en-de": { "num_samples": 250, "number_of_characters": 22177, + "min_sentence1_length": 12, "average_sentence1_len": 43.952, + "max_sentence1_length": 94, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 44.756, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 104, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "en-en": { "num_samples": 250, "number_of_characters": 21669, + "min_sentence1_length": 12, "average_sentence1_len": 43.952, + "max_sentence1_length": 94, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "en-tr": { "num_samples": 250, "number_of_characters": 20879, + "min_sentence1_length": 15, "average_sentence1_len": 41.916, + "max_sentence1_length": 101, + "unique_sentence1": 250, + "min_sentence2_length": 10, "average_sentence2_len": 41.6, - "avg_score": 2.1335999999999986 + "max_sentence2_length": 107, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.1335999999999986, + "max_score": 5.0 }, "es-en": { "num_samples": 250, "number_of_characters": 23216, + "min_sentence1_length": 12, "average_sentence1_len": 50.84, + "max_sentence1_length": 160, + "unique_sentence1": 250, + "min_sentence2_length": 14, "average_sentence2_len": 42.024, - "avg_score": 2.1464000000000003 + "max_sentence2_length": 117, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.1464000000000003, + "max_score": 5.0 }, "es-es": { "num_samples": 250, "number_of_characters": 25265, + "min_sentence1_length": 18, "average_sentence1_len": 49.836, + "max_sentence1_length": 136, + "unique_sentence1": 250, + "min_sentence2_length": 13, "average_sentence2_len": 51.224, - "avg_score": 2.2312000000000007 + "max_sentence2_length": 129, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2312000000000007, + "max_score": 5.0 }, "fr-en": { "num_samples": 250, "number_of_characters": 23087, + "min_sentence1_length": 19, "average_sentence1_len": 49.624, + "max_sentence1_length": 115, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "it-en": { "num_samples": 250, "number_of_characters": 23188, + "min_sentence1_length": 15, "average_sentence1_len": 50.028, + "max_sentence1_length": 113, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "nl-en": { "num_samples": 250, "number_of_characters": 22385, + "min_sentence1_length": 14, "average_sentence1_len": 46.816, + "max_sentence1_length": 123, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 } } } diff --git a/mteb/descriptive_stats/Summarization/SummEval.json b/mteb/descriptive_stats/Summarization/SummEval.json index e9c0b172e8..4c2f133abb 100644 --- a/mteb/descriptive_stats/Summarization/SummEval.json +++ b/mteb/descriptive_stats/Summarization/SummEval.json @@ -2,9 +2,54 @@ "test": { "num_samples": 100, "number_of_characters": 212735, - "avg_text_len": 2100.35, - "avg_human_summaries_len": 11.0, - "avg_machine_summaries_len": 16.0, - "avg_relevance": 3.7770833333333336 + "min_text_length": 626, + "avg_text_length": 2100.35, + "max_text_length": 3153, + "unique_texts": 100, + "min_human_summaries_length": 11, + "avg_human_summaries_length": 11.0, + "max_human_summaries_length": 11, + "unique_human_summaries": 1100, + "min_machine_summaries_length": 16, + "avg_machine_summaries_length": 16.0, + "max_machine_summaries_length": 16, + "unique_machine_summaries": 1548, + "min_relevance": [ + 1.0, + 1.3333333333333333, + 3.6666666666666665, + 2.3333333333333335, + 3.6666666666666665, + 3.0, + 4.333333333333333, + 4.0, + 2.6666666666666665, + 4.0, + 2.0, + 4.666666666666667, + 4.333333333333333, + 1.0, + 2.0, + 1.0 + ], + "avg_relevance": 3.7770833333333336, + "max_relevance": [ + 5.0, + 4.666666666666667, + 4.333333333333333, + 2.6666666666666665, + 4.666666666666667, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 4.0, + 4.333333333333333, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 2.3333333333333335, + 4.666666666666667, + 4.666666666666667 + ] } } \ No newline at end of file diff --git a/mteb/leaderboard/figures.py b/mteb/leaderboard/figures.py index e8419d9a31..7a354f7c82 100644 --- a/mteb/leaderboard/figures.py +++ b/mteb/leaderboard/figures.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numpy as np import pandas as pd import plotly.express as px @@ -68,7 +70,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: hover_name="Model", ) fig.update_layout( - coloraxis_colorbar=dict( + coloraxis_colorbar=dict( # noqa title="Max Tokens", tickvals=[2, 3, 4, 5], ticktext=[ @@ -78,7 +80,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: "100K", ], ), - hoverlabel=dict( + hoverlabel=dict( # noqa bgcolor="white", font_size=16, ), @@ -87,7 +89,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: textposition="top center", ) fig.update_layout( - font=dict(size=16, color="black"), - margin=dict(b=20, t=10, l=20, r=10), + font=dict(size=16, color="black"), # noqa + margin=dict(b=20, t=10, l=20, r=10), # noqa ) return fig diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index 034b33b4f8..d9b830d236 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -88,7 +88,7 @@ def get_means_per_types(df: pd.DataFrame) -> pd.DataFrame: [name_to_score.get(task_name, np.nan) for task_name in task_names] ) records.append( - dict( + dict( # noqa model_name=model_name, model_revision=model_revision, task_type=task_type, diff --git a/pyproject.toml b/pyproject.toml index 460fc8aed4..223e6f9673 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.4" +version = "1.19.5" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index f599a21990..c9fdf22865 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -51,10 +51,25 @@ class MockClassificationTask(AbsTaskClassification): "test": { "num_samples": 2, "number_of_characters": 52, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, - } + }, + "train": { + "num_samples": 2, + "number_of_characters": 52, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, } metadata = TaskMetadata( @@ -92,26 +107,73 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "test": { "num_samples": 4, "number_of_characters": 104, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 2}, "1": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 52, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, "fra": { "num_samples": 2, "number_of_characters": 52, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, }, - } + }, + "train": { + "num_samples": 4, + "number_of_characters": 104, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 2, + "number_of_characters": 52, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + "fra": { + "num_samples": 2, + "number_of_characters": 52, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + }, + }, } metadata = TaskMetadata( @@ -152,10 +214,17 @@ def load_data(self, **kwargs): class MockBitextMiningTask(AbsTaskBitextMining): expected_stats = { "test": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, } } @@ -189,22 +258,43 @@ def load_data(self, **kwargs): class MockMultilingualBitextMiningTask(AbsTaskBitextMining, MultilingualTask): expected_stats = { "test": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "hf_subset_descriptive_stats": { "eng": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, - }, - "fra": { + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + }, + "fra": { "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, }, }, } @@ -245,22 +335,43 @@ class MockMultilingualParallelBitextMiningTask(AbsTaskBitextMining, Multilingual parallel_subsets = True expected_stats = { "test": { - "average_sentence1_length": 28.25, - "average_sentence2_length": 28.25, "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 4, + "min_sentence1_length": 23, + "average_sentence1_length": 28.25, + "max_sentence1_length": 37, + "unique_sentence1": 4, + "min_sentence2_length": 23, + "average_sentence2_length": 28.25, + "max_sentence2_length": 37, + "unique_sentence2": 4, "hf_subset_descriptive_stats": { "eng_Latn-fra_Latn": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, }, "fra_Latn-eng_Latn": { - "average_sentence1_length": 30.5, - "average_sentence2_length": 26.0, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 24, + "average_sentence1_length": 30.5, + "max_sentence1_length": 37, + "unique_sentence1": 2, + "min_sentence2_length": 23, + "average_sentence2_length": 26.0, + "max_sentence2_length": 29, + "unique_sentence2": 2, }, }, } @@ -302,8 +413,13 @@ class MockClusteringTask(AbsTaskClustering): "test": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, } @@ -344,24 +460,39 @@ class MockMultilingualClusteringTask(AbsTaskClustering, MultilingualTask): "test": { "num_samples": 2, "number_of_characters": 6, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 2, "average_labels_per_text": 3.0, + "max_labels_per_text": 2, "unique_labels": 3, "labels": {"0": {"count": 2}, "1": {"count": 2}, "2": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, "fra": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, @@ -411,8 +542,13 @@ class MockClusteringFastTask(AbsTaskClusteringFast): "test": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, } @@ -453,24 +589,39 @@ class MockMultilingualClusteringFastTask(AbsTaskClusteringFast, MultilingualTask "test": { "num_samples": 6, "number_of_characters": 162, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_labels_per_text": 1.0, + "max_labels_per_text": 2, "unique_labels": 3, "labels": {"0": {"count": 2}, "1": {"count": 2}, "2": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, "fra": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, @@ -516,8 +667,15 @@ class MockPairClassificationTask(AbsTaskPairClassification): "test": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "unique_pairs": 2, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, } @@ -561,24 +719,45 @@ class MockMultilingualPairClassificationTask( "test": { "num_samples": 4, "number_of_characters": 226, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "unique_pairs": 2, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 2}, "0": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "unique_pairs": 2, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, }, "fra": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "unique_pairs": 2, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, }, @@ -626,9 +805,18 @@ class MockSTSTask(AbsTaskSTS): "test": { "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, } } @@ -673,23 +861,50 @@ class MockMultilingualSTSTask(AbsTaskSTS, MultilingualTask): "test": { "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, }, "fra": { "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, }, }, } @@ -741,10 +956,21 @@ class MockSummarizationTask(AbsTaskSummarization): "test": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], } } @@ -794,26 +1020,59 @@ class MockMultilingualSummarizationTask(AbsTaskSummarization, MultilingualTask): "test": { "num_samples": 4, "number_of_characters": 120, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], }, "fra": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], }, }, } @@ -867,16 +1126,30 @@ def metadata_dict(self) -> dict[str, str]: class MockRerankingTask(AbsTaskReranking): expected_stats = { "test": { - "average_document_length": 13.5, - "average_query_length": 13.0, - "average_instruction_length": 0, - "num_documents": 2, + "number_of_characters": 106, + "num_samples": 4, "num_queries": 2, - "average_top_ranked_per_query": 2.0, - "num_instructions": 0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 27.0, + "max_query_length": 27, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "number_of_characters": 53.0, - "num_samples": 4, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 2.0, + "max_top_ranked_per_query": 2, } } @@ -921,40 +1194,82 @@ def load_data(self, **kwargs): class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): expected_stats = { "test": { - "average_document_length": 7.5, - "average_query_length": 6.5, - "num_documents": 4, + "number_of_characters": 224, + "num_samples": 8, "num_queries": 4, - "num_instructions": 0, + "num_documents": 4, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 4, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 4, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_instruction_length": 0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_samples": 8, - "number_of_characters": 56.0, + "max_top_ranked_per_query": 2, "hf_subset_descriptive_stats": { "eng": { - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 0, - "average_top_ranked_per_query": 2.0, - "num_instructions": 0, - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 2.0, + "max_top_ranked_per_query": 2, }, "fra": { - "average_document_length": 15.0, - "average_query_length": 13.0, - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_instruction_length": 0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_instructions": 0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_top_ranked_per_query": 2, }, }, } @@ -1011,16 +1326,30 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): expected_stats = { "test": { - "average_document_length": 15.0, - "average_query_length": 13.0, - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, - "average_instruction_length": 0, - "num_instructions": 0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, } } @@ -1059,40 +1388,82 @@ def load_data(self, **kwargs): class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "average_document_length": 7.5, - "average_query_length": 6.5, - "num_documents": 4, + "number_of_characters": 224, + "num_samples": 8, "num_queries": 4, + "num_documents": 4, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 4, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 4, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "num_instructions": 0, - "average_top_ranked_per_query": 0, - "average_instruction_length": 0, - "num_samples": 8, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, "hf_subset_descriptive_stats": { "eng": { - "average_document_length": 15.0, - "average_query_length": 13.0, - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, - "average_instruction_length": 0, - "num_instructions": 0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, }, "fra": { - "average_document_length": 15.0, - "average_query_length": 13.0, - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, - "average_instruction_length": 0, - "num_instructions": 0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, }, }, } @@ -1140,13 +1511,33 @@ def load_data(self, **kwargs): class MockMultilabelClassification(AbsTaskMultilabelClassification): expected_stats = { "test": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "train": { "num_samples": 6, + "number_of_characters": 156, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, - } + }, } metadata = TaskMetadata( @@ -1184,31 +1575,93 @@ class MockMultilingualMultilabelClassification( ): expected_stats = { "test": { - "average_text_length": 26.0, + "num_samples": 12, "number_of_characters": 312, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, - "num_samples": 12, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 12}, "1": {"count": 12}}, "hf_subset_descriptive_stats": { "eng": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, - "num_samples": 6, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, }, "fra": { + "num_samples": 6, + "number_of_characters": 156, + "number_texts_intersect_with_train": 2, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + }, + }, + "train": { + "num_samples": 12, + "number_of_characters": 312, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 6, "number_of_characters": 156, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "fra": { "num_samples": 6, + "number_of_characters": 156, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, }, }, - } + }, } metadata = TaskMetadata( @@ -1250,16 +1703,30 @@ def load_data(self, **kwargs): class MockInstructionRetrieval(AbsTaskRetrieval): expected_stats = { "test": { - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 29.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, "num_instructions": 2, - "num_samples": 4, - "number_of_characters": 56.0, + "min_instruction_length": 26, + "average_instruction_length": 58, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, } } @@ -1303,16 +1770,30 @@ def load_data(self, **kwargs): class MockInstructionReranking(AbsTaskReranking): expected_stats = { "test": { - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, - "num_instructions": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 29.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": 2, + "min_instruction_length": 26, + "average_instruction_length": 58, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_top_ranked_per_query": 2, } } @@ -1361,40 +1842,82 @@ def load_data(self, **kwargs): class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "num_documents": 4, + "number_of_characters": 224, + "num_samples": 8, "num_queries": 4, - "num_instructions": 4, - "average_document_length": 7.5, - "average_query_length": 6.5, - "average_instruction_length": 29.0, + "num_documents": 4, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 4, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 4, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, - "num_samples": 8, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, + "num_instructions": 4, + "min_instruction_length": 26, + "average_instruction_length": 116, + "max_instruction_length": 32, + "unique_instructions": 4, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, "hf_subset_descriptive_stats": { "eng": { - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, - "num_instructions": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 29.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": 2, + "min_instruction_length": 26, + "average_instruction_length": 58, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, }, "fra": { - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, - "num_instructions": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 29.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, - "average_top_ranked_per_query": 0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": 2, + "min_instruction_length": 26, + "average_instruction_length": 58, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, }, }, } @@ -1457,40 +1980,82 @@ def load_data(self, **kwargs): class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): expected_stats = { "test": { - "num_documents": 4, + "number_of_characters": 224, + "num_samples": 8, "num_queries": 4, - "num_instructions": 4, - "average_document_length": 7.5, - "average_query_length": 6.5, - "average_instruction_length": 29.0, + "num_documents": 4, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 4, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 4, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, + "num_instructions": 4, + "min_instruction_length": 26, + "average_instruction_length": 116, + "max_instruction_length": 32, + "unique_instructions": 4, + "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_samples": 8, - "number_of_characters": 56.0, + "max_top_ranked_per_query": 2, "hf_subset_descriptive_stats": { "eng": { - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, - "num_instructions": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 29.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": 2, + "min_instruction_length": 26, + "average_instruction_length": 58, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_top_ranked_per_query": 2, }, "fra": { - "num_documents": 2, + "number_of_characters": 112, + "num_samples": 4, "num_queries": 2, - "num_instructions": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_instruction_length": 29.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": 2, + "min_instruction_length": 26, + "average_instruction_length": 58, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_samples": 4, - "number_of_characters": 56.0, + "max_top_ranked_per_query": 2, }, }, } diff --git a/tests/test_tasks/test_metadata.py b/tests/test_tasks/test_metadata.py index 1e7e1b24df..9c11b4ccf3 100644 --- a/tests/test_tasks/test_metadata.py +++ b/tests/test_tasks/test_metadata.py @@ -8,9 +8,11 @@ @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) def test_descriptive_stats(task): result_stat = task.calculate_metadata_metrics() + # remove descriptive task file + task.metadata.descriptive_stat_path.unlink() task_stat = task.expected_stats + print(task.metadata.name) + print(result_stat) for key, value in result_stat.items(): assert key in task_stat assert value == task_stat[key] - # remove descriptive task file - task.metadata.descriptive_stat_path.unlink() From 70a3ff2c6cf7d9a642233e75daba8cfe86061fc6 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 15 Nov 2024 20:53:12 +0500 Subject: [PATCH 06/40] feat: enable codecarbon by default (#1428) * enable codecarbon by default * lint * update flag * add allow_multiple_runs param * make lint * add warning * lint * negate the flag --------- Co-authored-by: Isaac Chung --- mteb/cli.py | 10 ++++++---- mteb/evaluation/MTEB.py | 18 +++++++++--------- pyproject.toml | 2 +- tests/test_cli.py | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/mteb/cli.py b/mteb/cli.py index 65d6938416..f9dc1a352f 100644 --- a/mteb/cli.py +++ b/mteb/cli.py @@ -142,12 +142,14 @@ def run(args: argparse.Namespace) -> None: args.save_predictions if hasattr(args, "save_predictions") else False ) + enable_co2_tracker = not args.disable_co2_tracker + eval.run( model, verbosity=args.verbosity, output_folder=args.output_folder, eval_splits=args.eval_splits, - co2_tracker=args.co2_tracker, + co2_tracker=enable_co2_tracker, overwrite_results=args.overwrite, encode_kwargs=encode_kwargs, save_predictions=save_predictions, @@ -263,10 +265,10 @@ def add_run_parser(subparsers) -> None: "-v", "--verbosity", type=int, default=2, help="Verbosity level" ) parser.add_argument( - "--co2_tracker", - type=bool, + "--disable_co2_tracker", + action="store_true", default=False, - help="Enable CO₂ tracker, disabled by default", + help="Disable CO₂ tracker, enabled by default", ) parser.add_argument( "--eval_splits", diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 64d5e9071b..a04e55f96b 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -13,6 +13,7 @@ from typing import Any import datasets +from codecarbon import EmissionsTracker from sentence_transformers import SentenceTransformer from mteb.encoder_interface import Encoder @@ -322,7 +323,7 @@ def run( eval_splits=None, overwrite_results: bool = False, raise_error: bool = True, - co2_tracker: bool = False, + co2_tracker: bool = True, encode_kwargs: dict[str, Any] = {}, **kwargs, ) -> list[TaskResult]: @@ -418,15 +419,14 @@ def run( kg_co2_emissions: int | None = 0 if co2_tracker else None for split in task_eval_splits: if co2_tracker: - try: - from codecarbon import EmissionsTracker - except ImportError: - raise ImportError( - "To use the CO2 emissions tracker, please install codecarbon using 'pip install codecarbon'" - ) - + logger.warning( + "Evaluating multiple MTEB runs simultaniously will produce incorrect CO₂ results" + ) with EmissionsTracker( - save_to_file=False, save_to_api=False, logging_logger=logger + save_to_file=False, + save_to_api=False, + logging_logger=logger, + allow_multiple_runs=True, ) as tracker: results, tick, tock = self._run_eval( task, diff --git a/pyproject.toml b/pyproject.toml index 223e6f9673..ebcf63dba6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "typing_extensions>=0.0.0", "eval_type_backport>=0.0.0", "polars>=0.20.22", + "codecarbon>=2.0.0", ] @@ -54,7 +55,6 @@ mteb = "mteb.cli:main" [project.optional-dependencies] dev = ["ruff==0.6.4", # locked so we don't get PRs which fail only due to a lint update "pytest", "pytest-xdist", "pytest-coverage"] -codecarbon = ["codecarbon"] speedtask = ["GPUtil>=1.4.0", "psutil>=5.9.8"] peft = ["peft>=0.11.0"] leaderboard = ["gradio>=5.5.0", "gradio_rangeslider>=0.0.8"] diff --git a/tests/test_cli.py b/tests/test_cli.py index 842fa9368d..407afdeb4a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -62,7 +62,7 @@ def test_run_task( task_types=None, languages=None, batch_size=None, - co2_tracker=None, + disable_co2_tracker=None, overwrite=True, eval_splits=None, benchmarks=None, From 0e9b6fdc0a13d1c54b112ee228b1101bb63579df Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Mon, 18 Nov 2024 16:19:29 +0500 Subject: [PATCH 07/40] Add decriptive stat almost to all datasets (#1466) * run tasks * remove test script * lint * remove cache * fix sickbrsts * fix tests * add datasets --- mteb/abstasks/AbsTaskPairClassification.py | 5 +- mteb/abstasks/AbsTaskRetrieval.py | 18 +- mteb/abstasks/dataloaders.py | 4 +- .../BitextMining/DiaBlaBitextMining.json | 43 + .../BitextMining/LinceMTBitextMining.json | 30 + .../BitextMining/NusaXBitextMining.json | 160 + .../BitextMining/RomaTalesBitextMining.json | 30 + .../BitextMining/SRNCorpusBitextMining.json | 43 + .../BitextMining/Tatoeba.json | 1473 + .../Classification/AJGT.json | 20 + .../AfriSentiClassification.json | 543 + .../AfriSentiLangClassification.json | 98 + .../Classification/AllegroReviews.json | 56 + .../AmazonCounterfactualClassification.json | 278 + .../AmazonPolarityClassification.json | 38 + .../AmazonReviewsClassification.json | 575 + .../AngryTweetsClassification.json | 44 + .../Classification/ArxivClassification.json | 92 + .../Banking77Classification.json | 488 + .../BengaliDocumentClassification.json | 104 + .../BengaliHateSpeechClassification.json | 29 + .../BengaliSentimentAnalysis.json | 20 + ...rianStoreReviewSentimentClassfication.json | 50 + .../descriptive_stats/Classification/CBD.json | 38 + ...DCZMovieReviewSentimentClassification.json | 62 + ...DSKMovieReviewSentimentClassification.json | 62 + ...censeLicenseeLegalBenchClassification.json | 38 + ...censeLicensorLegalBenchClassification.json | 38 + ...ntiAssignmentLegalBenchClassification.json | 38 + ...ADAuditRightsLegalBenchClassification.json | 38 + ...apOnLiabilityLegalBenchClassification.json | 38 + ...angeOfControlLegalBenchClassification.json | 38 + ...tionExceptionLegalBenchClassification.json | 38 + ...enantNotToSueLegalBenchClassification.json | 38 + ...EffectiveDateLegalBenchClassification.json | 38 + ...ADExclusivityLegalBenchClassification.json | 38 + ...xpirationDateLegalBenchClassification.json | 38 + ...DGoverningLawLegalBenchClassification.json | 38 + ...hipAssignmentLegalBenchClassification.json | 38 + ...CUADInsuranceLegalBenchClassification.json | 38 + ...petualLicenseLegalBenchClassification.json | 38 + ...ntIPOwnershipLegalBenchClassification.json | 38 + ...DLicenseGrantLegalBenchClassification.json | 38 + ...idatedDamagesLegalBenchClassification.json | 38 + ...mumCommitmentLegalBenchClassification.json | 38 + ...FavoredNationLegalBenchClassification.json | 38 + ...itOfCustomersLegalBenchClassification.json | 38 + ...itOfEmployeesLegalBenchClassification.json | 38 + ...UADNonCompeteLegalBenchClassification.json | 38 + ...DisparagementLegalBenchClassification.json | 38 + ...erableLicenseLegalBenchClassification.json | 38 + ...minateRenewalLegalBenchClassification.json | 38 + ...ationServicesLegalBenchClassification.json | 38 + ...eRestrictionsLegalBenchClassification.json | 38 + ...ADRenewalTermLegalBenchClassification.json | 38 + ...ProfitSharingLegalBenchClassification.json | 38 + ...DRofrRofoRofnLegalBenchClassification.json | 38 + ...rceCodeEscrowLegalBenchClassification.json | 38 + ...orConvenienceLegalBenchClassification.json | 38 + ...tyBeneficiaryLegalBenchClassification.json | 38 + ...ppedLiabilityLegalBenchClassification.json | 38 + ...CanEatLicenseLegalBenchClassification.json | 38 + ...meRestrictionLegalBenchClassification.json | 38 + ...rantyDurationLegalBenchClassification.json | 38 + ...CourtOutcomesLegalBenchClassification.json | 44 + .../CataloniaTweetClassification.json | 197 + ...tyOfAgreementLegalBenchClassification.json | 38 + ...dentificationLegalBenchClassification.json | 38 + ...edInformationLegalBenchClassification.json | 38 + ...NLILimitedUseLegalBenchClassification.json | 38 + ...LINoLicensingLegalBenchClassification.json | 38 + ...ledDisclosureLegalBenchClassification.json | 38 + ...arInformationLegalBenchClassification.json | 38 + ...rmissibleCopyLegalBenchClassification.json | 38 + ...arInformationLegalBenchClassification.json | 38 + ...entPossessionLegalBenchClassification.json | 38 + ...alInformationLegalBenchClassification.json | 38 + ...WithEmployeesLegalBenchClassification.json | 38 + ...hThirdPartiesLegalBenchClassification.json | 38 + ...OfObligationsLegalBenchClassification.json | 38 + ...orateLobbyingLegalBenchClassification.json | 38 + .../CyrillicTurkicLangClassification.json | 80 + ...hProductReviewSentimentClassification.json | 44 + .../CzechSoMeSentimentClassification.json | 44 + .../CzechSubjectivityClassification.json | 56 + .../Classification/DBpediaClassification.json | 110 + .../Classification/DKHateClassification.json | 38 + .../Classification/DalajClassification.json | 38 + ...DanishPoliticalCommentsClassification.json | 29 + ...lassificationLegalBenchClassification.json | 38 + .../Diversity1LegalBenchClassification.json | 38 + .../Diversity2LegalBenchClassification.json | 38 + .../Diversity3LegalBenchClassification.json | 38 + .../Diversity4LegalBenchClassification.json | 38 + .../Diversity5LegalBenchClassification.json | 38 + .../Diversity6LegalBenchClassification.json | 38 + ...utchBookReviewSentimentClassification.json | 38 + .../Classification/EmotionClassification.json | 92 + .../EstonianValenceClassification.json | 50 + .../FilipinoShopeeReviewsClassification.json | 83 + .../FinToxicityClassification.json | 38 + .../FinancialPhrasebankClassification.json | 23 + .../Classification/FrenchBookReviews.json | 23 + .../Classification/FrenkEnClassification.json | 38 + .../Classification/FrenkHrClassification.json | 38 + .../Classification/FrenkSlClassification.json | 38 + ...cisionSectionLegalBenchClassification.json | 68 + .../GeoreviewClassification.json | 56 + ...ticiansTwitterSentimentClassification.json | 44 + .../GreekLegalCodeClassification.json | 3146 ++ .../GujaratiNewsClassification.json | 44 + .../HateSpeechPortugueseClassification.json | 20 + .../HeadlineClassification.json | 62 + .../HebrewSentimentAnalysis.json | 44 + .../HinDialectClassification.json | 152 + .../HindiDiscourseClassification.json | 32 + .../HotelReviewSentimentClassification.json | 26 + .../Classification/IFlyTek.json | 740 + .../Classification/ImdbClassification.json | 38 + .../InappropriatenessClassification.json | 38 + .../IndicLangClassification.json | 140 + .../IndicNLPNewsClassification.json | 408 + .../IndicSentimentClassification.json | 510 + .../IndonesianIdClickbaitClassification.json | 20 + ...ianMongabayConservationClassification.json | 65 + ...nterpretationLegalBenchClassification.json | 44 + ...shipQuestionsLegalBenchClassification.json | 38 + .../IsiZuluNewsClassification.json | 62 + .../ItaCaseholdClassification.json | 431 + .../Classification/Itacola.json | 38 + .../JCrewBlockerLegalBenchClassification.json | 38 + .../Classification/JDReview.json | 38 + .../JavaneseIMDBClassification.json | 38 + .../Classification/KLUE-TC.json | 68 + .../KannadaNewsClassification.json | 23 + .../KinopoiskClassification.json | 44 + .../Classification/KorFin.json | 23 + .../Classification/KorHateClassification.json | 23 + .../KorSarcasmClassification.json | 20 + .../KurdishSentimentClassification.json | 38 + .../LccSentimentClassification.json | 44 + ...HandsBenefitsLegalBenchClassification.json | 38 + ...HandsBusinessLegalBenchClassification.json | 38 + ...HandsConsumerLegalBenchClassification.json | 38 + ...edHandsCourtsLegalBenchClassification.json | 38 + ...nedHandsCrimeLegalBenchClassification.json | 38 + ...dHandsDivorceLegalBenchClassification.json | 38 + ...esticViolenceLegalBenchClassification.json | 38 + ...andsEducationLegalBenchClassification.json | 38 + ...ndsEmploymentLegalBenchClassification.json | 38 + ...dHandsEstatesLegalBenchClassification.json | 38 + ...edHandsFamilyLegalBenchClassification.json | 38 + ...edHandsHealthLegalBenchClassification.json | 38 + ...dHandsHousingLegalBenchClassification.json | 38 + ...dsImmigrationLegalBenchClassification.json | 38 + ...nedHandsTortsLegalBenchClassification.json | 38 + ...dHandsTrafficLegalBenchClassification.json | 38 + ...ningCausalityLegalBenchClassification.json | 38 + .../MAUDLegalBenchClassification.json | 86 + .../MTOPDomainClassification.json | 953 + .../MTOPIntentClassification.json | 6398 ++++ ...acedonianTweetSentimentClassification.json | 44 + .../MalayalamNewsClassification.json | 44 + .../MarathiNewsClassification.json | 44 + .../MasakhaNEWSClassification.json | 978 + .../MassiveIntentClassification.json | 29648 ++++++++++++++++ .../MassiveScenarioClassification.json | 10304 ++++++ .../Classification/Moroco.json | 62 + .../MovieReviewSentimentClassification.json | 56 + .../MultiHateClassification.json | 438 + .../Classification/MultilingualSentiment.json | 65 + .../MultilingualSentimentClassification.json | 1086 + .../Classification/MyanmarNews.json | 26 + ...udicialEthicsLegalBenchClassification.json | 38 + .../Classification/NaijaSenti.json | 216 + .../NepaliNewsClassification.json | 23 + .../Classification/NewsClassification.json | 50 + .../Classification/NoRecClassification.json | 44 + .../NordicLangClassification.json | 62 + .../NorwegianParliamentClassification.json | 56 + .../NusaParagraphEmotionClassification.json | 732 + .../NusaParagraphTopicClassification.json | 792 + .../Classification/NusaX-senti.json | 552 + ...DataRetentionLegalBenchClassification.json | 38 + ...5DataSecurityLegalBenchClassification.json | 38 + ...115DoNotTrackLegalBenchClassification.json | 38 + ...CollectionUseLegalBenchClassification.json | 38 + ...ificAudiencesLegalBenchClassification.json | 38 + ...5PolicyChangeLegalBenchClassification.json | 38 + ...ingCollectionLegalBenchClassification.json | 38 + ...itAndDeletionLegalBenchClassification.json | 38 + ...ChoiceControlLegalBenchClassification.json | 38 + .../OdiaNewsClassification.json | 44 + .../Classification/OnlineShopping.json | 38 + ...ineStoreReviewSentimentClassification.json | 29 + ...estionPurposeLegalBenchClassification.json | 68 + .../OverrulingLegalBenchClassification.json | 38 + .../descriptive_stats/Classification/PAC.json | 38 + .../PROALegalBenchClassification.json | 38 + .../Classification/PatentClassification.json | 80 + .../PersianFoodSentimentClassification.json | 56 + ...lJurisdictionLegalBenchClassification.json | 38 + .../PoemSentimentClassification.json | 68 + .../Classification/PolEmo2.0-IN.json | 50 + .../Classification/PolEmo2.0-OUT.json | 50 + .../PunjabiNewsClassification.json | 38 + ...staurantReviewSentimentClassification.json | 20 + .../RomanianReviewsSentiment.json | 50 + .../RomanianSentimentClassification.json | 38 + .../RuReviewsClassification.json | 44 + .../RuSciBenchGRNTIClassification.json | 194 + .../RuSciBenchOECDClassification.json | 200 + ...ccountabilityLegalBenchClassification.json | 38 + .../SCDBPAuditsLegalBenchClassification.json | 38 + ...CertificationLegalBenchClassification.json | 38 + ...SCDBPTrainingLegalBenchClassification.json | 38 + ...PVerificationLegalBenchClassification.json | 38 + ...ccountabilityLegalBenchClassification.json | 38 + .../SCDDAuditsLegalBenchClassification.json | 38 + ...CertificationLegalBenchClassification.json | 38 + .../SCDDTrainingLegalBenchClassification.json | 38 + ...DVerificationLegalBenchClassification.json | 38 + .../Classification/SIB200Classification.json | 19610 ++++++++++ .../SanskritShlokasClassification.json | 44 + .../Classification/ScalaClassification.json | 186 + .../SentimentAnalysisHindi.json | 23 + .../SinhalaNewsClassification.json | 29 + .../SinhalaNewsSourceClassification.json | 41 + .../SiswatiNewsClassification.json | 50 + ...vakMovieReviewSentimentClassification.json | 38 + .../SouthAfricanLangClassification.json | 92 + .../SpanishNewsClassification.json | 50 + .../SpanishSentimentClassification.json | 56 + .../SwahiliNewsClassification.json | 32 + .../Classification/SweRecClassification.json | 44 + .../SwedishSentimentClassification.json | 56 + .../Classification/TNews.json | 116 + .../TamilNewsClassification.json | 62 + ...tingSalesRuleLegalBenchClassification.json | 38 + .../TeluguAndhraJyotiNewsClassification.json | 56 + .../TenKGnadClassification.json | 80 + ...lDictionariesLegalBenchClassification.json | 38 + ...lismToolPlainLegalBenchClassification.json | 38 + .../ToxicChatClassification.json | 38 + .../ToxicConversationsClassification.json | 38 + .../TswanaNewsClassification.json | 86 + .../Classification/TurkicClassification.json | 616 + .../TurkishMovieSentimentClassification.json | 38 + ...TurkishProductSentimentClassification.json | 38 + .../TweetEmotionClassification.json | 38 + .../TweetSarcasmClassification.json | 38 + .../TweetSentimentClassification.json | 384 + ...weetSentimentExtractionClassification.json | 44 + .../TweetTopicSingleClassification.json | 62 + ...UCCVCommonLawLegalBenchClassification.json | 38 + .../UkrFormalityClassification.json | 38 + .../UnfairTOSLegalBenchClassification.json | 80 + .../UrduRomanSentimentClassification.json | 23 + .../VieStudentFeedbackClassification.json | 44 + .../Classification/WRIMEClassification.json | 56 + .../Classification/Waimai.json | 38 + .../WisesightSentimentClassification.json | 50 + .../YahooAnswersTopicsClassification.json | 86 + .../YelpReviewFullClassification.json | 56 + .../YueOpenriceReviewClassification.json | 56 + .../Clustering/AlloProfClusteringP2P.v2.json | 55 + .../Clustering/AlloProfClusteringS2S.v2.json | 55 + .../ArXivHierarchicalClusteringS2S.json | 403 + .../Clustering/BigPatentClustering.v2.json | 43 + .../Clustering/BiorxivClusteringP2P.v2.json | 94 + .../Clustering/BiorxivClusteringS2S.v2.json | 94 + .../Clustering/BlurbsClusteringP2P.v2.json | 121 + .../Clustering/BlurbsClusteringS2S.v2.json | 301 + .../Clustering/CLSClusteringP2P.v2.json | 55 + .../Clustering/CLSClusteringS2S.v2.json | 55 + .../Clustering/EightTagsClustering.v2.json | 40 + .../Clustering/GeoreviewClusteringP2P.json | 166 + .../Clustering/HALClusteringS2S.v2.json | 49 + .../Clustering/IndicReviewsClusteringP2P.json | 830 + .../Clustering/LivedoorNewsClustering.v2.json | 43 + .../Clustering/MLSUMClusteringP2P.v2.json | 773 + .../Clustering/MLSUMClusteringS2S.v2.json | 1529 + .../Clustering/MasakhaNEWSClusteringP2P.json | 524 + .../Clustering/MasakhaNEWSClusteringS2S.json | 524 + .../Clustering/MewsC16JaClustering.json | 52 + .../Clustering/PlscClusteringP2P.v2.json | 169 + .../Clustering/PlscClusteringS2S.v2.json | 169 + .../Clustering/RedditClustering.v2.json | 166 + .../Clustering/RomaniBibleClustering.json | 211 + .../RuSciBenchOECDClusteringP2P.json | 103 + .../Clustering/SIB200ClusteringS2S.json | 6934 ++++ .../SNLHierarchicalClusteringP2P.json | 3754 ++ .../SNLHierarchicalClusteringS2S.json | 3754 ++ .../Clustering/SpanishNewsClusteringP2P.json | 37 + .../StackExchangeClustering.v2.json | 379 + .../StackExchangeClusteringP2P.v2.json | 1588 + .../Clustering/SwednClusteringP2P.json | 54 + .../Clustering/SwednClusteringS2S.json | 28 + .../Clustering/TenKGnadClusteringP2P.v2.json | 43 + .../Clustering/TenKGnadClusteringS2S.v2.json | 43 + .../Clustering/ThuNewsClusteringP2P.v2.json | 58 + .../Clustering/ThuNewsClusteringS2S.v2.json | 58 + .../VGHierarchicalClusteringP2P.json | 229 + .../VGHierarchicalClusteringS2S.json | 229 + .../Clustering/WikiCitiesClustering.json | 394 + .../Clustering/WikiClusteringP2P.v2.json | 1201 + .../Core17InstructionRetrieval.json | 29 + .../News21InstructionRetrieval.json | 29 + .../Robust04InstructionRetrieval.json | 29 + .../BrazilianToxicTweetsClassification.json | 74 + .../KorHateSpeechMLClassification.json | 86 + .../MalteseNewsClassification.json | 134 + .../SensitiveTopicsClassification.json | 146 + .../PairClassification/ArEntail.json | 24 + .../ArmenianParaphrasePC.json | 24 + .../PairClassification/Assin2RTE.json | 24 + .../PairClassification/CDSC-E.json | 24 + .../PairClassification/CTKFactsNLI.json | 46 + .../PairClassification/Cmnli.json | 24 + .../FalseFriendsGermanEnglish.json | 24 + .../PairClassification/FarsTail.json | 24 + .../PairClassification/KLUE-NLI.json | 24 + .../PairClassification/LegalBenchPC.json | 24 + .../PairClassification/Ocnli.json | 24 + .../PairClassification/OpusparcusPC.json | 314 + .../PairClassification/PSC.json | 24 + .../PairClassification/PpcPC.json | 24 + .../PairClassification/RTE3.json | 114 + .../PairClassification/SICK-BR-PC.json | 24 + .../PairClassification/SICK-E-PL.json | 24 + .../SprintDuplicateQuestions.json | 46 + .../PairClassification/TERRa.json | 24 + .../TwitterSemEval2015.json | 24 + .../PairClassification/XNLIV2.json | 312 + .../PairClassification/XStance.json | 92 + .../PairClassification/indonli.json | 24 + .../Reranking/AlloprofReranking.json | 29 + .../Reranking/AskUbuntuDupQuestions.json | 39 +- .../Reranking/CMedQAv1-reranking.json | 29 + .../Reranking/CMedQAv2-reranking.json | 29 + .../Reranking/ESCIReranking.json | 158 +- .../Reranking/InstructIR.json | 28 + .../Reranking/MMarcoReranking.json | 29 + mteb/descriptive_stats/Reranking/NevIR.json | 28 + .../Reranking/RuBQReranking.json | 29 + .../Reranking/SciDocsRR.json | 29 + .../Reranking/StackOverflowDupQuestions.json | 29 + .../Reranking/SyntecReranking.json | 29 + .../Reranking/T2Reranking.json | 29 + .../WikipediaRerankingMultilingual.json | 663 +- .../Retrieval/AILACasedocs.json | 28 + .../Retrieval/AILAStatutes.json | 28 + .../Retrieval/ARCChallenge.json | 28 + .../Retrieval/AlloprofRetrieval.json | 28 + .../descriptive_stats/Retrieval/AlphaNLI.json | 28 + .../Retrieval/ArguAna-PL.json | 28 + mteb/descriptive_stats/Retrieval/ArguAna.json | 28 + .../Retrieval/AutoRAGRetrieval.json | 25 +- .../Retrieval/BSARDRetrieval.json | 28 + .../CQADupstackAndroidRetrieval.json | 28 + .../CQADupstackEnglishRetrieval.json | 28 + .../Retrieval/CQADupstackGamingRetrieval.json | 28 + .../Retrieval/CQADupstackGisRetrieval.json | 28 + .../CQADupstackMathematicaRetrieval.json | 28 + .../CQADupstackPhysicsRetrieval.json | 28 + .../CQADupstackProgrammersRetrieval.json | 28 + .../Retrieval/CQADupstackStatsRetrieval.json | 28 + .../Retrieval/CQADupstackTexRetrieval.json | 28 + .../Retrieval/CQADupstackUnixRetrieval.json | 28 + .../CQADupstackWebmastersRetrieval.json | 28 + .../CQADupstackWordpressRetrieval.json | 28 + .../Retrieval/ClimateFEVER.json | 28 + .../Retrieval/ClimateFEVERHardNegatives.json | 28 + .../Retrieval/CmedqaRetrieval.json | 28 + .../Retrieval/CovidRetrieval.json | 28 + ...ossLingualSemanticDiscriminationWMT19.json | 82 + ...ossLingualSemanticDiscriminationWMT21.json | 82 + .../Retrieval/DBPedia-PL.json | 28 + .../Retrieval/DBPedia-PLHardNegatives.json | 28 + mteb/descriptive_stats/Retrieval/DBPedia.json | 54 + .../Retrieval/DBPediaHardNegatives.json | 28 + .../Retrieval/DanFeverRetrieval.json | 28 + .../Retrieval/DuRetrieval.json | 28 + .../Retrieval/EcomRetrieval.json | 28 + mteb/descriptive_stats/Retrieval/EstQA.json | 28 + .../Retrieval/FEVERHardNegatives.json | 28 + .../Retrieval/FQuADRetrieval.json | 54 + .../Retrieval/FaithDial.json | 28 + .../Retrieval/FeedbackQARetrieval.json | 28 + mteb/descriptive_stats/Retrieval/FiQA-PL.json | 28 + .../descriptive_stats/Retrieval/FiQA2018.json | 80 + .../Retrieval/GeorgianFAQRetrieval.json | 28 + .../descriptive_stats/Retrieval/GerDaLIR.json | 28 + .../Retrieval/GerDaLIRSmall.json | 28 + .../Retrieval/GermanDPR.json | 28 + .../Retrieval/GermanGovServiceRetrieval.json | 28 + .../Retrieval/GermanQuAD-Retrieval.json | 28 + .../Retrieval/GreekCivicsQA.json | 28 + .../Retrieval/HagridRetrieval.json | 28 + .../Retrieval/HellaSwag.json | 28 + .../Retrieval/HotpotQA-PL.json | 28 + .../Retrieval/HotpotQA-PLHardNegatives.json | 28 + .../Retrieval/HotpotQAHardNegatives.json | 28 + .../HunSum2AbstractiveRetrieval.json | 28 + .../Retrieval/JaGovFaqsRetrieval.json | 28 + .../Retrieval/JaQuADRetrieval.json | 28 + .../Retrieval/Ko-StrategyQA.json | 28 + .../Retrieval/LEMBNarrativeQARetrieval.json | 28 + .../Retrieval/LEMBNeedleRetrieval.json | 210 + .../Retrieval/LEMBPasskeyRetrieval.json | 210 + .../Retrieval/LEMBQMSumRetrieval.json | 28 + .../Retrieval/LEMBSummScreenFDRetrieval.json | 28 + .../Retrieval/LEMBWikimQARetrieval.json | 28 + .../descriptive_stats/Retrieval/LeCaRDv2.json | 28 + .../LegalBenchConsumerContractsQA.json | 28 + .../LegalBenchCorporateLobbying.json | 28 + .../Retrieval/LegalQuAD.json | 28 + .../Retrieval/LegalSummarization.json | 28 + .../Retrieval/LitSearchRetrieval.json | 28 + .../MIRACLRetrievalHardNegatives.json | 498 + .../Retrieval/MLQARetrieval.json | 2606 ++ .../Retrieval/MLQuestions.json | 54 + .../Retrieval/MMarcoRetrieval.json | 28 + .../Retrieval/MSMARCO-PL.json | 28 + .../Retrieval/MSMARCO-PLHardNegatives.json | 28 + .../Retrieval/MSMARCOHardNegatives.json | 28 + .../Retrieval/MedicalQARetrieval.json | 28 + .../Retrieval/MedicalRetrieval.json | 28 + .../Retrieval/MintakaRetrieval.json | 238 + .../Retrieval/NFCorpus-PL.json | 28 + .../NLPJournalAbsIntroRetrieval.json | 28 + .../NLPJournalTitleAbsRetrieval.json | 28 + .../NLPJournalTitleIntroRetrieval.json | 28 + mteb/descriptive_stats/Retrieval/NQ-PL.json | 28 + .../Retrieval/NQ-PLHardNegatives.json | 28 + mteb/descriptive_stats/Retrieval/NQ.json | 28 + .../Retrieval/NQHardNegatives.json | 28 + .../Retrieval/NarrativeQARetrieval.json | 28 + .../NeuCLIR2022RetrievalHardNegatives.json | 108 + .../NeuCLIR2023RetrievalHardNegatives.json | 108 + .../Retrieval/NorQuadRetrieval.json | 28 + mteb/descriptive_stats/Retrieval/PIQA.json | 28 + .../Retrieval/PublicHealthQA.json | 247 + mteb/descriptive_stats/Retrieval/Quail.json | 28 + .../descriptive_stats/Retrieval/Quora-PL.json | 54 + .../Retrieval/Quora-PLHardNegatives.json | 28 + .../Retrieval/QuoraRetrieval.json | 54 + .../QuoraRetrievalHardNegatives.json | 28 + .../descriptive_stats/Retrieval/RARbCode.json | 28 + .../descriptive_stats/Retrieval/RARbMath.json | 28 + .../Retrieval/RiaNewsRetrieval.json | 28 + .../RiaNewsRetrievalHardNegatives.json | 28 + .../Retrieval/RuBQRetrieval.json | 28 + .../Retrieval/SCIDOCS-PL.json | 28 + mteb/descriptive_stats/Retrieval/SCIDOCS.json | 28 + mteb/descriptive_stats/Retrieval/SIQA.json | 28 + .../Retrieval/SKQuadRetrieval.json | 28 + .../Retrieval/SNLRetrieval.json | 28 + .../Retrieval/SadeemQuestionRetrieval.json | 28 + .../Retrieval/SciFact-PL.json | 28 + mteb/descriptive_stats/Retrieval/SciFact.json | 54 + .../Retrieval/SlovakSumRetrieval.json | 28 + .../Retrieval/SpanishPassageRetrievalS2P.json | 28 + .../Retrieval/SpanishPassageRetrievalS2S.json | 28 + mteb/descriptive_stats/Retrieval/SpartQA.json | 28 + .../StatcanDialogueDatasetRetrieval.json | 162 + .../Retrieval/SweFaqRetrieval.json | 28 + .../Retrieval/SwednRetrieval.json | 28 + .../Retrieval/SyntecRetrieval.json | 28 + .../Retrieval/T2Retrieval.json | 28 + .../Retrieval/TRECCOVID-PL.json | 28 + .../Retrieval/TRECCOVID.json | 28 + .../Retrieval/TV2Nordretrieval.json | 28 + .../Retrieval/TempReasonL1.json | 28 + .../Retrieval/TempReasonL2Context.json | 28 + .../Retrieval/TempReasonL2Fact.json | 28 + .../Retrieval/TempReasonL2Pure.json | 28 + .../Retrieval/TempReasonL3Context.json | 28 + .../Retrieval/TempReasonL3Fact.json | 28 + .../Retrieval/TempReasonL3Pure.json | 28 + .../Retrieval/TopiOCQAHardNegatives.json | 28 + .../Retrieval/TurHistQuadRetrieval.json | 28 + .../Retrieval/TwitterHjerneRetrieval.json | 28 + .../Retrieval/VideoRetrieval.json | 28 + .../Retrieval/VieQuADRetrieval.json | 28 + .../WikipediaRetrievalMultilingual.json | 446 + .../Retrieval/WinoGrande.json | 28 + mteb/descriptive_stats/Retrieval/XMarket.json | 112 + .../Retrieval/XPQARetrieval.json | 966 + .../Retrieval/XQuADRetrieval.json | 342 + .../Retrieval/mFollowIR.json | 108 + .../Retrieval/mFollowIRCrossLingual.json | 108 + mteb/descriptive_stats/STS/AFQMC.json | 18 + mteb/descriptive_stats/STS/ATEC.json | 34 + mteb/descriptive_stats/STS/Assin2STS.json | 18 + mteb/descriptive_stats/STS/BIOSSES.json | 18 + mteb/descriptive_stats/STS/BQ.json | 34 + mteb/descriptive_stats/STS/CDSC-R.json | 18 + mteb/descriptive_stats/STS/FaroeseSTS.json | 18 + mteb/descriptive_stats/STS/FinParaSTS.json | 34 + .../STS/GermanSTSBenchmark.json | 34 + .../STS/IndicCrosslingualSTS.json | 212 + mteb/descriptive_stats/STS/JSICK.json | 18 + mteb/descriptive_stats/STS/JSTS.json | 18 + mteb/descriptive_stats/STS/KLUE-STS.json | 18 + mteb/descriptive_stats/STS/KorSTS.json | 18 + mteb/descriptive_stats/STS/LCQMC.json | 18 + mteb/descriptive_stats/STS/PAWSX.json | 18 + mteb/descriptive_stats/STS/QBQTC.json | 18 + .../STS/RUParaPhraserSTS.json | 18 + mteb/descriptive_stats/STS/RonSTS.json | 18 + .../STS/RuSTSBenchmarkSTS.json | 18 + mteb/descriptive_stats/STS/SICK-BR-STS.json | 18 + mteb/descriptive_stats/STS/SICK-R-PL.json | 18 + mteb/descriptive_stats/STS/SICK-R.json | 18 + mteb/descriptive_stats/STS/SICKFr.json | 34 + mteb/descriptive_stats/STS/STS13.json | 18 + mteb/descriptive_stats/STS/STS14.json | 18 + mteb/descriptive_stats/STS/STS15.json | 18 + mteb/descriptive_stats/STS/STS16.json | 18 + mteb/descriptive_stats/STS/STS22.v2.json | 308 + mteb/descriptive_stats/STS/STSB.json | 34 + mteb/descriptive_stats/STS/STSBenchmark.json | 18 + .../STS/STSBenchmarkMultilingualSTS.json | 358 + mteb/descriptive_stats/STS/STSES.json | 18 + mteb/descriptive_stats/STS/SemRel24STS.json | 212 + .../SummEvalFrSummarization.v2.json | 55 + .../SummEvalSummarization.v2.json | 55 + mteb/tasks/Classification/kor/KorFin.py | 4 +- .../multilingual/OpusparcusPC.py | 2 +- .../multilingual/XStance.py | 33 +- mteb/tasks/Retrieval/eng/HagridRetrieval.py | 1 + mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py | 1 + .../spa/SpanishPassageRetrievalS2P.py | 3 - mteb/tasks/STS/por/SickBrSTS.py | 2 +- tests/test_benchmark/mock_tasks.py | 16 + tests/test_tasks/test_metadata.py | 2 - 537 files changed, 132941 insertions(+), 375 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/DiaBlaBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/LinceMTBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NusaXBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/RomaTalesBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/SRNCorpusBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/Tatoeba.json create mode 100644 mteb/descriptive_stats/Classification/AJGT.json create mode 100644 mteb/descriptive_stats/Classification/AfriSentiClassification.json create mode 100644 mteb/descriptive_stats/Classification/AfriSentiLangClassification.json create mode 100644 mteb/descriptive_stats/Classification/AllegroReviews.json create mode 100644 mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json create mode 100644 mteb/descriptive_stats/Classification/AmazonPolarityClassification.json create mode 100644 mteb/descriptive_stats/Classification/AmazonReviewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/AngryTweetsClassification.json create mode 100644 mteb/descriptive_stats/Classification/ArxivClassification.json create mode 100644 mteb/descriptive_stats/Classification/Banking77Classification.json create mode 100644 mteb/descriptive_stats/Classification/BengaliDocumentClassification.json create mode 100644 mteb/descriptive_stats/Classification/BengaliHateSpeechClassification.json create mode 100644 mteb/descriptive_stats/Classification/BengaliSentimentAnalysis.json create mode 100644 mteb/descriptive_stats/Classification/BulgarianStoreReviewSentimentClassfication.json create mode 100644 mteb/descriptive_stats/Classification/CBD.json create mode 100644 mteb/descriptive_stats/Classification/CSFDCZMovieReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/CSFDSKMovieReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicenseeLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicensorLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADAntiAssignmentLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADAuditRightsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADCapOnLiabilityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADChangeOfControlLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADCompetitiveRestrictionExceptionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADCovenantNotToSueLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADEffectiveDateLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADExclusivityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADExpirationDateLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADGoverningLawLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADIPOwnershipAssignmentLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADInsuranceLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADIrrevocableOrPerpetualLicenseLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADJointIPOwnershipLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADLicenseGrantLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADLiquidatedDamagesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADMinimumCommitmentLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADMostFavoredNationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADNoSolicitOfCustomersLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADNoSolicitOfEmployeesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADNonCompeteLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADNonDisparagementLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADNonTransferableLicenseLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADNoticePeriodToTerminateRenewalLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADPostTerminationServicesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADPriceRestrictionsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADRenewalTermLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADRevenueProfitSharingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADRofrRofoRofnLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADSourceCodeEscrowLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADTerminationForConvenienceLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADThirdPartyBeneficiaryLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADUncappedLiabilityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADVolumeRestrictionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CUADWarrantyDurationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CanadaTaxCourtOutcomesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CataloniaTweetClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIConfidentialityOfAgreementLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIExplicitIdentificationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLILimitedUseLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLINoLicensingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLINoticeOnCompelledDisclosureLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIPermissibleCopyLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLIReturnOfConfidentialInformationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLISharingWithEmployeesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLISharingWithThirdPartiesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ContractNLISurvivalOfObligationsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CorporateLobbyingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/CyrillicTurkicLangClassification.json create mode 100644 mteb/descriptive_stats/Classification/CzechProductReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/CzechSoMeSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/CzechSubjectivityClassification.json create mode 100644 mteb/descriptive_stats/Classification/DBpediaClassification.json create mode 100644 mteb/descriptive_stats/Classification/DKHateClassification.json create mode 100644 mteb/descriptive_stats/Classification/DalajClassification.json create mode 100644 mteb/descriptive_stats/Classification/DanishPoliticalCommentsClassification.json create mode 100644 mteb/descriptive_stats/Classification/DefinitionClassificationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/Diversity1LegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/Diversity2LegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/Diversity3LegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/Diversity4LegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/Diversity5LegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/Diversity6LegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/DutchBookReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/EmotionClassification.json create mode 100644 mteb/descriptive_stats/Classification/EstonianValenceClassification.json create mode 100644 mteb/descriptive_stats/Classification/FilipinoShopeeReviewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/FinToxicityClassification.json create mode 100644 mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json create mode 100644 mteb/descriptive_stats/Classification/FrenchBookReviews.json create mode 100644 mteb/descriptive_stats/Classification/FrenkEnClassification.json create mode 100644 mteb/descriptive_stats/Classification/FrenkHrClassification.json create mode 100644 mteb/descriptive_stats/Classification/FrenkSlClassification.json create mode 100644 mteb/descriptive_stats/Classification/FunctionOfDecisionSectionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/GeoreviewClassification.json create mode 100644 mteb/descriptive_stats/Classification/GermanPoliticiansTwitterSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/GreekLegalCodeClassification.json create mode 100644 mteb/descriptive_stats/Classification/GujaratiNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/HateSpeechPortugueseClassification.json create mode 100644 mteb/descriptive_stats/Classification/HeadlineClassification.json create mode 100644 mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.json create mode 100644 mteb/descriptive_stats/Classification/HinDialectClassification.json create mode 100644 mteb/descriptive_stats/Classification/HindiDiscourseClassification.json create mode 100644 mteb/descriptive_stats/Classification/HotelReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/IFlyTek.json create mode 100644 mteb/descriptive_stats/Classification/ImdbClassification.json create mode 100644 mteb/descriptive_stats/Classification/InappropriatenessClassification.json create mode 100644 mteb/descriptive_stats/Classification/IndicLangClassification.json create mode 100644 mteb/descriptive_stats/Classification/IndicNLPNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/IndicSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/IndonesianIdClickbaitClassification.json create mode 100644 mteb/descriptive_stats/Classification/IndonesianMongabayConservationClassification.json create mode 100644 mteb/descriptive_stats/Classification/InsurancePolicyInterpretationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/InternationalCitizenshipQuestionsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/IsiZuluNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/ItaCaseholdClassification.json create mode 100644 mteb/descriptive_stats/Classification/Itacola.json create mode 100644 mteb/descriptive_stats/Classification/JCrewBlockerLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/JDReview.json create mode 100644 mteb/descriptive_stats/Classification/JavaneseIMDBClassification.json create mode 100644 mteb/descriptive_stats/Classification/KLUE-TC.json create mode 100644 mteb/descriptive_stats/Classification/KannadaNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/KinopoiskClassification.json create mode 100644 mteb/descriptive_stats/Classification/KorFin.json create mode 100644 mteb/descriptive_stats/Classification/KorHateClassification.json create mode 100644 mteb/descriptive_stats/Classification/KorSarcasmClassification.json create mode 100644 mteb/descriptive_stats/Classification/KurdishSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/LccSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsBenefitsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsBusinessLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsConsumerLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsCourtsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsCrimeLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsDivorceLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsDomesticViolenceLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsEducationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsEmploymentLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsEstatesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsFamilyLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsHealthLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsHousingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsImmigrationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsTortsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LearnedHandsTrafficLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/LegalReasoningCausalityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/MAUDLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/MTOPDomainClassification.json create mode 100644 mteb/descriptive_stats/Classification/MTOPIntentClassification.json create mode 100644 mteb/descriptive_stats/Classification/MacedonianTweetSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/MalayalamNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/MarathiNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/MasakhaNEWSClassification.json create mode 100644 mteb/descriptive_stats/Classification/MassiveIntentClassification.json create mode 100644 mteb/descriptive_stats/Classification/MassiveScenarioClassification.json create mode 100644 mteb/descriptive_stats/Classification/Moroco.json create mode 100644 mteb/descriptive_stats/Classification/MovieReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/MultiHateClassification.json create mode 100644 mteb/descriptive_stats/Classification/MultilingualSentiment.json create mode 100644 mteb/descriptive_stats/Classification/MultilingualSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/MyanmarNews.json create mode 100644 mteb/descriptive_stats/Classification/NYSJudicialEthicsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/NaijaSenti.json create mode 100644 mteb/descriptive_stats/Classification/NepaliNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/NewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/NoRecClassification.json create mode 100644 mteb/descriptive_stats/Classification/NordicLangClassification.json create mode 100644 mteb/descriptive_stats/Classification/NorwegianParliamentClassification.json create mode 100644 mteb/descriptive_stats/Classification/NusaParagraphEmotionClassification.json create mode 100644 mteb/descriptive_stats/Classification/NusaParagraphTopicClassification.json create mode 100644 mteb/descriptive_stats/Classification/NusaX-senti.json create mode 100644 mteb/descriptive_stats/Classification/OPP115DataRetentionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115DataSecurityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115DoNotTrackLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115FirstPartyCollectionUseLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115InternationalAndSpecificAudiencesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115PolicyChangeLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115ThirdPartySharingCollectionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115UserAccessEditAndDeletionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OPP115UserChoiceControlLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OdiaNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/OnlineShopping.json create mode 100644 mteb/descriptive_stats/Classification/OnlineStoreReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/OralArgumentQuestionPurposeLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/OverrulingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/PAC.json create mode 100644 mteb/descriptive_stats/Classification/PROALegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/PatentClassification.json create mode 100644 mteb/descriptive_stats/Classification/PersianFoodSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/PersonalJurisdictionLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/PoemSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/PolEmo2.0-IN.json create mode 100644 mteb/descriptive_stats/Classification/PolEmo2.0-OUT.json create mode 100644 mteb/descriptive_stats/Classification/PunjabiNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/RestaurantReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/RomanianReviewsSentiment.json create mode 100644 mteb/descriptive_stats/Classification/RomanianSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/RuReviewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/RuSciBenchGRNTIClassification.json create mode 100644 mteb/descriptive_stats/Classification/RuSciBenchOECDClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDBPAccountabilityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDBPAuditsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDBPCertificationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDBPTrainingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDBPVerificationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDDAccountabilityLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDDAuditsLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDDCertificationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDDTrainingLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SCDDVerificationLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/SIB200Classification.json create mode 100644 mteb/descriptive_stats/Classification/SanskritShlokasClassification.json create mode 100644 mteb/descriptive_stats/Classification/ScalaClassification.json create mode 100644 mteb/descriptive_stats/Classification/SentimentAnalysisHindi.json create mode 100644 mteb/descriptive_stats/Classification/SinhalaNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/SinhalaNewsSourceClassification.json create mode 100644 mteb/descriptive_stats/Classification/SiswatiNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/SlovakMovieReviewSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/SouthAfricanLangClassification.json create mode 100644 mteb/descriptive_stats/Classification/SpanishNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/SpanishSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/SwahiliNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/SweRecClassification.json create mode 100644 mteb/descriptive_stats/Classification/SwedishSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/TNews.json create mode 100644 mteb/descriptive_stats/Classification/TamilNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/TelemarketingSalesRuleLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/TeluguAndhraJyotiNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/TenKGnadClassification.json create mode 100644 mteb/descriptive_stats/Classification/TextualismToolDictionariesLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/TextualismToolPlainLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/ToxicChatClassification.json create mode 100644 mteb/descriptive_stats/Classification/ToxicConversationsClassification.json create mode 100644 mteb/descriptive_stats/Classification/TswanaNewsClassification.json create mode 100644 mteb/descriptive_stats/Classification/TurkicClassification.json create mode 100644 mteb/descriptive_stats/Classification/TurkishMovieSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/TurkishProductSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/TweetEmotionClassification.json create mode 100644 mteb/descriptive_stats/Classification/TweetSarcasmClassification.json create mode 100644 mteb/descriptive_stats/Classification/TweetSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/TweetSentimentExtractionClassification.json create mode 100644 mteb/descriptive_stats/Classification/TweetTopicSingleClassification.json create mode 100644 mteb/descriptive_stats/Classification/UCCVCommonLawLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/UkrFormalityClassification.json create mode 100644 mteb/descriptive_stats/Classification/UnfairTOSLegalBenchClassification.json create mode 100644 mteb/descriptive_stats/Classification/UrduRomanSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/VieStudentFeedbackClassification.json create mode 100644 mteb/descriptive_stats/Classification/WRIMEClassification.json create mode 100644 mteb/descriptive_stats/Classification/Waimai.json create mode 100644 mteb/descriptive_stats/Classification/WisesightSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/YahooAnswersTopicsClassification.json create mode 100644 mteb/descriptive_stats/Classification/YelpReviewFullClassification.json create mode 100644 mteb/descriptive_stats/Classification/YueOpenriceReviewClassification.json create mode 100644 mteb/descriptive_stats/Clustering/AlloProfClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/AlloProfClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringS2S.json create mode 100644 mteb/descriptive_stats/Clustering/BigPatentClustering.v2.json create mode 100644 mteb/descriptive_stats/Clustering/BiorxivClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/BlurbsClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/BlurbsClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/CLSClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/CLSClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/EightTagsClustering.v2.json create mode 100644 mteb/descriptive_stats/Clustering/GeoreviewClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/HALClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/IndicReviewsClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/LivedoorNewsClustering.v2.json create mode 100644 mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/MLSUMClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringS2S.json create mode 100644 mteb/descriptive_stats/Clustering/MewsC16JaClustering.json create mode 100644 mteb/descriptive_stats/Clustering/PlscClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/PlscClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/RedditClustering.v2.json create mode 100644 mteb/descriptive_stats/Clustering/RomaniBibleClustering.json create mode 100644 mteb/descriptive_stats/Clustering/RuSciBenchOECDClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/SIB200ClusteringS2S.json create mode 100644 mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringS2S.json create mode 100644 mteb/descriptive_stats/Clustering/SpanishNewsClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/StackExchangeClustering.v2.json create mode 100644 mteb/descriptive_stats/Clustering/StackExchangeClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/SwednClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/SwednClusteringS2S.json create mode 100644 mteb/descriptive_stats/Clustering/TenKGnadClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/TenKGnadClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/ThuNewsClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/ThuNewsClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/VGHierarchicalClusteringP2P.json create mode 100644 mteb/descriptive_stats/Clustering/VGHierarchicalClusteringS2S.json create mode 100644 mteb/descriptive_stats/Clustering/WikiCitiesClustering.json create mode 100644 mteb/descriptive_stats/Clustering/WikiClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json create mode 100644 mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json create mode 100644 mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json create mode 100644 mteb/descriptive_stats/MultilabelClassification/BrazilianToxicTweetsClassification.json create mode 100644 mteb/descriptive_stats/MultilabelClassification/KorHateSpeechMLClassification.json create mode 100644 mteb/descriptive_stats/MultilabelClassification/MalteseNewsClassification.json create mode 100644 mteb/descriptive_stats/MultilabelClassification/SensitiveTopicsClassification.json create mode 100644 mteb/descriptive_stats/PairClassification/ArEntail.json create mode 100644 mteb/descriptive_stats/PairClassification/ArmenianParaphrasePC.json create mode 100644 mteb/descriptive_stats/PairClassification/Assin2RTE.json create mode 100644 mteb/descriptive_stats/PairClassification/CDSC-E.json create mode 100644 mteb/descriptive_stats/PairClassification/CTKFactsNLI.json create mode 100644 mteb/descriptive_stats/PairClassification/Cmnli.json create mode 100644 mteb/descriptive_stats/PairClassification/FalseFriendsGermanEnglish.json create mode 100644 mteb/descriptive_stats/PairClassification/FarsTail.json create mode 100644 mteb/descriptive_stats/PairClassification/KLUE-NLI.json create mode 100644 mteb/descriptive_stats/PairClassification/LegalBenchPC.json create mode 100644 mteb/descriptive_stats/PairClassification/Ocnli.json create mode 100644 mteb/descriptive_stats/PairClassification/OpusparcusPC.json create mode 100644 mteb/descriptive_stats/PairClassification/PSC.json create mode 100644 mteb/descriptive_stats/PairClassification/PpcPC.json create mode 100644 mteb/descriptive_stats/PairClassification/RTE3.json create mode 100644 mteb/descriptive_stats/PairClassification/SICK-BR-PC.json create mode 100644 mteb/descriptive_stats/PairClassification/SICK-E-PL.json create mode 100644 mteb/descriptive_stats/PairClassification/SprintDuplicateQuestions.json create mode 100644 mteb/descriptive_stats/PairClassification/TERRa.json create mode 100644 mteb/descriptive_stats/PairClassification/TwitterSemEval2015.json create mode 100644 mteb/descriptive_stats/PairClassification/XNLIV2.json create mode 100644 mteb/descriptive_stats/PairClassification/XStance.json create mode 100644 mteb/descriptive_stats/PairClassification/indonli.json create mode 100644 mteb/descriptive_stats/Reranking/AlloprofReranking.json create mode 100644 mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json create mode 100644 mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json create mode 100644 mteb/descriptive_stats/Reranking/InstructIR.json create mode 100644 mteb/descriptive_stats/Reranking/MMarcoReranking.json create mode 100644 mteb/descriptive_stats/Reranking/NevIR.json create mode 100644 mteb/descriptive_stats/Reranking/RuBQReranking.json create mode 100644 mteb/descriptive_stats/Reranking/SciDocsRR.json create mode 100644 mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json create mode 100644 mteb/descriptive_stats/Reranking/SyntecReranking.json create mode 100644 mteb/descriptive_stats/Reranking/T2Reranking.json create mode 100644 mteb/descriptive_stats/Retrieval/AILACasedocs.json create mode 100644 mteb/descriptive_stats/Retrieval/AILAStatutes.json create mode 100644 mteb/descriptive_stats/Retrieval/ARCChallenge.json create mode 100644 mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/AlphaNLI.json create mode 100644 mteb/descriptive_stats/Retrieval/ArguAna-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/ArguAna.json create mode 100644 mteb/descriptive_stats/Retrieval/BSARDRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/ClimateFEVER.json create mode 100644 mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CovidRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json create mode 100644 mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json create mode 100644 mteb/descriptive_stats/Retrieval/DBPedia-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/DBPedia.json create mode 100644 mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/DuRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/EcomRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/EstQA.json create mode 100644 mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/FQuADRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/FaithDial.json create mode 100644 mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/FiQA-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/FiQA2018.json create mode 100644 mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/GerDaLIR.json create mode 100644 mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json create mode 100644 mteb/descriptive_stats/Retrieval/GermanDPR.json create mode 100644 mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/GreekCivicsQA.json create mode 100644 mteb/descriptive_stats/Retrieval/HagridRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/HellaSwag.json create mode 100644 mteb/descriptive_stats/Retrieval/HotpotQA-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json create mode 100644 mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/LeCaRDv2.json create mode 100644 mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json create mode 100644 mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json create mode 100644 mteb/descriptive_stats/Retrieval/LegalQuAD.json create mode 100644 mteb/descriptive_stats/Retrieval/LegalSummarization.json create mode 100644 mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/MLQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MLQuestions.json create mode 100644 mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MSMARCO-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MedicalRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MintakaRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NFCorpus-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NQ-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/NQ.json create mode 100644 mteb/descriptive_stats/Retrieval/NQHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/PIQA.json create mode 100644 mteb/descriptive_stats/Retrieval/PublicHealthQA.json create mode 100644 mteb/descriptive_stats/Retrieval/Quail.json create mode 100644 mteb/descriptive_stats/Retrieval/Quora-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/QuoraRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/RARbCode.json create mode 100644 mteb/descriptive_stats/Retrieval/RARbMath.json create mode 100644 mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/RuBQRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/SCIDOCS.json create mode 100644 mteb/descriptive_stats/Retrieval/SIQA.json create mode 100644 mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SNLRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SciFact-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/SciFact.json create mode 100644 mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json create mode 100644 mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json create mode 100644 mteb/descriptive_stats/Retrieval/SpartQA.json create mode 100644 mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SwednRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/SyntecRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/T2Retrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json create mode 100644 mteb/descriptive_stats/Retrieval/TRECCOVID.json create mode 100644 mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL1.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL2Context.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL3Context.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json create mode 100644 mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json create mode 100644 mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json create mode 100644 mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/VideoRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json create mode 100644 mteb/descriptive_stats/Retrieval/WinoGrande.json create mode 100644 mteb/descriptive_stats/Retrieval/XMarket.json create mode 100644 mteb/descriptive_stats/Retrieval/XPQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/XQuADRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/mFollowIR.json create mode 100644 mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json create mode 100644 mteb/descriptive_stats/STS/AFQMC.json create mode 100644 mteb/descriptive_stats/STS/ATEC.json create mode 100644 mteb/descriptive_stats/STS/Assin2STS.json create mode 100644 mteb/descriptive_stats/STS/BIOSSES.json create mode 100644 mteb/descriptive_stats/STS/BQ.json create mode 100644 mteb/descriptive_stats/STS/CDSC-R.json create mode 100644 mteb/descriptive_stats/STS/FaroeseSTS.json create mode 100644 mteb/descriptive_stats/STS/FinParaSTS.json create mode 100644 mteb/descriptive_stats/STS/GermanSTSBenchmark.json create mode 100644 mteb/descriptive_stats/STS/IndicCrosslingualSTS.json create mode 100644 mteb/descriptive_stats/STS/JSICK.json create mode 100644 mteb/descriptive_stats/STS/JSTS.json create mode 100644 mteb/descriptive_stats/STS/KLUE-STS.json create mode 100644 mteb/descriptive_stats/STS/KorSTS.json create mode 100644 mteb/descriptive_stats/STS/LCQMC.json create mode 100644 mteb/descriptive_stats/STS/PAWSX.json create mode 100644 mteb/descriptive_stats/STS/QBQTC.json create mode 100644 mteb/descriptive_stats/STS/RUParaPhraserSTS.json create mode 100644 mteb/descriptive_stats/STS/RonSTS.json create mode 100644 mteb/descriptive_stats/STS/RuSTSBenchmarkSTS.json create mode 100644 mteb/descriptive_stats/STS/SICK-BR-STS.json create mode 100644 mteb/descriptive_stats/STS/SICK-R-PL.json create mode 100644 mteb/descriptive_stats/STS/SICK-R.json create mode 100644 mteb/descriptive_stats/STS/SICKFr.json create mode 100644 mteb/descriptive_stats/STS/STS13.json create mode 100644 mteb/descriptive_stats/STS/STS14.json create mode 100644 mteb/descriptive_stats/STS/STS15.json create mode 100644 mteb/descriptive_stats/STS/STS16.json create mode 100644 mteb/descriptive_stats/STS/STS22.v2.json create mode 100644 mteb/descriptive_stats/STS/STSB.json create mode 100644 mteb/descriptive_stats/STS/STSBenchmark.json create mode 100644 mteb/descriptive_stats/STS/STSBenchmarkMultilingualSTS.json create mode 100644 mteb/descriptive_stats/STS/STSES.json create mode 100644 mteb/descriptive_stats/STS/SemRel24STS.json create mode 100644 mteb/descriptive_stats/Summarization/SummEvalFrSummarization.v2.json create mode 100644 mteb/descriptive_stats/Summarization/SummEvalSummarization.v2.json diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index d11f96b938..2b4c82c01f 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -102,8 +102,6 @@ def _calculate_metrics_from_split( ) -> PairClassificationDescriptiveStatistics: if hf_subset: dataset = self.dataset[hf_subset][split] - if isinstance(dataset, list): - dataset = dataset[0] elif compute_overall: dataset = defaultdict(list) for hf_subset in self.metadata.eval_langs: @@ -115,6 +113,9 @@ def _calculate_metrics_from_split( else: dataset = self.dataset[split] + if isinstance(dataset, list): + dataset = dataset[0] + sentence1 = ( dataset["sentence1"][0] if len(dataset["sentence1"]) == 1 diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 839966f7ac..41b0702509 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -37,13 +37,13 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): average_query_length: Average length of queries max_query_length: Maximum length of queries unique_queries: Number of unique queries + none_queries: Number of none queries min_relevant_docs_per_query: Minimum number of relevant documents per query average_relevant_docs_per_query: Average number of relevant documents per query max_relevant_docs_per_query: Maximum number of relevant documents per query unique_relevant_docs: Number of unique relevant documents - num_instructions: Number of instructions min_instruction_length: Minimum length of instructions average_instruction_length: Average length of instructions @@ -69,6 +69,7 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): average_query_length: float max_query_length: int unique_queries: int + none_queries: int min_relevant_docs_per_query: int average_relevant_docs_per_query: float @@ -347,6 +348,10 @@ def _calculate_metrics_from_split( if self.top_ranked is not None: top_ranked.update(process_docs(self.top_ranked, hf_subset, split)) else: + if "default" in self.queries: + return self._calculate_metrics_from_split( + split=split, hf_subset="default" + ) queries = self.queries[split] corpus = self.corpus[split] relevant_docs = self.relevant_docs[split] @@ -358,6 +363,7 @@ def _calculate_metrics_from_split( query_len, doc_len = calculate_length(queries, corpus) num_documents = len(corpus) num_queries = len(queries) + none_queries = sum(q is None or len(q) == 0 for q in queries.values()) # create a list of number of relevant docs per query qrels_lengths = [ @@ -389,10 +395,8 @@ def _calculate_metrics_from_split( max_instruction_length = None unique_instructions = None - if self.top_ranked is not None: - top_ranked_per_query = ( - [len(docs) for docs in top_ranked.values()] if num_queries else None - ) + if self.top_ranked is not None and num_queries: + top_ranked_per_query = [len(docs) for docs in top_ranked.values()] min_top_ranked_per_query = min(top_ranked_per_query) average_top_ranked_per_query = sum(top_ranked_per_query) / num_queries max_top_ranked_per_query = max(top_ranked_per_query) @@ -414,6 +418,7 @@ def _calculate_metrics_from_split( average_query_length=sum(query_len) / num_queries, max_query_length=max(query_len), unique_queries=len(set(queries)), + none_queries=none_queries, min_relevant_docs_per_query=min(qrels_lengths), average_relevant_docs_per_query=qrels_per_doc, max_relevant_docs_per_query=max(qrels_lengths), @@ -435,6 +440,9 @@ def calculate_length( queries_lens = [] doc_lens = [] for query in queries.values(): + if query is None or len(query) == 0: + continue + if isinstance(query[0], str): queries_lens.append(len(query)) else: diff --git a/mteb/abstasks/dataloaders.py b/mteb/abstasks/dataloaders.py index 0b6505c5ee..25a6150a5e 100644 --- a/mteb/abstasks/dataloaders.py +++ b/mteb/abstasks/dataloaders.py @@ -300,9 +300,7 @@ def _load_instructions(self): "instruction", keep_in_memory=self.keep_in_memory, streaming=self.streaming, - trust_remote_code=self.metadata_dict["dataset"].get( - "trust_remote_code", False - ), + trust_remote_code=self.trust_remote_code, ) else: instructions_ds = load_dataset( diff --git a/mteb/descriptive_stats/BitextMining/DiaBlaBitextMining.json b/mteb/descriptive_stats/BitextMining/DiaBlaBitextMining.json new file mode 100644 index 0000000000..203af250d8 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/DiaBlaBitextMining.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 11496, + "number_of_characters": 1029116, + "unique_pairs": 5644, + "min_sentence1_length": 1, + "average_sentence1_length": 47.947807933194156, + "max_sentence1_length": 250, + "unique_sentence1": 5572, + "min_sentence2_length": 1, + "average_sentence2_length": 41.57167710508003, + "max_sentence2_length": 213, + "unique_sentence2": 5506, + "hf_subset_descriptive_stats": { + "fr-en": { + "num_samples": 5748, + "number_of_characters": 514558, + "unique_pairs": 5644, + "min_sentence1_length": 1, + "average_sentence1_length": 47.947807933194156, + "max_sentence1_length": 250, + "unique_sentence1": 5572, + "min_sentence2_length": 1, + "average_sentence2_length": 41.57167710508003, + "max_sentence2_length": 213, + "unique_sentence2": 5506 + }, + "en-fr": { + "num_samples": 5748, + "number_of_characters": 514558, + "unique_pairs": 5644, + "min_sentence1_length": 1, + "average_sentence1_length": 47.947807933194156, + "max_sentence1_length": 250, + "unique_sentence1": 5572, + "min_sentence2_length": 1, + "average_sentence2_length": 41.57167710508003, + "max_sentence2_length": 213, + "unique_sentence2": 5506 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/LinceMTBitextMining.json b/mteb/descriptive_stats/BitextMining/LinceMTBitextMining.json new file mode 100644 index 0000000000..464e4ea727 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/LinceMTBitextMining.json @@ -0,0 +1,30 @@ +{ + "train": { + "num_samples": 8059, + "number_of_characters": 945706, + "unique_pairs": 7546, + "min_sentence1_length": 1, + "average_sentence1_length": 56.28266534309468, + "max_sentence1_length": 1508, + "unique_sentence1": 6052, + "min_sentence2_length": 1, + "average_sentence2_length": 61.06514455887827, + "max_sentence2_length": 1881, + "unique_sentence2": 7389, + "hf_subset_descriptive_stats": { + "eng-eng_hin": { + "num_samples": 8059, + "number_of_characters": 945706, + "unique_pairs": 7546, + "min_sentence1_length": 1, + "average_sentence1_length": 56.28266534309468, + "max_sentence1_length": 1508, + "unique_sentence1": 6052, + "min_sentence2_length": 1, + "average_sentence2_length": 61.06514455887827, + "max_sentence2_length": 1881, + "unique_sentence2": 7389 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NusaXBitextMining.json b/mteb/descriptive_stats/BitextMining/NusaXBitextMining.json new file mode 100644 index 0000000000..d04b27afce --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NusaXBitextMining.json @@ -0,0 +1,160 @@ +{ + "train": { + "num_samples": 5500, + "number_of_characters": 1728596, + "unique_pairs": 5499, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 7, + "average_sentence2_length": 152.63018181818182, + "max_sentence2_length": 550, + "unique_sentence2": 5498, + "hf_subset_descriptive_stats": { + "eng-ace": { + "num_samples": 500, + "number_of_characters": 154722, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 20, + "average_sentence2_length": 147.784, + "max_sentence2_length": 550, + "unique_sentence2": 500 + }, + "eng-ban": { + "num_samples": 500, + "number_of_characters": 157380, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 22, + "average_sentence2_length": 153.1, + "max_sentence2_length": 491, + "unique_sentence2": 500 + }, + "eng-bbc": { + "num_samples": 500, + "number_of_characters": 159184, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 25, + "average_sentence2_length": 156.708, + "max_sentence2_length": 521, + "unique_sentence2": 500 + }, + "eng-bjn": { + "num_samples": 500, + "number_of_characters": 157328, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 22, + "average_sentence2_length": 152.996, + "max_sentence2_length": 526, + "unique_sentence2": 500 + }, + "eng-bug": { + "num_samples": 500, + "number_of_characters": 161552, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 7, + "average_sentence2_length": 161.444, + "max_sentence2_length": 549, + "unique_sentence2": 500 + }, + "eng-ind": { + "num_samples": 500, + "number_of_characters": 157480, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 27, + "average_sentence2_length": 153.3, + "max_sentence2_length": 498, + "unique_sentence2": 500 + }, + "eng-jav": { + "num_samples": 500, + "number_of_characters": 155271, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 21, + "average_sentence2_length": 148.882, + "max_sentence2_length": 507, + "unique_sentence2": 500 + }, + "eng-mad": { + "num_samples": 500, + "number_of_characters": 157942, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 24, + "average_sentence2_length": 154.224, + "max_sentence2_length": 496, + "unique_sentence2": 500 + }, + "eng-min": { + "num_samples": 500, + "number_of_characters": 154912, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 23, + "average_sentence2_length": 148.164, + "max_sentence2_length": 500, + "unique_sentence2": 500 + }, + "eng-nij": { + "num_samples": 500, + "number_of_characters": 155800, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 27, + "average_sentence2_length": 149.94, + "max_sentence2_length": 479, + "unique_sentence2": 500 + }, + "eng-sun": { + "num_samples": 500, + "number_of_characters": 157025, + "unique_pairs": 500, + "min_sentence1_length": 18, + "average_sentence1_length": 161.66, + "max_sentence1_length": 562, + "unique_sentence1": 500, + "min_sentence2_length": 7, + "average_sentence2_length": 152.39, + "max_sentence2_length": 493, + "unique_sentence2": 500 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/RomaTalesBitextMining.json b/mteb/descriptive_stats/BitextMining/RomaTalesBitextMining.json new file mode 100644 index 0000000000..ae0423df5a --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/RomaTalesBitextMining.json @@ -0,0 +1,30 @@ +{ + "test": { + "num_samples": 215, + "number_of_characters": 131478, + "unique_pairs": 215, + "min_sentence1_length": 13, + "average_sentence1_length": 316.8046511627907, + "max_sentence1_length": 8361, + "unique_sentence1": 215, + "min_sentence2_length": 8, + "average_sentence2_length": 294.72093023255815, + "max_sentence2_length": 8811, + "unique_sentence2": 215, + "hf_subset_descriptive_stats": { + "rom-hun": { + "num_samples": 215, + "number_of_characters": 131478, + "unique_pairs": 215, + "min_sentence1_length": 13, + "average_sentence1_length": 316.8046511627907, + "max_sentence1_length": 8361, + "unique_sentence1": 215, + "min_sentence2_length": 8, + "average_sentence2_length": 294.72093023255815, + "max_sentence2_length": 8811, + "unique_sentence2": 215 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/SRNCorpusBitextMining.json b/mteb/descriptive_stats/BitextMining/SRNCorpusBitextMining.json new file mode 100644 index 0000000000..cfba9580d6 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/SRNCorpusBitextMining.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 512, + "number_of_characters": 56148, + "unique_pairs": 512, + "min_sentence1_length": 8, + "average_sentence1_length": 54.83203125, + "max_sentence1_length": 204, + "unique_sentence1": 512, + "min_sentence2_length": 8, + "average_sentence2_length": 54.83203125, + "max_sentence2_length": 204, + "unique_sentence2": 512, + "hf_subset_descriptive_stats": { + "srn_Latn-nld_Latn": { + "num_samples": 256, + "number_of_characters": 28074, + "unique_pairs": 256, + "min_sentence1_length": 8, + "average_sentence1_length": 50.3203125, + "max_sentence1_length": 204, + "unique_sentence1": 256, + "min_sentence2_length": 12, + "average_sentence2_length": 59.34375, + "max_sentence2_length": 194, + "unique_sentence2": 256 + }, + "nld_Latn-srn_Latn": { + "num_samples": 256, + "number_of_characters": 28074, + "unique_pairs": 256, + "min_sentence1_length": 12, + "average_sentence1_length": 59.34375, + "max_sentence1_length": 194, + "unique_sentence1": 256, + "min_sentence2_length": 8, + "average_sentence2_length": 50.3203125, + "max_sentence2_length": 204, + "unique_sentence2": 256 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/Tatoeba.json b/mteb/descriptive_stats/BitextMining/Tatoeba.json new file mode 100644 index 0000000000..7a96292b6b --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/Tatoeba.json @@ -0,0 +1,1473 @@ +{ + "test": { + "num_samples": 88877, + "number_of_characters": 5716305, + "unique_pairs": 88840, + "min_sentence1_length": 3, + "average_sentence1_length": 31.77314715843244, + "max_sentence1_length": 704, + "unique_sentence1": 88838, + "min_sentence2_length": 9, + "average_sentence2_length": 32.54388649481868, + "max_sentence2_length": 661, + "unique_sentence2": 69241, + "hf_subset_descriptive_stats": { + "sqi-eng": { + "num_samples": 1000, + "number_of_characters": 70260, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 35.741, + "max_sentence1_length": 172, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 34.519, + "max_sentence2_length": 184, + "unique_sentence2": 1000 + }, + "fry-eng": { + "num_samples": 173, + "number_of_characters": 12129, + "unique_pairs": 173, + "min_sentence1_length": 4, + "average_sentence1_length": 34.884393063583815, + "max_sentence1_length": 242, + "unique_sentence1": 173, + "min_sentence2_length": 11, + "average_sentence2_length": 35.225433526011564, + "max_sentence2_length": 206, + "unique_sentence2": 173 + }, + "kur-eng": { + "num_samples": 410, + "number_of_characters": 24499, + "unique_pairs": 410, + "min_sentence1_length": 6, + "average_sentence1_length": 29.302439024390242, + "max_sentence1_length": 123, + "unique_sentence1": 410, + "min_sentence2_length": 10, + "average_sentence2_length": 30.451219512195124, + "max_sentence2_length": 144, + "unique_sentence2": 410 + }, + "tur-eng": { + "num_samples": 1000, + "number_of_characters": 68792, + "unique_pairs": 1000, + "min_sentence1_length": 11, + "average_sentence1_length": 34.516, + "max_sentence1_length": 142, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 34.276, + "max_sentence2_length": 127, + "unique_sentence2": 1000 + }, + "deu-eng": { + "num_samples": 1000, + "number_of_characters": 104754, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 56.318, + "max_sentence1_length": 415, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 48.436, + "max_sentence2_length": 335, + "unique_sentence2": 1000 + }, + "nld-eng": { + "num_samples": 1000, + "number_of_characters": 73204, + "unique_pairs": 1000, + "min_sentence1_length": 10, + "average_sentence1_length": 37.815, + "max_sentence1_length": 704, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 35.389, + "max_sentence2_length": 661, + "unique_sentence2": 1000 + }, + "ron-eng": { + "num_samples": 1000, + "number_of_characters": 70333, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 35.843, + "max_sentence1_length": 241, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 34.49, + "max_sentence2_length": 277, + "unique_sentence2": 1000 + }, + "ang-eng": { + "num_samples": 134, + "number_of_characters": 14292, + "unique_pairs": 134, + "min_sentence1_length": 10, + "average_sentence1_length": 53.17164179104478, + "max_sentence1_length": 291, + "unique_sentence1": 134, + "min_sentence2_length": 11, + "average_sentence2_length": 53.485074626865675, + "max_sentence2_length": 270, + "unique_sentence2": 134 + }, + "ido-eng": { + "num_samples": 1000, + "number_of_characters": 65907, + "unique_pairs": 1000, + "min_sentence1_length": 11, + "average_sentence1_length": 32.765, + "max_sentence1_length": 341, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 33.142, + "max_sentence2_length": 334, + "unique_sentence2": 1000 + }, + "jav-eng": { + "num_samples": 205, + "number_of_characters": 13311, + "unique_pairs": 205, + "min_sentence1_length": 10, + "average_sentence1_length": 33.4390243902439, + "max_sentence1_length": 93, + "unique_sentence1": 205, + "min_sentence2_length": 12, + "average_sentence2_length": 31.492682926829268, + "max_sentence2_length": 80, + "unique_sentence2": 205 + }, + "isl-eng": { + "num_samples": 1000, + "number_of_characters": 75201, + "unique_pairs": 1000, + "min_sentence1_length": 11, + "average_sentence1_length": 38.543, + "max_sentence1_length": 177, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 36.658, + "max_sentence2_length": 166, + "unique_sentence2": 1000 + }, + "slv-eng": { + "num_samples": 823, + "number_of_characters": 52443, + "unique_pairs": 823, + "min_sentence1_length": 7, + "average_sentence1_length": 30.462940461725395, + "max_sentence1_length": 254, + "unique_sentence1": 823, + "min_sentence2_length": 11, + "average_sentence2_length": 33.2588092345079, + "max_sentence2_length": 321, + "unique_sentence2": 823 + }, + "cym-eng": { + "num_samples": 575, + "number_of_characters": 32514, + "unique_pairs": 575, + "min_sentence1_length": 7, + "average_sentence1_length": 29.577391304347827, + "max_sentence1_length": 293, + "unique_sentence1": 575, + "min_sentence2_length": 10, + "average_sentence2_length": 26.968695652173913, + "max_sentence2_length": 252, + "unique_sentence2": 575 + }, + "kaz-eng": { + "num_samples": 575, + "number_of_characters": 34015, + "unique_pairs": 575, + "min_sentence1_length": 4, + "average_sentence1_length": 28.74434782608696, + "max_sentence1_length": 210, + "unique_sentence1": 575, + "min_sentence2_length": 11, + "average_sentence2_length": 30.41217391304348, + "max_sentence2_length": 171, + "unique_sentence2": 575 + }, + "est-eng": { + "num_samples": 1000, + "number_of_characters": 58808, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_length": 29.076, + "max_sentence1_length": 176, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 29.732, + "max_sentence2_length": 171, + "unique_sentence2": 1000 + }, + "heb-eng": { + "num_samples": 1000, + "number_of_characters": 62885, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 27.045, + "max_sentence1_length": 131, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 35.84, + "max_sentence2_length": 169, + "unique_sentence2": 1000 + }, + "gla-eng": { + "num_samples": 829, + "number_of_characters": 54430, + "unique_pairs": 829, + "min_sentence1_length": 10, + "average_sentence1_length": 35.62967430639325, + "max_sentence1_length": 304, + "unique_sentence1": 829, + "min_sentence2_length": 11, + "average_sentence2_length": 30.027744270205066, + "max_sentence2_length": 252, + "unique_sentence2": 829 + }, + "mar-eng": { + "num_samples": 1000, + "number_of_characters": 56010, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_length": 28.124, + "max_sentence1_length": 146, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 27.886, + "max_sentence2_length": 141, + "unique_sentence2": 1000 + }, + "lat-eng": { + "num_samples": 1000, + "number_of_characters": 69629, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 33.661, + "max_sentence1_length": 377, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 35.968, + "max_sentence2_length": 445, + "unique_sentence2": 1000 + }, + "bel-eng": { + "num_samples": 1000, + "number_of_characters": 70542, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 34.52, + "max_sentence1_length": 419, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 36.022, + "max_sentence2_length": 394, + "unique_sentence2": 1000 + }, + "pms-eng": { + "num_samples": 525, + "number_of_characters": 37360, + "unique_pairs": 525, + "min_sentence1_length": 6, + "average_sentence1_length": 36.7047619047619, + "max_sentence1_length": 292, + "unique_sentence1": 525, + "min_sentence2_length": 12, + "average_sentence2_length": 34.457142857142856, + "max_sentence2_length": 286, + "unique_sentence2": 525 + }, + "gle-eng": { + "num_samples": 1000, + "number_of_characters": 56238, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 29.778, + "max_sentence1_length": 236, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 26.46, + "max_sentence2_length": 252, + "unique_sentence2": 1000 + }, + "pes-eng": { + "num_samples": 1000, + "number_of_characters": 68001, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_length": 33.158, + "max_sentence1_length": 145, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 34.843, + "max_sentence2_length": 172, + "unique_sentence2": 1000 + }, + "nob-eng": { + "num_samples": 1000, + "number_of_characters": 67672, + "unique_pairs": 1000, + "min_sentence1_length": 11, + "average_sentence1_length": 33.66, + "max_sentence1_length": 235, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 34.012, + "max_sentence2_length": 223, + "unique_sentence2": 1000 + }, + "bul-eng": { + "num_samples": 1000, + "number_of_characters": 73434, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 36.657, + "max_sentence1_length": 164, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 36.777, + "max_sentence2_length": 187, + "unique_sentence2": 1000 + }, + "cbk-eng": { + "num_samples": 1000, + "number_of_characters": 61350, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 32.182, + "max_sentence1_length": 384, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 29.168, + "max_sentence2_length": 362, + "unique_sentence2": 1000 + }, + "hun-eng": { + "num_samples": 1000, + "number_of_characters": 66397, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 32.846, + "max_sentence1_length": 125, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 33.551, + "max_sentence2_length": 142, + "unique_sentence2": 1000 + }, + "uig-eng": { + "num_samples": 1000, + "number_of_characters": 63764, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 31.607, + "max_sentence1_length": 99, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 32.157, + "max_sentence2_length": 121, + "unique_sentence2": 1000 + }, + "rus-eng": { + "num_samples": 1000, + "number_of_characters": 66659, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 33.013, + "max_sentence1_length": 170, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 33.646, + "max_sentence2_length": 199, + "unique_sentence2": 1000 + }, + "spa-eng": { + "num_samples": 1000, + "number_of_characters": 71211, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 36.438, + "max_sentence1_length": 285, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 34.773, + "max_sentence2_length": 253, + "unique_sentence2": 1000 + }, + "hye-eng": { + "num_samples": 742, + "number_of_characters": 48984, + "unique_pairs": 742, + "min_sentence1_length": 5, + "average_sentence1_length": 33.64824797843666, + "max_sentence1_length": 161, + "unique_sentence1": 742, + "min_sentence2_length": 12, + "average_sentence2_length": 32.367924528301884, + "max_sentence2_length": 137, + "unique_sentence2": 742 + }, + "tel-eng": { + "num_samples": 234, + "number_of_characters": 13687, + "unique_pairs": 234, + "min_sentence1_length": 13, + "average_sentence1_length": 29.405982905982906, + "max_sentence1_length": 80, + "unique_sentence1": 234, + "min_sentence2_length": 12, + "average_sentence2_length": 29.085470085470085, + "max_sentence2_length": 87, + "unique_sentence2": 234 + }, + "afr-eng": { + "num_samples": 1000, + "number_of_characters": 59558, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 30.51, + "max_sentence1_length": 168, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 29.048, + "max_sentence2_length": 198, + "unique_sentence2": 1000 + }, + "mon-eng": { + "num_samples": 440, + "number_of_characters": 31665, + "unique_pairs": 440, + "min_sentence1_length": 10, + "average_sentence1_length": 36.247727272727275, + "max_sentence1_length": 178, + "unique_sentence1": 440, + "min_sentence2_length": 12, + "average_sentence2_length": 35.71818181818182, + "max_sentence2_length": 171, + "unique_sentence2": 440 + }, + "arz-eng": { + "num_samples": 477, + "number_of_characters": 23588, + "unique_pairs": 477, + "min_sentence1_length": 6, + "average_sentence1_length": 21.366876310272538, + "max_sentence1_length": 153, + "unique_sentence1": 477, + "min_sentence2_length": 9, + "average_sentence2_length": 28.08385744234801, + "max_sentence2_length": 194, + "unique_sentence2": 477 + }, + "hrv-eng": { + "num_samples": 1000, + "number_of_characters": 63926, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 30.853, + "max_sentence1_length": 149, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 33.073, + "max_sentence2_length": 154, + "unique_sentence2": 1000 + }, + "nov-eng": { + "num_samples": 257, + "number_of_characters": 13706, + "unique_pairs": 257, + "min_sentence1_length": 10, + "average_sentence1_length": 26.80933852140078, + "max_sentence1_length": 112, + "unique_sentence1": 257, + "min_sentence2_length": 10, + "average_sentence2_length": 26.52140077821012, + "max_sentence2_length": 98, + "unique_sentence2": 257 + }, + "gsw-eng": { + "num_samples": 117, + "number_of_characters": 4602, + "unique_pairs": 117, + "min_sentence1_length": 7, + "average_sentence1_length": 19.65811965811966, + "max_sentence1_length": 37, + "unique_sentence1": 117, + "min_sentence2_length": 11, + "average_sentence2_length": 19.675213675213676, + "max_sentence2_length": 42, + "unique_sentence2": 117 + }, + "nds-eng": { + "num_samples": 1000, + "number_of_characters": 62504, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 31.962, + "max_sentence1_length": 97, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 30.542, + "max_sentence2_length": 88, + "unique_sentence2": 1000 + }, + "ukr-eng": { + "num_samples": 1000, + "number_of_characters": 59074, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 29.231, + "max_sentence1_length": 130, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 29.843, + "max_sentence2_length": 126, + "unique_sentence2": 1000 + }, + "uzb-eng": { + "num_samples": 428, + "number_of_characters": 22030, + "unique_pairs": 428, + "min_sentence1_length": 7, + "average_sentence1_length": 25.296728971962615, + "max_sentence1_length": 225, + "unique_sentence1": 428, + "min_sentence2_length": 12, + "average_sentence2_length": 26.175233644859812, + "max_sentence2_length": 207, + "unique_sentence2": 428 + }, + "lit-eng": { + "num_samples": 1000, + "number_of_characters": 63164, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 31.3, + "max_sentence1_length": 103, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 31.864, + "max_sentence2_length": 109, + "unique_sentence2": 1000 + }, + "ina-eng": { + "num_samples": 1000, + "number_of_characters": 87440, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 45.782, + "max_sentence1_length": 578, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 41.658, + "max_sentence2_length": 522, + "unique_sentence2": 1000 + }, + "lfn-eng": { + "num_samples": 1000, + "number_of_characters": 64888, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 32.594, + "max_sentence1_length": 299, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 32.294, + "max_sentence2_length": 323, + "unique_sentence2": 1000 + }, + "zsm-eng": { + "num_samples": 1000, + "number_of_characters": 79196, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 42.791, + "max_sentence1_length": 583, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 36.405, + "max_sentence2_length": 450, + "unique_sentence2": 1000 + }, + "ita-eng": { + "num_samples": 1000, + "number_of_characters": 65489, + "unique_pairs": 1000, + "min_sentence1_length": 11, + "average_sentence1_length": 33.896, + "max_sentence1_length": 164, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 31.593, + "max_sentence2_length": 146, + "unique_sentence2": 1000 + }, + "cmn-eng": { + "num_samples": 1000, + "number_of_characters": 47475, + "unique_pairs": 1000, + "min_sentence1_length": 3, + "average_sentence1_length": 11.962, + "max_sentence1_length": 66, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 35.513, + "max_sentence2_length": 185, + "unique_sentence2": 1000 + }, + "lvs-eng": { + "num_samples": 1000, + "number_of_characters": 60776, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 30.508, + "max_sentence1_length": 327, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 30.268, + "max_sentence2_length": 252, + "unique_sentence2": 1000 + }, + "glg-eng": { + "num_samples": 1000, + "number_of_characters": 75443, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 37.593, + "max_sentence1_length": 425, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 37.85, + "max_sentence2_length": 360, + "unique_sentence2": 1000 + }, + "ceb-eng": { + "num_samples": 600, + "number_of_characters": 36136, + "unique_pairs": 600, + "min_sentence1_length": 6, + "average_sentence1_length": 30.416666666666668, + "max_sentence1_length": 153, + "unique_sentence1": 600, + "min_sentence2_length": 11, + "average_sentence2_length": 29.81, + "max_sentence2_length": 151, + "unique_sentence2": 600 + }, + "bre-eng": { + "num_samples": 1000, + "number_of_characters": 50550, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 26.237, + "max_sentence1_length": 348, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 24.313, + "max_sentence2_length": 286, + "unique_sentence2": 1000 + }, + "ben-eng": { + "num_samples": 1000, + "number_of_characters": 50985, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_length": 25.272, + "max_sentence1_length": 94, + "unique_sentence1": 1000, + "min_sentence2_length": 9, + "average_sentence2_length": 25.713, + "max_sentence2_length": 97, + "unique_sentence2": 1000 + }, + "swg-eng": { + "num_samples": 112, + "number_of_characters": 6667, + "unique_pairs": 112, + "min_sentence1_length": 10, + "average_sentence1_length": 30.866071428571427, + "max_sentence1_length": 94, + "unique_sentence1": 112, + "min_sentence2_length": 12, + "average_sentence2_length": 28.660714285714285, + "max_sentence2_length": 92, + "unique_sentence2": 112 + }, + "arq-eng": { + "num_samples": 911, + "number_of_characters": 57287, + "unique_pairs": 911, + "min_sentence1_length": 6, + "average_sentence1_length": 27.972557628979143, + "max_sentence1_length": 138, + "unique_sentence1": 911, + "min_sentence2_length": 11, + "average_sentence2_length": 34.911086717892424, + "max_sentence2_length": 173, + "unique_sentence2": 911 + }, + "kab-eng": { + "num_samples": 1000, + "number_of_characters": 59944, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 29.677, + "max_sentence1_length": 133, + "unique_sentence1": 1000, + "min_sentence2_length": 9, + "average_sentence2_length": 30.267, + "max_sentence2_length": 153, + "unique_sentence2": 1000 + }, + "fra-eng": { + "num_samples": 1000, + "number_of_characters": 78704, + "unique_pairs": 1000, + "min_sentence1_length": 11, + "average_sentence1_length": 42.418, + "max_sentence1_length": 496, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 36.286, + "max_sentence2_length": 445, + "unique_sentence2": 1000 + }, + "por-eng": { + "num_samples": 1000, + "number_of_characters": 73977, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 38.125, + "max_sentence1_length": 414, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 35.852, + "max_sentence2_length": 445, + "unique_sentence2": 1000 + }, + "tat-eng": { + "num_samples": 1000, + "number_of_characters": 66528, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 32.811, + "max_sentence1_length": 313, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 33.717, + "max_sentence2_length": 354, + "unique_sentence2": 1000 + }, + "oci-eng": { + "num_samples": 1000, + "number_of_characters": 60636, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 31.036, + "max_sentence1_length": 155, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 29.6, + "max_sentence2_length": 144, + "unique_sentence2": 1000 + }, + "pol-eng": { + "num_samples": 1000, + "number_of_characters": 68740, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 34.105, + "max_sentence1_length": 175, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 34.635, + "max_sentence2_length": 179, + "unique_sentence2": 1000 + }, + "war-eng": { + "num_samples": 1000, + "number_of_characters": 70934, + "unique_pairs": 1000, + "min_sentence1_length": 10, + "average_sentence1_length": 37.586, + "max_sentence1_length": 229, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 33.348, + "max_sentence2_length": 247, + "unique_sentence2": 1000 + }, + "aze-eng": { + "num_samples": 1000, + "number_of_characters": 55410, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 27.775, + "max_sentence1_length": 124, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 27.635, + "max_sentence2_length": 116, + "unique_sentence2": 1000 + }, + "vie-eng": { + "num_samples": 1000, + "number_of_characters": 78576, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 39.893, + "max_sentence1_length": 162, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 38.683, + "max_sentence2_length": 183, + "unique_sentence2": 1000 + }, + "nno-eng": { + "num_samples": 1000, + "number_of_characters": 67792, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 33.498, + "max_sentence1_length": 172, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 34.294, + "max_sentence2_length": 193, + "unique_sentence2": 1000 + }, + "cha-eng": { + "num_samples": 137, + "number_of_characters": 10957, + "unique_pairs": 137, + "min_sentence1_length": 12, + "average_sentence1_length": 39.54014598540146, + "max_sentence1_length": 174, + "unique_sentence1": 137, + "min_sentence2_length": 12, + "average_sentence2_length": 40.43795620437956, + "max_sentence2_length": 179, + "unique_sentence2": 137 + }, + "mhr-eng": { + "num_samples": 1000, + "number_of_characters": 60973, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 30.136, + "max_sentence1_length": 127, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 30.837, + "max_sentence2_length": 164, + "unique_sentence2": 1000 + }, + "dan-eng": { + "num_samples": 1000, + "number_of_characters": 69988, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 35.117, + "max_sentence1_length": 268, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 34.871, + "max_sentence2_length": 279, + "unique_sentence2": 1000 + }, + "ell-eng": { + "num_samples": 1000, + "number_of_characters": 55982, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 29.225, + "max_sentence1_length": 98, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 26.757, + "max_sentence2_length": 116, + "unique_sentence2": 1000 + }, + "amh-eng": { + "num_samples": 168, + "number_of_characters": 6146, + "unique_pairs": 168, + "min_sentence1_length": 7, + "average_sentence1_length": 14.25, + "max_sentence1_length": 44, + "unique_sentence1": 168, + "min_sentence2_length": 10, + "average_sentence2_length": 22.333333333333332, + "max_sentence2_length": 63, + "unique_sentence2": 168 + }, + "pam-eng": { + "num_samples": 1000, + "number_of_characters": 59952, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 30.177, + "max_sentence1_length": 84, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 29.775, + "max_sentence2_length": 75, + "unique_sentence2": 1000 + }, + "hsb-eng": { + "num_samples": 483, + "number_of_characters": 27450, + "unique_pairs": 483, + "min_sentence1_length": 8, + "average_sentence1_length": 27.428571428571427, + "max_sentence1_length": 110, + "unique_sentence1": 483, + "min_sentence2_length": 10, + "average_sentence2_length": 29.403726708074533, + "max_sentence2_length": 144, + "unique_sentence2": 483 + }, + "srp-eng": { + "num_samples": 1000, + "number_of_characters": 64746, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 31.718, + "max_sentence1_length": 359, + "unique_sentence1": 1000, + "min_sentence2_length": 13, + "average_sentence2_length": 33.028, + "max_sentence2_length": 384, + "unique_sentence2": 1000 + }, + "epo-eng": { + "num_samples": 1000, + "number_of_characters": 83966, + "unique_pairs": 1000, + "min_sentence1_length": 10, + "average_sentence1_length": 42.135, + "max_sentence1_length": 433, + "unique_sentence1": 1000, + "min_sentence2_length": 13, + "average_sentence2_length": 41.831, + "max_sentence2_length": 445, + "unique_sentence2": 1000 + }, + "kzj-eng": { + "num_samples": 1000, + "number_of_characters": 73297, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 39.279, + "max_sentence1_length": 502, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 34.018, + "max_sentence2_length": 450, + "unique_sentence2": 1000 + }, + "awa-eng": { + "num_samples": 231, + "number_of_characters": 9405, + "unique_pairs": 231, + "min_sentence1_length": 7, + "average_sentence1_length": 19.1991341991342, + "max_sentence1_length": 50, + "unique_sentence1": 231, + "min_sentence2_length": 12, + "average_sentence2_length": 21.515151515151516, + "max_sentence2_length": 53, + "unique_sentence2": 231 + }, + "fao-eng": { + "num_samples": 262, + "number_of_characters": 15853, + "unique_pairs": 262, + "min_sentence1_length": 12, + "average_sentence1_length": 30.83587786259542, + "max_sentence1_length": 481, + "unique_sentence1": 262, + "min_sentence2_length": 11, + "average_sentence2_length": 29.671755725190838, + "max_sentence2_length": 252, + "unique_sentence2": 262 + }, + "mal-eng": { + "num_samples": 687, + "number_of_characters": 48623, + "unique_pairs": 687, + "min_sentence1_length": 12, + "average_sentence1_length": 39.09461426491994, + "max_sentence1_length": 165, + "unique_sentence1": 687, + "min_sentence2_length": 11, + "average_sentence2_length": 31.68122270742358, + "max_sentence2_length": 198, + "unique_sentence2": 687 + }, + "ile-eng": { + "num_samples": 1000, + "number_of_characters": 50569, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 25.263, + "max_sentence1_length": 81, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 25.306, + "max_sentence2_length": 84, + "unique_sentence2": 1000 + }, + "bos-eng": { + "num_samples": 354, + "number_of_characters": 17438, + "unique_pairs": 354, + "min_sentence1_length": 4, + "average_sentence1_length": 23.83050847457627, + "max_sentence1_length": 165, + "unique_sentence1": 354, + "min_sentence2_length": 10, + "average_sentence2_length": 25.429378531073446, + "max_sentence2_length": 171, + "unique_sentence2": 354 + }, + "cor-eng": { + "num_samples": 1000, + "number_of_characters": 44659, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 22.87, + "max_sentence1_length": 127, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 21.789, + "max_sentence2_length": 80, + "unique_sentence2": 1000 + }, + "cat-eng": { + "num_samples": 1000, + "number_of_characters": 73024, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 36.799, + "max_sentence1_length": 214, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 36.225, + "max_sentence2_length": 183, + "unique_sentence2": 1000 + }, + "eus-eng": { + "num_samples": 1000, + "number_of_characters": 66063, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 34.058, + "max_sentence1_length": 137, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 32.005, + "max_sentence2_length": 128, + "unique_sentence2": 1000 + }, + "yue-eng": { + "num_samples": 1000, + "number_of_characters": 51576, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 12.93, + "max_sentence1_length": 61, + "unique_sentence1": 1000, + "min_sentence2_length": 9, + "average_sentence2_length": 38.646, + "max_sentence2_length": 226, + "unique_sentence2": 1000 + }, + "swe-eng": { + "num_samples": 1000, + "number_of_characters": 60740, + "unique_pairs": 1000, + "min_sentence1_length": 10, + "average_sentence1_length": 30.712, + "max_sentence1_length": 204, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 30.028, + "max_sentence2_length": 205, + "unique_sentence2": 1000 + }, + "dtp-eng": { + "num_samples": 1000, + "number_of_characters": 65031, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 34.895, + "max_sentence1_length": 584, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 30.136, + "max_sentence2_length": 450, + "unique_sentence2": 1000 + }, + "kat-eng": { + "num_samples": 746, + "number_of_characters": 40638, + "unique_pairs": 746, + "min_sentence1_length": 6, + "average_sentence1_length": 26.071045576407506, + "max_sentence1_length": 199, + "unique_sentence1": 746, + "min_sentence2_length": 11, + "average_sentence2_length": 28.403485254691688, + "max_sentence2_length": 171, + "unique_sentence2": 746 + }, + "jpn-eng": { + "num_samples": 1000, + "number_of_characters": 57904, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 18.668, + "max_sentence1_length": 65, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 39.236, + "max_sentence2_length": 173, + "unique_sentence2": 1000 + }, + "csb-eng": { + "num_samples": 253, + "number_of_characters": 16058, + "unique_pairs": 253, + "min_sentence1_length": 11, + "average_sentence1_length": 31.897233201581027, + "max_sentence1_length": 93, + "unique_sentence1": 253, + "min_sentence2_length": 13, + "average_sentence2_length": 31.57312252964427, + "max_sentence2_length": 82, + "unique_sentence2": 253 + }, + "xho-eng": { + "num_samples": 142, + "number_of_characters": 7427, + "unique_pairs": 142, + "min_sentence1_length": 6, + "average_sentence1_length": 25.838028169014084, + "max_sentence1_length": 101, + "unique_sentence1": 142, + "min_sentence2_length": 12, + "average_sentence2_length": 26.464788732394368, + "max_sentence2_length": 89, + "unique_sentence2": 142 + }, + "orv-eng": { + "num_samples": 835, + "number_of_characters": 44790, + "unique_pairs": 835, + "min_sentence1_length": 6, + "average_sentence1_length": 24.861077844311378, + "max_sentence1_length": 346, + "unique_sentence1": 835, + "min_sentence2_length": 10, + "average_sentence2_length": 28.779640718562874, + "max_sentence2_length": 286, + "unique_sentence2": 835 + }, + "ind-eng": { + "num_samples": 1000, + "number_of_characters": 74844, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_length": 39.969, + "max_sentence1_length": 335, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 34.875, + "max_sentence2_length": 361, + "unique_sentence2": 1000 + }, + "tuk-eng": { + "num_samples": 203, + "number_of_characters": 8786, + "unique_pairs": 203, + "min_sentence1_length": 7, + "average_sentence1_length": 20.70935960591133, + "max_sentence1_length": 56, + "unique_sentence1": 203, + "min_sentence2_length": 10, + "average_sentence2_length": 22.571428571428573, + "max_sentence2_length": 67, + "unique_sentence2": 203 + }, + "max-eng": { + "num_samples": 284, + "number_of_characters": 17672, + "unique_pairs": 284, + "min_sentence1_length": 9, + "average_sentence1_length": 33.313380281690144, + "max_sentence1_length": 129, + "unique_sentence1": 284, + "min_sentence2_length": 12, + "average_sentence2_length": 28.911971830985916, + "max_sentence2_length": 102, + "unique_sentence2": 284 + }, + "swh-eng": { + "num_samples": 390, + "number_of_characters": 20466, + "unique_pairs": 390, + "min_sentence1_length": 6, + "average_sentence1_length": 24.73076923076923, + "max_sentence1_length": 284, + "unique_sentence1": 390, + "min_sentence2_length": 10, + "average_sentence2_length": 27.746153846153845, + "max_sentence2_length": 280, + "unique_sentence2": 390 + }, + "hin-eng": { + "num_samples": 1000, + "number_of_characters": 68646, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 34.559, + "max_sentence1_length": 166, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 34.087, + "max_sentence2_length": 171, + "unique_sentence2": 1000 + }, + "dsb-eng": { + "num_samples": 479, + "number_of_characters": 27354, + "unique_pairs": 479, + "min_sentence1_length": 8, + "average_sentence1_length": 27.958246346555324, + "max_sentence1_length": 84, + "unique_sentence1": 479, + "min_sentence2_length": 10, + "average_sentence2_length": 29.1482254697286, + "max_sentence2_length": 93, + "unique_sentence2": 479 + }, + "ber-eng": { + "num_samples": 1000, + "number_of_characters": 61535, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 31.421, + "max_sentence1_length": 145, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "average_sentence2_length": 30.114, + "max_sentence2_length": 121, + "unique_sentence2": 1000 + }, + "tam-eng": { + "num_samples": 307, + "number_of_characters": 20077, + "unique_pairs": 307, + "min_sentence1_length": 10, + "average_sentence1_length": 36.44625407166124, + "max_sentence1_length": 110, + "unique_sentence1": 307, + "min_sentence2_length": 11, + "average_sentence2_length": 28.95114006514658, + "max_sentence2_length": 132, + "unique_sentence2": 307 + }, + "slk-eng": { + "num_samples": 1000, + "number_of_characters": 58261, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 27.835, + "max_sentence1_length": 172, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 30.426, + "max_sentence2_length": 184, + "unique_sentence2": 1000 + }, + "tgl-eng": { + "num_samples": 1000, + "number_of_characters": 68135, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 36.503, + "max_sentence1_length": 138, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 31.632, + "max_sentence2_length": 98, + "unique_sentence2": 1000 + }, + "ast-eng": { + "num_samples": 127, + "number_of_characters": 9134, + "unique_pairs": 127, + "min_sentence1_length": 6, + "average_sentence1_length": 35.53543307086614, + "max_sentence1_length": 138, + "unique_sentence1": 127, + "min_sentence2_length": 12, + "average_sentence2_length": 36.38582677165354, + "max_sentence2_length": 144, + "unique_sentence2": 127 + }, + "mkd-eng": { + "num_samples": 1000, + "number_of_characters": 60377, + "unique_pairs": 1000, + "min_sentence1_length": 7, + "average_sentence1_length": 29.865, + "max_sentence1_length": 128, + "unique_sentence1": 1000, + "min_sentence2_length": 13, + "average_sentence2_length": 30.512, + "max_sentence2_length": 137, + "unique_sentence2": 1000 + }, + "khm-eng": { + "num_samples": 722, + "number_of_characters": 39209, + "unique_pairs": 722, + "min_sentence1_length": 5, + "average_sentence1_length": 26.627423822714682, + "max_sentence1_length": 111, + "unique_sentence1": 722, + "min_sentence2_length": 10, + "average_sentence2_length": 27.678670360110804, + "max_sentence2_length": 149, + "unique_sentence2": 722 + }, + "ces-eng": { + "num_samples": 1000, + "number_of_characters": 61206, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_length": 29.098, + "max_sentence1_length": 151, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 32.108, + "max_sentence2_length": 153, + "unique_sentence2": 1000 + }, + "tzl-eng": { + "num_samples": 104, + "number_of_characters": 4091, + "unique_pairs": 104, + "min_sentence1_length": 7, + "average_sentence1_length": 18.66346153846154, + "max_sentence1_length": 52, + "unique_sentence1": 104, + "min_sentence2_length": 11, + "average_sentence2_length": 20.673076923076923, + "max_sentence2_length": 48, + "unique_sentence2": 104 + }, + "urd-eng": { + "num_samples": 1000, + "number_of_characters": 63925, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "average_sentence1_length": 32.04, + "max_sentence1_length": 157, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 31.885, + "max_sentence2_length": 171, + "unique_sentence2": 1000 + }, + "ara-eng": { + "num_samples": 1000, + "number_of_characters": 53640, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_length": 24.576, + "max_sentence1_length": 144, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 29.064, + "max_sentence2_length": 150, + "unique_sentence2": 1000 + }, + "kor-eng": { + "num_samples": 1000, + "number_of_characters": 53674, + "unique_pairs": 1000, + "min_sentence1_length": 3, + "average_sentence1_length": 18.592, + "max_sentence1_length": 120, + "unique_sentence1": 1000, + "min_sentence2_length": 11, + "average_sentence2_length": 35.082, + "max_sentence2_length": 201, + "unique_sentence2": 1000 + }, + "yid-eng": { + "num_samples": 848, + "number_of_characters": 57110, + "unique_pairs": 848, + "min_sentence1_length": 9, + "average_sentence1_length": 34.83372641509434, + "max_sentence1_length": 196, + "unique_sentence1": 848, + "min_sentence2_length": 11, + "average_sentence2_length": 32.512971698113205, + "max_sentence2_length": 194, + "unique_sentence2": 848 + }, + "fin-eng": { + "num_samples": 1000, + "number_of_characters": 73694, + "unique_pairs": 1000, + "min_sentence1_length": 9, + "average_sentence1_length": 37.884, + "max_sentence1_length": 377, + "unique_sentence1": 1000, + "min_sentence2_length": 9, + "average_sentence2_length": 35.81, + "max_sentence2_length": 302, + "unique_sentence2": 1000 + }, + "tha-eng": { + "num_samples": 548, + "number_of_characters": 33871, + "unique_pairs": 548, + "min_sentence1_length": 6, + "average_sentence1_length": 28.173357664233578, + "max_sentence1_length": 194, + "unique_sentence1": 548, + "min_sentence2_length": 11, + "average_sentence2_length": 33.63503649635037, + "max_sentence2_length": 248, + "unique_sentence2": 548 + }, + "wuu-eng": { + "num_samples": 1000, + "number_of_characters": 51238, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 12.857, + "max_sentence1_length": 67, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 38.381, + "max_sentence2_length": 175, + "unique_sentence2": 1000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AJGT.json b/mteb/descriptive_stats/Classification/AJGT.json new file mode 100644 index 0000000000..52137af53b --- /dev/null +++ b/mteb/descriptive_stats/Classification/AJGT.json @@ -0,0 +1,20 @@ +{ + "train": { + "num_samples": 1800, + "number_of_characters": 84249, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 46.805, + "max_text_length": 864, + "unique_text": 1800, + "unique_labels": 2, + "labels": { + "1": { + "count": 900 + }, + "0": { + "count": 900 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AfriSentiClassification.json b/mteb/descriptive_stats/Classification/AfriSentiClassification.json new file mode 100644 index 0000000000..2a458bd3d0 --- /dev/null +++ b/mteb/descriptive_stats/Classification/AfriSentiClassification.json @@ -0,0 +1,543 @@ +{ + "test": { + "num_samples": 18222, + "number_of_characters": 1378570, + "number_texts_intersect_with_train": 595, + "min_text_length": 6, + "average_text_length": 75.65415431895511, + "max_text_length": 414, + "unique_text": 18222, + "unique_labels": 3, + "labels": { + "0": { + "count": 9206 + }, + "2": { + "count": 3876 + }, + "1": { + "count": 5140 + } + }, + "hf_subset_descriptive_stats": { + "amh": { + "num_samples": 1999, + "number_of_characters": 161648, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 80.86443221610806, + "max_text_length": 142, + "unique_text": 1999, + "unique_labels": 3, + "labels": { + "0": { + "count": 438 + }, + "2": { + "count": 1337 + }, + "1": { + "count": 224 + } + } + }, + "arq": { + "num_samples": 958, + "number_of_characters": 63912, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 66.71398747390397, + "max_text_length": 150, + "unique_text": 958, + "unique_labels": 3, + "labels": { + "0": { + "count": 329 + }, + "1": { + "count": 154 + }, + "2": { + "count": 475 + } + } + }, + "ary": { + "num_samples": 2048, + "number_of_characters": 154042, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 75.2158203125, + "max_text_length": 319, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "0": { + "count": 798 + }, + "2": { + "count": 590 + }, + "1": { + "count": 660 + } + } + }, + "hau": { + "num_samples": 2048, + "number_of_characters": 137138, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 66.9619140625, + "max_text_length": 273, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1755 + }, + "1": { + "count": 293 + } + } + }, + "ibo": { + "num_samples": 2048, + "number_of_characters": 103924, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 50.744140625, + "max_text_length": 269, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1118 + }, + "1": { + "count": 930 + } + } + }, + "kin": { + "num_samples": 1026, + "number_of_characters": 113162, + "number_texts_intersect_with_train": 29, + "min_text_length": 21, + "average_text_length": 110.2943469785575, + "max_text_length": 414, + "unique_text": 1026, + "unique_labels": 3, + "labels": { + "1": { + "count": 393 + }, + "2": { + "count": 355 + }, + "0": { + "count": 278 + } + } + }, + "por": { + "num_samples": 2048, + "number_of_characters": 161831, + "number_texts_intersect_with_train": 0, + "min_text_length": 17, + "average_text_length": 79.01904296875, + "max_text_length": 297, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "2": { + "count": 376 + }, + "1": { + "count": 1292 + }, + "0": { + "count": 380 + } + } + }, + "pcm": { + "num_samples": 2048, + "number_of_characters": 170833, + "number_texts_intersect_with_train": 371, + "min_text_length": 8, + "average_text_length": 83.41455078125, + "max_text_length": 276, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "0": { + "count": 1397 + }, + "1": { + "count": 431 + }, + "2": { + "count": 220 + } + } + }, + "swa": { + "num_samples": 748, + "number_of_characters": 83744, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 111.9572192513369, + "max_text_length": 274, + "unique_text": 748, + "unique_labels": 3, + "labels": { + "0": { + "count": 224 + }, + "1": { + "count": 444 + }, + "2": { + "count": 80 + } + } + }, + "twi": { + "num_samples": 949, + "number_of_characters": 41424, + "number_texts_intersect_with_train": 195, + "min_text_length": 11, + "average_text_length": 43.65015806111697, + "max_text_length": 271, + "unique_text": 949, + "unique_labels": 3, + "labels": { + "0": { + "count": 450 + }, + "2": { + "count": 353 + }, + "1": { + "count": 146 + } + } + }, + "tso": { + "num_samples": 254, + "number_of_characters": 13844, + "number_texts_intersect_with_train": 0, + "min_text_length": 7, + "average_text_length": 54.503937007874015, + "max_text_length": 229, + "unique_text": 254, + "unique_labels": 3, + "labels": { + "2": { + "count": 90 + }, + "0": { + "count": 121 + }, + "1": { + "count": 43 + } + } + }, + "yor": { + "num_samples": 2048, + "number_of_characters": 173068, + "number_texts_intersect_with_train": 0, + "min_text_length": 6, + "average_text_length": 84.505859375, + "max_text_length": 266, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1918 + }, + "1": { + "count": 130 + } + } + } + } + }, + "train": { + "num_samples": 63685, + "number_of_characters": 5446582, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 85.52378111015153, + "max_text_length": 771, + "unique_text": 62635, + "unique_labels": 3, + "labels": { + "2": { + "count": 20108 + }, + "1": { + "count": 22794 + }, + "0": { + "count": 20783 + } + }, + "hf_subset_descriptive_stats": { + "amh": { + "num_samples": 5984, + "number_of_characters": 427540, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 71.44719251336899, + "max_text_length": 146, + "unique_text": 5983, + "unique_labels": 3, + "labels": { + "2": { + "count": 1548 + }, + "1": { + "count": 3104 + }, + "0": { + "count": 1332 + } + } + }, + "arq": { + "num_samples": 1651, + "number_of_characters": 105426, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 63.855844942459115, + "max_text_length": 141, + "unique_text": 1641, + "unique_labels": 3, + "labels": { + "2": { + "count": 892 + }, + "1": { + "count": 342 + }, + "0": { + "count": 417 + } + } + }, + "ary": { + "num_samples": 5583, + "number_of_characters": 420832, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 75.37739566541286, + "max_text_length": 771, + "unique_text": 5563, + "unique_labels": 3, + "labels": { + "1": { + "count": 2161 + }, + "0": { + "count": 1758 + }, + "2": { + "count": 1664 + } + } + }, + "hau": { + "num_samples": 14172, + "number_of_characters": 1106209, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 78.055955405024, + "max_text_length": 337, + "unique_text": 14172, + "unique_labels": 3, + "labels": { + "2": { + "count": 4573 + }, + "1": { + "count": 4912 + }, + "0": { + "count": 4687 + } + } + }, + "ibo": { + "num_samples": 10192, + "number_of_characters": 709705, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 69.6335361067504, + "max_text_length": 354, + "unique_text": 10192, + "unique_labels": 3, + "labels": { + "2": { + "count": 2600 + }, + "1": { + "count": 4508 + }, + "0": { + "count": 3084 + } + } + }, + "kin": { + "num_samples": 3302, + "number_of_characters": 361836, + "number_texts_intersect_with_train": null, + "min_text_length": 23, + "average_text_length": 109.5808600847971, + "max_text_length": 458, + "unique_text": 3259, + "unique_labels": 3, + "labels": { + "2": { + "count": 1146 + }, + "1": { + "count": 1257 + }, + "0": { + "count": 899 + } + } + }, + "por": { + "num_samples": 3063, + "number_of_characters": 319410, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 104.28011753183154, + "max_text_length": 307, + "unique_text": 2983, + "unique_labels": 3, + "labels": { + "2": { + "count": 782 + }, + "1": { + "count": 1600 + }, + "0": { + "count": 681 + } + } + }, + "pcm": { + "num_samples": 5121, + "number_of_characters": 594073, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 116.00722515133764, + "max_text_length": 279, + "unique_text": 4609, + "unique_labels": 3, + "labels": { + "2": { + "count": 3241 + }, + "1": { + "count": 72 + }, + "0": { + "count": 1808 + } + } + }, + "swa": { + "num_samples": 1810, + "number_of_characters": 195694, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 108.11823204419889, + "max_text_length": 277, + "unique_text": 1802, + "unique_labels": 3, + "labels": { + "2": { + "count": 191 + }, + "1": { + "count": 1072 + }, + "0": { + "count": 547 + } + } + }, + "twi": { + "num_samples": 3481, + "number_of_characters": 157623, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 45.28095374892273, + "max_text_length": 273, + "unique_text": 3105, + "unique_labels": 3, + "labels": { + "2": { + "count": 1315 + }, + "1": { + "count": 522 + }, + "0": { + "count": 1644 + } + } + }, + "tso": { + "num_samples": 804, + "number_of_characters": 45865, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 57.04601990049751, + "max_text_length": 291, + "unique_text": 804, + "unique_labels": 3, + "labels": { + "2": { + "count": 284 + }, + "1": { + "count": 136 + }, + "0": { + "count": 384 + } + } + }, + "yor": { + "num_samples": 8522, + "number_of_characters": 1002369, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 117.62133302041774, + "max_text_length": 354, + "unique_text": 8522, + "unique_labels": 3, + "labels": { + "2": { + "count": 1872 + }, + "1": { + "count": 3108 + }, + "0": { + "count": 3542 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AfriSentiLangClassification.json b/mteb/descriptive_stats/Classification/AfriSentiLangClassification.json new file mode 100644 index 0000000000..d84ec3be60 --- /dev/null +++ b/mteb/descriptive_stats/Classification/AfriSentiLangClassification.json @@ -0,0 +1,98 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 162858, + "number_texts_intersect_with_train": 80, + "min_text_length": 6, + "average_text_length": 79.5205078125, + "max_text_length": 283, + "unique_text": 2048, + "unique_labels": 12, + "labels": { + "10": { + "count": 178 + }, + "7": { + "count": 178 + }, + "8": { + "count": 178 + }, + "2": { + "count": 178 + }, + "0": { + "count": 178 + }, + "5": { + "count": 178 + }, + "9": { + "count": 90 + }, + "4": { + "count": 178 + }, + "3": { + "count": 178 + }, + "6": { + "count": 178 + }, + "1": { + "count": 178 + }, + "11": { + "count": 178 + } + } + }, + "train": { + "num_samples": 97128, + "number_of_characters": 8081377, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 83.20337080965324, + "max_text_length": 771, + "unique_text": 94411, + "unique_labels": 12, + "labels": { + "3": { + "count": 21152 + }, + "2": { + "count": 8532 + }, + "11": { + "count": 14127 + }, + "0": { + "count": 8480 + }, + "4": { + "count": 14715 + }, + "6": { + "count": 9556 + }, + "7": { + "count": 6492 + }, + "10": { + "count": 4206 + }, + "5": { + "count": 4155 + }, + "1": { + "count": 2437 + }, + "8": { + "count": 2464 + }, + "9": { + "count": 812 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AllegroReviews.json b/mteb/descriptive_stats/Classification/AllegroReviews.json new file mode 100644 index 0000000000..7c1c7d07bf --- /dev/null +++ b/mteb/descriptive_stats/Classification/AllegroReviews.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 1006, + "number_of_characters": 480112, + "number_texts_intersect_with_train": 16, + "min_text_length": 51, + "average_text_length": 477.2485089463221, + "max_text_length": 2096, + "unique_text": 1004, + "unique_labels": 5, + "labels": { + "4.0": { + "count": 162 + }, + "2.0": { + "count": 125 + }, + "5.0": { + "count": 372 + }, + "1.0": { + "count": 203 + }, + "3.0": { + "count": 144 + } + } + }, + "train": { + "num_samples": 9577, + "number_of_characters": 4576784, + "number_texts_intersect_with_train": null, + "min_text_length": 50, + "average_text_length": 477.89328599770283, + "max_text_length": 5000, + "unique_text": 9099, + "unique_labels": 5, + "labels": { + "3.0": { + "count": 1208 + }, + "1.0": { + "count": 1733 + }, + "4.0": { + "count": 1644 + }, + "2.0": { + "count": 1069 + }, + "5.0": { + "count": 3923 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json b/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json new file mode 100644 index 0000000000..176d0d00cf --- /dev/null +++ b/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json @@ -0,0 +1,278 @@ +{ + "validation": { + "num_samples": 1933, + "number_of_characters": 183142, + "number_texts_intersect_with_train": 552, + "min_text_length": 9, + "average_text_length": 94.74495602690119, + "max_text_length": 525, + "unique_text": 1903, + "unique_labels": 2, + "labels": { + "0": { + "count": 1437 + }, + "1": { + "count": 496 + } + }, + "hf_subset_descriptive_stats": { + "en-ext": { + "num_samples": 666, + "number_of_characters": 68028, + "number_texts_intersect_with_train": 0, + "min_text_length": 31, + "average_text_length": 102.14414414414415, + "max_text_length": 370, + "unique_text": 666, + "unique_labels": 2, + "labels": { + "0": { + "count": 599 + }, + "1": { + "count": 67 + } + } + }, + "en": { + "num_samples": 335, + "number_of_characters": 36583, + "number_texts_intersect_with_train": 0, + "min_text_length": 36, + "average_text_length": 109.20298507462687, + "max_text_length": 470, + "unique_text": 335, + "unique_labels": 2, + "labels": { + "0": { + "count": 277 + }, + "1": { + "count": 58 + } + } + }, + "de": { + "num_samples": 466, + "number_of_characters": 58251, + "number_texts_intersect_with_train": 3, + "min_text_length": 22, + "average_text_length": 125.00214592274678, + "max_text_length": 525, + "unique_text": 466, + "unique_labels": 2, + "labels": { + "0": { + "count": 141 + }, + "1": { + "count": 325 + } + } + }, + "ja": { + "num_samples": 466, + "number_of_characters": 20280, + "number_texts_intersect_with_train": 13, + "min_text_length": 9, + "average_text_length": 43.51931330472103, + "max_text_length": 191, + "unique_text": 464, + "unique_labels": 2, + "labels": { + "0": { + "count": 420 + }, + "1": { + "count": 46 + } + } + } + } + }, + "test": { + "num_samples": 3872, + "number_of_characters": 361556, + "number_texts_intersect_with_train": 1128, + "min_text_length": 6, + "average_text_length": 93.37706611570248, + "max_text_length": 568, + "unique_text": 3779, + "unique_labels": 2, + "labels": { + "1": { + "count": 1016 + }, + "0": { + "count": 2856 + } + }, + "hf_subset_descriptive_stats": { + "en-ext": { + "num_samples": 1334, + "number_of_characters": 135364, + "number_texts_intersect_with_train": 1, + "min_text_length": 6, + "average_text_length": 101.47226386806597, + "max_text_length": 420, + "unique_text": 1333, + "unique_labels": 2, + "labels": { + "1": { + "count": 139 + }, + "0": { + "count": 1195 + } + } + }, + "en": { + "num_samples": 670, + "number_of_characters": 71118, + "number_texts_intersect_with_train": 0, + "min_text_length": 32, + "average_text_length": 106.14626865671642, + "max_text_length": 541, + "unique_text": 670, + "unique_labels": 2, + "labels": { + "0": { + "count": 539 + }, + "1": { + "count": 131 + } + } + }, + "de": { + "num_samples": 934, + "number_of_characters": 115432, + "number_texts_intersect_with_train": 3, + "min_text_length": 23, + "average_text_length": 123.58886509635974, + "max_text_length": 568, + "unique_text": 933, + "unique_labels": 2, + "labels": { + "0": { + "count": 284 + }, + "1": { + "count": 650 + } + } + }, + "ja": { + "num_samples": 934, + "number_of_characters": 39642, + "number_texts_intersect_with_train": 27, + "min_text_length": 6, + "average_text_length": 42.44325481798715, + "max_text_length": 165, + "unique_text": 934, + "unique_labels": 2, + "labels": { + "0": { + "count": 838 + }, + "1": { + "count": 96 + } + } + } + } + }, + "train": { + "num_samples": 23218, + "number_of_characters": 2161346, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 93.08924110603841, + "max_text_length": 572, + "unique_text": 19945, + "unique_labels": 2, + "labels": { + "0": { + "count": 17239 + }, + "1": { + "count": 5979 + } + }, + "hf_subset_descriptive_stats": { + "en-ext": { + "num_samples": 8000, + "number_of_characters": 816814, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 102.10175, + "max_text_length": 541, + "unique_text": 7998, + "unique_labels": 2, + "labels": { + "0": { + "count": 7176 + }, + "1": { + "count": 824 + } + } + }, + "en": { + "num_samples": 4018, + "number_of_characters": 431133, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 107.30039820806371, + "max_text_length": 514, + "unique_text": 4018, + "unique_labels": 2, + "labels": { + "1": { + "count": 765 + }, + "0": { + "count": 3253 + } + } + }, + "de": { + "num_samples": 5600, + "number_of_characters": 674491, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 120.44482142857143, + "max_text_length": 572, + "unique_text": 5587, + "unique_labels": 2, + "labels": { + "1": { + "count": 3865 + }, + "0": { + "count": 1735 + } + } + }, + "ja": { + "num_samples": 5600, + "number_of_characters": 238908, + "number_texts_intersect_with_train": null, + "min_text_length": 8, + "average_text_length": 42.662142857142854, + "max_text_length": 190, + "unique_text": 5530, + "unique_labels": 2, + "labels": { + "0": { + "count": 5075 + }, + "1": { + "count": 525 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AmazonPolarityClassification.json b/mteb/descriptive_stats/Classification/AmazonPolarityClassification.json new file mode 100644 index 0000000000..4aa26d00bd --- /dev/null +++ b/mteb/descriptive_stats/Classification/AmazonPolarityClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 400000, + "number_of_characters": 172571852, + "number_texts_intersect_with_train": 0, + "min_text_length": 99, + "average_text_length": 431.42963, + "max_text_length": 1015, + "unique_text": 400000, + "unique_labels": 2, + "labels": { + "1": { + "count": 200000 + }, + "0": { + "count": 200000 + } + } + }, + "train": { + "num_samples": 3600000, + "number_of_characters": 1553926733, + "number_texts_intersect_with_train": null, + "min_text_length": 72, + "average_text_length": 431.6463147222222, + "max_text_length": 1015, + "unique_text": 3600000, + "unique_labels": 2, + "labels": { + "1": { + "count": 1800000 + }, + "0": { + "count": 1800000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AmazonReviewsClassification.json b/mteb/descriptive_stats/Classification/AmazonReviewsClassification.json new file mode 100644 index 0000000000..ab3c5718b1 --- /dev/null +++ b/mteb/descriptive_stats/Classification/AmazonReviewsClassification.json @@ -0,0 +1,575 @@ +{ + "validation": { + "num_samples": 30000, + "number_of_characters": 4776902, + "number_texts_intersect_with_train": 155, + "min_text_length": 20, + "average_text_length": 159.23006666666666, + "max_text_length": 3722, + "unique_text": 29997, + "unique_labels": 5, + "labels": { + "0": { + "count": 6000 + }, + "1": { + "count": 6000 + }, + "2": { + "count": 6000 + }, + "3": { + "count": 6000 + }, + "4": { + "count": 6000 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 5000, + "number_of_characters": 1034433, + "number_texts_intersect_with_train": 5, + "min_text_length": 24, + "average_text_length": 206.8866, + "max_text_length": 3722, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "de": { + "num_samples": 5000, + "number_of_characters": 1152232, + "number_texts_intersect_with_train": 8, + "min_text_length": 24, + "average_text_length": 230.4464, + "max_text_length": 1596, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "es": { + "num_samples": 5000, + "number_of_characters": 857010, + "number_texts_intersect_with_train": 10, + "min_text_length": 24, + "average_text_length": 171.402, + "max_text_length": 1618, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "fr": { + "num_samples": 5000, + "number_of_characters": 878607, + "number_texts_intersect_with_train": 13, + "min_text_length": 23, + "average_text_length": 175.7214, + "max_text_length": 1626, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "ja": { + "num_samples": 5000, + "number_of_characters": 555716, + "number_texts_intersect_with_train": 9, + "min_text_length": 20, + "average_text_length": 111.1432, + "max_text_length": 1233, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "zh": { + "num_samples": 5000, + "number_of_characters": 298904, + "number_texts_intersect_with_train": 110, + "min_text_length": 21, + "average_text_length": 59.7808, + "max_text_length": 1388, + "unique_text": 4997, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + } + } + }, + "test": { + "num_samples": 30000, + "number_of_characters": 4810650, + "number_texts_intersect_with_train": 134, + "min_text_length": 19, + "average_text_length": 160.355, + "max_text_length": 3814, + "unique_text": 30000, + "unique_labels": 5, + "labels": { + "0": { + "count": 6000 + }, + "1": { + "count": 6000 + }, + "2": { + "count": 6000 + }, + "3": { + "count": 6000 + }, + "4": { + "count": 6000 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 5000, + "number_of_characters": 1020794, + "number_texts_intersect_with_train": 2, + "min_text_length": 24, + "average_text_length": 204.1588, + "max_text_length": 2397, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "de": { + "num_samples": 5000, + "number_of_characters": 1157422, + "number_texts_intersect_with_train": 7, + "min_text_length": 25, + "average_text_length": 231.4844, + "max_text_length": 3814, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "es": { + "num_samples": 5000, + "number_of_characters": 865393, + "number_texts_intersect_with_train": 12, + "min_text_length": 24, + "average_text_length": 173.0786, + "max_text_length": 1818, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "fr": { + "num_samples": 5000, + "number_of_characters": 902601, + "number_texts_intersect_with_train": 14, + "min_text_length": 22, + "average_text_length": 180.5202, + "max_text_length": 3800, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "ja": { + "num_samples": 5000, + "number_of_characters": 560191, + "number_texts_intersect_with_train": 5, + "min_text_length": 22, + "average_text_length": 112.0382, + "max_text_length": 1054, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + }, + "zh": { + "num_samples": 5000, + "number_of_characters": 304249, + "number_texts_intersect_with_train": 94, + "min_text_length": 19, + "average_text_length": 60.8498, + "max_text_length": 837, + "unique_text": 5000, + "unique_labels": 5, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "2": { + "count": 1000 + }, + "3": { + "count": 1000 + }, + "4": { + "count": 1000 + } + } + } + } + }, + "train": { + "num_samples": 1200000, + "number_of_characters": 192628074, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 160.523395, + "max_text_length": 3931, + "unique_text": 1196369, + "unique_labels": 5, + "labels": { + "0": { + "count": 240000 + }, + "1": { + "count": 240000 + }, + "2": { + "count": 240000 + }, + "3": { + "count": 240000 + }, + "4": { + "count": 240000 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 200000, + "number_of_characters": 41010127, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 205.050635, + "max_text_length": 3931, + "unique_text": 199891, + "unique_labels": 5, + "labels": { + "0": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "2": { + "count": 40000 + }, + "3": { + "count": 40000 + }, + "4": { + "count": 40000 + } + } + }, + "de": { + "num_samples": 200000, + "number_of_characters": 46342647, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 231.713235, + "max_text_length": 3874, + "unique_text": 199877, + "unique_labels": 5, + "labels": { + "0": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "2": { + "count": 40000 + }, + "3": { + "count": 40000 + }, + "4": { + "count": 40000 + } + } + }, + "es": { + "num_samples": 200000, + "number_of_characters": 34494548, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 172.47274, + "max_text_length": 3162, + "unique_text": 199726, + "unique_labels": 5, + "labels": { + "0": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "2": { + "count": 40000 + }, + "3": { + "count": 40000 + }, + "4": { + "count": 40000 + } + } + }, + "fr": { + "num_samples": 200000, + "number_of_characters": 36091159, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 180.455795, + "max_text_length": 3917, + "unique_text": 199612, + "unique_labels": 5, + "labels": { + "0": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "2": { + "count": 40000 + }, + "3": { + "count": 40000 + }, + "4": { + "count": 40000 + } + } + }, + "ja": { + "num_samples": 200000, + "number_of_characters": 22575204, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 112.87602, + "max_text_length": 1466, + "unique_text": 199845, + "unique_labels": 5, + "labels": { + "0": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "2": { + "count": 40000 + }, + "3": { + "count": 40000 + }, + "4": { + "count": 40000 + } + } + }, + "zh": { + "num_samples": 200000, + "number_of_characters": 12114389, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 60.571945, + "max_text_length": 2423, + "unique_text": 197418, + "unique_labels": 5, + "labels": { + "0": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "2": { + "count": 40000 + }, + "3": { + "count": 40000 + }, + "4": { + "count": 40000 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/AngryTweetsClassification.json b/mteb/descriptive_stats/Classification/AngryTweetsClassification.json new file mode 100644 index 0000000000..4910de774e --- /dev/null +++ b/mteb/descriptive_stats/Classification/AngryTweetsClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 1047, + "number_of_characters": 163484, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 156.14517669531998, + "max_text_length": 327, + "unique_text": 1044, + "unique_labels": 3, + "labels": { + "neutral": { + "count": 363 + }, + "positiv": { + "count": 282 + }, + "negativ": { + "count": 402 + } + } + }, + "train": { + "num_samples": 2411, + "number_of_characters": 368784, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 152.95893819991704, + "max_text_length": 338, + "unique_text": 2410, + "unique_labels": 3, + "labels": { + "positiv": { + "count": 648 + }, + "neutral": { + "count": 852 + }, + "negativ": { + "count": 911 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ArxivClassification.json b/mteb/descriptive_stats/Classification/ArxivClassification.json new file mode 100644 index 0000000000..f991b00053 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ArxivClassification.json @@ -0,0 +1,92 @@ +{ + "test": { + "num_samples": 2500, + "number_of_characters": 137209409, + "number_texts_intersect_with_train": 159, + "min_text_length": 3895, + "average_text_length": 54883.7636, + "max_text_length": 559979, + "unique_text": 2495, + "unique_labels": 11, + "labels": { + "4": { + "count": 234 + }, + "1": { + "count": 194 + }, + "7": { + "count": 236 + }, + "3": { + "count": 233 + }, + "9": { + "count": 219 + }, + "5": { + "count": 196 + }, + "2": { + "count": 205 + }, + "10": { + "count": 212 + }, + "8": { + "count": 318 + }, + "0": { + "count": 212 + }, + "6": { + "count": 241 + } + } + }, + "train": { + "num_samples": 28388, + "number_of_characters": 1602729054, + "number_texts_intersect_with_train": null, + "min_text_length": 2852, + "average_text_length": 56457.97710300127, + "max_text_length": 2553775, + "unique_text": 27321, + "unique_labels": 11, + "labels": { + "8": { + "count": 3527 + }, + "9": { + "count": 2560 + }, + "3": { + "count": 2631 + }, + "5": { + "count": 2117 + }, + "1": { + "count": 2137 + }, + "6": { + "count": 2443 + }, + "0": { + "count": 2456 + }, + "10": { + "count": 2581 + }, + "7": { + "count": 2768 + }, + "2": { + "count": 2569 + }, + "4": { + "count": 2599 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Banking77Classification.json b/mteb/descriptive_stats/Classification/Banking77Classification.json new file mode 100644 index 0000000000..59a8c466b6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/Banking77Classification.json @@ -0,0 +1,488 @@ +{ + "test": { + "num_samples": 3080, + "number_of_characters": 167036, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 54.23246753246753, + "max_text_length": 368, + "unique_text": 3080, + "unique_labels": 77, + "labels": { + "11": { + "count": 40 + }, + "13": { + "count": 40 + }, + "32": { + "count": 40 + }, + "17": { + "count": 40 + }, + "34": { + "count": 40 + }, + "46": { + "count": 40 + }, + "36": { + "count": 40 + }, + "12": { + "count": 40 + }, + "4": { + "count": 40 + }, + "14": { + "count": 40 + }, + "33": { + "count": 40 + }, + "41": { + "count": 40 + }, + "1": { + "count": 40 + }, + "49": { + "count": 40 + }, + "23": { + "count": 40 + }, + "56": { + "count": 40 + }, + "47": { + "count": 40 + }, + "8": { + "count": 40 + }, + "60": { + "count": 40 + }, + "75": { + "count": 40 + }, + "15": { + "count": 40 + }, + "66": { + "count": 40 + }, + "54": { + "count": 40 + }, + "40": { + "count": 40 + }, + "10": { + "count": 40 + }, + "61": { + "count": 40 + }, + "6": { + "count": 40 + }, + "16": { + "count": 40 + }, + "30": { + "count": 40 + }, + "74": { + "count": 40 + }, + "68": { + "count": 40 + }, + "38": { + "count": 40 + }, + "73": { + "count": 40 + }, + "62": { + "count": 40 + }, + "29": { + "count": 40 + }, + "22": { + "count": 40 + }, + "3": { + "count": 40 + }, + "28": { + "count": 40 + }, + "44": { + "count": 40 + }, + "26": { + "count": 40 + }, + "45": { + "count": 40 + }, + "42": { + "count": 40 + }, + "52": { + "count": 40 + }, + "27": { + "count": 40 + }, + "51": { + "count": 40 + }, + "25": { + "count": 40 + }, + "48": { + "count": 40 + }, + "55": { + "count": 40 + }, + "18": { + "count": 40 + }, + "63": { + "count": 40 + }, + "70": { + "count": 40 + }, + "67": { + "count": 40 + }, + "53": { + "count": 40 + }, + "21": { + "count": 40 + }, + "7": { + "count": 40 + }, + "64": { + "count": 40 + }, + "50": { + "count": 40 + }, + "35": { + "count": 40 + }, + "65": { + "count": 40 + }, + "71": { + "count": 40 + }, + "39": { + "count": 40 + }, + "58": { + "count": 40 + }, + "43": { + "count": 40 + }, + "72": { + "count": 40 + }, + "76": { + "count": 40 + }, + "37": { + "count": 40 + }, + "59": { + "count": 40 + }, + "5": { + "count": 40 + }, + "20": { + "count": 40 + }, + "31": { + "count": 40 + }, + "57": { + "count": 40 + }, + "0": { + "count": 40 + }, + "19": { + "count": 40 + }, + "9": { + "count": 40 + }, + "2": { + "count": 40 + }, + "69": { + "count": 40 + }, + "24": { + "count": 40 + } + } + }, + "train": { + "num_samples": 10003, + "number_of_characters": 594916, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 59.47375787263821, + "max_text_length": 433, + "unique_text": 10003, + "unique_labels": 77, + "labels": { + "11": { + "count": 153 + }, + "13": { + "count": 139 + }, + "32": { + "count": 112 + }, + "17": { + "count": 167 + }, + "34": { + "count": 166 + }, + "46": { + "count": 143 + }, + "36": { + "count": 126 + }, + "12": { + "count": 112 + }, + "4": { + "count": 127 + }, + "14": { + "count": 112 + }, + "33": { + "count": 118 + }, + "41": { + "count": 82 + }, + "1": { + "count": 110 + }, + "49": { + "count": 115 + }, + "23": { + "count": 35 + }, + "56": { + "count": 111 + }, + "47": { + "count": 149 + }, + "8": { + "count": 157 + }, + "60": { + "count": 97 + }, + "75": { + "count": 180 + }, + "15": { + "count": 187 + }, + "66": { + "count": 171 + }, + "54": { + "count": 129 + }, + "40": { + "count": 98 + }, + "10": { + "count": 59 + }, + "61": { + "count": 146 + }, + "6": { + "count": 181 + }, + "16": { + "count": 168 + }, + "30": { + "count": 121 + }, + "74": { + "count": 121 + }, + "68": { + "count": 102 + }, + "38": { + "count": 106 + }, + "73": { + "count": 135 + }, + "62": { + "count": 103 + }, + "29": { + "count": 121 + }, + "22": { + "count": 86 + }, + "3": { + "count": 87 + }, + "28": { + "count": 182 + }, + "44": { + "count": 105 + }, + "26": { + "count": 173 + }, + "45": { + "count": 159 + }, + "42": { + "count": 121 + }, + "52": { + "count": 169 + }, + "27": { + "count": 133 + }, + "51": { + "count": 162 + }, + "25": { + "count": 153 + }, + "48": { + "count": 148 + }, + "55": { + "count": 108 + }, + "18": { + "count": 61 + }, + "63": { + "count": 175 + }, + "70": { + "count": 113 + }, + "67": { + "count": 128 + }, + "53": { + "count": 161 + }, + "21": { + "count": 122 + }, + "7": { + "count": 156 + }, + "64": { + "count": 172 + }, + "50": { + "count": 95 + }, + "35": { + "count": 137 + }, + "65": { + "count": 113 + }, + "71": { + "count": 126 + }, + "39": { + "count": 129 + }, + "58": { + "count": 114 + }, + "43": { + "count": 120 + }, + "72": { + "count": 41 + }, + "76": { + "count": 163 + }, + "37": { + "count": 97 + }, + "59": { + "count": 145 + }, + "5": { + "count": 171 + }, + "20": { + "count": 160 + }, + "31": { + "count": 121 + }, + "57": { + "count": 114 + }, + "0": { + "count": 159 + }, + "19": { + "count": 177 + }, + "9": { + "count": 129 + }, + "2": { + "count": 126 + }, + "69": { + "count": 104 + }, + "24": { + "count": 129 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/BengaliDocumentClassification.json b/mteb/descriptive_stats/Classification/BengaliDocumentClassification.json new file mode 100644 index 0000000000..fc91785ffc --- /dev/null +++ b/mteb/descriptive_stats/Classification/BengaliDocumentClassification.json @@ -0,0 +1,104 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 3404227, + "number_texts_intersect_with_train": 4, + "min_text_length": 161, + "average_text_length": 1662.22021484375, + "max_text_length": 14619, + "unique_text": 2048, + "unique_labels": 13, + "labels": { + "6": { + "count": 1091 + }, + "3": { + "count": 284 + }, + "9": { + "count": 139 + }, + "12": { + "count": 164 + }, + "1": { + "count": 37 + }, + "0": { + "count": 60 + }, + "8": { + "count": 33 + }, + "4": { + "count": 59 + }, + "2": { + "count": 149 + }, + "10": { + "count": 3 + }, + "11": { + "count": 10 + }, + "5": { + "count": 15 + }, + "7": { + "count": 4 + } + } + }, + "train": { + "num_samples": 220574, + "number_of_characters": 364935023, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 1654.4788733032906, + "max_text_length": 58089, + "unique_text": 220301, + "unique_labels": 13, + "labels": { + "6": { + "count": 117566 + }, + "9": { + "count": 15018 + }, + "2": { + "count": 16104 + }, + "1": { + "count": 4008 + }, + "4": { + "count": 6356 + }, + "8": { + "count": 3501 + }, + "0": { + "count": 6447 + }, + "3": { + "count": 30558 + }, + "12": { + "count": 17635 + }, + "10": { + "count": 269 + }, + "5": { + "count": 1599 + }, + "11": { + "count": 1046 + }, + "7": { + "count": 467 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/BengaliHateSpeechClassification.json b/mteb/descriptive_stats/Classification/BengaliHateSpeechClassification.json new file mode 100644 index 0000000000..003dbf5e04 --- /dev/null +++ b/mteb/descriptive_stats/Classification/BengaliHateSpeechClassification.json @@ -0,0 +1,29 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 209493, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 102.29150390625, + "max_text_length": 502, + "unique_text": 1957, + "unique_labels": 5, + "labels": { + "3": { + "count": 826 + }, + "2": { + "count": 301 + }, + "1": { + "count": 355 + }, + "0": { + "count": 377 + }, + "4": { + "count": 189 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/BengaliSentimentAnalysis.json b/mteb/descriptive_stats/Classification/BengaliSentimentAnalysis.json new file mode 100644 index 0000000000..1bdb8ee05b --- /dev/null +++ b/mteb/descriptive_stats/Classification/BengaliSentimentAnalysis.json @@ -0,0 +1,20 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 144078, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 70.3505859375, + "max_text_length": 1450, + "unique_text": 1935, + "unique_labels": 2, + "labels": { + "1": { + "count": 1474 + }, + "0": { + "count": 574 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/BulgarianStoreReviewSentimentClassfication.json b/mteb/descriptive_stats/Classification/BulgarianStoreReviewSentimentClassfication.json new file mode 100644 index 0000000000..73464d1499 --- /dev/null +++ b/mteb/descriptive_stats/Classification/BulgarianStoreReviewSentimentClassfication.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 182, + "number_of_characters": 57648, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 316.74725274725273, + "max_text_length": 2560, + "unique_text": 182, + "unique_labels": 4, + "labels": { + "1": { + "count": 118 + }, + "0": { + "count": 33 + }, + "2": { + "count": 25 + }, + "3": { + "count": 6 + } + } + }, + "train": { + "num_samples": 724, + "number_of_characters": 227740, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 314.55801104972375, + "max_text_length": 1916, + "unique_text": 724, + "unique_labels": 4, + "labels": { + "1": { + "count": 422 + }, + "2": { + "count": 113 + }, + "0": { + "count": 151 + }, + "3": { + "count": 38 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CBD.json b/mteb/descriptive_stats/Classification/CBD.json new file mode 100644 index 0000000000..663b5629c6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CBD.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 93183, + "number_texts_intersect_with_train": 1, + "min_text_length": 25, + "average_text_length": 93.183, + "max_text_length": 182, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 866 + }, + "1": { + "count": 134 + } + } + }, + "train": { + "num_samples": 10041, + "number_of_characters": 940306, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 93.64664874016532, + "max_text_length": 214, + "unique_text": 10027, + "unique_labels": 2, + "labels": { + "0": { + "count": 9190 + }, + "1": { + "count": 851 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CSFDCZMovieReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/CSFDCZMovieReviewSentimentClassification.json new file mode 100644 index 0000000000..b664ef7f96 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CSFDCZMovieReviewSentimentClassification.json @@ -0,0 +1,62 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 784470, + "number_texts_intersect_with_train": 3, + "min_text_length": 6, + "average_text_length": 383.0419921875, + "max_text_length": 6162, + "unique_text": 2048, + "unique_labels": 6, + "labels": { + "5": { + "count": 364 + }, + "3": { + "count": 340 + }, + "1": { + "count": 346 + }, + "0": { + "count": 337 + }, + "2": { + "count": 333 + }, + "4": { + "count": 328 + } + } + }, + "train": { + "num_samples": 25000, + "number_of_characters": 9467738, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 378.70952, + "max_text_length": 9981, + "unique_text": 24990, + "unique_labels": 6, + "labels": { + "2": { + "count": 4208 + }, + "5": { + "count": 4166 + }, + "1": { + "count": 4125 + }, + "4": { + "count": 4163 + }, + "3": { + "count": 4168 + }, + "0": { + "count": 4170 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CSFDSKMovieReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/CSFDSKMovieReviewSentimentClassification.json new file mode 100644 index 0000000000..fed427e49c --- /dev/null +++ b/mteb/descriptive_stats/Classification/CSFDSKMovieReviewSentimentClassification.json @@ -0,0 +1,62 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 763379, + "number_texts_intersect_with_train": 13, + "min_text_length": 3, + "average_text_length": 372.74365234375, + "max_text_length": 3640, + "unique_text": 2045, + "unique_labels": 6, + "labels": { + "5": { + "count": 364 + }, + "3": { + "count": 340 + }, + "1": { + "count": 346 + }, + "0": { + "count": 337 + }, + "2": { + "count": 333 + }, + "4": { + "count": 328 + } + } + }, + "train": { + "num_samples": 25000, + "number_of_characters": 9188752, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 367.55008, + "max_text_length": 11278, + "unique_text": 24905, + "unique_labels": 6, + "labels": { + "2": { + "count": 4208 + }, + "5": { + "count": 4166 + }, + "1": { + "count": 4125 + }, + "4": { + "count": 4163 + }, + "3": { + "count": 4168 + }, + "0": { + "count": 4170 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicenseeLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicenseeLegalBenchClassification.json new file mode 100644 index 0000000000..3cb275af13 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicenseeLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 198, + "number_of_characters": 95853, + "number_texts_intersect_with_train": 0, + "min_text_length": 62, + "average_text_length": 484.1060606060606, + "max_text_length": 3074, + "unique_text": 198, + "unique_labels": 2, + "labels": { + "1": { + "count": 99 + }, + "0": { + "count": 99 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3479, + "number_texts_intersect_with_train": null, + "min_text_length": 81, + "average_text_length": 579.8333333333334, + "max_text_length": 1638, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicensorLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicensorLegalBenchClassification.json new file mode 100644 index 0000000000..04b8b155d3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADAffiliateLicenseLicensorLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 88, + "number_of_characters": 55739, + "number_texts_intersect_with_train": 0, + "min_text_length": 73, + "average_text_length": 633.3977272727273, + "max_text_length": 3074, + "unique_text": 88, + "unique_labels": 2, + "labels": { + "1": { + "count": 44 + }, + "0": { + "count": 44 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3577, + "number_texts_intersect_with_train": null, + "min_text_length": 161, + "average_text_length": 596.1666666666666, + "max_text_length": 1609, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADAntiAssignmentLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADAntiAssignmentLegalBenchClassification.json new file mode 100644 index 0000000000..5315f92a9d --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADAntiAssignmentLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1172, + "number_of_characters": 399431, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 340.811433447099, + "max_text_length": 4220, + "unique_text": 1172, + "unique_labels": 2, + "labels": { + "1": { + "count": 586 + }, + "0": { + "count": 586 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1535, + "number_texts_intersect_with_train": null, + "min_text_length": 153, + "average_text_length": 255.83333333333334, + "max_text_length": 334, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADAuditRightsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADAuditRightsLegalBenchClassification.json new file mode 100644 index 0000000000..6ee0c5b477 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADAuditRightsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1216, + "number_of_characters": 409959, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 337.1373355263158, + "max_text_length": 2363, + "unique_text": 1216, + "unique_labels": 2, + "labels": { + "1": { + "count": 608 + }, + "0": { + "count": 608 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1990, + "number_texts_intersect_with_train": null, + "min_text_length": 183, + "average_text_length": 331.6666666666667, + "max_text_length": 419, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADCapOnLiabilityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADCapOnLiabilityLegalBenchClassification.json new file mode 100644 index 0000000000..3c8a4a339d --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADCapOnLiabilityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1246, + "number_of_characters": 468177, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 375.7439807383628, + "max_text_length": 1921, + "unique_text": 1246, + "unique_labels": 2, + "labels": { + "1": { + "count": 623 + }, + "0": { + "count": 623 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2064, + "number_texts_intersect_with_train": null, + "min_text_length": 187, + "average_text_length": 344.0, + "max_text_length": 513, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADChangeOfControlLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADChangeOfControlLegalBenchClassification.json new file mode 100644 index 0000000000..10e695f43b --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADChangeOfControlLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 416, + "number_of_characters": 163059, + "number_texts_intersect_with_train": 0, + "min_text_length": 76, + "average_text_length": 391.96875, + "max_text_length": 2908, + "unique_text": 416, + "unique_labels": 2, + "labels": { + "1": { + "count": 208 + }, + "0": { + "count": 208 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1637, + "number_texts_intersect_with_train": null, + "min_text_length": 90, + "average_text_length": 272.8333333333333, + "max_text_length": 419, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADCompetitiveRestrictionExceptionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADCompetitiveRestrictionExceptionLegalBenchClassification.json new file mode 100644 index 0000000000..fdbab5cb38 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADCompetitiveRestrictionExceptionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 220, + "number_of_characters": 95269, + "number_texts_intersect_with_train": 0, + "min_text_length": 76, + "average_text_length": 433.0409090909091, + "max_text_length": 2145, + "unique_text": 220, + "unique_labels": 2, + "labels": { + "1": { + "count": 110 + }, + "0": { + "count": 110 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2173, + "number_texts_intersect_with_train": null, + "min_text_length": 196, + "average_text_length": 362.1666666666667, + "max_text_length": 847, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADCovenantNotToSueLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADCovenantNotToSueLegalBenchClassification.json new file mode 100644 index 0000000000..d1c70acd51 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADCovenantNotToSueLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 308, + "number_of_characters": 124116, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 402.97402597402595, + "max_text_length": 2263, + "unique_text": 308, + "unique_labels": 2, + "labels": { + "1": { + "count": 154 + }, + "0": { + "count": 154 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1967, + "number_texts_intersect_with_train": null, + "min_text_length": 201, + "average_text_length": 327.8333333333333, + "max_text_length": 448, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADEffectiveDateLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADEffectiveDateLegalBenchClassification.json new file mode 100644 index 0000000000..246bd75b44 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADEffectiveDateLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 236, + "number_of_characters": 65520, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 277.6271186440678, + "max_text_length": 2925, + "unique_text": 236, + "unique_labels": 2, + "labels": { + "1": { + "count": 118 + }, + "0": { + "count": 118 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1601, + "number_texts_intersect_with_train": null, + "min_text_length": 129, + "average_text_length": 266.8333333333333, + "max_text_length": 697, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADExclusivityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADExclusivityLegalBenchClassification.json new file mode 100644 index 0000000000..5fde3738a4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADExclusivityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 762, + "number_of_characters": 281314, + "number_texts_intersect_with_train": 0, + "min_text_length": 64, + "average_text_length": 369.17847769028873, + "max_text_length": 2908, + "unique_text": 762, + "unique_labels": 2, + "labels": { + "1": { + "count": 381 + }, + "0": { + "count": 381 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1306, + "number_texts_intersect_with_train": null, + "min_text_length": 112, + "average_text_length": 217.66666666666666, + "max_text_length": 372, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADExpirationDateLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADExpirationDateLegalBenchClassification.json new file mode 100644 index 0000000000..8fd37e82c2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADExpirationDateLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 876, + "number_of_characters": 270929, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 309.2796803652968, + "max_text_length": 2471, + "unique_text": 876, + "unique_labels": 2, + "labels": { + "1": { + "count": 438 + }, + "0": { + "count": 438 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1100, + "number_texts_intersect_with_train": null, + "min_text_length": 79, + "average_text_length": 183.33333333333334, + "max_text_length": 345, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADGoverningLawLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADGoverningLawLegalBenchClassification.json new file mode 100644 index 0000000000..c6b4de29b5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADGoverningLawLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 876, + "number_of_characters": 253930, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 289.8744292237443, + "max_text_length": 2402, + "unique_text": 876, + "unique_labels": 2, + "labels": { + "1": { + "count": 438 + }, + "0": { + "count": 438 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1845, + "number_texts_intersect_with_train": null, + "min_text_length": 97, + "average_text_length": 307.5, + "max_text_length": 838, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADIPOwnershipAssignmentLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADIPOwnershipAssignmentLegalBenchClassification.json new file mode 100644 index 0000000000..b252c31ea6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADIPOwnershipAssignmentLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 576, + "number_of_characters": 238457, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 413.98784722222223, + "max_text_length": 3074, + "unique_text": 576, + "unique_labels": 2, + "labels": { + "1": { + "count": 288 + }, + "0": { + "count": 288 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 4180, + "number_texts_intersect_with_train": null, + "min_text_length": 207, + "average_text_length": 696.6666666666666, + "max_text_length": 1959, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADInsuranceLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADInsuranceLegalBenchClassification.json new file mode 100644 index 0000000000..cd832160b4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADInsuranceLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1030, + "number_of_characters": 376504, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 365.5378640776699, + "max_text_length": 3032, + "unique_text": 1030, + "unique_labels": 2, + "labels": { + "1": { + "count": 515 + }, + "0": { + "count": 515 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1925, + "number_texts_intersect_with_train": null, + "min_text_length": 126, + "average_text_length": 320.8333333333333, + "max_text_length": 499, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADIrrevocableOrPerpetualLicenseLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADIrrevocableOrPerpetualLicenseLegalBenchClassification.json new file mode 100644 index 0000000000..b5a03056de --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADIrrevocableOrPerpetualLicenseLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 280, + "number_of_characters": 132553, + "number_texts_intersect_with_train": 0, + "min_text_length": 79, + "average_text_length": 473.4035714285714, + "max_text_length": 3017, + "unique_text": 280, + "unique_labels": 2, + "labels": { + "1": { + "count": 140 + }, + "0": { + "count": 140 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 4704, + "number_texts_intersect_with_train": null, + "min_text_length": 377, + "average_text_length": 784.0, + "max_text_length": 1638, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADJointIPOwnershipLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADJointIPOwnershipLegalBenchClassification.json new file mode 100644 index 0000000000..de988733c1 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADJointIPOwnershipLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 192, + "number_of_characters": 71841, + "number_texts_intersect_with_train": 0, + "min_text_length": 68, + "average_text_length": 374.171875, + "max_text_length": 1415, + "unique_text": 192, + "unique_labels": 2, + "labels": { + "1": { + "count": 96 + }, + "0": { + "count": 96 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 4359, + "number_texts_intersect_with_train": null, + "min_text_length": 190, + "average_text_length": 726.5, + "max_text_length": 1761, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADLicenseGrantLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADLicenseGrantLegalBenchClassification.json new file mode 100644 index 0000000000..4c5beda58a --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADLicenseGrantLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1396, + "number_of_characters": 572215, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 409.8961318051576, + "max_text_length": 3400, + "unique_text": 1396, + "unique_labels": 2, + "labels": { + "1": { + "count": 698 + }, + "0": { + "count": 698 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3179, + "number_texts_intersect_with_train": null, + "min_text_length": 142, + "average_text_length": 529.8333333333334, + "max_text_length": 1028, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADLiquidatedDamagesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADLiquidatedDamagesLegalBenchClassification.json new file mode 100644 index 0000000000..a87f65a53e --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADLiquidatedDamagesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 220, + "number_of_characters": 77388, + "number_texts_intersect_with_train": 0, + "min_text_length": 70, + "average_text_length": 351.76363636363635, + "max_text_length": 2526, + "unique_text": 220, + "unique_labels": 2, + "labels": { + "1": { + "count": 110 + }, + "0": { + "count": 110 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3039, + "number_texts_intersect_with_train": null, + "min_text_length": 163, + "average_text_length": 506.5, + "max_text_length": 681, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADMinimumCommitmentLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADMinimumCommitmentLegalBenchClassification.json new file mode 100644 index 0000000000..d101990ba9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADMinimumCommitmentLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 772, + "number_of_characters": 281132, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 364.16062176165804, + "max_text_length": 2771, + "unique_text": 772, + "unique_labels": 2, + "labels": { + "1": { + "count": 386 + }, + "0": { + "count": 386 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2194, + "number_texts_intersect_with_train": null, + "min_text_length": 76, + "average_text_length": 365.6666666666667, + "max_text_length": 682, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADMostFavoredNationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADMostFavoredNationLegalBenchClassification.json new file mode 100644 index 0000000000..68e28b2e6c --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADMostFavoredNationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 64, + "number_of_characters": 26800, + "number_texts_intersect_with_train": 0, + "min_text_length": 95, + "average_text_length": 418.75, + "max_text_length": 1459, + "unique_text": 64, + "unique_labels": 2, + "labels": { + "1": { + "count": 32 + }, + "0": { + "count": 32 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1489, + "number_texts_intersect_with_train": null, + "min_text_length": 131, + "average_text_length": 248.16666666666666, + "max_text_length": 459, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADNoSolicitOfCustomersLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADNoSolicitOfCustomersLegalBenchClassification.json new file mode 100644 index 0000000000..b1d3d5b36e --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADNoSolicitOfCustomersLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 84, + "number_of_characters": 33003, + "number_texts_intersect_with_train": 0, + "min_text_length": 84, + "average_text_length": 392.89285714285717, + "max_text_length": 1314, + "unique_text": 84, + "unique_labels": 2, + "labels": { + "1": { + "count": 42 + }, + "0": { + "count": 42 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2774, + "number_texts_intersect_with_train": null, + "min_text_length": 128, + "average_text_length": 462.3333333333333, + "max_text_length": 829, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADNoSolicitOfEmployeesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADNoSolicitOfEmployeesLegalBenchClassification.json new file mode 100644 index 0000000000..89a5ad32dd --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADNoSolicitOfEmployeesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 142, + "number_of_characters": 59348, + "number_texts_intersect_with_train": 0, + "min_text_length": 68, + "average_text_length": 417.943661971831, + "max_text_length": 1881, + "unique_text": 142, + "unique_labels": 2, + "labels": { + "1": { + "count": 71 + }, + "0": { + "count": 71 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3039, + "number_texts_intersect_with_train": null, + "min_text_length": 109, + "average_text_length": 506.5, + "max_text_length": 974, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADNonCompeteLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADNonCompeteLegalBenchClassification.json new file mode 100644 index 0000000000..297e702282 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADNonCompeteLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 442, + "number_of_characters": 169376, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 383.20361990950227, + "max_text_length": 2925, + "unique_text": 442, + "unique_labels": 2, + "labels": { + "1": { + "count": 221 + }, + "0": { + "count": 221 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3084, + "number_texts_intersect_with_train": null, + "min_text_length": 95, + "average_text_length": 514.0, + "max_text_length": 1451, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADNonDisparagementLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADNonDisparagementLegalBenchClassification.json new file mode 100644 index 0000000000..5d5ed335e3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADNonDisparagementLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 100, + "number_of_characters": 40308, + "number_texts_intersect_with_train": 0, + "min_text_length": 77, + "average_text_length": 403.08, + "max_text_length": 2030, + "unique_text": 100, + "unique_labels": 2, + "labels": { + "1": { + "count": 50 + }, + "0": { + "count": 50 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1568, + "number_texts_intersect_with_train": null, + "min_text_length": 124, + "average_text_length": 261.3333333333333, + "max_text_length": 480, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADNonTransferableLicenseLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADNonTransferableLicenseLegalBenchClassification.json new file mode 100644 index 0000000000..6549a5748a --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADNonTransferableLicenseLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 542, + "number_of_characters": 216344, + "number_texts_intersect_with_train": 0, + "min_text_length": 69, + "average_text_length": 399.15867158671585, + "max_text_length": 2263, + "unique_text": 542, + "unique_labels": 2, + "labels": { + "1": { + "count": 271 + }, + "0": { + "count": 271 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3061, + "number_texts_intersect_with_train": null, + "min_text_length": 200, + "average_text_length": 510.1666666666667, + "max_text_length": 947, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADNoticePeriodToTerminateRenewalLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADNoticePeriodToTerminateRenewalLegalBenchClassification.json new file mode 100644 index 0000000000..54051b2701 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADNoticePeriodToTerminateRenewalLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 222, + "number_of_characters": 78777, + "number_texts_intersect_with_train": 0, + "min_text_length": 75, + "average_text_length": 354.85135135135135, + "max_text_length": 2145, + "unique_text": 222, + "unique_labels": 2, + "labels": { + "1": { + "count": 111 + }, + "0": { + "count": 111 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3634, + "number_texts_intersect_with_train": null, + "min_text_length": 216, + "average_text_length": 605.6666666666666, + "max_text_length": 1285, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADPostTerminationServicesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADPostTerminationServicesLegalBenchClassification.json new file mode 100644 index 0000000000..16f69592dc --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADPostTerminationServicesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 808, + "number_of_characters": 341405, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 422.5309405940594, + "max_text_length": 2402, + "unique_text": 808, + "unique_labels": 2, + "labels": { + "1": { + "count": 404 + }, + "0": { + "count": 404 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2783, + "number_texts_intersect_with_train": null, + "min_text_length": 335, + "average_text_length": 463.8333333333333, + "max_text_length": 665, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADPriceRestrictionsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADPriceRestrictionsLegalBenchClassification.json new file mode 100644 index 0000000000..312ac9ccf2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADPriceRestrictionsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 46, + "number_of_characters": 14937, + "number_texts_intersect_with_train": 0, + "min_text_length": 87, + "average_text_length": 324.7173913043478, + "max_text_length": 1095, + "unique_text": 46, + "unique_labels": 2, + "labels": { + "1": { + "count": 23 + }, + "0": { + "count": 23 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2356, + "number_texts_intersect_with_train": null, + "min_text_length": 153, + "average_text_length": 392.6666666666667, + "max_text_length": 993, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADRenewalTermLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADRenewalTermLegalBenchClassification.json new file mode 100644 index 0000000000..81db76b7e8 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADRenewalTermLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 386, + "number_of_characters": 131578, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 340.8756476683938, + "max_text_length": 2771, + "unique_text": 386, + "unique_labels": 2, + "labels": { + "1": { + "count": 193 + }, + "0": { + "count": 193 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1655, + "number_texts_intersect_with_train": null, + "min_text_length": 167, + "average_text_length": 275.8333333333333, + "max_text_length": 431, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADRevenueProfitSharingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADRevenueProfitSharingLegalBenchClassification.json new file mode 100644 index 0000000000..c5b8e0ea63 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADRevenueProfitSharingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 774, + "number_of_characters": 287579, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 371.54909560723513, + "max_text_length": 3169, + "unique_text": 774, + "unique_labels": 2, + "labels": { + "1": { + "count": 387 + }, + "0": { + "count": 387 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1972, + "number_texts_intersect_with_train": null, + "min_text_length": 76, + "average_text_length": 328.6666666666667, + "max_text_length": 518, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADRofrRofoRofnLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADRofrRofoRofnLegalBenchClassification.json new file mode 100644 index 0000000000..f49bee8b23 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADRofrRofoRofnLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 690, + "number_of_characters": 272872, + "number_texts_intersect_with_train": 0, + "min_text_length": 69, + "average_text_length": 395.46666666666664, + "max_text_length": 4220, + "unique_text": 690, + "unique_labels": 2, + "labels": { + "1": { + "count": 345 + }, + "0": { + "count": 345 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2312, + "number_texts_intersect_with_train": null, + "min_text_length": 202, + "average_text_length": 385.3333333333333, + "max_text_length": 665, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADSourceCodeEscrowLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADSourceCodeEscrowLegalBenchClassification.json new file mode 100644 index 0000000000..367508104d --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADSourceCodeEscrowLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 118, + "number_of_characters": 47104, + "number_texts_intersect_with_train": 0, + "min_text_length": 77, + "average_text_length": 399.1864406779661, + "max_text_length": 3169, + "unique_text": 118, + "unique_labels": 2, + "labels": { + "1": { + "count": 59 + }, + "0": { + "count": 59 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2078, + "number_texts_intersect_with_train": null, + "min_text_length": 102, + "average_text_length": 346.3333333333333, + "max_text_length": 799, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADTerminationForConvenienceLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADTerminationForConvenienceLegalBenchClassification.json new file mode 100644 index 0000000000..80704b3da9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADTerminationForConvenienceLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 430, + "number_of_characters": 140313, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 326.30930232558137, + "max_text_length": 2489, + "unique_text": 430, + "unique_labels": 2, + "labels": { + "1": { + "count": 215 + }, + "0": { + "count": 215 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 914, + "number_texts_intersect_with_train": null, + "min_text_length": 100, + "average_text_length": 152.33333333333334, + "max_text_length": 215, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADThirdPartyBeneficiaryLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADThirdPartyBeneficiaryLegalBenchClassification.json new file mode 100644 index 0000000000..ce255c77f4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADThirdPartyBeneficiaryLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 68, + "number_of_characters": 17751, + "number_texts_intersect_with_train": 0, + "min_text_length": 75, + "average_text_length": 261.04411764705884, + "max_text_length": 760, + "unique_text": 68, + "unique_labels": 2, + "labels": { + "1": { + "count": 34 + }, + "0": { + "count": 34 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1803, + "number_texts_intersect_with_train": null, + "min_text_length": 90, + "average_text_length": 300.5, + "max_text_length": 665, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADUncappedLiabilityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADUncappedLiabilityLegalBenchClassification.json new file mode 100644 index 0000000000..e6fba51730 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADUncappedLiabilityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 294, + "number_of_characters": 129668, + "number_texts_intersect_with_train": 0, + "min_text_length": 77, + "average_text_length": 441.04761904761904, + "max_text_length": 2063, + "unique_text": 294, + "unique_labels": 2, + "labels": { + "1": { + "count": 147 + }, + "0": { + "count": 147 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1999, + "number_texts_intersect_with_train": null, + "min_text_length": 142, + "average_text_length": 333.1666666666667, + "max_text_length": 622, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification.json new file mode 100644 index 0000000000..83c89d7efe --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 48, + "number_of_characters": 17668, + "number_texts_intersect_with_train": 0, + "min_text_length": 66, + "average_text_length": 368.0833333333333, + "max_text_length": 1094, + "unique_text": 48, + "unique_labels": 2, + "labels": { + "1": { + "count": 24 + }, + "0": { + "count": 24 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1909, + "number_texts_intersect_with_train": null, + "min_text_length": 105, + "average_text_length": 318.1666666666667, + "max_text_length": 689, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADVolumeRestrictionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADVolumeRestrictionLegalBenchClassification.json new file mode 100644 index 0000000000..1891f735ad --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADVolumeRestrictionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 322, + "number_of_characters": 98621, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 306.27639751552795, + "max_text_length": 1563, + "unique_text": 322, + "unique_labels": 2, + "labels": { + "1": { + "count": 161 + }, + "0": { + "count": 161 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 821, + "number_texts_intersect_with_train": null, + "min_text_length": 88, + "average_text_length": 136.83333333333334, + "max_text_length": 215, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CUADWarrantyDurationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CUADWarrantyDurationLegalBenchClassification.json new file mode 100644 index 0000000000..8e2bb60c64 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CUADWarrantyDurationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 320, + "number_of_characters": 112727, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 352.271875, + "max_text_length": 2471, + "unique_text": 320, + "unique_labels": 2, + "labels": { + "1": { + "count": 160 + }, + "0": { + "count": 160 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1222, + "number_texts_intersect_with_train": null, + "min_text_length": 81, + "average_text_length": 203.66666666666666, + "max_text_length": 385, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CanadaTaxCourtOutcomesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CanadaTaxCourtOutcomesLegalBenchClassification.json new file mode 100644 index 0000000000..6c3ebdfa88 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CanadaTaxCourtOutcomesLegalBenchClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 244, + "number_of_characters": 151915, + "number_texts_intersect_with_train": 0, + "min_text_length": 184, + "average_text_length": 622.6024590163935, + "max_text_length": 3427, + "unique_text": 244, + "unique_labels": 3, + "labels": { + "allowed": { + "count": 101 + }, + "dismissed": { + "count": 131 + }, + "other": { + "count": 12 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2855, + "number_texts_intersect_with_train": null, + "min_text_length": 284, + "average_text_length": 475.8333333333333, + "max_text_length": 678, + "unique_text": 6, + "unique_labels": 3, + "labels": { + "allowed": { + "count": 2 + }, + "dismissed": { + "count": 2 + }, + "other": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CataloniaTweetClassification.json b/mteb/descriptive_stats/Classification/CataloniaTweetClassification.json new file mode 100644 index 0000000000..9e0485a61f --- /dev/null +++ b/mteb/descriptive_stats/Classification/CataloniaTweetClassification.json @@ -0,0 +1,197 @@ +{ + "validation": { + "num_samples": 4025, + "number_of_characters": 814740, + "number_texts_intersect_with_train": 5, + "min_text_length": 17, + "average_text_length": 202.4198757763975, + "max_text_length": 956, + "unique_text": 4025, + "unique_labels": 3, + "labels": { + "1": { + "count": 1545 + }, + "0": { + "count": 1676 + }, + "2": { + "count": 804 + } + }, + "hf_subset_descriptive_stats": { + "spanish": { + "num_samples": 2015, + "number_of_characters": 424553, + "number_texts_intersect_with_train": 5, + "min_text_length": 17, + "average_text_length": 210.69627791563275, + "max_text_length": 956, + "unique_text": 2015, + "unique_labels": 3, + "labels": { + "1": { + "count": 782 + }, + "0": { + "count": 856 + }, + "2": { + "count": 377 + } + } + }, + "catalan": { + "num_samples": 2010, + "number_of_characters": 390187, + "number_texts_intersect_with_train": 0, + "min_text_length": 26, + "average_text_length": 194.1228855721393, + "max_text_length": 753, + "unique_text": 2010, + "unique_labels": 3, + "labels": { + "1": { + "count": 763 + }, + "2": { + "count": 427 + }, + "0": { + "count": 820 + } + } + } + } + }, + "test": { + "num_samples": 4026, + "number_of_characters": 807122, + "number_texts_intersect_with_train": 4, + "min_text_length": 21, + "average_text_length": 200.47739692001988, + "max_text_length": 911, + "unique_text": 4026, + "unique_labels": 3, + "labels": { + "0": { + "count": 1581 + }, + "1": { + "count": 1611 + }, + "2": { + "count": 834 + } + }, + "hf_subset_descriptive_stats": { + "spanish": { + "num_samples": 2016, + "number_of_characters": 421522, + "number_texts_intersect_with_train": 1, + "min_text_length": 21, + "average_text_length": 209.08829365079364, + "max_text_length": 911, + "unique_text": 2016, + "unique_labels": 3, + "labels": { + "0": { + "count": 829 + }, + "1": { + "count": 807 + }, + "2": { + "count": 380 + } + } + }, + "catalan": { + "num_samples": 2010, + "number_of_characters": 385600, + "number_texts_intersect_with_train": 0, + "min_text_length": 26, + "average_text_length": 191.8407960199005, + "max_text_length": 781, + "unique_text": 2010, + "unique_labels": 3, + "labels": { + "1": { + "count": 804 + }, + "2": { + "count": 454 + }, + "0": { + "count": 752 + } + } + } + } + }, + "train": { + "num_samples": 12074, + "number_of_characters": 2421991, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 200.59557727348022, + "max_text_length": 938, + "unique_text": 12070, + "unique_labels": 3, + "labels": { + "0": { + "count": 4836 + }, + "2": { + "count": 2388 + }, + "1": { + "count": 4850 + } + }, + "hf_subset_descriptive_stats": { + "spanish": { + "num_samples": 6046, + "number_of_characters": 1266286, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 209.44194508766125, + "max_text_length": 938, + "unique_text": 6043, + "unique_labels": 3, + "labels": { + "0": { + "count": 2420 + }, + "2": { + "count": 1111 + }, + "1": { + "count": 2515 + } + } + }, + "catalan": { + "num_samples": 6028, + "number_of_characters": 1155705, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 191.72279362972793, + "max_text_length": 828, + "unique_text": 6028, + "unique_labels": 3, + "labels": { + "0": { + "count": 2416 + }, + "1": { + "count": 2335 + }, + "2": { + "count": 1277 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIConfidentialityOfAgreementLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIConfidentialityOfAgreementLegalBenchClassification.json new file mode 100644 index 0000000000..2bc034c34a --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIConfidentialityOfAgreementLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 82, + "number_of_characters": 38800, + "number_texts_intersect_with_train": 0, + "min_text_length": 100, + "average_text_length": 473.1707317073171, + "max_text_length": 2493, + "unique_text": 82, + "unique_labels": 2, + "labels": { + "1": { + "count": 41 + }, + "0": { + "count": 41 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3569, + "number_texts_intersect_with_train": null, + "min_text_length": 282, + "average_text_length": 446.125, + "max_text_length": 761, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIExplicitIdentificationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIExplicitIdentificationLegalBenchClassification.json new file mode 100644 index 0000000000..1cd4560671 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIExplicitIdentificationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 109, + "number_of_characters": 55167, + "number_texts_intersect_with_train": 0, + "min_text_length": 87, + "average_text_length": 506.1192660550459, + "max_text_length": 1897, + "unique_text": 109, + "unique_labels": 2, + "labels": { + "1": { + "count": 20 + }, + "0": { + "count": 89 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3097, + "number_texts_intersect_with_train": null, + "min_text_length": 215, + "average_text_length": 387.125, + "max_text_length": 610, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification.json new file mode 100644 index 0000000000..38551cdc98 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 139, + "number_of_characters": 73080, + "number_texts_intersect_with_train": 0, + "min_text_length": 76, + "average_text_length": 525.7553956834532, + "max_text_length": 1931, + "unique_text": 139, + "unique_labels": 2, + "labels": { + "1": { + "count": 68 + }, + "0": { + "count": 71 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3300, + "number_texts_intersect_with_train": null, + "min_text_length": 92, + "average_text_length": 412.5, + "max_text_length": 1120, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLILimitedUseLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLILimitedUseLegalBenchClassification.json new file mode 100644 index 0000000000..2cad63ae20 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLILimitedUseLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 208, + "number_of_characters": 84762, + "number_texts_intersect_with_train": 0, + "min_text_length": 80, + "average_text_length": 407.50961538461536, + "max_text_length": 1672, + "unique_text": 208, + "unique_labels": 2, + "labels": { + "1": { + "count": 97 + }, + "0": { + "count": 111 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 4349, + "number_texts_intersect_with_train": null, + "min_text_length": 154, + "average_text_length": 543.625, + "max_text_length": 1104, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLINoLicensingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLINoLicensingLegalBenchClassification.json new file mode 100644 index 0000000000..2b8e3d90f4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLINoLicensingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 162, + "number_of_characters": 67946, + "number_texts_intersect_with_train": 0, + "min_text_length": 72, + "average_text_length": 419.41975308641975, + "max_text_length": 1976, + "unique_text": 162, + "unique_labels": 2, + "labels": { + "1": { + "count": 80 + }, + "0": { + "count": 82 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 2011, + "number_texts_intersect_with_train": null, + "min_text_length": 127, + "average_text_length": 251.375, + "max_text_length": 367, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLINoticeOnCompelledDisclosureLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLINoticeOnCompelledDisclosureLegalBenchClassification.json new file mode 100644 index 0000000000..c6087deeb5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLINoticeOnCompelledDisclosureLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 142, + "number_of_characters": 71490, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 503.4507042253521, + "max_text_length": 1976, + "unique_text": 142, + "unique_labels": 2, + "labels": { + "1": { + "count": 71 + }, + "0": { + "count": 71 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3417, + "number_texts_intersect_with_train": null, + "min_text_length": 181, + "average_text_length": 427.125, + "max_text_length": 816, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification.json new file mode 100644 index 0000000000..0ddfe91f81 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 178, + "number_of_characters": 76077, + "number_texts_intersect_with_train": 0, + "min_text_length": 87, + "average_text_length": 427.3988764044944, + "max_text_length": 1903, + "unique_text": 178, + "unique_labels": 2, + "labels": { + "1": { + "count": 89 + }, + "0": { + "count": 89 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 2241, + "number_texts_intersect_with_train": null, + "min_text_length": 152, + "average_text_length": 280.125, + "max_text_length": 435, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIPermissibleCopyLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIPermissibleCopyLegalBenchClassification.json new file mode 100644 index 0000000000..cf09d8dab4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIPermissibleCopyLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 87, + "number_of_characters": 33655, + "number_texts_intersect_with_train": 0, + "min_text_length": 87, + "average_text_length": 386.8390804597701, + "max_text_length": 1897, + "unique_text": 87, + "unique_labels": 2, + "labels": { + "1": { + "count": 18 + }, + "0": { + "count": 69 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3060, + "number_texts_intersect_with_train": null, + "min_text_length": 117, + "average_text_length": 382.5, + "max_text_length": 824, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification.json new file mode 100644 index 0000000000..37c70ae25a --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 136, + "number_of_characters": 53910, + "number_texts_intersect_with_train": 0, + "min_text_length": 109, + "average_text_length": 396.3970588235294, + "max_text_length": 1897, + "unique_text": 136, + "unique_labels": 2, + "labels": { + "1": { + "count": 68 + }, + "0": { + "count": 68 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3344, + "number_texts_intersect_with_train": null, + "min_text_length": 169, + "average_text_length": 418.0, + "max_text_length": 1015, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification.json new file mode 100644 index 0000000000..3c8e618bad --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 111, + "number_of_characters": 58729, + "number_texts_intersect_with_train": 0, + "min_text_length": 76, + "average_text_length": 529.0900900900901, + "max_text_length": 1903, + "unique_text": 111, + "unique_labels": 2, + "labels": { + "1": { + "count": 28 + }, + "0": { + "count": 83 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 4055, + "number_texts_intersect_with_train": null, + "min_text_length": 208, + "average_text_length": 506.875, + "max_text_length": 1087, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLIReturnOfConfidentialInformationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLIReturnOfConfidentialInformationLegalBenchClassification.json new file mode 100644 index 0000000000..b69f701865 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLIReturnOfConfidentialInformationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 66, + "number_of_characters": 31567, + "number_texts_intersect_with_train": 0, + "min_text_length": 177, + "average_text_length": 478.2878787878788, + "max_text_length": 1469, + "unique_text": 66, + "unique_labels": 2, + "labels": { + "1": { + "count": 32 + }, + "0": { + "count": 34 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 3035, + "number_texts_intersect_with_train": null, + "min_text_length": 128, + "average_text_length": 379.375, + "max_text_length": 824, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLISharingWithEmployeesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLISharingWithEmployeesLegalBenchClassification.json new file mode 100644 index 0000000000..be7bafb3bc --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLISharingWithEmployeesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 170, + "number_of_characters": 93267, + "number_texts_intersect_with_train": 0, + "min_text_length": 87, + "average_text_length": 548.6294117647059, + "max_text_length": 2493, + "unique_text": 170, + "unique_labels": 2, + "labels": { + "1": { + "count": 88 + }, + "0": { + "count": 82 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 2680, + "number_texts_intersect_with_train": null, + "min_text_length": 126, + "average_text_length": 335.0, + "max_text_length": 706, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLISharingWithThirdPartiesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLISharingWithThirdPartiesLegalBenchClassification.json new file mode 100644 index 0000000000..2b857aee1b --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLISharingWithThirdPartiesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 180, + "number_of_characters": 93112, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 517.2888888888889, + "max_text_length": 1976, + "unique_text": 180, + "unique_labels": 2, + "labels": { + "1": { + "count": 71 + }, + "0": { + "count": 109 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 2763, + "number_texts_intersect_with_train": null, + "min_text_length": 186, + "average_text_length": 345.375, + "max_text_length": 713, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ContractNLISurvivalOfObligationsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/ContractNLISurvivalOfObligationsLegalBenchClassification.json new file mode 100644 index 0000000000..7bfc33c2a5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ContractNLISurvivalOfObligationsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 157, + "number_of_characters": 65569, + "number_texts_intersect_with_train": 0, + "min_text_length": 94, + "average_text_length": 417.6369426751592, + "max_text_length": 1616, + "unique_text": 157, + "unique_labels": 2, + "labels": { + "1": { + "count": 74 + }, + "0": { + "count": 83 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1767, + "number_texts_intersect_with_train": null, + "min_text_length": 76, + "average_text_length": 220.875, + "max_text_length": 468, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CorporateLobbyingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/CorporateLobbyingLegalBenchClassification.json new file mode 100644 index 0000000000..d14b194e45 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CorporateLobbyingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 490, + "number_of_characters": 2959526, + "number_texts_intersect_with_train": 0, + "min_text_length": 1241, + "average_text_length": 6039.848979591837, + "max_text_length": 16232, + "unique_text": 490, + "unique_labels": 2, + "labels": { + "0": { + "count": 345 + }, + "1": { + "count": 145 + } + } + }, + "train": { + "num_samples": 10, + "number_of_characters": 54082, + "number_texts_intersect_with_train": null, + "min_text_length": 4210, + "average_text_length": 5408.2, + "max_text_length": 6424, + "unique_text": 10, + "unique_labels": 2, + "labels": { + "0": { + "count": 7 + }, + "1": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CyrillicTurkicLangClassification.json b/mteb/descriptive_stats/Classification/CyrillicTurkicLangClassification.json new file mode 100644 index 0000000000..7c3f1b4419 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CyrillicTurkicLangClassification.json @@ -0,0 +1,80 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 191378, + "number_texts_intersect_with_train": 0, + "min_text_length": 15, + "average_text_length": 93.4462890625, + "max_text_length": 253, + "unique_text": 2048, + "unique_labels": 9, + "labels": { + "2": { + "count": 228 + }, + "3": { + "count": 227 + }, + "8": { + "count": 228 + }, + "5": { + "count": 227 + }, + "6": { + "count": 228 + }, + "0": { + "count": 227 + }, + "7": { + "count": 227 + }, + "1": { + "count": 228 + }, + "4": { + "count": 228 + } + } + }, + "train": { + "num_samples": 72000, + "number_of_characters": 6640175, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 92.22465277777778, + "max_text_length": 255, + "unique_text": 72000, + "unique_labels": 9, + "labels": { + "8": { + "count": 8000 + }, + "3": { + "count": 8000 + }, + "7": { + "count": 8000 + }, + "5": { + "count": 8000 + }, + "2": { + "count": 8000 + }, + "1": { + "count": 8000 + }, + "6": { + "count": 8000 + }, + "4": { + "count": 8000 + }, + "0": { + "count": 8000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CzechProductReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/CzechProductReviewSentimentClassification.json new file mode 100644 index 0000000000..1840c40ace --- /dev/null +++ b/mteb/descriptive_stats/Classification/CzechProductReviewSentimentClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 314089, + "number_texts_intersect_with_train": 506, + "min_text_length": 1, + "average_text_length": 153.36376953125, + "max_text_length": 2859, + "unique_text": 2003, + "unique_labels": 3, + "labels": { + "1": { + "count": 683 + }, + "0": { + "count": 682 + }, + "2": { + "count": 683 + } + } + }, + "train": { + "num_samples": 24000, + "number_of_characters": 3660165, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 152.506875, + "max_text_length": 2603, + "unique_text": 20409, + "unique_labels": 3, + "labels": { + "1": { + "count": 8000 + }, + "0": { + "count": 8000 + }, + "2": { + "count": 8000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CzechSoMeSentimentClassification.json b/mteb/descriptive_stats/Classification/CzechSoMeSentimentClassification.json new file mode 100644 index 0000000000..638cf011a9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CzechSoMeSentimentClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 59888, + "number_texts_intersect_with_train": 1, + "min_text_length": 1, + "average_text_length": 59.888, + "max_text_length": 140, + "unique_text": 968, + "unique_labels": 3, + "labels": { + "0": { + "count": 333 + }, + "1": { + "count": 334 + }, + "-1": { + "count": 333 + } + } + }, + "train": { + "num_samples": 5000, + "number_of_characters": 287057, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 57.4114, + "max_text_length": 140, + "unique_text": 4810, + "unique_labels": 3, + "labels": { + "0": { + "count": 1667 + }, + "1": { + "count": 1666 + }, + "-1": { + "count": 1667 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/CzechSubjectivityClassification.json b/mteb/descriptive_stats/Classification/CzechSubjectivityClassification.json new file mode 100644 index 0000000000..08238e6466 --- /dev/null +++ b/mteb/descriptive_stats/Classification/CzechSubjectivityClassification.json @@ -0,0 +1,56 @@ +{ + "validation": { + "num_samples": 500, + "number_of_characters": 54082, + "number_texts_intersect_with_train": 0, + "min_text_length": 28, + "average_text_length": 108.164, + "max_text_length": 443, + "unique_text": 500, + "unique_labels": 2, + "labels": { + "0": { + "count": 250 + }, + "1": { + "count": 250 + } + } + }, + "test": { + "num_samples": 2000, + "number_of_characters": 216612, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 108.306, + "max_text_length": 689, + "unique_text": 2000, + "unique_labels": 2, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + } + } + }, + "train": { + "num_samples": 7443, + "number_of_characters": 816035, + "number_texts_intersect_with_train": null, + "min_text_length": 24, + "average_text_length": 109.6379148192933, + "max_text_length": 5399, + "unique_text": 7443, + "unique_labels": 2, + "labels": { + "0": { + "count": 3750 + }, + "1": { + "count": 3693 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/DBpediaClassification.json b/mteb/descriptive_stats/Classification/DBpediaClassification.json new file mode 100644 index 0000000000..28ba58730a --- /dev/null +++ b/mteb/descriptive_stats/Classification/DBpediaClassification.json @@ -0,0 +1,110 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 568368, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 277.5234375, + "max_text_length": 1045, + "unique_text": 2048, + "unique_labels": 14, + "labels": { + "7": { + "count": 147 + }, + "0": { + "count": 146 + }, + "10": { + "count": 146 + }, + "3": { + "count": 146 + }, + "13": { + "count": 147 + }, + "2": { + "count": 146 + }, + "12": { + "count": 147 + }, + "1": { + "count": 146 + }, + "6": { + "count": 146 + }, + "11": { + "count": 146 + }, + "8": { + "count": 146 + }, + "5": { + "count": 147 + }, + "4": { + "count": 146 + }, + "9": { + "count": 146 + } + } + }, + "train": { + "num_samples": 2048, + "number_of_characters": 578420, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 282.431640625, + "max_text_length": 777, + "unique_text": 2048, + "unique_labels": 14, + "labels": { + "12": { + "count": 147 + }, + "10": { + "count": 146 + }, + "2": { + "count": 146 + }, + "5": { + "count": 147 + }, + "13": { + "count": 147 + }, + "9": { + "count": 146 + }, + "6": { + "count": 146 + }, + "4": { + "count": 146 + }, + "3": { + "count": 146 + }, + "1": { + "count": 146 + }, + "0": { + "count": 146 + }, + "8": { + "count": 146 + }, + "11": { + "count": 146 + }, + "7": { + "count": 147 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/DKHateClassification.json b/mteb/descriptive_stats/Classification/DKHateClassification.json new file mode 100644 index 0000000000..214fb7925f --- /dev/null +++ b/mteb/descriptive_stats/Classification/DKHateClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 329, + "number_of_characters": 29011, + "number_texts_intersect_with_train": 4, + "min_text_length": 1, + "average_text_length": 88.17933130699087, + "max_text_length": 2434, + "unique_text": 326, + "unique_labels": 2, + "labels": { + "0": { + "count": 288 + }, + "1": { + "count": 41 + } + } + }, + "train": { + "num_samples": 2960, + "number_of_characters": 307722, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 103.96013513513513, + "max_text_length": 5403, + "unique_text": 2902, + "unique_labels": 2, + "labels": { + "0": { + "count": 2576 + }, + "1": { + "count": 384 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/DalajClassification.json b/mteb/descriptive_stats/Classification/DalajClassification.json new file mode 100644 index 0000000000..58c11ce0d3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/DalajClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 888, + "number_of_characters": 107246, + "number_texts_intersect_with_train": 2, + "min_text_length": 12, + "average_text_length": 120.77252252252252, + "max_text_length": 342, + "unique_text": 748, + "unique_labels": 2, + "labels": { + "1": { + "count": 444 + }, + "0": { + "count": 444 + } + } + }, + "train": { + "num_samples": 7682, + "number_of_characters": 932450, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 121.38115074199428, + "max_text_length": 424, + "unique_text": 6337, + "unique_labels": 2, + "labels": { + "1": { + "count": 3841 + }, + "0": { + "count": 3841 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/DanishPoliticalCommentsClassification.json b/mteb/descriptive_stats/Classification/DanishPoliticalCommentsClassification.json new file mode 100644 index 0000000000..dffe202733 --- /dev/null +++ b/mteb/descriptive_stats/Classification/DanishPoliticalCommentsClassification.json @@ -0,0 +1,29 @@ +{ + "train": { + "num_samples": 7206, + "number_of_characters": 501542, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 69.60061060227588, + "max_text_length": 2719, + "unique_text": 7129, + "unique_labels": 5, + "labels": { + "3": { + "count": 2094 + }, + "4": { + "count": 670 + }, + "2": { + "count": 3288 + }, + "1": { + "count": 802 + }, + "0": { + "count": 352 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/DefinitionClassificationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/DefinitionClassificationLegalBenchClassification.json new file mode 100644 index 0000000000..fe009f1127 --- /dev/null +++ b/mteb/descriptive_stats/Classification/DefinitionClassificationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1337, + "number_of_characters": 339233, + "number_texts_intersect_with_train": 0, + "min_text_length": 21, + "average_text_length": 253.72700074794315, + "max_text_length": 1504, + "unique_text": 1337, + "unique_labels": 2, + "labels": { + "1": { + "count": 691 + }, + "0": { + "count": 646 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1656, + "number_texts_intersect_with_train": null, + "min_text_length": 57, + "average_text_length": 207.0, + "max_text_length": 355, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Diversity1LegalBenchClassification.json b/mteb/descriptive_stats/Classification/Diversity1LegalBenchClassification.json new file mode 100644 index 0000000000..d347491118 --- /dev/null +++ b/mteb/descriptive_stats/Classification/Diversity1LegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 300, + "number_of_characters": 50207, + "number_texts_intersect_with_train": 0, + "min_text_length": 147, + "average_text_length": 167.35666666666665, + "max_text_length": 188, + "unique_text": 300, + "unique_labels": 2, + "labels": { + "0": { + "count": 229 + }, + "1": { + "count": 71 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 997, + "number_texts_intersect_with_train": null, + "min_text_length": 153, + "average_text_length": 166.16666666666666, + "max_text_length": 176, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "0": { + "count": 5 + }, + "1": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Diversity2LegalBenchClassification.json b/mteb/descriptive_stats/Classification/Diversity2LegalBenchClassification.json new file mode 100644 index 0000000000..24c66358b9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/Diversity2LegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 300, + "number_of_characters": 62556, + "number_texts_intersect_with_train": 0, + "min_text_length": 183, + "average_text_length": 208.52, + "max_text_length": 232, + "unique_text": 300, + "unique_labels": 2, + "labels": { + "0": { + "count": 224 + }, + "1": { + "count": 76 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1229, + "number_texts_intersect_with_train": null, + "min_text_length": 200, + "average_text_length": 204.83333333333334, + "max_text_length": 216, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "0": { + "count": 5 + }, + "1": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Diversity3LegalBenchClassification.json b/mteb/descriptive_stats/Classification/Diversity3LegalBenchClassification.json new file mode 100644 index 0000000000..6a1c2c26d2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/Diversity3LegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 300, + "number_of_characters": 59198, + "number_texts_intersect_with_train": 0, + "min_text_length": 172, + "average_text_length": 197.32666666666665, + "max_text_length": 225, + "unique_text": 300, + "unique_labels": 2, + "labels": { + "0": { + "count": 124 + }, + "1": { + "count": 176 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1168, + "number_texts_intersect_with_train": null, + "min_text_length": 184, + "average_text_length": 194.66666666666666, + "max_text_length": 205, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Diversity4LegalBenchClassification.json b/mteb/descriptive_stats/Classification/Diversity4LegalBenchClassification.json new file mode 100644 index 0000000000..d3eeba68cf --- /dev/null +++ b/mteb/descriptive_stats/Classification/Diversity4LegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 300, + "number_of_characters": 61985, + "number_texts_intersect_with_train": 0, + "min_text_length": 181, + "average_text_length": 206.61666666666667, + "max_text_length": 226, + "unique_text": 300, + "unique_labels": 2, + "labels": { + "1": { + "count": 160 + }, + "0": { + "count": 140 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1246, + "number_texts_intersect_with_train": null, + "min_text_length": 201, + "average_text_length": 207.66666666666666, + "max_text_length": 215, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Diversity5LegalBenchClassification.json b/mteb/descriptive_stats/Classification/Diversity5LegalBenchClassification.json new file mode 100644 index 0000000000..c2f46616cc --- /dev/null +++ b/mteb/descriptive_stats/Classification/Diversity5LegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 300, + "number_of_characters": 71056, + "number_texts_intersect_with_train": 0, + "min_text_length": 208, + "average_text_length": 236.85333333333332, + "max_text_length": 264, + "unique_text": 300, + "unique_labels": 2, + "labels": { + "1": { + "count": 172 + }, + "0": { + "count": 128 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 1408, + "number_texts_intersect_with_train": null, + "min_text_length": 221, + "average_text_length": 234.66666666666666, + "max_text_length": 245, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "0": { + "count": 2 + }, + "1": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Diversity6LegalBenchClassification.json b/mteb/descriptive_stats/Classification/Diversity6LegalBenchClassification.json new file mode 100644 index 0000000000..0b7aae1185 --- /dev/null +++ b/mteb/descriptive_stats/Classification/Diversity6LegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 300, + "number_of_characters": 108936, + "number_texts_intersect_with_train": 0, + "min_text_length": 330, + "average_text_length": 363.12, + "max_text_length": 399, + "unique_text": 300, + "unique_labels": 2, + "labels": { + "1": { + "count": 159 + }, + "0": { + "count": 141 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 2199, + "number_texts_intersect_with_train": null, + "min_text_length": 355, + "average_text_length": 366.5, + "max_text_length": 383, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "0": { + "count": 3 + }, + "1": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/DutchBookReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/DutchBookReviewSentimentClassification.json new file mode 100644 index 0000000000..320e37e192 --- /dev/null +++ b/mteb/descriptive_stats/Classification/DutchBookReviewSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2224, + "number_of_characters": 3209177, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 1442.9752697841727, + "max_text_length": 11140, + "unique_text": 2224, + "unique_labels": 2, + "labels": { + "1": { + "count": 1112 + }, + "0": { + "count": 1112 + } + } + }, + "train": { + "num_samples": 20028, + "number_of_characters": 29162515, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 1456.0872278809666, + "max_text_length": 22676, + "unique_text": 20028, + "unique_labels": 2, + "labels": { + "1": { + "count": 10014 + }, + "0": { + "count": 10014 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/EmotionClassification.json b/mteb/descriptive_stats/Classification/EmotionClassification.json new file mode 100644 index 0000000000..7c87fbac5b --- /dev/null +++ b/mteb/descriptive_stats/Classification/EmotionClassification.json @@ -0,0 +1,92 @@ +{ + "validation": { + "num_samples": 2000, + "number_of_characters": 190695, + "number_texts_intersect_with_train": 5, + "min_text_length": 11, + "average_text_length": 95.3475, + "max_text_length": 295, + "unique_text": 1998, + "unique_labels": 6, + "labels": { + "0": { + "count": 550 + }, + "2": { + "count": 178 + }, + "3": { + "count": 275 + }, + "1": { + "count": 704 + }, + "4": { + "count": 212 + }, + "5": { + "count": 81 + } + } + }, + "test": { + "num_samples": 2000, + "number_of_characters": 193173, + "number_texts_intersect_with_train": 11, + "min_text_length": 14, + "average_text_length": 96.5865, + "max_text_length": 296, + "unique_text": 2000, + "unique_labels": 6, + "labels": { + "0": { + "count": 581 + }, + "1": { + "count": 695 + }, + "4": { + "count": 224 + }, + "3": { + "count": 275 + }, + "2": { + "count": 159 + }, + "5": { + "count": 66 + } + } + }, + "train": { + "num_samples": 16000, + "number_of_characters": 1549533, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 96.8458125, + "max_text_length": 300, + "unique_text": 15969, + "unique_labels": 6, + "labels": { + "0": { + "count": 4666 + }, + "3": { + "count": 2159 + }, + "2": { + "count": 1304 + }, + "5": { + "count": 572 + }, + "4": { + "count": 1937 + }, + "1": { + "count": 5362 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/EstonianValenceClassification.json b/mteb/descriptive_stats/Classification/EstonianValenceClassification.json new file mode 100644 index 0000000000..b28f65fb32 --- /dev/null +++ b/mteb/descriptive_stats/Classification/EstonianValenceClassification.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 818, + "number_of_characters": 189374, + "number_texts_intersect_with_train": 12, + "min_text_length": 4, + "average_text_length": 231.5085574572127, + "max_text_length": 1416, + "unique_text": 818, + "unique_labels": 4, + "labels": { + "2": { + "count": 139 + }, + "1": { + "count": 393 + }, + "3": { + "count": 103 + }, + "0": { + "count": 183 + } + } + }, + "train": { + "num_samples": 3270, + "number_of_characters": 741330, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 226.70642201834863, + "max_text_length": 1258, + "unique_text": 3265, + "unique_labels": 4, + "labels": { + "1": { + "count": 1534 + }, + "2": { + "count": 588 + }, + "0": { + "count": 699 + }, + "3": { + "count": 449 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FilipinoShopeeReviewsClassification.json b/mteb/descriptive_stats/Classification/FilipinoShopeeReviewsClassification.json new file mode 100644 index 0000000000..a6fa121dc3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/FilipinoShopeeReviewsClassification.json @@ -0,0 +1,83 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 295480, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 144.27734375, + "max_text_length": 1057, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "3": { + "count": 409 + }, + "4": { + "count": 410 + }, + "2": { + "count": 410 + }, + "1": { + "count": 410 + }, + "0": { + "count": 409 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 297163, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 145.09912109375, + "max_text_length": 1074, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "3": { + "count": 409 + }, + "4": { + "count": 410 + }, + "2": { + "count": 410 + }, + "1": { + "count": 410 + }, + "0": { + "count": 409 + } + } + }, + "train": { + "num_samples": 10500, + "number_of_characters": 1535817, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 146.26828571428572, + "max_text_length": 4716, + "unique_text": 10500, + "unique_labels": 5, + "labels": { + "4": { + "count": 2100 + }, + "2": { + "count": 2100 + }, + "0": { + "count": 2100 + }, + "1": { + "count": 2100 + }, + "3": { + "count": 2100 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FinToxicityClassification.json b/mteb/descriptive_stats/Classification/FinToxicityClassification.json new file mode 100644 index 0000000000..9f434c1a1a --- /dev/null +++ b/mteb/descriptive_stats/Classification/FinToxicityClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 821308, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 401.029296875, + "max_text_length": 6480, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1853 + }, + "1": { + "count": 195 + } + } + }, + "train": { + "num_samples": 2048, + "number_of_characters": 886035, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 432.63427734375, + "max_text_length": 5372, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1852 + }, + "1": { + "count": 196 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json b/mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json new file mode 100644 index 0000000000..e9e05850a6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2264, + "number_of_characters": 276123, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 121.96245583038869, + "max_text_length": 315, + "unique_text": 2259, + "unique_labels": 3, + "labels": { + "1": { + "count": 1391 + }, + "2": { + "count": 570 + }, + "0": { + "count": 303 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FrenchBookReviews.json b/mteb/descriptive_stats/Classification/FrenchBookReviews.json new file mode 100644 index 0000000000..5f273534c9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/FrenchBookReviews.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 648941, + "number_texts_intersect_with_train": null, + "min_text_length": 0, + "average_text_length": 316.86572265625, + "max_text_length": 3666, + "unique_text": 2025, + "unique_labels": 3, + "labels": { + "2": { + "count": 1414 + }, + "1": { + "count": 452 + }, + "0": { + "count": 182 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FrenkEnClassification.json b/mteb/descriptive_stats/Classification/FrenkEnClassification.json new file mode 100644 index 0000000000..e4a5f24123 --- /dev/null +++ b/mteb/descriptive_stats/Classification/FrenkEnClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2301, + "number_of_characters": 434318, + "number_texts_intersect_with_train": 23, + "min_text_length": 1, + "average_text_length": 188.75184702303346, + "max_text_length": 7322, + "unique_text": 2282, + "unique_labels": 2, + "labels": { + "0": { + "count": 1426 + }, + "1": { + "count": 875 + } + } + }, + "train": { + "num_samples": 8404, + "number_of_characters": 1216080, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 144.70252260828178, + "max_text_length": 5449, + "unique_text": 8275, + "unique_labels": 2, + "labels": { + "0": { + "count": 5379 + }, + "1": { + "count": 3025 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FrenkHrClassification.json b/mteb/descriptive_stats/Classification/FrenkHrClassification.json new file mode 100644 index 0000000000..1846f6e493 --- /dev/null +++ b/mteb/descriptive_stats/Classification/FrenkHrClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2119, + "number_of_characters": 190420, + "number_texts_intersect_with_train": 29, + "min_text_length": 1, + "average_text_length": 89.86314299197734, + "max_text_length": 2438, + "unique_text": 2100, + "unique_labels": 2, + "labels": { + "1": { + "count": 1191 + }, + "0": { + "count": 928 + } + } + }, + "train": { + "num_samples": 7964, + "number_of_characters": 931021, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 116.90369161225514, + "max_text_length": 4559, + "unique_text": 7829, + "unique_labels": 2, + "labels": { + "1": { + "count": 4372 + }, + "0": { + "count": 3592 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FrenkSlClassification.json b/mteb/descriptive_stats/Classification/FrenkSlClassification.json new file mode 100644 index 0000000000..7ab7eccff0 --- /dev/null +++ b/mteb/descriptive_stats/Classification/FrenkSlClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 277147, + "number_texts_intersect_with_train": 28, + "min_text_length": 1, + "average_text_length": 135.32568359375, + "max_text_length": 4678, + "unique_text": 2030, + "unique_labels": 2, + "labels": { + "1": { + "count": 1048 + }, + "0": { + "count": 1000 + } + } + }, + "train": { + "num_samples": 7189, + "number_of_characters": 958807, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 133.37140075114758, + "max_text_length": 7688, + "unique_text": 7124, + "unique_labels": 2, + "labels": { + "1": { + "count": 3387 + }, + "0": { + "count": 3802 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/FunctionOfDecisionSectionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/FunctionOfDecisionSectionLegalBenchClassification.json new file mode 100644 index 0000000000..a81aa4f3ae --- /dev/null +++ b/mteb/descriptive_stats/Classification/FunctionOfDecisionSectionLegalBenchClassification.json @@ -0,0 +1,68 @@ +{ + "test": { + "num_samples": 367, + "number_of_characters": 202245, + "number_texts_intersect_with_train": 0, + "min_text_length": 31, + "average_text_length": 551.0762942779292, + "max_text_length": 2437, + "unique_text": 367, + "unique_labels": 7, + "labels": { + "Facts": { + "count": 49 + }, + "Procedural History": { + "count": 58 + }, + "Issue": { + "count": 51 + }, + "Rule": { + "count": 56 + }, + "Analysis": { + "count": 56 + }, + "Conclusion": { + "count": 50 + }, + "Decree": { + "count": 47 + } + } + }, + "train": { + "num_samples": 7, + "number_of_characters": 1447, + "number_texts_intersect_with_train": null, + "min_text_length": 52, + "average_text_length": 206.71428571428572, + "max_text_length": 302, + "unique_text": 7, + "unique_labels": 7, + "labels": { + "Facts": { + "count": 1 + }, + "Procedural History": { + "count": 1 + }, + "Issue": { + "count": 1 + }, + "Rule": { + "count": 1 + }, + "Analysis": { + "count": 1 + }, + "Conclusion": { + "count": 1 + }, + "Decree": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/GeoreviewClassification.json b/mteb/descriptive_stats/Classification/GeoreviewClassification.json new file mode 100644 index 0000000000..47847e95c9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/GeoreviewClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 842648, + "number_texts_intersect_with_train": 0, + "min_text_length": 17, + "average_text_length": 411.44921875, + "max_text_length": 4124, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "2": { + "count": 410 + }, + "3": { + "count": 409 + }, + "1": { + "count": 410 + }, + "0": { + "count": 409 + }, + "4": { + "count": 410 + } + } + }, + "train": { + "num_samples": 50000, + "number_of_characters": 20786997, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 415.73994, + "max_text_length": 20162, + "unique_text": 50000, + "unique_labels": 5, + "labels": { + "2": { + "count": 10000 + }, + "4": { + "count": 10000 + }, + "1": { + "count": 10000 + }, + "0": { + "count": 10000 + }, + "3": { + "count": 10000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/GermanPoliticiansTwitterSentimentClassification.json b/mteb/descriptive_stats/Classification/GermanPoliticiansTwitterSentimentClassification.json new file mode 100644 index 0000000000..07c5f0980c --- /dev/null +++ b/mteb/descriptive_stats/Classification/GermanPoliticiansTwitterSentimentClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 357, + "number_of_characters": 107986, + "number_texts_intersect_with_train": 0, + "min_text_length": 3, + "average_text_length": 302.4817927170868, + "max_text_length": 652, + "unique_text": 357, + "unique_labels": 3, + "labels": { + "3": { + "count": 152 + }, + "2": { + "count": 108 + }, + "1": { + "count": 97 + } + } + }, + "train": { + "num_samples": 1428, + "number_of_characters": 443140, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 310.32212885154064, + "max_text_length": 762, + "unique_text": 1428, + "unique_labels": 3, + "labels": { + "2": { + "count": 428 + }, + "3": { + "count": 611 + }, + "1": { + "count": 389 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/GreekLegalCodeClassification.json b/mteb/descriptive_stats/Classification/GreekLegalCodeClassification.json new file mode 100644 index 0000000000..072f1822eb --- /dev/null +++ b/mteb/descriptive_stats/Classification/GreekLegalCodeClassification.json @@ -0,0 +1,3146 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 8347624, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 4075.98828125, + "max_text_length": 222630, + "unique_text": 2048, + "unique_labels": 321, + "labels": { + "318": { + "count": 20 + }, + "250": { + "count": 4 + }, + "150": { + "count": 4 + }, + "55": { + "count": 4 + }, + "243": { + "count": 1 + }, + "265": { + "count": 17 + }, + "118": { + "count": 15 + }, + "124": { + "count": 16 + }, + "295": { + "count": 10 + }, + "345": { + "count": 9 + }, + "222": { + "count": 3 + }, + "236": { + "count": 4 + }, + "248": { + "count": 17 + }, + "161": { + "count": 22 + }, + "128": { + "count": 3 + }, + "140": { + "count": 5 + }, + "177": { + "count": 5 + }, + "380": { + "count": 6 + }, + "173": { + "count": 29 + }, + "65": { + "count": 16 + }, + "244": { + "count": 3 + }, + "275": { + "count": 9 + }, + "108": { + "count": 9 + }, + "2": { + "count": 3 + }, + "99": { + "count": 8 + }, + "95": { + "count": 6 + }, + "332": { + "count": 13 + }, + "263": { + "count": 18 + }, + "180": { + "count": 34 + }, + "31": { + "count": 6 + }, + "44": { + "count": 15 + }, + "167": { + "count": 11 + }, + "142": { + "count": 21 + }, + "151": { + "count": 11 + }, + "183": { + "count": 6 + }, + "36": { + "count": 11 + }, + "383": { + "count": 5 + }, + "112": { + "count": 12 + }, + "168": { + "count": 9 + }, + "119": { + "count": 5 + }, + "365": { + "count": 4 + }, + "147": { + "count": 18 + }, + "331": { + "count": 58 + }, + "81": { + "count": 7 + }, + "301": { + "count": 51 + }, + "224": { + "count": 13 + }, + "132": { + "count": 4 + }, + "136": { + "count": 4 + }, + "19": { + "count": 5 + }, + "203": { + "count": 7 + }, + "41": { + "count": 22 + }, + "219": { + "count": 4 + }, + "94": { + "count": 2 + }, + "267": { + "count": 6 + }, + "53": { + "count": 6 + }, + "23": { + "count": 14 + }, + "323": { + "count": 4 + }, + "361": { + "count": 2 + }, + "197": { + "count": 22 + }, + "382": { + "count": 10 + }, + "127": { + "count": 5 + }, + "379": { + "count": 9 + }, + "30": { + "count": 17 + }, + "334": { + "count": 11 + }, + "200": { + "count": 1 + }, + "80": { + "count": 6 + }, + "238": { + "count": 3 + }, + "74": { + "count": 14 + }, + "251": { + "count": 7 + }, + "199": { + "count": 16 + }, + "357": { + "count": 14 + }, + "27": { + "count": 5 + }, + "258": { + "count": 6 + }, + "152": { + "count": 8 + }, + "368": { + "count": 13 + }, + "230": { + "count": 23 + }, + "8": { + "count": 21 + }, + "6": { + "count": 11 + }, + "360": { + "count": 26 + }, + "273": { + "count": 7 + }, + "381": { + "count": 11 + }, + "232": { + "count": 1 + }, + "375": { + "count": 1 + }, + "62": { + "count": 1 + }, + "292": { + "count": 9 + }, + "192": { + "count": 9 + }, + "176": { + "count": 4 + }, + "311": { + "count": 2 + }, + "11": { + "count": 7 + }, + "305": { + "count": 7 + }, + "384": { + "count": 19 + }, + "3": { + "count": 4 + }, + "388": { + "count": 6 + }, + "59": { + "count": 12 + }, + "79": { + "count": 8 + }, + "109": { + "count": 2 + }, + "122": { + "count": 11 + }, + "355": { + "count": 6 + }, + "327": { + "count": 6 + }, + "47": { + "count": 9 + }, + "342": { + "count": 2 + }, + "337": { + "count": 12 + }, + "87": { + "count": 4 + }, + "98": { + "count": 7 + }, + "92": { + "count": 4 + }, + "15": { + "count": 2 + }, + "96": { + "count": 1 + }, + "319": { + "count": 8 + }, + "335": { + "count": 3 + }, + "280": { + "count": 11 + }, + "155": { + "count": 6 + }, + "68": { + "count": 12 + }, + "350": { + "count": 8 + }, + "133": { + "count": 4 + }, + "297": { + "count": 1 + }, + "134": { + "count": 17 + }, + "207": { + "count": 4 + }, + "272": { + "count": 5 + }, + "287": { + "count": 14 + }, + "5": { + "count": 7 + }, + "284": { + "count": 8 + }, + "175": { + "count": 11 + }, + "336": { + "count": 6 + }, + "314": { + "count": 4 + }, + "181": { + "count": 10 + }, + "145": { + "count": 13 + }, + "114": { + "count": 6 + }, + "324": { + "count": 14 + }, + "162": { + "count": 6 + }, + "104": { + "count": 7 + }, + "182": { + "count": 1 + }, + "285": { + "count": 1 + }, + "371": { + "count": 12 + }, + "97": { + "count": 7 + }, + "386": { + "count": 5 + }, + "253": { + "count": 5 + }, + "290": { + "count": 10 + }, + "137": { + "count": 2 + }, + "14": { + "count": 6 + }, + "125": { + "count": 2 + }, + "259": { + "count": 3 + }, + "377": { + "count": 7 + }, + "333": { + "count": 7 + }, + "25": { + "count": 4 + }, + "356": { + "count": 6 + }, + "201": { + "count": 14 + }, + "190": { + "count": 3 + }, + "18": { + "count": 11 + }, + "363": { + "count": 9 + }, + "158": { + "count": 7 + }, + "194": { + "count": 1 + }, + "191": { + "count": 8 + }, + "76": { + "count": 7 + }, + "93": { + "count": 10 + }, + "245": { + "count": 7 + }, + "387": { + "count": 5 + }, + "218": { + "count": 15 + }, + "84": { + "count": 7 + }, + "196": { + "count": 1 + }, + "67": { + "count": 9 + }, + "255": { + "count": 1 + }, + "325": { + "count": 9 + }, + "91": { + "count": 2 + }, + "237": { + "count": 2 + }, + "294": { + "count": 1 + }, + "281": { + "count": 7 + }, + "385": { + "count": 1 + }, + "349": { + "count": 7 + }, + "121": { + "count": 13 + }, + "42": { + "count": 2 + }, + "240": { + "count": 10 + }, + "264": { + "count": 12 + }, + "123": { + "count": 5 + }, + "169": { + "count": 1 + }, + "1": { + "count": 3 + }, + "228": { + "count": 4 + }, + "310": { + "count": 1 + }, + "20": { + "count": 11 + }, + "326": { + "count": 7 + }, + "156": { + "count": 8 + }, + "52": { + "count": 2 + }, + "17": { + "count": 8 + }, + "312": { + "count": 8 + }, + "270": { + "count": 4 + }, + "322": { + "count": 5 + }, + "82": { + "count": 3 + }, + "56": { + "count": 3 + }, + "339": { + "count": 6 + }, + "321": { + "count": 9 + }, + "366": { + "count": 11 + }, + "198": { + "count": 6 + }, + "105": { + "count": 2 + }, + "289": { + "count": 3 + }, + "77": { + "count": 1 + }, + "279": { + "count": 6 + }, + "211": { + "count": 3 + }, + "204": { + "count": 4 + }, + "43": { + "count": 2 + }, + "309": { + "count": 4 + }, + "179": { + "count": 7 + }, + "353": { + "count": 10 + }, + "286": { + "count": 5 + }, + "358": { + "count": 4 + }, + "51": { + "count": 2 + }, + "143": { + "count": 7 + }, + "296": { + "count": 3 + }, + "126": { + "count": 7 + }, + "306": { + "count": 4 + }, + "138": { + "count": 7 + }, + "329": { + "count": 9 + }, + "239": { + "count": 1 + }, + "341": { + "count": 2 + }, + "100": { + "count": 3 + }, + "0": { + "count": 8 + }, + "70": { + "count": 10 + }, + "110": { + "count": 6 + }, + "193": { + "count": 1 + }, + "89": { + "count": 1 + }, + "234": { + "count": 3 + }, + "195": { + "count": 12 + }, + "38": { + "count": 2 + }, + "113": { + "count": 2 + }, + "148": { + "count": 4 + }, + "367": { + "count": 7 + }, + "75": { + "count": 8 + }, + "146": { + "count": 3 + }, + "298": { + "count": 2 + }, + "160": { + "count": 3 + }, + "254": { + "count": 2 + }, + "187": { + "count": 1 + }, + "268": { + "count": 10 + }, + "165": { + "count": 3 + }, + "225": { + "count": 4 + }, + "283": { + "count": 6 + }, + "163": { + "count": 1 + }, + "135": { + "count": 2 + }, + "86": { + "count": 4 + }, + "35": { + "count": 3 + }, + "217": { + "count": 6 + }, + "54": { + "count": 3 + }, + "202": { + "count": 3 + }, + "229": { + "count": 5 + }, + "338": { + "count": 3 + }, + "304": { + "count": 1 + }, + "212": { + "count": 3 + }, + "354": { + "count": 5 + }, + "262": { + "count": 1 + }, + "174": { + "count": 7 + }, + "210": { + "count": 1 + }, + "271": { + "count": 5 + }, + "220": { + "count": 5 + }, + "111": { + "count": 4 + }, + "28": { + "count": 2 + }, + "106": { + "count": 2 + }, + "149": { + "count": 2 + }, + "101": { + "count": 1 + }, + "48": { + "count": 1 + }, + "242": { + "count": 7 + }, + "178": { + "count": 5 + }, + "39": { + "count": 8 + }, + "64": { + "count": 1 + }, + "276": { + "count": 3 + }, + "369": { + "count": 1 + }, + "247": { + "count": 3 + }, + "34": { + "count": 2 + }, + "288": { + "count": 8 + }, + "373": { + "count": 8 + }, + "107": { + "count": 6 + }, + "351": { + "count": 1 + }, + "85": { + "count": 4 + }, + "291": { + "count": 3 + }, + "257": { + "count": 1 + }, + "266": { + "count": 3 + }, + "208": { + "count": 4 + }, + "32": { + "count": 2 + }, + "277": { + "count": 2 + }, + "29": { + "count": 2 + }, + "21": { + "count": 1 + }, + "7": { + "count": 6 + }, + "249": { + "count": 1 + }, + "141": { + "count": 1 + }, + "166": { + "count": 5 + }, + "316": { + "count": 2 + }, + "269": { + "count": 2 + }, + "206": { + "count": 3 + }, + "60": { + "count": 1 + }, + "184": { + "count": 1 + }, + "72": { + "count": 3 + }, + "88": { + "count": 3 + }, + "24": { + "count": 2 + }, + "241": { + "count": 1 + }, + "340": { + "count": 4 + }, + "303": { + "count": 3 + }, + "346": { + "count": 1 + }, + "347": { + "count": 2 + }, + "352": { + "count": 4 + }, + "227": { + "count": 5 + }, + "33": { + "count": 1 + }, + "102": { + "count": 1 + }, + "26": { + "count": 2 + }, + "131": { + "count": 1 + }, + "37": { + "count": 2 + }, + "246": { + "count": 1 + }, + "45": { + "count": 1 + }, + "205": { + "count": 2 + }, + "117": { + "count": 1 + }, + "78": { + "count": 2 + }, + "260": { + "count": 1 + }, + "103": { + "count": 1 + }, + "40": { + "count": 1 + }, + "115": { + "count": 2 + }, + "348": { + "count": 3 + }, + "69": { + "count": 1 + }, + "22": { + "count": 1 + }, + "49": { + "count": 2 + }, + "188": { + "count": 1 + }, + "372": { + "count": 1 + }, + "320": { + "count": 1 + }, + "71": { + "count": 1 + }, + "83": { + "count": 1 + }, + "307": { + "count": 1 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 9738607, + "number_texts_intersect_with_train": 0, + "min_text_length": 66, + "average_text_length": 4755.17919921875, + "max_text_length": 729483, + "unique_text": 2048, + "unique_labels": 329, + "labels": { + "45": { + "count": 1 + }, + "124": { + "count": 22 + }, + "201": { + "count": 12 + }, + "332": { + "count": 15 + }, + "331": { + "count": 69 + }, + "208": { + "count": 3 + }, + "108": { + "count": 6 + }, + "188": { + "count": 1 + }, + "173": { + "count": 29 + }, + "44": { + "count": 21 + }, + "235": { + "count": 1 + }, + "287": { + "count": 15 + }, + "224": { + "count": 22 + }, + "301": { + "count": 36 + }, + "128": { + "count": 7 + }, + "336": { + "count": 6 + }, + "119": { + "count": 3 + }, + "333": { + "count": 6 + }, + "248": { + "count": 16 + }, + "378": { + "count": 3 + }, + "360": { + "count": 22 + }, + "166": { + "count": 14 + }, + "155": { + "count": 10 + }, + "65": { + "count": 10 + }, + "197": { + "count": 16 + }, + "180": { + "count": 28 + }, + "104": { + "count": 10 + }, + "158": { + "count": 9 + }, + "327": { + "count": 6 + }, + "107": { + "count": 5 + }, + "338": { + "count": 1 + }, + "372": { + "count": 2 + }, + "305": { + "count": 5 + }, + "350": { + "count": 5 + }, + "68": { + "count": 17 + }, + "174": { + "count": 2 + }, + "170": { + "count": 2 + }, + "230": { + "count": 18 + }, + "30": { + "count": 21 + }, + "74": { + "count": 8 + }, + "250": { + "count": 3 + }, + "47": { + "count": 15 + }, + "184": { + "count": 3 + }, + "175": { + "count": 12 + }, + "285": { + "count": 4 + }, + "42": { + "count": 6 + }, + "311": { + "count": 5 + }, + "179": { + "count": 15 + }, + "292": { + "count": 10 + }, + "147": { + "count": 16 + }, + "353": { + "count": 2 + }, + "318": { + "count": 11 + }, + "122": { + "count": 10 + }, + "41": { + "count": 29 + }, + "22": { + "count": 1 + }, + "310": { + "count": 4 + }, + "290": { + "count": 6 + }, + "134": { + "count": 21 + }, + "8": { + "count": 14 + }, + "228": { + "count": 3 + }, + "217": { + "count": 3 + }, + "379": { + "count": 12 + }, + "357": { + "count": 11 + }, + "56": { + "count": 6 + }, + "321": { + "count": 2 + }, + "38": { + "count": 2 + }, + "39": { + "count": 8 + }, + "308": { + "count": 3 + }, + "161": { + "count": 19 + }, + "337": { + "count": 4 + }, + "218": { + "count": 17 + }, + "263": { + "count": 21 + }, + "354": { + "count": 11 + }, + "358": { + "count": 5 + }, + "19": { + "count": 10 + }, + "295": { + "count": 9 + }, + "279": { + "count": 8 + }, + "95": { + "count": 6 + }, + "1": { + "count": 6 + }, + "118": { + "count": 15 + }, + "356": { + "count": 8 + }, + "160": { + "count": 4 + }, + "156": { + "count": 11 + }, + "264": { + "count": 15 + }, + "110": { + "count": 5 + }, + "243": { + "count": 3 + }, + "319": { + "count": 3 + }, + "11": { + "count": 11 + }, + "200": { + "count": 2 + }, + "349": { + "count": 14 + }, + "167": { + "count": 6 + }, + "334": { + "count": 10 + }, + "363": { + "count": 16 + }, + "219": { + "count": 3 + }, + "23": { + "count": 7 + }, + "202": { + "count": 4 + }, + "94": { + "count": 4 + }, + "385": { + "count": 3 + }, + "84": { + "count": 6 + }, + "123": { + "count": 2 + }, + "145": { + "count": 9 + }, + "72": { + "count": 4 + }, + "148": { + "count": 7 + }, + "204": { + "count": 2 + }, + "283": { + "count": 12 + }, + "324": { + "count": 12 + }, + "17": { + "count": 9 + }, + "205": { + "count": 5 + }, + "386": { + "count": 6 + }, + "91": { + "count": 2 + }, + "265": { + "count": 29 + }, + "114": { + "count": 13 + }, + "162": { + "count": 9 + }, + "152": { + "count": 5 + }, + "312": { + "count": 10 + }, + "70": { + "count": 9 + }, + "61": { + "count": 4 + }, + "51": { + "count": 1 + }, + "172": { + "count": 3 + }, + "211": { + "count": 4 + }, + "112": { + "count": 11 + }, + "81": { + "count": 8 + }, + "294": { + "count": 4 + }, + "168": { + "count": 8 + }, + "6": { + "count": 4 + }, + "195": { + "count": 19 + }, + "212": { + "count": 1 + }, + "2": { + "count": 1 + }, + "314": { + "count": 2 + }, + "277": { + "count": 1 + }, + "207": { + "count": 3 + }, + "206": { + "count": 3 + }, + "140": { + "count": 6 + }, + "316": { + "count": 7 + }, + "366": { + "count": 19 + }, + "199": { + "count": 17 + }, + "54": { + "count": 2 + }, + "282": { + "count": 1 + }, + "7": { + "count": 8 + }, + "151": { + "count": 9 + }, + "280": { + "count": 6 + }, + "345": { + "count": 8 + }, + "335": { + "count": 8 + }, + "177": { + "count": 3 + }, + "270": { + "count": 8 + }, + "149": { + "count": 1 + }, + "288": { + "count": 7 + }, + "258": { + "count": 4 + }, + "142": { + "count": 17 + }, + "18": { + "count": 13 + }, + "20": { + "count": 6 + }, + "163": { + "count": 1 + }, + "28": { + "count": 2 + }, + "384": { + "count": 21 + }, + "249": { + "count": 2 + }, + "237": { + "count": 5 + }, + "227": { + "count": 3 + }, + "27": { + "count": 5 + }, + "267": { + "count": 3 + }, + "382": { + "count": 10 + }, + "97": { + "count": 4 + }, + "53": { + "count": 5 + }, + "37": { + "count": 3 + }, + "67": { + "count": 11 + }, + "325": { + "count": 4 + }, + "98": { + "count": 8 + }, + "190": { + "count": 2 + }, + "93": { + "count": 11 + }, + "284": { + "count": 4 + }, + "340": { + "count": 6 + }, + "86": { + "count": 3 + }, + "146": { + "count": 2 + }, + "0": { + "count": 8 + }, + "31": { + "count": 9 + }, + "377": { + "count": 3 + }, + "79": { + "count": 5 + }, + "60": { + "count": 5 + }, + "229": { + "count": 3 + }, + "259": { + "count": 1 + }, + "116": { + "count": 2 + }, + "106": { + "count": 3 + }, + "176": { + "count": 4 + }, + "367": { + "count": 8 + }, + "222": { + "count": 3 + }, + "368": { + "count": 9 + }, + "32": { + "count": 5 + }, + "99": { + "count": 5 + }, + "236": { + "count": 5 + }, + "121": { + "count": 8 + }, + "303": { + "count": 5 + }, + "143": { + "count": 6 + }, + "326": { + "count": 5 + }, + "64": { + "count": 5 + }, + "251": { + "count": 10 + }, + "182": { + "count": 1 + }, + "183": { + "count": 7 + }, + "352": { + "count": 2 + }, + "307": { + "count": 1 + }, + "342": { + "count": 3 + }, + "281": { + "count": 6 + }, + "266": { + "count": 11 + }, + "49": { + "count": 6 + }, + "157": { + "count": 4 + }, + "191": { + "count": 6 + }, + "26": { + "count": 5 + }, + "323": { + "count": 1 + }, + "365": { + "count": 3 + }, + "50": { + "count": 1 + }, + "127": { + "count": 13 + }, + "198": { + "count": 5 + }, + "196": { + "count": 3 + }, + "178": { + "count": 1 + }, + "59": { + "count": 7 + }, + "373": { + "count": 5 + }, + "137": { + "count": 2 + }, + "133": { + "count": 7 + }, + "240": { + "count": 9 + }, + "388": { + "count": 4 + }, + "241": { + "count": 1 + }, + "115": { + "count": 1 + }, + "14": { + "count": 6 + }, + "87": { + "count": 7 + }, + "52": { + "count": 4 + }, + "271": { + "count": 3 + }, + "347": { + "count": 1 + }, + "209": { + "count": 4 + }, + "329": { + "count": 5 + }, + "76": { + "count": 4 + }, + "3": { + "count": 4 + }, + "268": { + "count": 3 + }, + "355": { + "count": 4 + }, + "247": { + "count": 3 + }, + "181": { + "count": 7 + }, + "238": { + "count": 3 + }, + "296": { + "count": 3 + }, + "85": { + "count": 4 + }, + "381": { + "count": 10 + }, + "131": { + "count": 4 + }, + "313": { + "count": 1 + }, + "371": { + "count": 13 + }, + "29": { + "count": 3 + }, + "88": { + "count": 4 + }, + "138": { + "count": 5 + }, + "203": { + "count": 9 + }, + "387": { + "count": 8 + }, + "374": { + "count": 1 + }, + "351": { + "count": 3 + }, + "80": { + "count": 5 + }, + "105": { + "count": 2 + }, + "232": { + "count": 2 + }, + "369": { + "count": 4 + }, + "192": { + "count": 5 + }, + "12": { + "count": 1 + }, + "245": { + "count": 4 + }, + "75": { + "count": 8 + }, + "100": { + "count": 5 + }, + "90": { + "count": 1 + }, + "136": { + "count": 2 + }, + "63": { + "count": 2 + }, + "34": { + "count": 3 + }, + "306": { + "count": 4 + }, + "242": { + "count": 5 + }, + "253": { + "count": 1 + }, + "78": { + "count": 4 + }, + "291": { + "count": 4 + }, + "286": { + "count": 3 + }, + "244": { + "count": 3 + }, + "132": { + "count": 9 + }, + "309": { + "count": 3 + }, + "43": { + "count": 1 + }, + "234": { + "count": 5 + }, + "348": { + "count": 4 + }, + "55": { + "count": 2 + }, + "257": { + "count": 1 + }, + "223": { + "count": 2 + }, + "153": { + "count": 1 + }, + "120": { + "count": 2 + }, + "220": { + "count": 4 + }, + "83": { + "count": 2 + }, + "225": { + "count": 2 + }, + "346": { + "count": 2 + }, + "383": { + "count": 5 + }, + "150": { + "count": 3 + }, + "276": { + "count": 3 + }, + "36": { + "count": 4 + }, + "187": { + "count": 3 + }, + "96": { + "count": 1 + }, + "289": { + "count": 4 + }, + "239": { + "count": 2 + }, + "57": { + "count": 1 + }, + "189": { + "count": 2 + }, + "25": { + "count": 1 + }, + "58": { + "count": 2 + }, + "126": { + "count": 2 + }, + "135": { + "count": 1 + }, + "341": { + "count": 2 + }, + "141": { + "count": 1 + }, + "125": { + "count": 3 + }, + "24": { + "count": 5 + }, + "298": { + "count": 2 + }, + "273": { + "count": 3 + }, + "262": { + "count": 2 + }, + "226": { + "count": 1 + }, + "9": { + "count": 1 + }, + "92": { + "count": 1 + }, + "275": { + "count": 3 + }, + "322": { + "count": 1 + }, + "77": { + "count": 2 + }, + "255": { + "count": 1 + }, + "35": { + "count": 3 + }, + "370": { + "count": 2 + }, + "165": { + "count": 1 + }, + "278": { + "count": 1 + }, + "269": { + "count": 1 + }, + "102": { + "count": 1 + }, + "274": { + "count": 1 + }, + "69": { + "count": 1 + }, + "214": { + "count": 1 + }, + "339": { + "count": 1 + }, + "33": { + "count": 1 + }, + "293": { + "count": 1 + }, + "304": { + "count": 2 + }, + "361": { + "count": 2 + }, + "216": { + "count": 1 + }, + "16": { + "count": 1 + }, + "101": { + "count": 1 + }, + "254": { + "count": 1 + }, + "272": { + "count": 1 + }, + "103": { + "count": 1 + } + } + }, + "train": { + "num_samples": 28536, + "number_of_characters": 121117865, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 4244.388316512475, + "max_text_length": 931736, + "unique_text": 28536, + "unique_labels": 386, + "labels": { + "239": { + "count": 42 + }, + "183": { + "count": 121 + }, + "203": { + "count": 73 + }, + "332": { + "count": 201 + }, + "110": { + "count": 79 + }, + "41": { + "count": 285 + }, + "346": { + "count": 12 + }, + "104": { + "count": 90 + }, + "187": { + "count": 13 + }, + "161": { + "count": 284 + }, + "366": { + "count": 250 + }, + "263": { + "count": 241 + }, + "112": { + "count": 203 + }, + "114": { + "count": 119 + }, + "67": { + "count": 149 + }, + "77": { + "count": 51 + }, + "64": { + "count": 39 + }, + "283": { + "count": 135 + }, + "93": { + "count": 105 + }, + "248": { + "count": 275 + }, + "119": { + "count": 26 + }, + "220": { + "count": 46 + }, + "319": { + "count": 70 + }, + "60": { + "count": 35 + }, + "25": { + "count": 31 + }, + "99": { + "count": 108 + }, + "345": { + "count": 104 + }, + "36": { + "count": 70 + }, + "127": { + "count": 143 + }, + "301": { + "count": 762 + }, + "145": { + "count": 138 + }, + "218": { + "count": 234 + }, + "259": { + "count": 33 + }, + "295": { + "count": 94 + }, + "337": { + "count": 108 + }, + "305": { + "count": 91 + }, + "35": { + "count": 35 + }, + "288": { + "count": 108 + }, + "37": { + "count": 41 + }, + "142": { + "count": 254 + }, + "224": { + "count": 267 + }, + "240": { + "count": 105 + }, + "281": { + "count": 80 + }, + "195": { + "count": 181 + }, + "173": { + "count": 355 + }, + "245": { + "count": 120 + }, + "26": { + "count": 70 + }, + "18": { + "count": 206 + }, + "324": { + "count": 276 + }, + "212": { + "count": 18 + }, + "121": { + "count": 172 + }, + "207": { + "count": 25 + }, + "386": { + "count": 60 + }, + "78": { + "count": 55 + }, + "175": { + "count": 183 + }, + "363": { + "count": 191 + }, + "62": { + "count": 24 + }, + "228": { + "count": 59 + }, + "383": { + "count": 35 + }, + "356": { + "count": 113 + }, + "17": { + "count": 141 + }, + "74": { + "count": 199 + }, + "6": { + "count": 87 + }, + "333": { + "count": 68 + }, + "327": { + "count": 137 + }, + "349": { + "count": 120 + }, + "30": { + "count": 278 + }, + "265": { + "count": 314 + }, + "247": { + "count": 35 + }, + "230": { + "count": 315 + }, + "372": { + "count": 34 + }, + "201": { + "count": 109 + }, + "357": { + "count": 129 + }, + "219": { + "count": 50 + }, + "96": { + "count": 4 + }, + "117": { + "count": 27 + }, + "134": { + "count": 250 + }, + "75": { + "count": 134 + }, + "336": { + "count": 102 + }, + "371": { + "count": 190 + }, + "125": { + "count": 32 + }, + "192": { + "count": 124 + }, + "79": { + "count": 91 + }, + "379": { + "count": 134 + }, + "360": { + "count": 358 + }, + "197": { + "count": 311 + }, + "132": { + "count": 85 + }, + "250": { + "count": 55 + }, + "289": { + "count": 56 + }, + "268": { + "count": 46 + }, + "166": { + "count": 169 + }, + "94": { + "count": 46 + }, + "39": { + "count": 58 + }, + "340": { + "count": 59 + }, + "148": { + "count": 75 + }, + "95": { + "count": 90 + }, + "381": { + "count": 126 + }, + "315": { + "count": 15 + }, + "8": { + "count": 185 + }, + "118": { + "count": 144 + }, + "292": { + "count": 149 + }, + "275": { + "count": 71 + }, + "191": { + "count": 78 + }, + "211": { + "count": 74 + }, + "59": { + "count": 141 + }, + "304": { + "count": 34 + }, + "42": { + "count": 45 + }, + "63": { + "count": 26 + }, + "23": { + "count": 180 + }, + "27": { + "count": 43 + }, + "226": { + "count": 40 + }, + "123": { + "count": 47 + }, + "255": { + "count": 22 + }, + "140": { + "count": 90 + }, + "196": { + "count": 70 + }, + "318": { + "count": 217 + }, + "65": { + "count": 130 + }, + "34": { + "count": 58 + }, + "377": { + "count": 43 + }, + "180": { + "count": 422 + }, + "0": { + "count": 137 + }, + "167": { + "count": 133 + }, + "326": { + "count": 80 + }, + "331": { + "count": 788 + }, + "300": { + "count": 10 + }, + "267": { + "count": 90 + }, + "156": { + "count": 135 + }, + "298": { + "count": 21 + }, + "323": { + "count": 33 + }, + "122": { + "count": 86 + }, + "116": { + "count": 21 + }, + "348": { + "count": 36 + }, + "341": { + "count": 47 + }, + "380": { + "count": 52 + }, + "151": { + "count": 92 + }, + "205": { + "count": 53 + }, + "290": { + "count": 115 + }, + "15": { + "count": 22 + }, + "33": { + "count": 11 + }, + "147": { + "count": 247 + }, + "251": { + "count": 169 + }, + "271": { + "count": 68 + }, + "229": { + "count": 90 + }, + "367": { + "count": 112 + }, + "311": { + "count": 83 + }, + "287": { + "count": 156 + }, + "181": { + "count": 113 + }, + "339": { + "count": 46 + }, + "258": { + "count": 105 + }, + "334": { + "count": 99 + }, + "285": { + "count": 36 + }, + "373": { + "count": 63 + }, + "208": { + "count": 44 + }, + "143": { + "count": 51 + }, + "1": { + "count": 80 + }, + "280": { + "count": 120 + }, + "382": { + "count": 96 + }, + "128": { + "count": 86 + }, + "306": { + "count": 58 + }, + "124": { + "count": 252 + }, + "91": { + "count": 17 + }, + "106": { + "count": 55 + }, + "277": { + "count": 22 + }, + "152": { + "count": 82 + }, + "241": { + "count": 10 + }, + "14": { + "count": 103 + }, + "236": { + "count": 47 + }, + "325": { + "count": 56 + }, + "222": { + "count": 31 + }, + "358": { + "count": 49 + }, + "80": { + "count": 35 + }, + "70": { + "count": 108 + }, + "266": { + "count": 99 + }, + "353": { + "count": 63 + }, + "44": { + "count": 220 + }, + "234": { + "count": 54 + }, + "160": { + "count": 73 + }, + "158": { + "count": 129 + }, + "138": { + "count": 115 + }, + "81": { + "count": 164 + }, + "5": { + "count": 70 + }, + "162": { + "count": 89 + }, + "133": { + "count": 56 + }, + "303": { + "count": 50 + }, + "53": { + "count": 100 + }, + "61": { + "count": 5 + }, + "350": { + "count": 70 + }, + "378": { + "count": 21 + }, + "312": { + "count": 74 + }, + "387": { + "count": 146 + }, + "384": { + "count": 248 + }, + "329": { + "count": 82 + }, + "264": { + "count": 156 + }, + "31": { + "count": 64 + }, + "85": { + "count": 51 + }, + "11": { + "count": 109 + }, + "48": { + "count": 14 + }, + "279": { + "count": 63 + }, + "246": { + "count": 17 + }, + "131": { + "count": 67 + }, + "286": { + "count": 73 + }, + "92": { + "count": 29 + }, + "9": { + "count": 16 + }, + "210": { + "count": 15 + }, + "385": { + "count": 33 + }, + "321": { + "count": 82 + }, + "364": { + "count": 24 + }, + "270": { + "count": 92 + }, + "87": { + "count": 67 + }, + "126": { + "count": 32 + }, + "206": { + "count": 61 + }, + "260": { + "count": 6 + }, + "335": { + "count": 101 + }, + "257": { + "count": 28 + }, + "232": { + "count": 50 + }, + "108": { + "count": 113 + }, + "238": { + "count": 22 + }, + "20": { + "count": 168 + }, + "68": { + "count": 189 + }, + "153": { + "count": 14 + }, + "86": { + "count": 60 + }, + "98": { + "count": 76 + }, + "227": { + "count": 26 + }, + "157": { + "count": 33 + }, + "38": { + "count": 38 + }, + "316": { + "count": 51 + }, + "252": { + "count": 20 + }, + "199": { + "count": 158 + }, + "322": { + "count": 28 + }, + "150": { + "count": 56 + }, + "165": { + "count": 46 + }, + "43": { + "count": 37 + }, + "354": { + "count": 107 + }, + "29": { + "count": 41 + }, + "365": { + "count": 44 + }, + "19": { + "count": 91 + }, + "244": { + "count": 45 + }, + "198": { + "count": 97 + }, + "179": { + "count": 157 + }, + "202": { + "count": 50 + }, + "273": { + "count": 69 + }, + "217": { + "count": 47 + }, + "47": { + "count": 156 + }, + "243": { + "count": 54 + }, + "186": { + "count": 3 + }, + "242": { + "count": 89 + }, + "362": { + "count": 12 + }, + "249": { + "count": 41 + }, + "176": { + "count": 64 + }, + "55": { + "count": 56 + }, + "225": { + "count": 77 + }, + "100": { + "count": 43 + }, + "355": { + "count": 86 + }, + "209": { + "count": 29 + }, + "52": { + "count": 28 + }, + "155": { + "count": 97 + }, + "16": { + "count": 21 + }, + "368": { + "count": 96 + }, + "56": { + "count": 53 + }, + "3": { + "count": 64 + }, + "28": { + "count": 28 + }, + "204": { + "count": 52 + }, + "223": { + "count": 15 + }, + "216": { + "count": 14 + }, + "310": { + "count": 31 + }, + "296": { + "count": 27 + }, + "58": { + "count": 12 + }, + "111": { + "count": 30 + }, + "22": { + "count": 25 + }, + "284": { + "count": 103 + }, + "149": { + "count": 34 + }, + "50": { + "count": 9 + }, + "136": { + "count": 38 + }, + "7": { + "count": 60 + }, + "89": { + "count": 26 + }, + "54": { + "count": 37 + }, + "168": { + "count": 102 + }, + "293": { + "count": 35 + }, + "254": { + "count": 17 + }, + "170": { + "count": 17 + }, + "308": { + "count": 20 + }, + "369": { + "count": 13 + }, + "294": { + "count": 32 + }, + "282": { + "count": 5 + }, + "253": { + "count": 19 + }, + "76": { + "count": 79 + }, + "272": { + "count": 17 + }, + "351": { + "count": 32 + }, + "237": { + "count": 41 + }, + "84": { + "count": 108 + }, + "137": { + "count": 16 + }, + "82": { + "count": 23 + }, + "188": { + "count": 8 + }, + "107": { + "count": 53 + }, + "261": { + "count": 3 + }, + "291": { + "count": 42 + }, + "276": { + "count": 18 + }, + "4": { + "count": 5 + }, + "103": { + "count": 20 + }, + "352": { + "count": 29 + }, + "342": { + "count": 28 + }, + "115": { + "count": 30 + }, + "49": { + "count": 39 + }, + "57": { + "count": 11 + }, + "109": { + "count": 28 + }, + "90": { + "count": 8 + }, + "105": { + "count": 50 + }, + "338": { + "count": 12 + }, + "178": { + "count": 28 + }, + "314": { + "count": 60 + }, + "120": { + "count": 27 + }, + "97": { + "count": 85 + }, + "2": { + "count": 24 + }, + "274": { + "count": 46 + }, + "174": { + "count": 59 + }, + "72": { + "count": 55 + }, + "135": { + "count": 8 + }, + "83": { + "count": 14 + }, + "102": { + "count": 15 + }, + "88": { + "count": 39 + }, + "307": { + "count": 7 + }, + "177": { + "count": 54 + }, + "130": { + "count": 4 + }, + "146": { + "count": 24 + }, + "361": { + "count": 34 + }, + "32": { + "count": 64 + }, + "374": { + "count": 15 + }, + "388": { + "count": 59 + }, + "235": { + "count": 8 + }, + "309": { + "count": 39 + }, + "189": { + "count": 13 + }, + "347": { + "count": 12 + }, + "375": { + "count": 27 + }, + "190": { + "count": 40 + }, + "51": { + "count": 23 + }, + "313": { + "count": 21 + }, + "159": { + "count": 7 + }, + "171": { + "count": 2 + }, + "45": { + "count": 6 + }, + "12": { + "count": 1 + }, + "13": { + "count": 5 + }, + "172": { + "count": 16 + }, + "278": { + "count": 12 + }, + "213": { + "count": 6 + }, + "71": { + "count": 21 + }, + "169": { + "count": 25 + }, + "163": { + "count": 18 + }, + "262": { + "count": 22 + }, + "299": { + "count": 22 + }, + "330": { + "count": 14 + }, + "10": { + "count": 18 + }, + "139": { + "count": 12 + }, + "231": { + "count": 5 + }, + "101": { + "count": 7 + }, + "317": { + "count": 2 + }, + "184": { + "count": 22 + }, + "200": { + "count": 8 + }, + "256": { + "count": 9 + }, + "370": { + "count": 8 + }, + "215": { + "count": 1 + }, + "154": { + "count": 3 + }, + "24": { + "count": 21 + }, + "185": { + "count": 7 + }, + "328": { + "count": 6 + }, + "320": { + "count": 3 + }, + "69": { + "count": 1 + }, + "46": { + "count": 8 + }, + "164": { + "count": 5 + }, + "129": { + "count": 9 + }, + "297": { + "count": 3 + }, + "344": { + "count": 2 + }, + "221": { + "count": 4 + }, + "214": { + "count": 9 + }, + "269": { + "count": 4 + }, + "73": { + "count": 1 + }, + "343": { + "count": 3 + }, + "141": { + "count": 14 + }, + "21": { + "count": 5 + }, + "194": { + "count": 3 + }, + "359": { + "count": 1 + }, + "113": { + "count": 9 + }, + "144": { + "count": 1 + }, + "66": { + "count": 1 + }, + "233": { + "count": 1 + }, + "182": { + "count": 1 + }, + "376": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/GujaratiNewsClassification.json b/mteb/descriptive_stats/Classification/GujaratiNewsClassification.json new file mode 100644 index 0000000000..13f29dfad9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/GujaratiNewsClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 1318, + "number_of_characters": 81593, + "number_texts_intersect_with_train": 5, + "min_text_length": 31, + "average_text_length": 61.90667678300455, + "max_text_length": 102, + "unique_text": 1317, + "unique_labels": 3, + "labels": { + "business": { + "count": 491 + }, + "entertainment": { + "count": 584 + }, + "tech": { + "count": 243 + } + } + }, + "train": { + "num_samples": 5269, + "number_of_characters": 326423, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 61.951603719870945, + "max_text_length": 108, + "unique_text": 5264, + "unique_labels": 3, + "labels": { + "business": { + "count": 1840 + }, + "entertainment": { + "count": 2321 + }, + "tech": { + "count": 1108 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/HateSpeechPortugueseClassification.json b/mteb/descriptive_stats/Classification/HateSpeechPortugueseClassification.json new file mode 100644 index 0000000000..973dac74c4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/HateSpeechPortugueseClassification.json @@ -0,0 +1,20 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 208616, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 101.86328125, + "max_text_length": 143, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1402 + }, + "1": { + "count": 646 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/HeadlineClassification.json b/mteb/descriptive_stats/Classification/HeadlineClassification.json new file mode 100644 index 0000000000..946aa533f4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/HeadlineClassification.json @@ -0,0 +1,62 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 127763, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 62.38427734375, + "max_text_length": 173, + "unique_text": 2048, + "unique_labels": 6, + "labels": { + "1": { + "count": 342 + }, + "0": { + "count": 341 + }, + "4": { + "count": 341 + }, + "2": { + "count": 341 + }, + "3": { + "count": 341 + }, + "5": { + "count": 342 + } + } + }, + "train": { + "num_samples": 36000, + "number_of_characters": 2232586, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 62.01627777777778, + "max_text_length": 501, + "unique_text": 36000, + "unique_labels": 6, + "labels": { + "0": { + "count": 6000 + }, + "2": { + "count": 6000 + }, + "1": { + "count": 6000 + }, + "4": { + "count": 6000 + }, + "5": { + "count": 6000 + }, + "3": { + "count": 6000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.json b/mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.json new file mode 100644 index 0000000000..3dfef6480d --- /dev/null +++ b/mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 256478, + "number_texts_intersect_with_train": 987, + "min_text_length": 3, + "average_text_length": 125.2333984375, + "max_text_length": 2776, + "unique_text": 1918, + "unique_labels": 3, + "labels": { + "0": { + "count": 1359 + }, + "1": { + "count": 631 + }, + "2": { + "count": 58 + } + } + }, + "train": { + "num_samples": 10221, + "number_of_characters": 1251661, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 122.45973975149202, + "max_text_length": 3345, + "unique_text": 7481, + "unique_labels": 3, + "labels": { + "0": { + "count": 6802 + }, + "1": { + "count": 3123 + }, + "2": { + "count": 296 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/HinDialectClassification.json b/mteb/descriptive_stats/Classification/HinDialectClassification.json new file mode 100644 index 0000000000..fa2c0c0f4f --- /dev/null +++ b/mteb/descriptive_stats/Classification/HinDialectClassification.json @@ -0,0 +1,152 @@ +{ + "test": { + "num_samples": 1152, + "number_of_characters": 672566, + "number_texts_intersect_with_train": 23, + "min_text_length": 28, + "average_text_length": 583.8246527777778, + "max_text_length": 5970, + "unique_text": 1144, + "unique_labels": 21, + "labels": { + "bundeli-bns": { + "count": 114 + }, + "bhojpuri-bho": { + "count": 46 + }, + "panjabi-pan": { + "count": 264 + }, + "bhadrawahi-bhd": { + "count": 3 + }, + "korku-kfq": { + "count": 62 + }, + "awadhi-awa": { + "count": 16 + }, + "nimadi-noe": { + "count": 55 + }, + "haryanvi-bgc": { + "count": 194 + }, + "chhattisgarhi-hne": { + "count": 32 + }, + "rajasthani-raj": { + "count": 24 + }, + "magahi-mag": { + "count": 119 + }, + "malvi-mup": { + "count": 45 + }, + "angika-anp": { + "count": 34 + }, + "braj-bra": { + "count": 29 + }, + "bhili-bhb": { + "count": 54 + }, + "kumaoni-kfy": { + "count": 3 + }, + "garhwali-gbm": { + "count": 45 + }, + "gujarati-guj": { + "count": 5 + }, + "bengali-ben": { + "count": 4 + }, + "kanauji-bjj": { + "count": 2 + }, + "marathi-mar": { + "count": 2 + } + } + }, + "train": { + "num_samples": 2138, + "number_of_characters": 1289774, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 603.2619270346117, + "max_text_length": 15395, + "unique_text": 2108, + "unique_labels": 21, + "labels": { + "haryanvi-bgc": { + "count": 360 + }, + "angika-anp": { + "count": 62 + }, + "garhwali-gbm": { + "count": 83 + }, + "malvi-mup": { + "count": 84 + }, + "bhili-bhb": { + "count": 101 + }, + "magahi-mag": { + "count": 221 + }, + "braj-bra": { + "count": 54 + }, + "panjabi-pan": { + "count": 490 + }, + "nimadi-noe": { + "count": 102 + }, + "bundeli-bns": { + "count": 212 + }, + "awadhi-awa": { + "count": 31 + }, + "korku-kfq": { + "count": 115 + }, + "bhojpuri-bho": { + "count": 85 + }, + "chhattisgarhi-hne": { + "count": 60 + }, + "rajasthani-raj": { + "count": 43 + }, + "bhadrawahi-bhd": { + "count": 5 + }, + "gujarati-guj": { + "count": 9 + }, + "bengali-ben": { + "count": 8 + }, + "kumaoni-kfy": { + "count": 6 + }, + "marathi-mar": { + "count": 3 + }, + "kanauji-bjj": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/HindiDiscourseClassification.json b/mteb/descriptive_stats/Classification/HindiDiscourseClassification.json new file mode 100644 index 0000000000..1bb45dfcfa --- /dev/null +++ b/mteb/descriptive_stats/Classification/HindiDiscourseClassification.json @@ -0,0 +1,32 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 146219, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 71.39599609375, + "max_text_length": 579, + "unique_text": 2044, + "unique_labels": 6, + "labels": { + "4": { + "count": 645 + }, + "1": { + "count": 749 + }, + "2": { + "count": 600 + }, + "3": { + "count": 17 + }, + "0": { + "count": 26 + }, + "5": { + "count": 11 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/HotelReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/HotelReviewSentimentClassification.json new file mode 100644 index 0000000000..638006cb0d --- /dev/null +++ b/mteb/descriptive_stats/Classification/HotelReviewSentimentClassification.json @@ -0,0 +1,26 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 282368, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 137.875, + "max_text_length": 2698, + "unique_text": 2044, + "unique_labels": 4, + "labels": { + "4": { + "count": 512 + }, + "3": { + "count": 512 + }, + "0": { + "count": 279 + }, + "1": { + "count": 745 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IFlyTek.json b/mteb/descriptive_stats/Classification/IFlyTek.json new file mode 100644 index 0000000000..e3125581ec --- /dev/null +++ b/mteb/descriptive_stats/Classification/IFlyTek.json @@ -0,0 +1,740 @@ +{ + "validation": { + "num_samples": 2599, + "number_of_characters": 753272, + "number_texts_intersect_with_train": 270, + "min_text_length": 11, + "average_text_length": 289.8314736437091, + "max_text_length": 1755, + "unique_text": 2549, + "unique_labels": 119, + "labels": { + "110": { + "count": 3 + }, + "70": { + "count": 388 + }, + "10": { + "count": 22 + }, + "18": { + "count": 79 + }, + "17": { + "count": 192 + }, + "34": { + "count": 36 + }, + "71": { + "count": 123 + }, + "104": { + "count": 4 + }, + "49": { + "count": 38 + }, + "20": { + "count": 53 + }, + "44": { + "count": 7 + }, + "24": { + "count": 27 + }, + "95": { + "count": 79 + }, + "21": { + "count": 56 + }, + "66": { + "count": 2 + }, + "83": { + "count": 7 + }, + "94": { + "count": 25 + }, + "19": { + "count": 36 + }, + "46": { + "count": 52 + }, + "96": { + "count": 51 + }, + "113": { + "count": 32 + }, + "36": { + "count": 54 + }, + "87": { + "count": 6 + }, + "106": { + "count": 68 + }, + "62": { + "count": 9 + }, + "98": { + "count": 8 + }, + "22": { + "count": 35 + }, + "45": { + "count": 15 + }, + "13": { + "count": 24 + }, + "28": { + "count": 49 + }, + "15": { + "count": 9 + }, + "82": { + "count": 19 + }, + "4": { + "count": 37 + }, + "102": { + "count": 14 + }, + "88": { + "count": 4 + }, + "25": { + "count": 36 + }, + "91": { + "count": 23 + }, + "48": { + "count": 36 + }, + "74": { + "count": 6 + }, + "53": { + "count": 97 + }, + "57": { + "count": 7 + }, + "11": { + "count": 21 + }, + "103": { + "count": 16 + }, + "111": { + "count": 35 + }, + "56": { + "count": 40 + }, + "58": { + "count": 14 + }, + "27": { + "count": 4 + }, + "1": { + "count": 10 + }, + "16": { + "count": 42 + }, + "9": { + "count": 29 + }, + "99": { + "count": 20 + }, + "47": { + "count": 8 + }, + "35": { + "count": 14 + }, + "61": { + "count": 9 + }, + "101": { + "count": 14 + }, + "72": { + "count": 6 + }, + "41": { + "count": 5 + }, + "8": { + "count": 29 + }, + "84": { + "count": 8 + }, + "69": { + "count": 3 + }, + "114": { + "count": 4 + }, + "12": { + "count": 17 + }, + "54": { + "count": 23 + }, + "92": { + "count": 8 + }, + "118": { + "count": 18 + }, + "42": { + "count": 6 + }, + "97": { + "count": 24 + }, + "100": { + "count": 9 + }, + "29": { + "count": 9 + }, + "117": { + "count": 2 + }, + "23": { + "count": 11 + }, + "59": { + "count": 16 + }, + "81": { + "count": 6 + }, + "14": { + "count": 5 + }, + "116": { + "count": 22 + }, + "52": { + "count": 1 + }, + "63": { + "count": 6 + }, + "43": { + "count": 3 + }, + "85": { + "count": 15 + }, + "80": { + "count": 5 + }, + "79": { + "count": 1 + }, + "77": { + "count": 8 + }, + "93": { + "count": 8 + }, + "65": { + "count": 3 + }, + "7": { + "count": 6 + }, + "75": { + "count": 10 + }, + "78": { + "count": 9 + }, + "55": { + "count": 5 + }, + "3": { + "count": 4 + }, + "26": { + "count": 17 + }, + "67": { + "count": 3 + }, + "115": { + "count": 6 + }, + "112": { + "count": 4 + }, + "89": { + "count": 2 + }, + "90": { + "count": 3 + }, + "33": { + "count": 8 + }, + "60": { + "count": 9 + }, + "50": { + "count": 5 + }, + "37": { + "count": 3 + }, + "73": { + "count": 6 + }, + "68": { + "count": 2 + }, + "39": { + "count": 5 + }, + "51": { + "count": 4 + }, + "76": { + "count": 5 + }, + "32": { + "count": 4 + }, + "64": { + "count": 6 + }, + "107": { + "count": 3 + }, + "30": { + "count": 5 + }, + "31": { + "count": 4 + }, + "108": { + "count": 4 + }, + "40": { + "count": 2 + }, + "5": { + "count": 4 + }, + "109": { + "count": 1 + }, + "86": { + "count": 3 + }, + "38": { + "count": 6 + }, + "2": { + "count": 5 + }, + "105": { + "count": 4 + }, + "0": { + "count": 5 + }, + "6": { + "count": 2 + } + } + }, + "train": { + "num_samples": 12133, + "number_of_characters": 3506882, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 289.0366768317811, + "max_text_length": 4282, + "unique_text": 11425, + "unique_labels": 119, + "labels": { + "11": { + "count": 76 + }, + "95": { + "count": 375 + }, + "74": { + "count": 22 + }, + "70": { + "count": 1980 + }, + "58": { + "count": 58 + }, + "25": { + "count": 135 + }, + "54": { + "count": 121 + }, + "34": { + "count": 240 + }, + "71": { + "count": 506 + }, + "12": { + "count": 102 + }, + "49": { + "count": 138 + }, + "24": { + "count": 163 + }, + "19": { + "count": 169 + }, + "18": { + "count": 364 + }, + "17": { + "count": 952 + }, + "53": { + "count": 369 + }, + "4": { + "count": 129 + }, + "99": { + "count": 116 + }, + "20": { + "count": 264 + }, + "118": { + "count": 111 + }, + "108": { + "count": 10 + }, + "113": { + "count": 135 + }, + "94": { + "count": 108 + }, + "28": { + "count": 204 + }, + "48": { + "count": 143 + }, + "96": { + "count": 210 + }, + "116": { + "count": 114 + }, + "23": { + "count": 25 + }, + "22": { + "count": 173 + }, + "21": { + "count": 280 + }, + "102": { + "count": 112 + }, + "13": { + "count": 142 + }, + "97": { + "count": 115 + }, + "56": { + "count": 149 + }, + "1": { + "count": 37 + }, + "46": { + "count": 237 + }, + "36": { + "count": 253 + }, + "83": { + "count": 36 + }, + "111": { + "count": 156 + }, + "30": { + "count": 11 + }, + "82": { + "count": 86 + }, + "42": { + "count": 15 + }, + "16": { + "count": 180 + }, + "117": { + "count": 23 + }, + "0": { + "count": 20 + }, + "72": { + "count": 34 + }, + "90": { + "count": 39 + }, + "47": { + "count": 46 + }, + "35": { + "count": 42 + }, + "98": { + "count": 37 + }, + "81": { + "count": 26 + }, + "9": { + "count": 111 + }, + "59": { + "count": 82 + }, + "92": { + "count": 54 + }, + "91": { + "count": 99 + }, + "100": { + "count": 28 + }, + "79": { + "count": 23 + }, + "10": { + "count": 74 + }, + "29": { + "count": 27 + }, + "8": { + "count": 125 + }, + "110": { + "count": 27 + }, + "45": { + "count": 46 + }, + "103": { + "count": 71 + }, + "5": { + "count": 44 + }, + "88": { + "count": 44 + }, + "66": { + "count": 11 + }, + "101": { + "count": 69 + }, + "3": { + "count": 20 + }, + "43": { + "count": 13 + }, + "39": { + "count": 17 + }, + "60": { + "count": 40 + }, + "14": { + "count": 53 + }, + "62": { + "count": 42 + }, + "89": { + "count": 5 + }, + "106": { + "count": 263 + }, + "41": { + "count": 21 + }, + "85": { + "count": 84 + }, + "105": { + "count": 24 + }, + "38": { + "count": 40 + }, + "31": { + "count": 43 + }, + "107": { + "count": 22 + }, + "78": { + "count": 52 + }, + "76": { + "count": 31 + }, + "104": { + "count": 31 + }, + "26": { + "count": 58 + }, + "73": { + "count": 42 + }, + "84": { + "count": 43 + }, + "50": { + "count": 30 + }, + "44": { + "count": 44 + }, + "65": { + "count": 19 + }, + "114": { + "count": 13 + }, + "40": { + "count": 20 + }, + "61": { + "count": 29 + }, + "7": { + "count": 14 + }, + "112": { + "count": 27 + }, + "2": { + "count": 33 + }, + "115": { + "count": 32 + }, + "75": { + "count": 35 + }, + "33": { + "count": 18 + }, + "37": { + "count": 21 + }, + "52": { + "count": 11 + }, + "93": { + "count": 26 + }, + "80": { + "count": 28 + }, + "87": { + "count": 23 + }, + "51": { + "count": 15 + }, + "77": { + "count": 36 + }, + "27": { + "count": 22 + }, + "15": { + "count": 30 + }, + "109": { + "count": 20 + }, + "64": { + "count": 24 + }, + "63": { + "count": 26 + }, + "55": { + "count": 14 + }, + "32": { + "count": 17 + }, + "86": { + "count": 9 + }, + "67": { + "count": 7 + }, + "57": { + "count": 16 + }, + "6": { + "count": 3 + }, + "69": { + "count": 3 + }, + "68": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ImdbClassification.json b/mteb/descriptive_stats/Classification/ImdbClassification.json new file mode 100644 index 0000000000..869c2aa156 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ImdbClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 25000, + "number_of_characters": 32344810, + "number_texts_intersect_with_train": 123, + "min_text_length": 32, + "average_text_length": 1293.7924, + "max_text_length": 12988, + "unique_text": 24801, + "unique_labels": 2, + "labels": { + "0": { + "count": 12500 + }, + "1": { + "count": 12500 + } + } + }, + "train": { + "num_samples": 25000, + "number_of_characters": 33126741, + "number_texts_intersect_with_train": null, + "min_text_length": 52, + "average_text_length": 1325.06964, + "max_text_length": 13704, + "unique_text": 24904, + "unique_labels": 2, + "labels": { + "0": { + "count": 12500 + }, + "1": { + "count": 12500 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/InappropriatenessClassification.json b/mteb/descriptive_stats/Classification/InappropriatenessClassification.json new file mode 100644 index 0000000000..6258cc919b --- /dev/null +++ b/mteb/descriptive_stats/Classification/InappropriatenessClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 198775, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 97.05810546875, + "max_text_length": 1168, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "train": { + "num_samples": 40000, + "number_of_characters": 3878368, + "number_texts_intersect_with_train": null, + "min_text_length": 8, + "average_text_length": 96.9592, + "max_text_length": 1938, + "unique_text": 40000, + "unique_labels": 2, + "labels": { + "1": { + "count": 20000 + }, + "0": { + "count": 20000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IndicLangClassification.json b/mteb/descriptive_stats/Classification/IndicLangClassification.json new file mode 100644 index 0000000000..4d8d12020a --- /dev/null +++ b/mteb/descriptive_stats/Classification/IndicLangClassification.json @@ -0,0 +1,140 @@ +{ + "test": { + "num_samples": 30418, + "number_of_characters": 3240093, + "number_texts_intersect_with_train": 43, + "min_text_length": 2, + "average_text_length": 106.51893615622329, + "max_text_length": 850, + "unique_text": 30402, + "unique_labels": 19, + "labels": { + "0": { + "count": 1066 + }, + "2": { + "count": 1051 + }, + "1": { + "count": 2048 + }, + "4": { + "count": 1050 + }, + "5": { + "count": 2048 + }, + "6": { + "count": 2048 + }, + "7": { + "count": 2048 + }, + "10": { + "count": 1760 + }, + "11": { + "count": 2048 + }, + "12": { + "count": 2048 + }, + "15": { + "count": 1759 + }, + "16": { + "count": 1066 + }, + "17": { + "count": 2048 + }, + "18": { + "count": 1768 + }, + "21": { + "count": 2048 + }, + "22": { + "count": 2048 + }, + "9": { + "count": 708 + }, + "13": { + "count": 708 + }, + "3": { + "count": 1050 + } + } + }, + "train": { + "num_samples": 38256, + "number_of_characters": 3847653, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 100.57645859473024, + "max_text_length": 1544, + "unique_text": 38191, + "unique_labels": 19, + "labels": { + "0": { + "count": 458 + }, + "2": { + "count": 451 + }, + "1": { + "count": 3564 + }, + "4": { + "count": 450 + }, + "5": { + "count": 3753 + }, + "6": { + "count": 3580 + }, + "7": { + "count": 3813 + }, + "10": { + "count": 755 + }, + "11": { + "count": 3591 + }, + "12": { + "count": 3581 + }, + "15": { + "count": 755 + }, + "16": { + "count": 458 + }, + "17": { + "count": 3746 + }, + "18": { + "count": 759 + }, + "21": { + "count": 3766 + }, + "22": { + "count": 3718 + }, + "9": { + "count": 304 + }, + "13": { + "count": 304 + }, + "3": { + "count": 450 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IndicNLPNewsClassification.json b/mteb/descriptive_stats/Classification/IndicNLPNewsClassification.json new file mode 100644 index 0000000000..eaa092d2aa --- /dev/null +++ b/mteb/descriptive_stats/Classification/IndicNLPNewsClassification.json @@ -0,0 +1,408 @@ +{ + "test": { + "num_samples": 14960, + "number_of_characters": 23509208, + "number_texts_intersect_with_train": 298, + "min_text_length": 0, + "average_text_length": 1571.4711229946524, + "max_text_length": 25461, + "unique_text": 13944, + "unique_labels": 4, + "labels": { + "1": { + "count": 4723 + }, + "0": { + "count": 4509 + }, + "2": { + "count": 4490 + }, + "3": { + "count": 1238 + } + }, + "hf_subset_descriptive_stats": { + "gu": { + "num_samples": 2048, + "number_of_characters": 3016704, + "number_texts_intersect_with_train": 171, + "min_text_length": 307, + "average_text_length": 1473.0, + "max_text_length": 11982, + "unique_text": 1983, + "unique_labels": 3, + "labels": { + "1": { + "count": 687 + }, + "0": { + "count": 705 + }, + "2": { + "count": 656 + } + } + }, + "kn": { + "num_samples": 2048, + "number_of_characters": 4129549, + "number_texts_intersect_with_train": 7, + "min_text_length": 303, + "average_text_length": 2016.38134765625, + "max_text_length": 24145, + "unique_text": 2027, + "unique_labels": 3, + "labels": { + "0": { + "count": 657 + }, + "1": { + "count": 677 + }, + "2": { + "count": 714 + } + } + }, + "mal": { + "num_samples": 2048, + "number_of_characters": 2518131, + "number_texts_intersect_with_train": 11, + "min_text_length": 188, + "average_text_length": 1229.55615234375, + "max_text_length": 13458, + "unique_text": 2038, + "unique_labels": 4, + "labels": { + "0": { + "count": 496 + }, + "3": { + "count": 515 + }, + "2": { + "count": 501 + }, + "1": { + "count": 536 + } + } + }, + "mr": { + "num_samples": 2048, + "number_of_characters": 3951866, + "number_texts_intersect_with_train": 11, + "min_text_length": 428, + "average_text_length": 1929.6220703125, + "max_text_length": 25461, + "unique_text": 2043, + "unique_labels": 3, + "labels": { + "2": { + "count": 666 + }, + "1": { + "count": 701 + }, + "0": { + "count": 681 + } + } + }, + "tel": { + "num_samples": 2048, + "number_of_characters": 3037579, + "number_texts_intersect_with_train": 30, + "min_text_length": 301, + "average_text_length": 1483.19287109375, + "max_text_length": 11974, + "unique_text": 1991, + "unique_labels": 3, + "labels": { + "0": { + "count": 678 + }, + "1": { + "count": 715 + }, + "2": { + "count": 655 + } + } + }, + "ori": { + "num_samples": 2048, + "number_of_characters": 2471544, + "number_texts_intersect_with_train": 0, + "min_text_length": 323, + "average_text_length": 1206.80859375, + "max_text_length": 8448, + "unique_text": 2048, + "unique_labels": 4, + "labels": { + "0": { + "count": 490 + }, + "3": { + "count": 560 + }, + "1": { + "count": 523 + }, + "2": { + "count": 475 + } + } + }, + "pa": { + "num_samples": 624, + "number_of_characters": 998432, + "number_texts_intersect_with_train": 0, + "min_text_length": 0, + "average_text_length": 1600.051282051282, + "max_text_length": 8727, + "unique_text": 623, + "unique_labels": 4, + "labels": { + "2": { + "count": 144 + }, + "1": { + "count": 169 + }, + "0": { + "count": 148 + }, + "3": { + "count": 163 + } + } + }, + "ta": { + "num_samples": 2048, + "number_of_characters": 3385403, + "number_texts_intersect_with_train": 3, + "min_text_length": 595, + "average_text_length": 1653.02880859375, + "max_text_length": 8002, + "unique_text": 2046, + "unique_labels": 3, + "labels": { + "1": { + "count": 715 + }, + "0": { + "count": 654 + }, + "2": { + "count": 679 + } + } + } + } + }, + "train": { + "num_samples": 33533, + "number_of_characters": 50589676, + "number_texts_intersect_with_train": null, + "min_text_length": 245, + "average_text_length": 1508.6534458593028, + "max_text_length": 19536, + "unique_text": 29951, + "unique_labels": 4, + "labels": { + "0": { + "count": 10600 + }, + "1": { + "count": 10583 + }, + "2": { + "count": 10842 + }, + "3": { + "count": 1508 + } + }, + "hf_subset_descriptive_stats": { + "gu": { + "num_samples": 19197, + "number_of_characters": 28120011, + "number_texts_intersect_with_train": null, + "min_text_length": 299, + "average_text_length": 1464.8127832473824, + "max_text_length": 12354, + "unique_text": 17695, + "unique_labels": 3, + "labels": { + "0": { + "count": 6345 + }, + "1": { + "count": 6390 + }, + "2": { + "count": 6462 + } + } + }, + "kn": { + "num_samples": 2048, + "number_of_characters": 4059197, + "number_texts_intersect_with_train": null, + "min_text_length": 297, + "average_text_length": 1982.02978515625, + "max_text_length": 16016, + "unique_text": 2029, + "unique_labels": 3, + "labels": { + "2": { + "count": 709 + }, + "1": { + "count": 682 + }, + "0": { + "count": 657 + } + } + }, + "mal": { + "num_samples": 2048, + "number_of_characters": 2490600, + "number_texts_intersect_with_train": null, + "min_text_length": 336, + "average_text_length": 1216.11328125, + "max_text_length": 9018, + "unique_text": 2039, + "unique_labels": 4, + "labels": { + "0": { + "count": 519 + }, + "3": { + "count": 531 + }, + "2": { + "count": 531 + }, + "1": { + "count": 467 + } + } + }, + "mr": { + "num_samples": 2048, + "number_of_characters": 3872775, + "number_texts_intersect_with_train": null, + "min_text_length": 479, + "average_text_length": 1891.00341796875, + "max_text_length": 19536, + "unique_text": 2046, + "unique_labels": 3, + "labels": { + "1": { + "count": 645 + }, + "2": { + "count": 701 + }, + "0": { + "count": 702 + } + } + }, + "tel": { + "num_samples": 2048, + "number_of_characters": 2953411, + "number_texts_intersect_with_train": null, + "min_text_length": 309, + "average_text_length": 1442.09521484375, + "max_text_length": 12083, + "unique_text": 1983, + "unique_labels": 3, + "labels": { + "0": { + "count": 679 + }, + "1": { + "count": 706 + }, + "2": { + "count": 663 + } + } + }, + "ori": { + "num_samples": 2048, + "number_of_characters": 2467807, + "number_texts_intersect_with_train": null, + "min_text_length": 303, + "average_text_length": 1204.98388671875, + "max_text_length": 8417, + "unique_text": 2048, + "unique_labels": 4, + "labels": { + "2": { + "count": 549 + }, + "1": { + "count": 500 + }, + "3": { + "count": 476 + }, + "0": { + "count": 523 + } + } + }, + "pa": { + "num_samples": 2048, + "number_of_characters": 3248339, + "number_texts_intersect_with_train": null, + "min_text_length": 245, + "average_text_length": 1586.10302734375, + "max_text_length": 17538, + "unique_text": 2047, + "unique_labels": 4, + "labels": { + "1": { + "count": 515 + }, + "3": { + "count": 501 + }, + "0": { + "count": 515 + }, + "2": { + "count": 517 + } + } + }, + "ta": { + "num_samples": 2048, + "number_of_characters": 3377536, + "number_texts_intersect_with_train": null, + "min_text_length": 434, + "average_text_length": 1649.1875, + "max_text_length": 10438, + "unique_text": 2047, + "unique_labels": 3, + "labels": { + "2": { + "count": 710 + }, + "1": { + "count": 678 + }, + "0": { + "count": 660 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IndicSentimentClassification.json b/mteb/descriptive_stats/Classification/IndicSentimentClassification.json new file mode 100644 index 0000000000..b4d8865b12 --- /dev/null +++ b/mteb/descriptive_stats/Classification/IndicSentimentClassification.json @@ -0,0 +1,510 @@ +{ + "test": { + "num_samples": 12974, + "number_of_characters": 1785780, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 137.64297826422074, + "max_text_length": 762, + "unique_text": 12946, + "unique_labels": 2, + "labels": { + "0": { + "count": 6396 + }, + "1": { + "count": 6578 + } + }, + "hf_subset_descriptive_stats": { + "as": { + "num_samples": 998, + "number_of_characters": 128830, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 129.0881763527054, + "max_text_length": 529, + "unique_text": 995, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "bd": { + "num_samples": 998, + "number_of_characters": 133575, + "number_texts_intersect_with_train": 0, + "min_text_length": 16, + "average_text_length": 133.8426853707415, + "max_text_length": 580, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "bn": { + "num_samples": 998, + "number_of_characters": 128047, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 128.30360721442887, + "max_text_length": 598, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "gu": { + "num_samples": 998, + "number_of_characters": 129333, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 129.59218436873746, + "max_text_length": 551, + "unique_text": 995, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "hi": { + "num_samples": 998, + "number_of_characters": 133889, + "number_texts_intersect_with_train": 0, + "min_text_length": 14, + "average_text_length": 134.1573146292585, + "max_text_length": 516, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "kn": { + "num_samples": 998, + "number_of_characters": 145308, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 145.59919839679358, + "max_text_length": 597, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "ml": { + "num_samples": 998, + "number_of_characters": 152836, + "number_texts_intersect_with_train": 0, + "min_text_length": 15, + "average_text_length": 153.14228456913827, + "max_text_length": 651, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "mr": { + "num_samples": 998, + "number_of_characters": 133200, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 133.46693386773546, + "max_text_length": 575, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "or": { + "num_samples": 998, + "number_of_characters": 133897, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 134.16533066132266, + "max_text_length": 557, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "pa": { + "num_samples": 998, + "number_of_characters": 133927, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 134.19539078156313, + "max_text_length": 596, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "ta": { + "num_samples": 998, + "number_of_characters": 155517, + "number_texts_intersect_with_train": 0, + "min_text_length": 17, + "average_text_length": 155.82865731462925, + "max_text_length": 621, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "te": { + "num_samples": 998, + "number_of_characters": 143687, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 143.9749498997996, + "max_text_length": 762, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + }, + "ur": { + "num_samples": 998, + "number_of_characters": 133734, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 134.00200400801603, + "max_text_length": 573, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "0": { + "count": 492 + }, + "1": { + "count": 506 + } + } + } + } + }, + "train": { + "num_samples": 2028, + "number_of_characters": 282333, + "number_texts_intersect_with_train": null, + "min_text_length": 14, + "average_text_length": 139.21745562130178, + "max_text_length": 480, + "unique_text": 2028, + "unique_labels": 2, + "labels": { + "0": { + "count": 1053 + }, + "1": { + "count": 975 + } + }, + "hf_subset_descriptive_stats": { + "as": { + "num_samples": 156, + "number_of_characters": 20520, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 131.53846153846155, + "max_text_length": 357, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "bd": { + "num_samples": 156, + "number_of_characters": 20789, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 133.26282051282053, + "max_text_length": 396, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "bn": { + "num_samples": 156, + "number_of_characters": 20201, + "number_texts_intersect_with_train": null, + "min_text_length": 14, + "average_text_length": 129.49358974358975, + "max_text_length": 366, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "gu": { + "num_samples": 156, + "number_of_characters": 20641, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 132.31410256410257, + "max_text_length": 377, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "hi": { + "num_samples": 156, + "number_of_characters": 21139, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 135.50641025641025, + "max_text_length": 403, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "kn": { + "num_samples": 156, + "number_of_characters": 23042, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 147.7051282051282, + "max_text_length": 442, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "ml": { + "num_samples": 156, + "number_of_characters": 24061, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 154.23717948717947, + "max_text_length": 425, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "mr": { + "num_samples": 156, + "number_of_characters": 21210, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 135.96153846153845, + "max_text_length": 379, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "or": { + "num_samples": 156, + "number_of_characters": 21301, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 136.5448717948718, + "max_text_length": 376, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "pa": { + "num_samples": 156, + "number_of_characters": 21238, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 136.14102564102564, + "max_text_length": 375, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "ta": { + "num_samples": 156, + "number_of_characters": 24386, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 156.32051282051282, + "max_text_length": 480, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "te": { + "num_samples": 156, + "number_of_characters": 22504, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 144.25641025641025, + "max_text_length": 412, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + }, + "ur": { + "num_samples": 156, + "number_of_characters": 21301, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 136.5448717948718, + "max_text_length": 403, + "unique_text": 156, + "unique_labels": 2, + "labels": { + "0": { + "count": 81 + }, + "1": { + "count": 75 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IndonesianIdClickbaitClassification.json b/mteb/descriptive_stats/Classification/IndonesianIdClickbaitClassification.json new file mode 100644 index 0000000000..9bb09074bc --- /dev/null +++ b/mteb/descriptive_stats/Classification/IndonesianIdClickbaitClassification.json @@ -0,0 +1,20 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 131508, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 64.212890625, + "max_text_length": 118, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1189 + }, + "1": { + "count": 859 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IndonesianMongabayConservationClassification.json b/mteb/descriptive_stats/Classification/IndonesianMongabayConservationClassification.json new file mode 100644 index 0000000000..7e79f285b5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/IndonesianMongabayConservationClassification.json @@ -0,0 +1,65 @@ +{ + "validation": { + "num_samples": 492, + "number_of_characters": 824508, + "number_texts_intersect_with_train": 0, + "min_text_length": 244, + "average_text_length": 1675.8292682926829, + "max_text_length": 2004, + "unique_text": 492, + "unique_labels": 3, + "labels": { + "2": { + "count": 202 + }, + "0": { + "count": 112 + }, + "1": { + "count": 178 + } + } + }, + "test": { + "num_samples": 977, + "number_of_characters": 1637116, + "number_texts_intersect_with_train": 0, + "min_text_length": 91, + "average_text_length": 1675.656090071648, + "max_text_length": 2004, + "unique_text": 977, + "unique_labels": 3, + "labels": { + "2": { + "count": 427 + }, + "0": { + "count": 241 + }, + "1": { + "count": 309 + } + } + }, + "train": { + "num_samples": 11757, + "number_of_characters": 19730571, + "number_texts_intersect_with_train": null, + "min_text_length": 132, + "average_text_length": 1678.1977545292166, + "max_text_length": 2007, + "unique_text": 3919, + "unique_labels": 3, + "labels": { + "0": { + "count": 4895 + }, + "1": { + "count": 4924 + }, + "2": { + "count": 1938 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/InsurancePolicyInterpretationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/InsurancePolicyInterpretationLegalBenchClassification.json new file mode 100644 index 0000000000..c62aa3fe59 --- /dev/null +++ b/mteb/descriptive_stats/Classification/InsurancePolicyInterpretationLegalBenchClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 133, + "number_of_characters": 69411, + "number_texts_intersect_with_train": 0, + "min_text_length": 337, + "average_text_length": 521.8872180451128, + "max_text_length": 784, + "unique_text": 133, + "unique_labels": 3, + "labels": { + "A": { + "count": 47 + }, + "C": { + "count": 59 + }, + "B": { + "count": 27 + } + } + }, + "train": { + "num_samples": 5, + "number_of_characters": 3074, + "number_texts_intersect_with_train": null, + "min_text_length": 528, + "average_text_length": 614.8, + "max_text_length": 813, + "unique_text": 5, + "unique_labels": 3, + "labels": { + "B": { + "count": 3 + }, + "A": { + "count": 1 + }, + "C": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/InternationalCitizenshipQuestionsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/InternationalCitizenshipQuestionsLegalBenchClassification.json new file mode 100644 index 0000000000..6d03cd9774 --- /dev/null +++ b/mteb/descriptive_stats/Classification/InternationalCitizenshipQuestionsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 425078, + "number_texts_intersect_with_train": 0, + "min_text_length": 113, + "average_text_length": 207.5576171875, + "max_text_length": 338, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 864 + }, + "0": { + "count": 1184 + } + } + }, + "train": { + "num_samples": 4, + "number_of_characters": 770, + "number_texts_intersect_with_train": null, + "min_text_length": 115, + "average_text_length": 192.5, + "max_text_length": 237, + "unique_text": 4, + "unique_labels": 2, + "labels": { + "1": { + "count": 2 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/IsiZuluNewsClassification.json b/mteb/descriptive_stats/Classification/IsiZuluNewsClassification.json new file mode 100644 index 0000000000..8884289feb --- /dev/null +++ b/mteb/descriptive_stats/Classification/IsiZuluNewsClassification.json @@ -0,0 +1,62 @@ +{ + "train": { + "num_samples": 752, + "number_of_characters": 32402, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 43.087765957446805, + "max_text_length": 98, + "unique_text": 752, + "unique_labels": 16, + "labels": { + "economy, business and finance": { + "count": 46 + }, + "politics": { + "count": 118 + }, + "lifestyle and leisure": { + "count": 1 + }, + "crime, law and justice": { + "count": 292 + }, + "arts, culture, entertainment and media": { + "count": 26 + }, + "religion and belief": { + "count": 23 + }, + "sport": { + "count": 22 + }, + "disaster, accident and emergency incident": { + "count": 32 + }, + "society": { + "count": 68 + }, + "health": { + "count": 33 + }, + "education": { + "count": 39 + }, + "conflict, war and peace": { + "count": 6 + }, + "human interest": { + "count": 23 + }, + "weather": { + "count": 6 + }, + "labour": { + "count": 15 + }, + "environment": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ItaCaseholdClassification.json b/mteb/descriptive_stats/Classification/ItaCaseholdClassification.json new file mode 100644 index 0000000000..4b12dc51f2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ItaCaseholdClassification.json @@ -0,0 +1,431 @@ +{ + "test": { + "num_samples": 221, + "number_of_characters": 929965, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 4207.986425339366, + "max_text_length": 17878, + "unique_text": 221, + "unique_labels": 64, + "labels": { + "Processo amministrativo": { + "count": 39 + }, + "Militari, forze armate e di polizia": { + "count": 9 + }, + "Urbanistica": { + "count": 3 + }, + "Edilizia": { + "count": 10 + }, + "Professioni e mestieri": { + "count": 3 + }, + "Farmaci": { + "count": 1 + }, + "Contratti della Pubblica amministrazione": { + "count": 28 + }, + "Circolazione stradale": { + "count": 1 + }, + "Risarcimento danni": { + "count": 4 + }, + "Enti locali": { + "count": 2 + }, + "Covid-19": { + "count": 29 + }, + "Concessione amministrativa": { + "count": 1 + }, + "Beni culturali": { + "count": 2 + }, + "Autorit\u00e0 amministrative indipendenti": { + "count": 3 + }, + "Magistrati": { + "count": 3 + }, + "Energia elettrica": { + "count": 2 + }, + "Contributi e finanziamenti": { + "count": 2 + }, + "Atto amministrativo": { + "count": 2 + }, + "Sicurezza pubblica": { + "count": 1 + }, + "Agricoltura": { + "count": 2 + }, + "Farmacia": { + "count": 1 + }, + "Giurisdizione": { + "count": 7 + }, + "Silenzio della P.A.": { + "count": 1 + }, + "Accesso ai documenti": { + "count": 4 + }, + "Ambiente": { + "count": 2 + }, + "Universit\u00e0 degli studi": { + "count": 2 + }, + "Amministrazione dello Stato": { + "count": 1 + }, + "Pesca": { + "count": 1 + }, + "Ricorso straordinario al Capo dello Stato": { + "count": 2 + }, + "Cittadinanza": { + "count": 1 + }, + "Pubblico impiego privatizzato": { + "count": 1 + }, + "Ordinanza contingibile ed urgente": { + "count": 1 + }, + "Caccia": { + "count": 1 + }, + "Animali": { + "count": 2 + }, + "Sport": { + "count": 1 + }, + "Ricorso straordinario al Presidente della Regione Siciliana": { + "count": 1 + }, + "Sanit\u00e0 pubblica": { + "count": 5 + }, + "Rifiuti": { + "count": 1 + }, + "Societ\u00e0 in house": { + "count": 1 + }, + "Paesaggio": { + "count": 2 + }, + "Lavoro": { + "count": 1 + }, + "Economia": { + "count": 1 + }, + "Informativa antimafia": { + "count": 6 + }, + "Consiglio di Stato e Consiglio di Giustizia per la Regione Siciliana": { + "count": 1 + }, + "Elezioni": { + "count": 1 + }, + "Procedimento amministrativo": { + "count": 2 + }, + "Pubblica istruzione": { + "count": 3 + }, + "Inquinamento": { + "count": 1 + }, + "Pubblica amministrazione": { + "count": 1 + }, + "Straniero": { + "count": 3 + }, + "Contratti pubblici": { + "count": 1 + }, + "Telecomunicazione": { + "count": 1 + }, + "Concorso": { + "count": 1 + }, + "Commercio": { + "count": 1 + }, + "Espropriazione per pubblica utilit\u00e0": { + "count": 2 + }, + "Giustizia amministrativa": { + "count": 2 + }, + "Imposte e tasse": { + "count": 1 + }, + "Alimenti": { + "count": 1 + }, + "Autorizzazione amministrativa": { + "count": 1 + }, + "Aeroporti": { + "count": 1 + }, + "Concorrenza": { + "count": 1 + }, + "Leggi e decreti": { + "count": 1 + }, + "Giochi": { + "count": 1 + }, + "Annullamento d\u2019ufficio e revoca": { + "count": 1 + } + } + }, + "train": { + "num_samples": 792, + "number_of_characters": 3651636, + "number_texts_intersect_with_train": null, + "min_text_length": 322, + "average_text_length": 4610.651515151515, + "max_text_length": 19037, + "unique_text": 792, + "unique_labels": 71, + "labels": { + "Procedimento amministrativo": { + "count": 7 + }, + "Edilizia": { + "count": 36 + }, + "Contratti della Pubblica amministrazione": { + "count": 102 + }, + "Giochi": { + "count": 4 + }, + "Espropriazione per pubblica utilit\u00e0": { + "count": 5 + }, + "Covid-19": { + "count": 104 + }, + "Militari, forze armate e di polizia": { + "count": 31 + }, + "Processo amministrativo": { + "count": 138 + }, + "Energia elettrica": { + "count": 9 + }, + "Alimenti": { + "count": 4 + }, + "Autorit\u00e0 amministrative indipendenti": { + "count": 13 + }, + "Ambiente": { + "count": 8 + }, + "Consiglio di Stato e Consiglio di Giustizia per la Regione Siciliana": { + "count": 4 + }, + "Magistrati": { + "count": 9 + }, + "Concorrenza": { + "count": 4 + }, + "Agricoltura": { + "count": 5 + }, + "Pubblica istruzione": { + "count": 13 + }, + "Animali": { + "count": 7 + }, + "Rifiuti": { + "count": 5 + }, + "Beni culturali": { + "count": 5 + }, + "Giurisdizione": { + "count": 25 + }, + "Societ\u00e0 in house": { + "count": 2 + }, + "Enti locali": { + "count": 8 + }, + "Paesaggio": { + "count": 6 + }, + "Concorso": { + "count": 5 + }, + "Farmaci": { + "count": 4 + }, + "Sport": { + "count": 5 + }, + "Elezioni": { + "count": 5 + }, + "Sicurezza pubblica": { + "count": 2 + }, + "Concessione amministrativa": { + "count": 4 + }, + "Silenzio della P.A.": { + "count": 2 + }, + "Straniero": { + "count": 9 + }, + "Informativa antimafia": { + "count": 22 + }, + "Contributi e finanziamenti": { + "count": 8 + }, + "Farmacia": { + "count": 5 + }, + "Risarcimento danni": { + "count": 13 + }, + "Giustizia amministrativa": { + "count": 6 + }, + "Ricorso straordinario al Presidente della Regione Siciliana": { + "count": 2 + }, + "Atto amministrativo": { + "count": 9 + }, + "Amministrazione dello Stato": { + "count": 3 + }, + "Ricorso straordinario al Capo dello Stato": { + "count": 8 + }, + "Urbanistica": { + "count": 11 + }, + "Inquinamento": { + "count": 3 + }, + "Cave": { + "count": 2 + }, + "Piano nazionale di ripresa e resilienza": { + "count": 2 + }, + "Sanit\u00e0 pubblica": { + "count": 20 + }, + "Autorizzazione amministrativa": { + "count": 2 + }, + "Lavoro": { + "count": 2 + }, + "Imposte e tasse": { + "count": 3 + }, + "Universit\u00e0 degli studi": { + "count": 7 + }, + "Societ\u00e0": { + "count": 2 + }, + "Pubblica amministrazione": { + "count": 5 + }, + "Ordinanza contingibile ed urgente": { + "count": 3 + }, + "Annullamento d\u2019ufficio e revoca": { + "count": 2 + }, + "Criminalit\u00e0 organizzata": { + "count": 2 + }, + "Mare": { + "count": 2 + }, + "Circolazione stradale": { + "count": 4 + }, + "Aeroporti": { + "count": 2 + }, + "Professioni e mestieri": { + "count": 9 + }, + "Cittadinanza": { + "count": 4 + }, + "Accesso ai documenti": { + "count": 14 + }, + "Pesca": { + "count": 2 + }, + "Telecomunicazione": { + "count": 2 + }, + "Unione Europea": { + "count": 2 + }, + "Caccia": { + "count": 2 + }, + "Contratti pubblici": { + "count": 4 + }, + "Economia": { + "count": 2 + }, + "Commercio": { + "count": 3 + }, + "Demanio": { + "count": 2 + }, + "Leggi e decreti": { + "count": 2 + }, + "Pubblico impiego privatizzato": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Itacola.json b/mteb/descriptive_stats/Classification/Itacola.json new file mode 100644 index 0000000000..652e7cb045 --- /dev/null +++ b/mteb/descriptive_stats/Classification/Itacola.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 975, + "number_of_characters": 35758, + "number_texts_intersect_with_train": 0, + "min_text_length": 7, + "average_text_length": 36.6748717948718, + "max_text_length": 146, + "unique_text": 975, + "unique_labels": 2, + "labels": { + "1": { + "count": 821 + }, + "0": { + "count": 154 + } + } + }, + "train": { + "num_samples": 7801, + "number_of_characters": 280462, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 35.9520574285348, + "max_text_length": 134, + "unique_text": 7801, + "unique_labels": 2, + "labels": { + "1": { + "count": 6583 + }, + "0": { + "count": 1218 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/JCrewBlockerLegalBenchClassification.json b/mteb/descriptive_stats/Classification/JCrewBlockerLegalBenchClassification.json new file mode 100644 index 0000000000..8749ddff7e --- /dev/null +++ b/mteb/descriptive_stats/Classification/JCrewBlockerLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 54, + "number_of_characters": 58980, + "number_texts_intersect_with_train": 0, + "min_text_length": 193, + "average_text_length": 1092.2222222222222, + "max_text_length": 4099, + "unique_text": 54, + "unique_labels": 2, + "labels": { + "1": { + "count": 45 + }, + "0": { + "count": 9 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 7259, + "number_texts_intersect_with_train": null, + "min_text_length": 458, + "average_text_length": 1209.8333333333333, + "max_text_length": 2331, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/JDReview.json b/mteb/descriptive_stats/Classification/JDReview.json new file mode 100644 index 0000000000..c0a95d467e --- /dev/null +++ b/mteb/descriptive_stats/Classification/JDReview.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 533, + "number_of_characters": 39063, + "number_texts_intersect_with_train": 108, + "min_text_length": 1, + "average_text_length": 73.28893058161351, + "max_text_length": 815, + "unique_text": 496, + "unique_labels": 2, + "labels": { + "0": { + "count": 434 + }, + "1": { + "count": 99 + } + } + }, + "train": { + "num_samples": 3729, + "number_of_characters": 282541, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 75.76857066237598, + "max_text_length": 895, + "unique_text": 3064, + "unique_labels": 2, + "labels": { + "0": { + "count": 3100 + }, + "1": { + "count": 629 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/JavaneseIMDBClassification.json b/mteb/descriptive_stats/Classification/JavaneseIMDBClassification.json new file mode 100644 index 0000000000..4fd4559637 --- /dev/null +++ b/mteb/descriptive_stats/Classification/JavaneseIMDBClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 978954, + "number_texts_intersect_with_train": 10, + "min_text_length": 6, + "average_text_length": 478.0048828125, + "max_text_length": 976, + "unique_text": 2047, + "unique_labels": 2, + "labels": { + "0": { + "count": 1024 + }, + "1": { + "count": 1024 + } + } + }, + "train": { + "num_samples": 25000, + "number_of_characters": 12098675, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 483.947, + "max_text_length": 1860, + "unique_text": 24902, + "unique_labels": 2, + "labels": { + "1": { + "count": 12500 + }, + "0": { + "count": 12500 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KLUE-TC.json b/mteb/descriptive_stats/Classification/KLUE-TC.json new file mode 100644 index 0000000000..ecb2c25cec --- /dev/null +++ b/mteb/descriptive_stats/Classification/KLUE-TC.json @@ -0,0 +1,68 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 55699, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 27.19677734375, + "max_text_length": 41, + "unique_text": 2048, + "unique_labels": 7, + "labels": { + "\uc0ac\ud68c": { + "count": 832 + }, + "\uc815\uce58": { + "count": 162 + }, + "\uacbd\uc81c": { + "count": 303 + }, + "\uc0dd\ud65c\ubb38\ud654": { + "count": 308 + }, + "IT\uacfc\ud559": { + "count": 125 + }, + "\uc2a4\ud3ec\uce20": { + "count": 130 + }, + "\uc138\uacc4": { + "count": 188 + } + } + }, + "train": { + "num_samples": 45678, + "number_of_characters": 1250049, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 27.36654406935505, + "max_text_length": 44, + "unique_text": 45678, + "unique_labels": 7, + "labels": { + "\uc0dd\ud65c\ubb38\ud654": { + "count": 5751 + }, + "\uc0ac\ud68c": { + "count": 5133 + }, + "IT\uacfc\ud559": { + "count": 5235 + }, + "\uc2a4\ud3ec\uce20": { + "count": 7742 + }, + "\uc138\uacc4": { + "count": 8320 + }, + "\uc815\uce58": { + "count": 7379 + }, + "\uacbd\uc81c": { + "count": 6118 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KannadaNewsClassification.json b/mteb/descriptive_stats/Classification/KannadaNewsClassification.json new file mode 100644 index 0000000000..5b5bf2d8e3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/KannadaNewsClassification.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 134012, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 65.435546875, + "max_text_length": 141, + "unique_text": 2038, + "unique_labels": 3, + "labels": { + "0": { + "count": 1069 + }, + "1": { + "count": 737 + }, + "2": { + "count": 242 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KinopoiskClassification.json b/mteb/descriptive_stats/Classification/KinopoiskClassification.json new file mode 100644 index 0000000000..0af62d519a --- /dev/null +++ b/mteb/descriptive_stats/Classification/KinopoiskClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 1500, + "number_of_characters": 2845895, + "number_texts_intersect_with_train": 0, + "min_text_length": 96, + "average_text_length": 1897.2633333333333, + "max_text_length": 8147, + "unique_text": 1500, + "unique_labels": 3, + "labels": { + "1": { + "count": 500 + }, + "0": { + "count": 500 + }, + "2": { + "count": 500 + } + } + }, + "train": { + "num_samples": 10500, + "number_of_characters": 20033375, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 1907.9404761904761, + "max_text_length": 18363, + "unique_text": 10500, + "unique_labels": 3, + "labels": { + "2": { + "count": 3500 + }, + "0": { + "count": 3500 + }, + "1": { + "count": 3500 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KorFin.json b/mteb/descriptive_stats/Classification/KorFin.json new file mode 100644 index 0000000000..0074c48873 --- /dev/null +++ b/mteb/descriptive_stats/Classification/KorFin.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 154385, + "number_texts_intersect_with_train": null, + "min_text_length": 12, + "average_text_length": 75.38330078125, + "max_text_length": 216, + "unique_text": 1869, + "unique_labels": 3, + "labels": { + "1": { + "count": 602 + }, + "0": { + "count": 689 + }, + "2": { + "count": 757 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KorHateClassification.json b/mteb/descriptive_stats/Classification/KorHateClassification.json new file mode 100644 index 0000000000..287c0e12a6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/KorHateClassification.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 79006, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 38.5771484375, + "max_text_length": 130, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "1": { + "count": 648 + }, + "2": { + "count": 904 + }, + "0": { + "count": 496 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KorSarcasmClassification.json b/mteb/descriptive_stats/Classification/KorSarcasmClassification.json new file mode 100644 index 0000000000..855ccc386d --- /dev/null +++ b/mteb/descriptive_stats/Classification/KorSarcasmClassification.json @@ -0,0 +1,20 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 99234, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 48.4541015625, + "max_text_length": 260, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1022 + }, + "0": { + "count": 1026 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/KurdishSentimentClassification.json b/mteb/descriptive_stats/Classification/KurdishSentimentClassification.json new file mode 100644 index 0000000000..101a2af910 --- /dev/null +++ b/mteb/descriptive_stats/Classification/KurdishSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1987, + "number_of_characters": 111504, + "number_texts_intersect_with_train": 5, + "min_text_length": 9, + "average_text_length": 56.11675893306492, + "max_text_length": 282, + "unique_text": 1987, + "unique_labels": 2, + "labels": { + "1": { + "count": 1065 + }, + "0": { + "count": 922 + } + } + }, + "train": { + "num_samples": 6000, + "number_of_characters": 356322, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 59.387, + "max_text_length": 7639, + "unique_text": 5753, + "unique_labels": 2, + "labels": { + "1": { + "count": 3000 + }, + "0": { + "count": 3000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LccSentimentClassification.json b/mteb/descriptive_stats/Classification/LccSentimentClassification.json new file mode 100644 index 0000000000..a83aea259e --- /dev/null +++ b/mteb/descriptive_stats/Classification/LccSentimentClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 150, + "number_of_characters": 17809, + "number_texts_intersect_with_train": 0, + "min_text_length": 15, + "average_text_length": 118.72666666666667, + "max_text_length": 255, + "unique_text": 150, + "unique_labels": 3, + "labels": { + "neutral": { + "count": 83 + }, + "positiv": { + "count": 39 + }, + "negativ": { + "count": 28 + } + } + }, + "train": { + "num_samples": 349, + "number_of_characters": 39601, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 113.46991404011462, + "max_text_length": 253, + "unique_text": 349, + "unique_labels": 3, + "labels": { + "negativ": { + "count": 66 + }, + "positiv": { + "count": 90 + }, + "neutral": { + "count": 193 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsBenefitsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsBenefitsLegalBenchClassification.json new file mode 100644 index 0000000000..2e1c6ffd72 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsBenefitsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 66, + "number_of_characters": 86357, + "number_texts_intersect_with_train": 0, + "min_text_length": 252, + "average_text_length": 1308.439393939394, + "max_text_length": 4659, + "unique_text": 66, + "unique_labels": 2, + "labels": { + "1": { + "count": 33 + }, + "0": { + "count": 33 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 8152, + "number_texts_intersect_with_train": null, + "min_text_length": 229, + "average_text_length": 1358.6666666666667, + "max_text_length": 4207, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsBusinessLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsBusinessLegalBenchClassification.json new file mode 100644 index 0000000000..a4d0dc368f --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsBusinessLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 174, + "number_of_characters": 199145, + "number_texts_intersect_with_train": 0, + "min_text_length": 202, + "average_text_length": 1144.5114942528735, + "max_text_length": 5557, + "unique_text": 174, + "unique_labels": 2, + "labels": { + "1": { + "count": 87 + }, + "0": { + "count": 87 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 5976, + "number_texts_intersect_with_train": null, + "min_text_length": 365, + "average_text_length": 996.0, + "max_text_length": 2974, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsConsumerLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsConsumerLegalBenchClassification.json new file mode 100644 index 0000000000..4f573ec3c5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsConsumerLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 614, + "number_of_characters": 784358, + "number_texts_intersect_with_train": 0, + "min_text_length": 152, + "average_text_length": 1277.456026058632, + "max_text_length": 12224, + "unique_text": 614, + "unique_labels": 2, + "labels": { + "1": { + "count": 307 + }, + "0": { + "count": 307 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 6262, + "number_texts_intersect_with_train": null, + "min_text_length": 519, + "average_text_length": 1043.6666666666667, + "max_text_length": 1872, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsCourtsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsCourtsLegalBenchClassification.json new file mode 100644 index 0000000000..75c3b6182a --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsCourtsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 192, + "number_of_characters": 224837, + "number_texts_intersect_with_train": 0, + "min_text_length": 195, + "average_text_length": 1171.0260416666667, + "max_text_length": 5233, + "unique_text": 192, + "unique_labels": 2, + "labels": { + "1": { + "count": 96 + }, + "0": { + "count": 96 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 10600, + "number_texts_intersect_with_train": null, + "min_text_length": 545, + "average_text_length": 1766.6666666666667, + "max_text_length": 3743, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsCrimeLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsCrimeLegalBenchClassification.json new file mode 100644 index 0000000000..f5d798531a --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsCrimeLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 688, + "number_of_characters": 834476, + "number_texts_intersect_with_train": 0, + "min_text_length": 113, + "average_text_length": 1212.9011627906978, + "max_text_length": 8361, + "unique_text": 688, + "unique_labels": 2, + "labels": { + "1": { + "count": 344 + }, + "0": { + "count": 344 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 7229, + "number_texts_intersect_with_train": null, + "min_text_length": 440, + "average_text_length": 1204.8333333333333, + "max_text_length": 1969, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsDivorceLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsDivorceLegalBenchClassification.json new file mode 100644 index 0000000000..60fe57d36e --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsDivorceLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 150, + "number_of_characters": 186365, + "number_texts_intersect_with_train": 0, + "min_text_length": 122, + "average_text_length": 1242.4333333333334, + "max_text_length": 14062, + "unique_text": 150, + "unique_labels": 2, + "labels": { + "1": { + "count": 75 + }, + "0": { + "count": 75 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 10558, + "number_texts_intersect_with_train": null, + "min_text_length": 455, + "average_text_length": 1759.6666666666667, + "max_text_length": 3812, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsDomesticViolenceLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsDomesticViolenceLegalBenchClassification.json new file mode 100644 index 0000000000..9effb7189e --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsDomesticViolenceLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 174, + "number_of_characters": 236785, + "number_texts_intersect_with_train": 0, + "min_text_length": 81, + "average_text_length": 1360.8333333333333, + "max_text_length": 8979, + "unique_text": 174, + "unique_labels": 2, + "labels": { + "1": { + "count": 87 + }, + "0": { + "count": 87 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 11077, + "number_texts_intersect_with_train": null, + "min_text_length": 807, + "average_text_length": 1846.1666666666667, + "max_text_length": 3011, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsEducationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsEducationLegalBenchClassification.json new file mode 100644 index 0000000000..f3ba909d80 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsEducationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 56, + "number_of_characters": 78257, + "number_texts_intersect_with_train": 0, + "min_text_length": 214, + "average_text_length": 1397.4464285714287, + "max_text_length": 4864, + "unique_text": 56, + "unique_labels": 2, + "labels": { + "1": { + "count": 28 + }, + "0": { + "count": 28 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 6899, + "number_texts_intersect_with_train": null, + "min_text_length": 822, + "average_text_length": 1149.8333333333333, + "max_text_length": 1637, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsEmploymentLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsEmploymentLegalBenchClassification.json new file mode 100644 index 0000000000..d926ebf5b7 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsEmploymentLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 710, + "number_of_characters": 896545, + "number_texts_intersect_with_train": 0, + "min_text_length": 68, + "average_text_length": 1262.7394366197184, + "max_text_length": 9180, + "unique_text": 710, + "unique_labels": 2, + "labels": { + "1": { + "count": 355 + }, + "0": { + "count": 355 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 11130, + "number_texts_intersect_with_train": null, + "min_text_length": 985, + "average_text_length": 1855.0, + "max_text_length": 4502, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsEstatesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsEstatesLegalBenchClassification.json new file mode 100644 index 0000000000..3e28211ee0 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsEstatesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 178, + "number_of_characters": 213726, + "number_texts_intersect_with_train": 0, + "min_text_length": 142, + "average_text_length": 1200.7078651685392, + "max_text_length": 9402, + "unique_text": 178, + "unique_labels": 2, + "labels": { + "1": { + "count": 89 + }, + "0": { + "count": 89 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 5869, + "number_texts_intersect_with_train": null, + "min_text_length": 414, + "average_text_length": 978.1666666666666, + "max_text_length": 1634, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsFamilyLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsFamilyLegalBenchClassification.json new file mode 100644 index 0000000000..45df58cb2f --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsFamilyLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 2750841, + "number_texts_intersect_with_train": 0, + "min_text_length": 68, + "average_text_length": 1343.18408203125, + "max_text_length": 18318, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 8615, + "number_texts_intersect_with_train": null, + "min_text_length": 687, + "average_text_length": 1435.8333333333333, + "max_text_length": 2549, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsHealthLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsHealthLegalBenchClassification.json new file mode 100644 index 0000000000..ae3ffbe6ef --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsHealthLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 226, + "number_of_characters": 332806, + "number_texts_intersect_with_train": 0, + "min_text_length": 176, + "average_text_length": 1472.5929203539822, + "max_text_length": 7803, + "unique_text": 226, + "unique_labels": 2, + "labels": { + "1": { + "count": 113 + }, + "0": { + "count": 113 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 6062, + "number_texts_intersect_with_train": null, + "min_text_length": 444, + "average_text_length": 1010.3333333333334, + "max_text_length": 1691, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsHousingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsHousingLegalBenchClassification.json new file mode 100644 index 0000000000..f41f902bc4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsHousingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 2652837, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 1295.33056640625, + "max_text_length": 15545, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1024 + }, + "1": { + "count": 1024 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 9631, + "number_texts_intersect_with_train": null, + "min_text_length": 557, + "average_text_length": 1605.1666666666667, + "max_text_length": 2736, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsImmigrationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsImmigrationLegalBenchClassification.json new file mode 100644 index 0000000000..604e3d72bd --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsImmigrationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 134, + "number_of_characters": 162986, + "number_texts_intersect_with_train": 0, + "min_text_length": 176, + "average_text_length": 1216.313432835821, + "max_text_length": 5870, + "unique_text": 134, + "unique_labels": 2, + "labels": { + "1": { + "count": 67 + }, + "0": { + "count": 67 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 3862, + "number_texts_intersect_with_train": null, + "min_text_length": 457, + "average_text_length": 643.6666666666666, + "max_text_length": 791, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsTortsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsTortsLegalBenchClassification.json new file mode 100644 index 0000000000..eaf71c9022 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsTortsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 432, + "number_of_characters": 607812, + "number_texts_intersect_with_train": 0, + "min_text_length": 80, + "average_text_length": 1406.9722222222222, + "max_text_length": 13071, + "unique_text": 432, + "unique_labels": 2, + "labels": { + "1": { + "count": 216 + }, + "0": { + "count": 216 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 4391, + "number_texts_intersect_with_train": null, + "min_text_length": 329, + "average_text_length": 731.8333333333334, + "max_text_length": 1129, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LearnedHandsTrafficLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LearnedHandsTrafficLegalBenchClassification.json new file mode 100644 index 0000000000..cbebc20172 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LearnedHandsTrafficLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 556, + "number_of_characters": 657702, + "number_texts_intersect_with_train": 0, + "min_text_length": 133, + "average_text_length": 1182.9172661870505, + "max_text_length": 10168, + "unique_text": 556, + "unique_labels": 2, + "labels": { + "1": { + "count": 278 + }, + "0": { + "count": 278 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 6157, + "number_texts_intersect_with_train": null, + "min_text_length": 250, + "average_text_length": 1026.1666666666667, + "max_text_length": 1872, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LegalReasoningCausalityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/LegalReasoningCausalityLegalBenchClassification.json new file mode 100644 index 0000000000..5d43411816 --- /dev/null +++ b/mteb/descriptive_stats/Classification/LegalReasoningCausalityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 55, + "number_of_characters": 86007, + "number_texts_intersect_with_train": 0, + "min_text_length": 550, + "average_text_length": 1563.7636363636364, + "max_text_length": 3370, + "unique_text": 55, + "unique_labels": 2, + "labels": { + "1": { + "count": 31 + }, + "0": { + "count": 24 + } + } + }, + "train": { + "num_samples": 4, + "number_of_characters": 4616, + "number_texts_intersect_with_train": null, + "min_text_length": 780, + "average_text_length": 1154.0, + "max_text_length": 1809, + "unique_text": 4, + "unique_labels": 2, + "labels": { + "1": { + "count": 2 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MAUDLegalBenchClassification.json b/mteb/descriptive_stats/Classification/MAUDLegalBenchClassification.json new file mode 100644 index 0000000000..c2f8a502ca --- /dev/null +++ b/mteb/descriptive_stats/Classification/MAUDLegalBenchClassification.json @@ -0,0 +1,86 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 3624527, + "number_texts_intersect_with_train": 387, + "min_text_length": 44, + "average_text_length": 1769.78857421875, + "max_text_length": 7610, + "unique_text": 1309, + "unique_labels": 10, + "labels": { + "0": { + "count": 571 + }, + "1": { + "count": 941 + }, + "4": { + "count": 21 + }, + "2": { + "count": 229 + }, + "3": { + "count": 195 + }, + "7": { + "count": 39 + }, + "8": { + "count": 15 + }, + "5": { + "count": 27 + }, + "9": { + "count": 6 + }, + "6": { + "count": 4 + } + } + }, + "train": { + "num_samples": 941, + "number_of_characters": 1650228, + "number_texts_intersect_with_train": null, + "min_text_length": 86, + "average_text_length": 1753.6960680127524, + "max_text_length": 7610, + "unique_text": 751, + "unique_labels": 10, + "labels": { + "1": { + "count": 433 + }, + "0": { + "count": 262 + }, + "3": { + "count": 89 + }, + "2": { + "count": 106 + }, + "7": { + "count": 18 + }, + "5": { + "count": 12 + }, + "8": { + "count": 7 + }, + "9": { + "count": 2 + }, + "4": { + "count": 10 + }, + "6": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MTOPDomainClassification.json b/mteb/descriptive_stats/Classification/MTOPDomainClassification.json new file mode 100644 index 0000000000..81d71589ba --- /dev/null +++ b/mteb/descriptive_stats/Classification/MTOPDomainClassification.json @@ -0,0 +1,953 @@ +{ + "validation": { + "num_samples": 10837, + "number_of_characters": 431895, + "number_texts_intersect_with_train": 127, + "min_text_length": 5, + "average_text_length": 39.85374181046415, + "max_text_length": 154, + "unique_text": 10830, + "unique_labels": 11, + "labels": { + "1": { + "count": 1688 + }, + "10": { + "count": 754 + }, + "7": { + "count": 849 + }, + "3": { + "count": 681 + }, + "6": { + "count": 985 + }, + "2": { + "count": 647 + }, + "9": { + "count": 872 + }, + "0": { + "count": 833 + }, + "5": { + "count": 1182 + }, + "4": { + "count": 982 + }, + "8": { + "count": 1364 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 2235, + "number_of_characters": 81663, + "number_texts_intersect_with_train": 7, + "min_text_length": 8, + "average_text_length": 36.53825503355705, + "max_text_length": 125, + "unique_text": 2235, + "unique_labels": 11, + "labels": { + "1": { + "count": 329 + }, + "10": { + "count": 185 + }, + "7": { + "count": 183 + }, + "3": { + "count": 134 + }, + "6": { + "count": 186 + }, + "2": { + "count": 123 + }, + "9": { + "count": 196 + }, + "0": { + "count": 176 + }, + "5": { + "count": 228 + }, + "4": { + "count": 207 + }, + "8": { + "count": 288 + } + } + }, + "de": { + "num_samples": 1815, + "number_of_characters": 77727, + "number_texts_intersect_with_train": 23, + "min_text_length": 10, + "average_text_length": 42.824793388429754, + "max_text_length": 154, + "unique_text": 1814, + "unique_labels": 11, + "labels": { + "0": { + "count": 99 + }, + "1": { + "count": 303 + }, + "2": { + "count": 104 + }, + "3": { + "count": 122 + }, + "6": { + "count": 165 + }, + "4": { + "count": 157 + }, + "7": { + "count": 141 + }, + "5": { + "count": 203 + }, + "8": { + "count": 220 + }, + "10": { + "count": 133 + }, + "9": { + "count": 168 + } + } + }, + "es": { + "num_samples": 1527, + "number_of_characters": 67720, + "number_texts_intersect_with_train": 41, + "min_text_length": 11, + "average_text_length": 44.34839554682384, + "max_text_length": 134, + "unique_text": 1525, + "unique_labels": 11, + "labels": { + "1": { + "count": 197 + }, + "6": { + "count": 166 + }, + "4": { + "count": 138 + }, + "10": { + "count": 103 + }, + "3": { + "count": 104 + }, + "5": { + "count": 190 + }, + "2": { + "count": 115 + }, + "8": { + "count": 212 + }, + "7": { + "count": 82 + }, + "9": { + "count": 76 + }, + "0": { + "count": 144 + } + } + }, + "fr": { + "num_samples": 1577, + "number_of_characters": 68008, + "number_texts_intersect_with_train": 12, + "min_text_length": 11, + "average_text_length": 43.12492073557387, + "max_text_length": 141, + "unique_text": 1575, + "unique_labels": 11, + "labels": { + "0": { + "count": 125 + }, + "1": { + "count": 278 + }, + "2": { + "count": 92 + }, + "3": { + "count": 89 + }, + "4": { + "count": 137 + }, + "7": { + "count": 145 + }, + "6": { + "count": 138 + }, + "5": { + "count": 168 + }, + "8": { + "count": 203 + }, + "9": { + "count": 124 + }, + "10": { + "count": 78 + } + } + }, + "hi": { + "num_samples": 2012, + "number_of_characters": 78749, + "number_texts_intersect_with_train": 16, + "min_text_length": 7, + "average_text_length": 39.139662027833005, + "max_text_length": 131, + "unique_text": 2011, + "unique_labels": 11, + "labels": { + "0": { + "count": 161 + }, + "1": { + "count": 304 + }, + "3": { + "count": 126 + }, + "4": { + "count": 193 + }, + "2": { + "count": 109 + }, + "10": { + "count": 154 + }, + "5": { + "count": 208 + }, + "6": { + "count": 167 + }, + "7": { + "count": 172 + }, + "8": { + "count": 235 + }, + "9": { + "count": 183 + } + } + }, + "th": { + "num_samples": 1671, + "number_of_characters": 58028, + "number_texts_intersect_with_train": 28, + "min_text_length": 5, + "average_text_length": 34.726511071214844, + "max_text_length": 105, + "unique_text": 1670, + "unique_labels": 11, + "labels": { + "0": { + "count": 128 + }, + "1": { + "count": 277 + }, + "2": { + "count": 104 + }, + "3": { + "count": 106 + }, + "4": { + "count": 150 + }, + "5": { + "count": 185 + }, + "6": { + "count": 163 + }, + "7": { + "count": 126 + }, + "8": { + "count": 206 + }, + "9": { + "count": 125 + }, + "10": { + "count": 101 + } + } + } + } + }, + "test": { + "num_samples": 19680, + "number_of_characters": 781580, + "number_texts_intersect_with_train": 332, + "min_text_length": 3, + "average_text_length": 39.71443089430894, + "max_text_length": 168, + "unique_text": 19627, + "unique_labels": 11, + "labels": { + "2": { + "count": 977 + }, + "5": { + "count": 2372 + }, + "6": { + "count": 2014 + }, + "8": { + "count": 2572 + }, + "9": { + "count": 1317 + }, + "1": { + "count": 3065 + }, + "10": { + "count": 1330 + }, + "3": { + "count": 1351 + }, + "0": { + "count": 1459 + }, + "7": { + "count": 1535 + }, + "4": { + "count": 1688 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 4386, + "number_of_characters": 161376, + "number_texts_intersect_with_train": 15, + "min_text_length": 3, + "average_text_length": 36.79343365253078, + "max_text_length": 132, + "unique_text": 4384, + "unique_labels": 11, + "labels": { + "2": { + "count": 197 + }, + "5": { + "count": 487 + }, + "6": { + "count": 418 + }, + "8": { + "count": 613 + }, + "9": { + "count": 346 + }, + "1": { + "count": 613 + }, + "10": { + "count": 358 + }, + "3": { + "count": 290 + }, + "0": { + "count": 341 + }, + "7": { + "count": 354 + }, + "4": { + "count": 369 + } + } + }, + "de": { + "num_samples": 3549, + "number_of_characters": 151445, + "number_texts_intersect_with_train": 69, + "min_text_length": 7, + "average_text_length": 42.67258382642998, + "max_text_length": 162, + "unique_text": 3536, + "unique_labels": 11, + "labels": { + "0": { + "count": 193 + }, + "10": { + "count": 264 + }, + "1": { + "count": 553 + }, + "2": { + "count": 163 + }, + "3": { + "count": 256 + }, + "5": { + "count": 439 + }, + "4": { + "count": 306 + }, + "6": { + "count": 353 + }, + "7": { + "count": 279 + }, + "8": { + "count": 452 + }, + "9": { + "count": 291 + } + } + }, + "es": { + "num_samples": 2998, + "number_of_characters": 130569, + "number_texts_intersect_with_train": 97, + "min_text_length": 6, + "average_text_length": 43.552034689793196, + "max_text_length": 168, + "unique_text": 2983, + "unique_labels": 11, + "labels": { + "1": { + "count": 401 + }, + "6": { + "count": 352 + }, + "4": { + "count": 246 + }, + "10": { + "count": 206 + }, + "3": { + "count": 231 + }, + "5": { + "count": 404 + }, + "2": { + "count": 177 + }, + "8": { + "count": 435 + }, + "7": { + "count": 156 + }, + "9": { + "count": 126 + }, + "0": { + "count": 264 + } + } + }, + "fr": { + "num_samples": 3193, + "number_of_characters": 140029, + "number_texts_intersect_with_train": 45, + "min_text_length": 6, + "average_text_length": 43.854995302223614, + "max_text_length": 143, + "unique_text": 3187, + "unique_labels": 11, + "labels": { + "0": { + "count": 253 + }, + "1": { + "count": 551 + }, + "2": { + "count": 159 + }, + "3": { + "count": 190 + }, + "4": { + "count": 280 + }, + "6": { + "count": 330 + }, + "5": { + "count": 356 + }, + "7": { + "count": 272 + }, + "8": { + "count": 462 + }, + "10": { + "count": 159 + }, + "9": { + "count": 181 + } + } + }, + "hi": { + "num_samples": 2789, + "number_of_characters": 104295, + "number_texts_intersect_with_train": 32, + "min_text_length": 7, + "average_text_length": 37.395123700250984, + "max_text_length": 148, + "unique_text": 2785, + "unique_labels": 11, + "labels": { + "0": { + "count": 208 + }, + "1": { + "count": 470 + }, + "5": { + "count": 335 + }, + "3": { + "count": 195 + }, + "4": { + "count": 242 + }, + "2": { + "count": 132 + }, + "6": { + "count": 267 + }, + "7": { + "count": 262 + }, + "8": { + "count": 265 + }, + "10": { + "count": 186 + }, + "9": { + "count": 227 + } + } + }, + "th": { + "num_samples": 2765, + "number_of_characters": 93866, + "number_texts_intersect_with_train": 74, + "min_text_length": 6, + "average_text_length": 33.94792043399638, + "max_text_length": 117, + "unique_text": 2754, + "unique_labels": 11, + "labels": { + "0": { + "count": 200 + }, + "1": { + "count": 477 + }, + "2": { + "count": 149 + }, + "3": { + "count": 189 + }, + "4": { + "count": 245 + }, + "6": { + "count": 294 + }, + "5": { + "count": 351 + }, + "7": { + "count": 212 + }, + "8": { + "count": 345 + }, + "9": { + "count": 146 + }, + "10": { + "count": 157 + } + } + } + } + }, + "train": { + "num_samples": 73928, + "number_of_characters": 2937230, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 39.73095444215994, + "max_text_length": 216, + "unique_text": 73219, + "unique_labels": 11, + "labels": { + "0": { + "count": 5262 + }, + "5": { + "count": 8334 + }, + "6": { + "count": 6961 + }, + "9": { + "count": 5313 + }, + "1": { + "count": 11107 + }, + "8": { + "count": 9698 + }, + "10": { + "count": 5084 + }, + "2": { + "count": 4770 + }, + "4": { + "count": 6644 + }, + "3": { + "count": 5191 + }, + "7": { + "count": 5564 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 15667, + "number_of_characters": 572977, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 36.57222186761984, + "max_text_length": 148, + "unique_text": 15634, + "unique_labels": 11, + "labels": { + "0": { + "count": 1165 + }, + "5": { + "count": 1657 + }, + "6": { + "count": 1402 + }, + "9": { + "count": 1303 + }, + "1": { + "count": 2187 + }, + "8": { + "count": 2157 + }, + "10": { + "count": 1219 + }, + "2": { + "count": 929 + }, + "4": { + "count": 1353 + }, + "3": { + "count": 1064 + }, + "7": { + "count": 1231 + } + } + }, + "de": { + "num_samples": 13424, + "number_of_characters": 580266, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 43.226013110846246, + "max_text_length": 174, + "unique_text": 13264, + "unique_labels": 11, + "labels": { + "0": { + "count": 761 + }, + "10": { + "count": 996 + }, + "4": { + "count": 1185 + }, + "1": { + "count": 2016 + }, + "7": { + "count": 1029 + }, + "5": { + "count": 1484 + }, + "2": { + "count": 814 + }, + "3": { + "count": 980 + }, + "6": { + "count": 1265 + }, + "8": { + "count": 1767 + }, + "9": { + "count": 1127 + } + } + }, + "es": { + "num_samples": 10934, + "number_of_characters": 476798, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 43.60691421254801, + "max_text_length": 186, + "unique_text": 10740, + "unique_labels": 11, + "labels": { + "1": { + "count": 1459 + }, + "6": { + "count": 1188 + }, + "4": { + "count": 928 + }, + "10": { + "count": 743 + }, + "3": { + "count": 830 + }, + "5": { + "count": 1396 + }, + "2": { + "count": 823 + }, + "8": { + "count": 1555 + }, + "7": { + "count": 525 + }, + "9": { + "count": 560 + }, + "0": { + "count": 927 + } + } + }, + "fr": { + "num_samples": 11814, + "number_of_characters": 515029, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 43.594802776367025, + "max_text_length": 184, + "unique_text": 11727, + "unique_labels": 11, + "labels": { + "0": { + "count": 861 + }, + "10": { + "count": 668 + }, + "1": { + "count": 1968 + }, + "7": { + "count": 975 + }, + "5": { + "count": 1261 + }, + "2": { + "count": 799 + }, + "3": { + "count": 734 + }, + "4": { + "count": 1082 + }, + "6": { + "count": 1113 + }, + "8": { + "count": 1656 + }, + "9": { + "count": 697 + } + } + }, + "hi": { + "num_samples": 11330, + "number_of_characters": 425919, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 37.592144748455425, + "max_text_length": 216, + "unique_text": 11251, + "unique_labels": 11, + "labels": { + "0": { + "count": 794 + }, + "1": { + "count": 1741 + }, + "7": { + "count": 974 + }, + "2": { + "count": 670 + }, + "3": { + "count": 831 + }, + "5": { + "count": 1272 + }, + "6": { + "count": 940 + }, + "4": { + "count": 1073 + }, + "10": { + "count": 786 + }, + "8": { + "count": 1281 + }, + "9": { + "count": 968 + } + } + }, + "th": { + "num_samples": 10759, + "number_of_characters": 366241, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 34.04043126684636, + "max_text_length": 135, + "unique_text": 10622, + "unique_labels": 11, + "labels": { + "0": { + "count": 754 + }, + "10": { + "count": 672 + }, + "1": { + "count": 1736 + }, + "7": { + "count": 830 + }, + "2": { + "count": 735 + }, + "3": { + "count": 752 + }, + "5": { + "count": 1264 + }, + "6": { + "count": 1053 + }, + "4": { + "count": 1023 + }, + "8": { + "count": 1282 + }, + "9": { + "count": 658 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MTOPIntentClassification.json b/mteb/descriptive_stats/Classification/MTOPIntentClassification.json new file mode 100644 index 0000000000..68fe450868 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MTOPIntentClassification.json @@ -0,0 +1,6398 @@ +{ + "validation": { + "num_samples": 10837, + "number_of_characters": 431895, + "number_texts_intersect_with_train": 127, + "min_text_length": 5, + "average_text_length": 39.85374181046415, + "max_text_length": 154, + "unique_text": 10830, + "unique_labels": 88, + "labels": { + "3": { + "count": 919 + }, + "18": { + "count": 645 + }, + "27": { + "count": 117 + }, + "15": { + "count": 322 + }, + "24": { + "count": 81 + }, + "29": { + "count": 487 + }, + "14": { + "count": 713 + }, + "60": { + "count": 320 + }, + "84": { + "count": 69 + }, + "0": { + "count": 628 + }, + "25": { + "count": 1156 + }, + "1": { + "count": 188 + }, + "30": { + "count": 11 + }, + "16": { + "count": 692 + }, + "54": { + "count": 41 + }, + "59": { + "count": 464 + }, + "22": { + "count": 171 + }, + "9": { + "count": 49 + }, + "43": { + "count": 716 + }, + "63": { + "count": 103 + }, + "48": { + "count": 269 + }, + "11": { + "count": 114 + }, + "32": { + "count": 80 + }, + "4": { + "count": 453 + }, + "53": { + "count": 88 + }, + "65": { + "count": 92 + }, + "42": { + "count": 218 + }, + "5": { + "count": 139 + }, + "47": { + "count": 93 + }, + "35": { + "count": 41 + }, + "96": { + "count": 11 + }, + "33": { + "count": 42 + }, + "94": { + "count": 4 + }, + "13": { + "count": 23 + }, + "75": { + "count": 23 + }, + "34": { + "count": 43 + }, + "61": { + "count": 88 + }, + "52": { + "count": 65 + }, + "101": { + "count": 6 + }, + "49": { + "count": 29 + }, + "38": { + "count": 54 + }, + "17": { + "count": 40 + }, + "69": { + "count": 38 + }, + "45": { + "count": 58 + }, + "40": { + "count": 56 + }, + "51": { + "count": 39 + }, + "92": { + "count": 12 + }, + "77": { + "count": 11 + }, + "46": { + "count": 31 + }, + "7": { + "count": 22 + }, + "55": { + "count": 26 + }, + "87": { + "count": 6 + }, + "41": { + "count": 31 + }, + "36": { + "count": 45 + }, + "56": { + "count": 10 + }, + "37": { + "count": 36 + }, + "68": { + "count": 35 + }, + "90": { + "count": 6 + }, + "20": { + "count": 8 + }, + "85": { + "count": 8 + }, + "86": { + "count": 20 + }, + "44": { + "count": 39 + }, + "2": { + "count": 17 + }, + "76": { + "count": 5 + }, + "80": { + "count": 17 + }, + "72": { + "count": 34 + }, + "64": { + "count": 6 + }, + "19": { + "count": 49 + }, + "102": { + "count": 6 + }, + "89": { + "count": 20 + }, + "31": { + "count": 24 + }, + "79": { + "count": 33 + }, + "57": { + "count": 44 + }, + "104": { + "count": 18 + }, + "110": { + "count": 12 + }, + "62": { + "count": 24 + }, + "6": { + "count": 24 + }, + "21": { + "count": 3 + }, + "74": { + "count": 6 + }, + "73": { + "count": 7 + }, + "26": { + "count": 3 + }, + "66": { + "count": 6 + }, + "88": { + "count": 6 + }, + "10": { + "count": 6 + }, + "12": { + "count": 3 + }, + "93": { + "count": 6 + }, + "50": { + "count": 11 + }, + "107": { + "count": 3 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 2235, + "number_of_characters": 81663, + "number_texts_intersect_with_train": 7, + "min_text_length": 8, + "average_text_length": 36.53825503355705, + "max_text_length": 125, + "unique_text": 2235, + "unique_labels": 88, + "labels": { + "3": { + "count": 189 + }, + "18": { + "count": 163 + }, + "27": { + "count": 29 + }, + "15": { + "count": 65 + }, + "24": { + "count": 15 + }, + "29": { + "count": 93 + }, + "14": { + "count": 136 + }, + "60": { + "count": 59 + }, + "84": { + "count": 12 + }, + "0": { + "count": 131 + }, + "25": { + "count": 223 + }, + "1": { + "count": 41 + }, + "30": { + "count": 2 + }, + "16": { + "count": 149 + }, + "54": { + "count": 8 + }, + "59": { + "count": 120 + }, + "22": { + "count": 30 + }, + "9": { + "count": 9 + }, + "43": { + "count": 150 + }, + "63": { + "count": 21 + }, + "48": { + "count": 58 + }, + "11": { + "count": 22 + }, + "32": { + "count": 14 + }, + "4": { + "count": 94 + }, + "53": { + "count": 16 + }, + "65": { + "count": 18 + }, + "42": { + "count": 44 + }, + "5": { + "count": 24 + }, + "47": { + "count": 22 + }, + "35": { + "count": 8 + }, + "96": { + "count": 2 + }, + "33": { + "count": 8 + }, + "94": { + "count": 1 + }, + "13": { + "count": 4 + }, + "75": { + "count": 5 + }, + "34": { + "count": 8 + }, + "61": { + "count": 17 + }, + "52": { + "count": 15 + }, + "101": { + "count": 1 + }, + "49": { + "count": 6 + }, + "38": { + "count": 11 + }, + "17": { + "count": 7 + }, + "69": { + "count": 8 + }, + "45": { + "count": 11 + }, + "40": { + "count": 10 + }, + "51": { + "count": 7 + }, + "92": { + "count": 2 + }, + "77": { + "count": 2 + }, + "46": { + "count": 7 + }, + "7": { + "count": 4 + }, + "55": { + "count": 6 + }, + "87": { + "count": 2 + }, + "41": { + "count": 7 + }, + "36": { + "count": 8 + }, + "56": { + "count": 2 + }, + "37": { + "count": 6 + }, + "68": { + "count": 8 + }, + "90": { + "count": 2 + }, + "20": { + "count": 2 + }, + "85": { + "count": 2 + }, + "86": { + "count": 4 + }, + "44": { + "count": 7 + }, + "2": { + "count": 4 + }, + "76": { + "count": 1 + }, + "80": { + "count": 3 + }, + "72": { + "count": 6 + }, + "64": { + "count": 1 + }, + "19": { + "count": 9 + }, + "102": { + "count": 1 + }, + "89": { + "count": 4 + }, + "31": { + "count": 4 + }, + "79": { + "count": 6 + }, + "57": { + "count": 12 + }, + "104": { + "count": 3 + }, + "110": { + "count": 2 + }, + "62": { + "count": 5 + }, + "6": { + "count": 4 + }, + "21": { + "count": 1 + }, + "74": { + "count": 1 + }, + "73": { + "count": 2 + }, + "26": { + "count": 1 + }, + "66": { + "count": 1 + }, + "88": { + "count": 1 + }, + "10": { + "count": 1 + }, + "12": { + "count": 1 + }, + "93": { + "count": 1 + }, + "50": { + "count": 2 + }, + "107": { + "count": 1 + } + } + }, + "de": { + "num_samples": 1815, + "number_of_characters": 77727, + "number_texts_intersect_with_train": 23, + "min_text_length": 10, + "average_text_length": 42.824793388429754, + "max_text_length": 154, + "unique_text": 1814, + "unique_labels": 87, + "labels": { + "1": { + "count": 32 + }, + "0": { + "count": 65 + }, + "3": { + "count": 174 + }, + "6": { + "count": 4 + }, + "84": { + "count": 12 + }, + "15": { + "count": 57 + }, + "11": { + "count": 20 + }, + "14": { + "count": 120 + }, + "16": { + "count": 107 + }, + "17": { + "count": 6 + }, + "19": { + "count": 9 + }, + "24": { + "count": 13 + }, + "29": { + "count": 78 + }, + "30": { + "count": 2 + }, + "4": { + "count": 78 + }, + "88": { + "count": 1 + }, + "51": { + "count": 7 + }, + "25": { + "count": 199 + }, + "40": { + "count": 9 + }, + "38": { + "count": 10 + }, + "22": { + "count": 29 + }, + "41": { + "count": 6 + }, + "43": { + "count": 106 + }, + "5": { + "count": 23 + }, + "45": { + "count": 9 + }, + "46": { + "count": 6 + }, + "32": { + "count": 13 + }, + "48": { + "count": 50 + }, + "42": { + "count": 34 + }, + "49": { + "count": 5 + }, + "92": { + "count": 2 + }, + "47": { + "count": 17 + }, + "57": { + "count": 9 + }, + "52": { + "count": 11 + }, + "54": { + "count": 8 + }, + "55": { + "count": 5 + }, + "18": { + "count": 115 + }, + "59": { + "count": 107 + }, + "60": { + "count": 46 + }, + "61": { + "count": 15 + }, + "9": { + "count": 9 + }, + "63": { + "count": 17 + }, + "64": { + "count": 1 + }, + "77": { + "count": 2 + }, + "86": { + "count": 3 + }, + "65": { + "count": 16 + }, + "72": { + "count": 6 + }, + "66": { + "count": 1 + }, + "79": { + "count": 6 + }, + "12": { + "count": 1 + }, + "102": { + "count": 1 + }, + "75": { + "count": 5 + }, + "7": { + "count": 2 + }, + "37": { + "count": 6 + }, + "36": { + "count": 7 + }, + "35": { + "count": 7 + }, + "87": { + "count": 2 + }, + "31": { + "count": 4 + }, + "33": { + "count": 7 + }, + "94": { + "count": 1 + }, + "73": { + "count": 1 + }, + "53": { + "count": 12 + }, + "27": { + "count": 15 + }, + "69": { + "count": 8 + }, + "13": { + "count": 4 + }, + "68": { + "count": 5 + }, + "74": { + "count": 1 + }, + "85": { + "count": 1 + }, + "110": { + "count": 2 + }, + "34": { + "count": 7 + }, + "80": { + "count": 3 + }, + "62": { + "count": 4 + }, + "44": { + "count": 5 + }, + "90": { + "count": 2 + }, + "50": { + "count": 2 + }, + "89": { + "count": 4 + }, + "56": { + "count": 2 + }, + "101": { + "count": 1 + }, + "2": { + "count": 2 + }, + "20": { + "count": 1 + }, + "26": { + "count": 1 + }, + "104": { + "count": 3 + }, + "93": { + "count": 1 + }, + "21": { + "count": 1 + }, + "107": { + "count": 1 + }, + "10": { + "count": 1 + }, + "96": { + "count": 2 + } + } + }, + "es": { + "num_samples": 1527, + "number_of_characters": 67720, + "number_texts_intersect_with_train": 41, + "min_text_length": 11, + "average_text_length": 44.34839554682384, + "max_text_length": 134, + "unique_text": 1525, + "unique_labels": 79, + "labels": { + "3": { + "count": 79 + }, + "22": { + "count": 28 + }, + "5": { + "count": 23 + }, + "33": { + "count": 5 + }, + "45": { + "count": 8 + }, + "34": { + "count": 6 + }, + "35": { + "count": 6 + }, + "89": { + "count": 4 + }, + "32": { + "count": 13 + }, + "51": { + "count": 5 + }, + "104": { + "count": 3 + }, + "31": { + "count": 4 + }, + "44": { + "count": 6 + }, + "110": { + "count": 2 + }, + "46": { + "count": 4 + }, + "14": { + "count": 122 + }, + "37": { + "count": 6 + }, + "36": { + "count": 8 + }, + "38": { + "count": 7 + }, + "40": { + "count": 10 + }, + "24": { + "count": 13 + }, + "16": { + "count": 97 + }, + "17": { + "count": 7 + }, + "80": { + "count": 3 + }, + "41": { + "count": 7 + }, + "62": { + "count": 4 + }, + "69": { + "count": 4 + }, + "76": { + "count": 1 + }, + "68": { + "count": 3 + }, + "72": { + "count": 4 + }, + "18": { + "count": 86 + }, + "63": { + "count": 16 + }, + "65": { + "count": 12 + }, + "15": { + "count": 52 + }, + "11": { + "count": 19 + }, + "19": { + "count": 8 + }, + "9": { + "count": 7 + }, + "13": { + "count": 3 + }, + "75": { + "count": 2 + }, + "25": { + "count": 185 + }, + "10": { + "count": 1 + }, + "88": { + "count": 1 + }, + "73": { + "count": 1 + }, + "77": { + "count": 1 + }, + "79": { + "count": 4 + }, + "66": { + "count": 1 + }, + "102": { + "count": 1 + }, + "64": { + "count": 1 + }, + "74": { + "count": 1 + }, + "86": { + "count": 4 + }, + "6": { + "count": 4 + }, + "84": { + "count": 12 + }, + "7": { + "count": 4 + }, + "29": { + "count": 87 + }, + "20": { + "count": 2 + }, + "85": { + "count": 2 + }, + "27": { + "count": 3 + }, + "93": { + "count": 1 + }, + "43": { + "count": 125 + }, + "48": { + "count": 28 + }, + "42": { + "count": 34 + }, + "90": { + "count": 1 + }, + "50": { + "count": 2 + }, + "49": { + "count": 5 + }, + "47": { + "count": 13 + }, + "4": { + "count": 62 + }, + "96": { + "count": 1 + }, + "53": { + "count": 16 + }, + "101": { + "count": 1 + }, + "59": { + "count": 21 + }, + "60": { + "count": 49 + }, + "61": { + "count": 6 + }, + "0": { + "count": 112 + }, + "1": { + "count": 29 + }, + "2": { + "count": 3 + }, + "30": { + "count": 2 + }, + "92": { + "count": 2 + }, + "94": { + "count": 1 + }, + "54": { + "count": 1 + } + } + }, + "fr": { + "num_samples": 1577, + "number_of_characters": 68008, + "number_texts_intersect_with_train": 12, + "min_text_length": 11, + "average_text_length": 43.12492073557387, + "max_text_length": 141, + "unique_text": 1575, + "unique_labels": 78, + "labels": { + "0": { + "count": 105 + }, + "1": { + "count": 19 + }, + "3": { + "count": 154 + }, + "6": { + "count": 4 + }, + "75": { + "count": 3 + }, + "16": { + "count": 96 + }, + "29": { + "count": 69 + }, + "4": { + "count": 67 + }, + "34": { + "count": 7 + }, + "32": { + "count": 14 + }, + "14": { + "count": 98 + }, + "40": { + "count": 8 + }, + "25": { + "count": 164 + }, + "22": { + "count": 28 + }, + "45": { + "count": 10 + }, + "5": { + "count": 23 + }, + "46": { + "count": 3 + }, + "33": { + "count": 7 + }, + "51": { + "count": 6 + }, + "48": { + "count": 46 + }, + "43": { + "count": 104 + }, + "42": { + "count": 32 + }, + "49": { + "count": 5 + }, + "47": { + "count": 12 + }, + "52": { + "count": 13 + }, + "57": { + "count": 6 + }, + "53": { + "count": 14 + }, + "27": { + "count": 29 + }, + "61": { + "count": 17 + }, + "60": { + "count": 56 + }, + "59": { + "count": 51 + }, + "15": { + "count": 44 + }, + "18": { + "count": 62 + }, + "64": { + "count": 1 + }, + "65": { + "count": 14 + }, + "77": { + "count": 2 + }, + "86": { + "count": 3 + }, + "9": { + "count": 7 + }, + "19": { + "count": 5 + }, + "72": { + "count": 6 + }, + "66": { + "count": 1 + }, + "63": { + "count": 15 + }, + "11": { + "count": 11 + }, + "79": { + "count": 5 + }, + "24": { + "count": 13 + }, + "2": { + "count": 1 + }, + "84": { + "count": 11 + }, + "7": { + "count": 4 + }, + "17": { + "count": 6 + }, + "30": { + "count": 1 + }, + "88": { + "count": 1 + }, + "110": { + "count": 2 + }, + "31": { + "count": 4 + }, + "35": { + "count": 6 + }, + "38": { + "count": 7 + }, + "37": { + "count": 6 + }, + "36": { + "count": 6 + }, + "44": { + "count": 7 + }, + "92": { + "count": 2 + }, + "54": { + "count": 8 + }, + "55": { + "count": 5 + }, + "68": { + "count": 7 + }, + "69": { + "count": 6 + }, + "80": { + "count": 2 + }, + "13": { + "count": 4 + }, + "74": { + "count": 1 + }, + "62": { + "count": 4 + }, + "102": { + "count": 1 + }, + "85": { + "count": 1 + }, + "50": { + "count": 1 + }, + "89": { + "count": 3 + }, + "56": { + "count": 2 + }, + "101": { + "count": 1 + }, + "104": { + "count": 3 + }, + "93": { + "count": 1 + }, + "10": { + "count": 1 + }, + "96": { + "count": 2 + }, + "76": { + "count": 1 + } + } + }, + "hi": { + "num_samples": 2012, + "number_of_characters": 78749, + "number_texts_intersect_with_train": 16, + "min_text_length": 7, + "average_text_length": 39.139662027833005, + "max_text_length": 131, + "unique_text": 2011, + "unique_labels": 88, + "labels": { + "1": { + "count": 39 + }, + "3": { + "count": 171 + }, + "15": { + "count": 57 + }, + "16": { + "count": 140 + }, + "29": { + "count": 82 + }, + "9": { + "count": 9 + }, + "18": { + "count": 134 + }, + "22": { + "count": 28 + }, + "5": { + "count": 23 + }, + "35": { + "count": 7 + }, + "32": { + "count": 13 + }, + "51": { + "count": 7 + }, + "25": { + "count": 204 + }, + "14": { + "count": 120 + }, + "38": { + "count": 10 + }, + "24": { + "count": 13 + }, + "4": { + "count": 84 + }, + "0": { + "count": 118 + }, + "44": { + "count": 7 + }, + "48": { + "count": 49 + }, + "42": { + "count": 39 + }, + "43": { + "count": 120 + }, + "27": { + "count": 28 + }, + "53": { + "count": 16 + }, + "55": { + "count": 6 + }, + "101": { + "count": 1 + }, + "59": { + "count": 108 + }, + "60": { + "count": 58 + }, + "63": { + "count": 19 + }, + "61": { + "count": 17 + }, + "65": { + "count": 18 + }, + "69": { + "count": 6 + }, + "68": { + "count": 7 + }, + "79": { + "count": 6 + }, + "13": { + "count": 4 + }, + "11": { + "count": 22 + }, + "2": { + "count": 4 + }, + "6": { + "count": 4 + }, + "7": { + "count": 4 + }, + "84": { + "count": 10 + }, + "19": { + "count": 9 + }, + "17": { + "count": 7 + }, + "20": { + "count": 2 + }, + "26": { + "count": 1 + }, + "85": { + "count": 1 + }, + "33": { + "count": 8 + }, + "45": { + "count": 11 + }, + "34": { + "count": 8 + }, + "80": { + "count": 3 + }, + "37": { + "count": 6 + }, + "36": { + "count": 8 + }, + "40": { + "count": 10 + }, + "104": { + "count": 3 + }, + "87": { + "count": 2 + }, + "31": { + "count": 4 + }, + "41": { + "count": 6 + }, + "93": { + "count": 1 + }, + "46": { + "count": 7 + }, + "47": { + "count": 16 + }, + "90": { + "count": 1 + }, + "49": { + "count": 4 + }, + "50": { + "count": 2 + }, + "89": { + "count": 2 + }, + "57": { + "count": 12 + }, + "56": { + "count": 2 + }, + "52": { + "count": 15 + }, + "54": { + "count": 8 + }, + "62": { + "count": 4 + }, + "75": { + "count": 5 + }, + "72": { + "count": 6 + }, + "73": { + "count": 2 + }, + "110": { + "count": 2 + }, + "10": { + "count": 1 + }, + "96": { + "count": 2 + }, + "76": { + "count": 1 + }, + "88": { + "count": 1 + }, + "92": { + "count": 2 + }, + "77": { + "count": 2 + }, + "102": { + "count": 1 + }, + "21": { + "count": 1 + }, + "107": { + "count": 1 + }, + "30": { + "count": 2 + }, + "86": { + "count": 3 + }, + "12": { + "count": 1 + }, + "94": { + "count": 1 + }, + "64": { + "count": 1 + }, + "66": { + "count": 1 + }, + "74": { + "count": 1 + } + } + }, + "th": { + "num_samples": 1671, + "number_of_characters": 58028, + "number_texts_intersect_with_train": 28, + "min_text_length": 5, + "average_text_length": 34.726511071214844, + "max_text_length": 105, + "unique_text": 1670, + "unique_labels": 81, + "labels": { + "0": { + "count": 97 + }, + "1": { + "count": 28 + }, + "3": { + "count": 152 + }, + "6": { + "count": 4 + }, + "84": { + "count": 12 + }, + "2": { + "count": 3 + }, + "15": { + "count": 47 + }, + "19": { + "count": 9 + }, + "17": { + "count": 7 + }, + "29": { + "count": 78 + }, + "25": { + "count": 181 + }, + "14": { + "count": 117 + }, + "85": { + "count": 1 + }, + "33": { + "count": 7 + }, + "34": { + "count": 7 + }, + "37": { + "count": 6 + }, + "35": { + "count": 7 + }, + "38": { + "count": 9 + }, + "16": { + "count": 103 + }, + "32": { + "count": 13 + }, + "41": { + "count": 5 + }, + "4": { + "count": 68 + }, + "93": { + "count": 1 + }, + "44": { + "count": 7 + }, + "5": { + "count": 23 + }, + "22": { + "count": 28 + }, + "47": { + "count": 13 + }, + "48": { + "count": 38 + }, + "42": { + "count": 35 + }, + "43": { + "count": 111 + }, + "49": { + "count": 4 + }, + "50": { + "count": 2 + }, + "89": { + "count": 3 + }, + "27": { + "count": 13 + }, + "56": { + "count": 2 + }, + "52": { + "count": 11 + }, + "54": { + "count": 8 + }, + "55": { + "count": 4 + }, + "59": { + "count": 57 + }, + "60": { + "count": 52 + }, + "61": { + "count": 16 + }, + "24": { + "count": 14 + }, + "9": { + "count": 8 + }, + "36": { + "count": 8 + }, + "65": { + "count": 14 + }, + "63": { + "count": 15 + }, + "18": { + "count": 85 + }, + "69": { + "count": 6 + }, + "11": { + "count": 20 + }, + "62": { + "count": 3 + }, + "79": { + "count": 6 + }, + "72": { + "count": 6 + }, + "53": { + "count": 14 + }, + "68": { + "count": 5 + }, + "40": { + "count": 9 + }, + "13": { + "count": 4 + }, + "75": { + "count": 3 + }, + "7": { + "count": 4 + }, + "45": { + "count": 9 + }, + "51": { + "count": 7 + }, + "46": { + "count": 4 + }, + "20": { + "count": 1 + }, + "80": { + "count": 3 + }, + "104": { + "count": 3 + }, + "31": { + "count": 4 + }, + "110": { + "count": 2 + }, + "96": { + "count": 2 + }, + "10": { + "count": 1 + }, + "57": { + "count": 5 + }, + "76": { + "count": 1 + }, + "30": { + "count": 2 + }, + "88": { + "count": 1 + }, + "92": { + "count": 2 + }, + "73": { + "count": 1 + }, + "77": { + "count": 2 + }, + "86": { + "count": 3 + }, + "66": { + "count": 1 + }, + "102": { + "count": 1 + }, + "64": { + "count": 1 + }, + "74": { + "count": 1 + }, + "101": { + "count": 1 + } + } + } + } + }, + "test": { + "num_samples": 19680, + "number_of_characters": 781580, + "number_texts_intersect_with_train": 332, + "min_text_length": 3, + "average_text_length": 39.71443089430894, + "max_text_length": 168, + "unique_text": 19627, + "unique_labels": 102, + "labels": { + "29": { + "count": 794 + }, + "25": { + "count": 2294 + }, + "14": { + "count": 1429 + }, + "43": { + "count": 1487 + }, + "59": { + "count": 692 + }, + "37": { + "count": 88 + }, + "3": { + "count": 1687 + }, + "31": { + "count": 62 + }, + "46": { + "count": 82 + }, + "18": { + "count": 1146 + }, + "11": { + "count": 284 + }, + "0": { + "count": 1188 + }, + "4": { + "count": 906 + }, + "48": { + "count": 392 + }, + "5": { + "count": 295 + }, + "63": { + "count": 176 + }, + "16": { + "count": 1036 + }, + "22": { + "count": 331 + }, + "72": { + "count": 112 + }, + "53": { + "count": 178 + }, + "20": { + "count": 6 + }, + "60": { + "count": 545 + }, + "55": { + "count": 61 + }, + "80": { + "count": 50 + }, + "38": { + "count": 90 + }, + "51": { + "count": 60 + }, + "1": { + "count": 243 + }, + "17": { + "count": 86 + }, + "12": { + "count": 71 + }, + "35": { + "count": 87 + }, + "65": { + "count": 176 + }, + "61": { + "count": 80 + }, + "15": { + "count": 508 + }, + "9": { + "count": 85 + }, + "27": { + "count": 151 + }, + "90": { + "count": 23 + }, + "6": { + "count": 44 + }, + "45": { + "count": 68 + }, + "57": { + "count": 46 + }, + "75": { + "count": 74 + }, + "47": { + "count": 157 + }, + "74": { + "count": 57 + }, + "42": { + "count": 420 + }, + "103": { + "count": 16 + }, + "88": { + "count": 29 + }, + "19": { + "count": 40 + }, + "52": { + "count": 84 + }, + "26": { + "count": 3 + }, + "32": { + "count": 183 + }, + "24": { + "count": 190 + }, + "33": { + "count": 49 + }, + "85": { + "count": 18 + }, + "30": { + "count": 6 + }, + "44": { + "count": 53 + }, + "73": { + "count": 17 + }, + "62": { + "count": 34 + }, + "36": { + "count": 144 + }, + "54": { + "count": 49 + }, + "79": { + "count": 73 + }, + "49": { + "count": 35 + }, + "34": { + "count": 60 + }, + "84": { + "count": 41 + }, + "89": { + "count": 17 + }, + "69": { + "count": 62 + }, + "96": { + "count": 14 + }, + "41": { + "count": 39 + }, + "81": { + "count": 29 + }, + "40": { + "count": 73 + }, + "10": { + "count": 67 + }, + "7": { + "count": 43 + }, + "13": { + "count": 56 + }, + "2": { + "count": 12 + }, + "58": { + "count": 5 + }, + "77": { + "count": 25 + }, + "109": { + "count": 5 + }, + "56": { + "count": 6 + }, + "50": { + "count": 15 + }, + "101": { + "count": 18 + }, + "67": { + "count": 11 + }, + "68": { + "count": 23 + }, + "28": { + "count": 16 + }, + "64": { + "count": 8 + }, + "93": { + "count": 21 + }, + "92": { + "count": 6 + }, + "21": { + "count": 5 + }, + "108": { + "count": 11 + }, + "78": { + "count": 18 + }, + "86": { + "count": 11 + }, + "97": { + "count": 2 + }, + "91": { + "count": 8 + }, + "105": { + "count": 3 + }, + "106": { + "count": 12 + }, + "102": { + "count": 3 + }, + "94": { + "count": 8 + }, + "76": { + "count": 6 + }, + "8": { + "count": 2 + }, + "66": { + "count": 8 + }, + "82": { + "count": 5 + }, + "71": { + "count": 15 + }, + "98": { + "count": 5 + }, + "70": { + "count": 10 + }, + "95": { + "count": 6 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 4386, + "number_of_characters": 161376, + "number_texts_intersect_with_train": 15, + "min_text_length": 3, + "average_text_length": 36.79343365253078, + "max_text_length": 132, + "unique_text": 4384, + "unique_labels": 102, + "labels": { + "29": { + "count": 159 + }, + "25": { + "count": 473 + }, + "14": { + "count": 303 + }, + "43": { + "count": 358 + }, + "59": { + "count": 215 + }, + "37": { + "count": 16 + }, + "3": { + "count": 350 + }, + "31": { + "count": 12 + }, + "46": { + "count": 17 + }, + "18": { + "count": 316 + }, + "11": { + "count": 59 + }, + "0": { + "count": 274 + }, + "4": { + "count": 202 + }, + "48": { + "count": 95 + }, + "5": { + "count": 56 + }, + "63": { + "count": 40 + }, + "16": { + "count": 235 + }, + "22": { + "count": 63 + }, + "72": { + "count": 23 + }, + "53": { + "count": 34 + }, + "20": { + "count": 1 + }, + "60": { + "count": 112 + }, + "55": { + "count": 18 + }, + "80": { + "count": 12 + }, + "38": { + "count": 19 + }, + "51": { + "count": 13 + }, + "1": { + "count": 60 + }, + "17": { + "count": 17 + }, + "12": { + "count": 14 + }, + "35": { + "count": 17 + }, + "65": { + "count": 37 + }, + "61": { + "count": 19 + }, + "15": { + "count": 113 + }, + "9": { + "count": 20 + }, + "27": { + "count": 39 + }, + "90": { + "count": 5 + }, + "6": { + "count": 10 + }, + "45": { + "count": 13 + }, + "57": { + "count": 11 + }, + "75": { + "count": 15 + }, + "47": { + "count": 37 + }, + "74": { + "count": 11 + }, + "42": { + "count": 96 + }, + "103": { + "count": 3 + }, + "88": { + "count": 5 + }, + "19": { + "count": 9 + }, + "52": { + "count": 23 + }, + "26": { + "count": 1 + }, + "32": { + "count": 33 + }, + "24": { + "count": 38 + }, + "33": { + "count": 9 + }, + "85": { + "count": 4 + }, + "30": { + "count": 1 + }, + "44": { + "count": 10 + }, + "73": { + "count": 3 + }, + "62": { + "count": 7 + }, + "36": { + "count": 26 + }, + "54": { + "count": 11 + }, + "79": { + "count": 15 + }, + "49": { + "count": 8 + }, + "34": { + "count": 11 + }, + "84": { + "count": 9 + }, + "89": { + "count": 3 + }, + "69": { + "count": 13 + }, + "96": { + "count": 5 + }, + "41": { + "count": 9 + }, + "81": { + "count": 6 + }, + "40": { + "count": 16 + }, + "10": { + "count": 12 + }, + "7": { + "count": 8 + }, + "13": { + "count": 12 + }, + "2": { + "count": 4 + }, + "58": { + "count": 1 + }, + "77": { + "count": 5 + }, + "109": { + "count": 1 + }, + "56": { + "count": 1 + }, + "50": { + "count": 4 + }, + "101": { + "count": 4 + }, + "67": { + "count": 2 + }, + "68": { + "count": 4 + }, + "28": { + "count": 3 + }, + "64": { + "count": 2 + }, + "93": { + "count": 4 + }, + "92": { + "count": 2 + }, + "21": { + "count": 1 + }, + "108": { + "count": 3 + }, + "78": { + "count": 3 + }, + "86": { + "count": 2 + }, + "97": { + "count": 1 + }, + "91": { + "count": 2 + }, + "105": { + "count": 1 + }, + "106": { + "count": 2 + }, + "102": { + "count": 1 + }, + "94": { + "count": 2 + }, + "76": { + "count": 1 + }, + "8": { + "count": 1 + }, + "66": { + "count": 2 + }, + "82": { + "count": 1 + }, + "71": { + "count": 3 + }, + "98": { + "count": 1 + }, + "70": { + "count": 2 + }, + "95": { + "count": 1 + } + } + }, + "de": { + "num_samples": 3549, + "number_of_characters": 151445, + "number_texts_intersect_with_train": 69, + "min_text_length": 7, + "average_text_length": 42.67258382642998, + "max_text_length": 162, + "unique_text": 3536, + "unique_labels": 99, + "labels": { + "0": { + "count": 145 + }, + "1": { + "count": 45 + }, + "18": { + "count": 237 + }, + "3": { + "count": 312 + }, + "6": { + "count": 7 + }, + "31": { + "count": 10 + }, + "65": { + "count": 31 + }, + "10": { + "count": 12 + }, + "15": { + "count": 99 + }, + "16": { + "count": 195 + }, + "17": { + "count": 15 + }, + "14": { + "count": 256 + }, + "29": { + "count": 133 + }, + "7": { + "count": 7 + }, + "21": { + "count": 1 + }, + "36": { + "count": 24 + }, + "22": { + "count": 55 + }, + "27": { + "count": 27 + }, + "44": { + "count": 8 + }, + "86": { + "count": 2 + }, + "88": { + "count": 5 + }, + "35": { + "count": 17 + }, + "34": { + "count": 11 + }, + "89": { + "count": 3 + }, + "32": { + "count": 32 + }, + "25": { + "count": 425 + }, + "24": { + "count": 30 + }, + "13": { + "count": 12 + }, + "5": { + "count": 53 + }, + "4": { + "count": 160 + }, + "33": { + "count": 9 + }, + "46": { + "count": 16 + }, + "45": { + "count": 12 + }, + "43": { + "count": 245 + }, + "47": { + "count": 31 + }, + "48": { + "count": 72 + }, + "50": { + "count": 2 + }, + "49": { + "count": 7 + }, + "42": { + "count": 80 + }, + "92": { + "count": 2 + }, + "93": { + "count": 4 + }, + "51": { + "count": 10 + }, + "98": { + "count": 1 + }, + "52": { + "count": 13 + }, + "54": { + "count": 11 + }, + "53": { + "count": 32 + }, + "57": { + "count": 11 + }, + "101": { + "count": 4 + }, + "59": { + "count": 184 + }, + "60": { + "count": 90 + }, + "37": { + "count": 15 + }, + "19": { + "count": 9 + }, + "61": { + "count": 17 + }, + "12": { + "count": 13 + }, + "81": { + "count": 5 + }, + "80": { + "count": 6 + }, + "69": { + "count": 11 + }, + "70": { + "count": 1 + }, + "72": { + "count": 19 + }, + "79": { + "count": 14 + }, + "11": { + "count": 51 + }, + "75": { + "count": 14 + }, + "77": { + "count": 5 + }, + "74": { + "count": 10 + }, + "41": { + "count": 7 + }, + "9": { + "count": 17 + }, + "38": { + "count": 17 + }, + "30": { + "count": 1 + }, + "73": { + "count": 3 + }, + "96": { + "count": 2 + }, + "55": { + "count": 12 + }, + "64": { + "count": 1 + }, + "63": { + "count": 26 + }, + "62": { + "count": 7 + }, + "71": { + "count": 3 + }, + "68": { + "count": 4 + }, + "84": { + "count": 8 + }, + "28": { + "count": 2 + }, + "40": { + "count": 11 + }, + "90": { + "count": 5 + }, + "95": { + "count": 1 + }, + "58": { + "count": 1 + }, + "76": { + "count": 1 + }, + "103": { + "count": 1 + }, + "20": { + "count": 1 + }, + "94": { + "count": 2 + }, + "109": { + "count": 1 + }, + "108": { + "count": 3 + }, + "67": { + "count": 1 + }, + "78": { + "count": 3 + }, + "66": { + "count": 1 + }, + "2": { + "count": 2 + }, + "85": { + "count": 2 + }, + "97": { + "count": 1 + }, + "106": { + "count": 2 + }, + "26": { + "count": 1 + }, + "91": { + "count": 2 + }, + "56": { + "count": 1 + }, + "82": { + "count": 1 + } + } + }, + "es": { + "num_samples": 2998, + "number_of_characters": 130569, + "number_texts_intersect_with_train": 97, + "min_text_length": 6, + "average_text_length": 43.552034689793196, + "max_text_length": 168, + "unique_text": 2983, + "unique_labels": 90, + "labels": { + "3": { + "count": 179 + }, + "44": { + "count": 8 + }, + "22": { + "count": 55 + }, + "32": { + "count": 31 + }, + "88": { + "count": 5 + }, + "31": { + "count": 8 + }, + "45": { + "count": 13 + }, + "5": { + "count": 45 + }, + "35": { + "count": 16 + }, + "34": { + "count": 8 + }, + "51": { + "count": 8 + }, + "46": { + "count": 14 + }, + "33": { + "count": 8 + }, + "24": { + "count": 32 + }, + "14": { + "count": 248 + }, + "40": { + "count": 15 + }, + "36": { + "count": 25 + }, + "37": { + "count": 15 + }, + "38": { + "count": 17 + }, + "16": { + "count": 141 + }, + "17": { + "count": 14 + }, + "80": { + "count": 10 + }, + "41": { + "count": 8 + }, + "102": { + "count": 1 + }, + "72": { + "count": 19 + }, + "68": { + "count": 4 + }, + "69": { + "count": 8 + }, + "62": { + "count": 5 + }, + "79": { + "count": 11 + }, + "77": { + "count": 2 + }, + "81": { + "count": 4 + }, + "67": { + "count": 2 + }, + "78": { + "count": 3 + }, + "106": { + "count": 2 + }, + "66": { + "count": 1 + }, + "73": { + "count": 2 + }, + "71": { + "count": 3 + }, + "18": { + "count": 177 + }, + "63": { + "count": 28 + }, + "15": { + "count": 89 + }, + "65": { + "count": 29 + }, + "12": { + "count": 13 + }, + "13": { + "count": 7 + }, + "19": { + "count": 7 + }, + "11": { + "count": 50 + }, + "74": { + "count": 9 + }, + "9": { + "count": 15 + }, + "75": { + "count": 12 + }, + "10": { + "count": 10 + }, + "25": { + "count": 392 + }, + "86": { + "count": 2 + }, + "89": { + "count": 3 + }, + "21": { + "count": 1 + }, + "95": { + "count": 1 + }, + "105": { + "count": 1 + }, + "70": { + "count": 2 + }, + "76": { + "count": 1 + }, + "64": { + "count": 1 + }, + "6": { + "count": 9 + }, + "84": { + "count": 4 + }, + "7": { + "count": 8 + }, + "29": { + "count": 146 + }, + "20": { + "count": 1 + }, + "85": { + "count": 4 + }, + "28": { + "count": 3 + }, + "43": { + "count": 274 + }, + "42": { + "count": 62 + }, + "93": { + "count": 4 + }, + "48": { + "count": 48 + }, + "91": { + "count": 2 + }, + "49": { + "count": 6 + }, + "47": { + "count": 31 + }, + "94": { + "count": 2 + }, + "90": { + "count": 2 + }, + "50": { + "count": 4 + }, + "4": { + "count": 119 + }, + "53": { + "count": 30 + }, + "55": { + "count": 1 + }, + "101": { + "count": 2 + }, + "60": { + "count": 90 + }, + "61": { + "count": 7 + }, + "59": { + "count": 29 + }, + "0": { + "count": 225 + }, + "1": { + "count": 34 + }, + "2": { + "count": 2 + }, + "103": { + "count": 3 + }, + "27": { + "count": 2 + }, + "30": { + "count": 1 + }, + "52": { + "count": 2 + }, + "56": { + "count": 1 + } + } + }, + "fr": { + "num_samples": 3193, + "number_of_characters": 140029, + "number_texts_intersect_with_train": 45, + "min_text_length": 6, + "average_text_length": 43.854995302223614, + "max_text_length": 143, + "unique_text": 3187, + "unique_labels": 99, + "labels": { + "0": { + "count": 209 + }, + "1": { + "count": 40 + }, + "3": { + "count": 310 + }, + "6": { + "count": 10 + }, + "84": { + "count": 9 + }, + "103": { + "count": 3 + }, + "31": { + "count": 12 + }, + "65": { + "count": 21 + }, + "11": { + "count": 41 + }, + "32": { + "count": 31 + }, + "16": { + "count": 169 + }, + "14": { + "count": 236 + }, + "15": { + "count": 73 + }, + "22": { + "count": 57 + }, + "29": { + "count": 125 + }, + "27": { + "count": 36 + }, + "35": { + "count": 14 + }, + "89": { + "count": 3 + }, + "25": { + "count": 344 + }, + "24": { + "count": 32 + }, + "13": { + "count": 10 + }, + "36": { + "count": 23 + }, + "5": { + "count": 52 + }, + "34": { + "count": 10 + }, + "4": { + "count": 152 + }, + "43": { + "count": 261 + }, + "33": { + "count": 9 + }, + "45": { + "count": 13 + }, + "46": { + "count": 15 + }, + "42": { + "count": 80 + }, + "48": { + "count": 76 + }, + "92": { + "count": 2 + }, + "49": { + "count": 5 + }, + "18": { + "count": 126 + }, + "51": { + "count": 9 + }, + "52": { + "count": 14 + }, + "53": { + "count": 29 + }, + "57": { + "count": 8 + }, + "54": { + "count": 10 + }, + "56": { + "count": 1 + }, + "101": { + "count": 3 + }, + "61": { + "count": 16 + }, + "60": { + "count": 96 + }, + "37": { + "count": 14 + }, + "74": { + "count": 8 + }, + "40": { + "count": 12 + }, + "75": { + "count": 12 + }, + "12": { + "count": 11 + }, + "59": { + "count": 69 + }, + "71": { + "count": 3 + }, + "80": { + "count": 12 + }, + "68": { + "count": 4 + }, + "82": { + "count": 1 + }, + "72": { + "count": 17 + }, + "77": { + "count": 5 + }, + "63": { + "count": 31 + }, + "38": { + "count": 13 + }, + "79": { + "count": 12 + }, + "106": { + "count": 2 + }, + "7": { + "count": 6 + }, + "8": { + "count": 1 + }, + "10": { + "count": 11 + }, + "19": { + "count": 5 + }, + "17": { + "count": 16 + }, + "28": { + "count": 3 + }, + "85": { + "count": 3 + }, + "30": { + "count": 1 + }, + "44": { + "count": 10 + }, + "86": { + "count": 1 + }, + "88": { + "count": 5 + }, + "47": { + "count": 25 + }, + "90": { + "count": 5 + }, + "93": { + "count": 4 + }, + "50": { + "count": 2 + }, + "95": { + "count": 1 + }, + "105": { + "count": 1 + }, + "73": { + "count": 3 + }, + "98": { + "count": 1 + }, + "58": { + "count": 1 + }, + "55": { + "count": 12 + }, + "64": { + "count": 2 + }, + "9": { + "count": 9 + }, + "81": { + "count": 4 + }, + "69": { + "count": 12 + }, + "70": { + "count": 2 + }, + "41": { + "count": 6 + }, + "62": { + "count": 2 + }, + "76": { + "count": 1 + }, + "20": { + "count": 1 + }, + "94": { + "count": 1 + }, + "96": { + "count": 3 + }, + "109": { + "count": 1 + }, + "108": { + "count": 2 + }, + "67": { + "count": 2 + }, + "78": { + "count": 3 + }, + "66": { + "count": 1 + }, + "2": { + "count": 1 + }, + "91": { + "count": 1 + }, + "102": { + "count": 1 + } + } + }, + "hi": { + "num_samples": 2789, + "number_of_characters": 104295, + "number_texts_intersect_with_train": 32, + "min_text_length": 7, + "average_text_length": 37.395123700250984, + "max_text_length": 148, + "unique_text": 2785, + "unique_labels": 95, + "labels": { + "0": { + "count": 170 + }, + "3": { + "count": 278 + }, + "44": { + "count": 8 + }, + "25": { + "count": 322 + }, + "15": { + "count": 66 + }, + "16": { + "count": 150 + }, + "17": { + "count": 11 + }, + "20": { + "count": 1 + }, + "22": { + "count": 49 + }, + "29": { + "count": 109 + }, + "88": { + "count": 5 + }, + "31": { + "count": 10 + }, + "45": { + "count": 5 + }, + "5": { + "count": 42 + }, + "34": { + "count": 10 + }, + "36": { + "count": 23 + }, + "14": { + "count": 181 + }, + "40": { + "count": 8 + }, + "24": { + "count": 30 + }, + "1": { + "count": 33 + }, + "38": { + "count": 12 + }, + "4": { + "count": 146 + }, + "42": { + "count": 38 + }, + "32": { + "count": 28 + }, + "33": { + "count": 5 + }, + "18": { + "count": 158 + }, + "43": { + "count": 158 + }, + "48": { + "count": 49 + }, + "51": { + "count": 12 + }, + "96": { + "count": 4 + }, + "108": { + "count": 2 + }, + "52": { + "count": 19 + }, + "27": { + "count": 29 + }, + "53": { + "count": 25 + }, + "55": { + "count": 12 + }, + "57": { + "count": 10 + }, + "60": { + "count": 77 + }, + "61": { + "count": 10 + }, + "59": { + "count": 140 + }, + "63": { + "count": 26 + }, + "65": { + "count": 32 + }, + "13": { + "count": 7 + }, + "9": { + "count": 14 + }, + "11": { + "count": 43 + }, + "12": { + "count": 10 + }, + "79": { + "count": 10 + }, + "62": { + "count": 7 + }, + "72": { + "count": 18 + }, + "77": { + "count": 4 + }, + "6": { + "count": 3 + }, + "7": { + "count": 6 + }, + "103": { + "count": 3 + }, + "10": { + "count": 11 + }, + "19": { + "count": 4 + }, + "85": { + "count": 3 + }, + "35": { + "count": 8 + }, + "37": { + "count": 13 + }, + "46": { + "count": 8 + }, + "93": { + "count": 2 + }, + "90": { + "count": 2 + }, + "49": { + "count": 4 + }, + "47": { + "count": 11 + }, + "109": { + "count": 1 + }, + "54": { + "count": 9 + }, + "75": { + "count": 10 + }, + "86": { + "count": 2 + }, + "70": { + "count": 2 + }, + "81": { + "count": 6 + }, + "67": { + "count": 2 + }, + "78": { + "count": 3 + }, + "69": { + "count": 8 + }, + "106": { + "count": 2 + }, + "66": { + "count": 1 + }, + "41": { + "count": 3 + }, + "73": { + "count": 3 + }, + "84": { + "count": 5 + }, + "28": { + "count": 2 + }, + "80": { + "count": 4 + }, + "101": { + "count": 3 + }, + "74": { + "count": 9 + }, + "68": { + "count": 3 + }, + "64": { + "count": 2 + }, + "2": { + "count": 2 + }, + "89": { + "count": 2 + }, + "26": { + "count": 1 + }, + "56": { + "count": 1 + }, + "71": { + "count": 1 + }, + "82": { + "count": 1 + }, + "21": { + "count": 1 + }, + "30": { + "count": 1 + }, + "50": { + "count": 1 + }, + "95": { + "count": 1 + }, + "98": { + "count": 1 + }, + "58": { + "count": 1 + }, + "76": { + "count": 1 + } + } + }, + "th": { + "num_samples": 2765, + "number_of_characters": 93866, + "number_texts_intersect_with_train": 74, + "min_text_length": 6, + "average_text_length": 33.94792043399638, + "max_text_length": 117, + "unique_text": 2754, + "unique_labels": 94, + "labels": { + "0": { + "count": 165 + }, + "1": { + "count": 31 + }, + "3": { + "count": 258 + }, + "84": { + "count": 6 + }, + "7": { + "count": 8 + }, + "103": { + "count": 3 + }, + "15": { + "count": 68 + }, + "16": { + "count": 146 + }, + "17": { + "count": 13 + }, + "19": { + "count": 6 + }, + "29": { + "count": 122 + }, + "20": { + "count": 1 + }, + "22": { + "count": 52 + }, + "12": { + "count": 10 + }, + "14": { + "count": 205 + }, + "31": { + "count": 10 + }, + "45": { + "count": 12 + }, + "34": { + "count": 10 + }, + "35": { + "count": 15 + }, + "25": { + "count": 338 + }, + "36": { + "count": 23 + }, + "37": { + "count": 15 + }, + "24": { + "count": 28 + }, + "38": { + "count": 12 + }, + "32": { + "count": 28 + }, + "5": { + "count": 47 + }, + "4": { + "count": 127 + }, + "46": { + "count": 12 + }, + "33": { + "count": 9 + }, + "42": { + "count": 64 + }, + "43": { + "count": 191 + }, + "90": { + "count": 4 + }, + "47": { + "count": 22 + }, + "48": { + "count": 52 + }, + "94": { + "count": 1 + }, + "109": { + "count": 1 + }, + "52": { + "count": 13 + }, + "53": { + "count": 28 + }, + "55": { + "count": 6 + }, + "60": { + "count": 80 + }, + "59": { + "count": 55 + }, + "63": { + "count": 25 + }, + "65": { + "count": 26 + }, + "18": { + "count": 132 + }, + "86": { + "count": 2 + }, + "79": { + "count": 11 + }, + "11": { + "count": 40 + }, + "72": { + "count": 16 + }, + "67": { + "count": 2 + }, + "78": { + "count": 3 + }, + "9": { + "count": 10 + }, + "106": { + "count": 2 + }, + "66": { + "count": 2 + }, + "62": { + "count": 6 + }, + "75": { + "count": 11 + }, + "40": { + "count": 11 + }, + "61": { + "count": 11 + }, + "10": { + "count": 11 + }, + "13": { + "count": 8 + }, + "93": { + "count": 3 + }, + "77": { + "count": 4 + }, + "69": { + "count": 10 + }, + "2": { + "count": 1 + }, + "44": { + "count": 9 + }, + "6": { + "count": 5 + }, + "88": { + "count": 4 + }, + "51": { + "count": 8 + }, + "108": { + "count": 1 + }, + "54": { + "count": 8 + }, + "27": { + "count": 18 + }, + "74": { + "count": 10 + }, + "41": { + "count": 6 + }, + "49": { + "count": 5 + }, + "57": { + "count": 6 + }, + "68": { + "count": 4 + }, + "73": { + "count": 3 + }, + "28": { + "count": 3 + }, + "81": { + "count": 4 + }, + "80": { + "count": 6 + }, + "21": { + "count": 1 + }, + "85": { + "count": 2 + }, + "50": { + "count": 2 + }, + "89": { + "count": 3 + }, + "98": { + "count": 1 + }, + "30": { + "count": 1 + }, + "91": { + "count": 1 + }, + "95": { + "count": 1 + }, + "58": { + "count": 1 + }, + "56": { + "count": 1 + }, + "101": { + "count": 2 + }, + "71": { + "count": 2 + }, + "70": { + "count": 1 + }, + "82": { + "count": 1 + }, + "76": { + "count": 1 + } + } + } + } + }, + "train": { + "num_samples": 73928, + "number_of_characters": 2937230, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 39.73095444215994, + "max_text_length": 216, + "unique_text": 73219, + "unique_labels": 113, + "labels": { + "1": { + "count": 906 + }, + "25": { + "count": 8111 + }, + "24": { + "count": 554 + }, + "0": { + "count": 4245 + }, + "59": { + "count": 2613 + }, + "33": { + "count": 453 + }, + "42": { + "count": 1674 + }, + "18": { + "count": 4386 + }, + "14": { + "count": 5093 + }, + "48": { + "count": 1422 + }, + "43": { + "count": 5613 + }, + "60": { + "count": 2307 + }, + "63": { + "count": 636 + }, + "29": { + "count": 3691 + }, + "16": { + "count": 4131 + }, + "34": { + "count": 188 + }, + "3": { + "count": 5708 + }, + "32": { + "count": 676 + }, + "41": { + "count": 116 + }, + "15": { + "count": 1979 + }, + "51": { + "count": 265 + }, + "98": { + "count": 28 + }, + "52": { + "count": 460 + }, + "22": { + "count": 1190 + }, + "47": { + "count": 517 + }, + "4": { + "count": 2900 + }, + "11": { + "count": 1001 + }, + "49": { + "count": 177 + }, + "38": { + "count": 300 + }, + "71": { + "count": 93 + }, + "57": { + "count": 223 + }, + "86": { + "count": 103 + }, + "53": { + "count": 538 + }, + "55": { + "count": 161 + }, + "44": { + "count": 239 + }, + "7": { + "count": 177 + }, + "36": { + "count": 420 + }, + "97": { + "count": 37 + }, + "50": { + "count": 95 + }, + "61": { + "count": 393 + }, + "77": { + "count": 93 + }, + "5": { + "count": 1155 + }, + "30": { + "count": 86 + }, + "21": { + "count": 44 + }, + "65": { + "count": 551 + }, + "35": { + "count": 288 + }, + "45": { + "count": 264 + }, + "67": { + "count": 60 + }, + "54": { + "count": 239 + }, + "20": { + "count": 100 + }, + "26": { + "count": 16 + }, + "80": { + "count": 161 + }, + "89": { + "count": 84 + }, + "27": { + "count": 785 + }, + "37": { + "count": 351 + }, + "46": { + "count": 246 + }, + "72": { + "count": 422 + }, + "12": { + "count": 255 + }, + "10": { + "count": 109 + }, + "75": { + "count": 290 + }, + "19": { + "count": 234 + }, + "62": { + "count": 197 + }, + "17": { + "count": 227 + }, + "88": { + "count": 50 + }, + "101": { + "count": 55 + }, + "69": { + "count": 242 + }, + "31": { + "count": 226 + }, + "6": { + "count": 228 + }, + "68": { + "count": 133 + }, + "56": { + "count": 63 + }, + "9": { + "count": 425 + }, + "40": { + "count": 243 + }, + "92": { + "count": 58 + }, + "85": { + "count": 89 + }, + "66": { + "count": 50 + }, + "74": { + "count": 240 + }, + "13": { + "count": 216 + }, + "110": { + "count": 6 + }, + "2": { + "count": 74 + }, + "79": { + "count": 280 + }, + "70": { + "count": 62 + }, + "90": { + "count": 51 + }, + "28": { + "count": 80 + }, + "81": { + "count": 71 + }, + "64": { + "count": 62 + }, + "58": { + "count": 53 + }, + "8": { + "count": 44 + }, + "100": { + "count": 25 + }, + "39": { + "count": 31 + }, + "96": { + "count": 46 + }, + "84": { + "count": 222 + }, + "94": { + "count": 10 + }, + "104": { + "count": 22 + }, + "103": { + "count": 34 + }, + "106": { + "count": 23 + }, + "93": { + "count": 59 + }, + "76": { + "count": 31 + }, + "78": { + "count": 42 + }, + "105": { + "count": 6 + }, + "82": { + "count": 42 + }, + "73": { + "count": 39 + }, + "108": { + "count": 17 + }, + "107": { + "count": 5 + }, + "95": { + "count": 20 + }, + "91": { + "count": 22 + }, + "102": { + "count": 16 + }, + "23": { + "count": 6 + }, + "112": { + "count": 11 + }, + "109": { + "count": 5 + }, + "83": { + "count": 6 + }, + "99": { + "count": 5 + }, + "87": { + "count": 3 + }, + "111": { + "count": 3 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 15667, + "number_of_characters": 572977, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 36.57222186761984, + "max_text_length": 148, + "unique_text": 15634, + "unique_labels": 113, + "labels": { + "1": { + "count": 220 + }, + "25": { + "count": 1616 + }, + "24": { + "count": 101 + }, + "0": { + "count": 921 + }, + "59": { + "count": 776 + }, + "33": { + "count": 83 + }, + "42": { + "count": 373 + }, + "18": { + "count": 1075 + }, + "14": { + "count": 1045 + }, + "48": { + "count": 335 + }, + "43": { + "count": 1208 + }, + "60": { + "count": 442 + }, + "63": { + "count": 131 + }, + "29": { + "count": 720 + }, + "16": { + "count": 868 + }, + "34": { + "count": 37 + }, + "3": { + "count": 1177 + }, + "32": { + "count": 121 + }, + "41": { + "count": 24 + }, + "15": { + "count": 424 + }, + "51": { + "count": 53 + }, + "98": { + "count": 7 + }, + "52": { + "count": 109 + }, + "22": { + "count": 218 + }, + "47": { + "count": 132 + }, + "4": { + "count": 618 + }, + "11": { + "count": 201 + }, + "49": { + "count": 42 + }, + "38": { + "count": 59 + }, + "71": { + "count": 16 + }, + "57": { + "count": 51 + }, + "86": { + "count": 18 + }, + "53": { + "count": 99 + }, + "55": { + "count": 46 + }, + "44": { + "count": 45 + }, + "7": { + "count": 33 + }, + "36": { + "count": 84 + }, + "97": { + "count": 7 + }, + "50": { + "count": 23 + }, + "61": { + "count": 85 + }, + "77": { + "count": 20 + }, + "5": { + "count": 214 + }, + "30": { + "count": 16 + }, + "21": { + "count": 10 + }, + "65": { + "count": 114 + }, + "35": { + "count": 54 + }, + "45": { + "count": 52 + }, + "67": { + "count": 12 + }, + "54": { + "count": 56 + }, + "20": { + "count": 21 + }, + "26": { + "count": 4 + }, + "80": { + "count": 33 + }, + "89": { + "count": 16 + }, + "27": { + "count": 183 + }, + "37": { + "count": 64 + }, + "46": { + "count": 49 + }, + "72": { + "count": 78 + }, + "12": { + "count": 47 + }, + "10": { + "count": 20 + }, + "75": { + "count": 60 + }, + "19": { + "count": 50 + }, + "62": { + "count": 36 + }, + "17": { + "count": 42 + }, + "88": { + "count": 10 + }, + "101": { + "count": 12 + }, + "69": { + "count": 49 + }, + "31": { + "count": 44 + }, + "6": { + "count": 44 + }, + "68": { + "count": 28 + }, + "56": { + "count": 14 + }, + "9": { + "count": 82 + }, + "40": { + "count": 49 + }, + "92": { + "count": 13 + }, + "85": { + "count": 18 + }, + "66": { + "count": 9 + }, + "74": { + "count": 43 + }, + "13": { + "count": 43 + }, + "110": { + "count": 1 + }, + "2": { + "count": 17 + }, + "79": { + "count": 51 + }, + "70": { + "count": 12 + }, + "90": { + "count": 11 + }, + "28": { + "count": 17 + }, + "81": { + "count": 14 + }, + "64": { + "count": 13 + }, + "58": { + "count": 14 + }, + "8": { + "count": 8 + }, + "100": { + "count": 8 + }, + "39": { + "count": 6 + }, + "96": { + "count": 13 + }, + "84": { + "count": 41 + }, + "94": { + "count": 2 + }, + "104": { + "count": 4 + }, + "103": { + "count": 6 + }, + "106": { + "count": 4 + }, + "93": { + "count": 14 + }, + "76": { + "count": 6 + }, + "78": { + "count": 9 + }, + "105": { + "count": 1 + }, + "82": { + "count": 9 + }, + "73": { + "count": 7 + }, + "108": { + "count": 5 + }, + "107": { + "count": 2 + }, + "95": { + "count": 4 + }, + "91": { + "count": 4 + }, + "102": { + "count": 3 + }, + "23": { + "count": 1 + }, + "112": { + "count": 3 + }, + "109": { + "count": 1 + }, + "83": { + "count": 1 + }, + "99": { + "count": 1 + }, + "87": { + "count": 1 + }, + "111": { + "count": 1 + } + } + }, + "de": { + "num_samples": 13424, + "number_of_characters": 580266, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 43.226013110846246, + "max_text_length": 174, + "unique_text": 13264, + "unique_labels": 113, + "labels": { + "0": { + "count": 592 + }, + "1": { + "count": 150 + }, + "2": { + "count": 13 + }, + "18": { + "count": 873 + }, + "83": { + "count": 1 + }, + "3": { + "count": 1067 + }, + "4": { + "count": 514 + }, + "25": { + "count": 1449 + }, + "6": { + "count": 41 + }, + "7": { + "count": 23 + }, + "84": { + "count": 39 + }, + "15": { + "count": 390 + }, + "31": { + "count": 39 + }, + "65": { + "count": 107 + }, + "9": { + "count": 78 + }, + "14": { + "count": 939 + }, + "12": { + "count": 43 + }, + "11": { + "count": 188 + }, + "13": { + "count": 39 + }, + "16": { + "count": 736 + }, + "17": { + "count": 41 + }, + "75": { + "count": 48 + }, + "29": { + "count": 634 + }, + "20": { + "count": 16 + }, + "21": { + "count": 9 + }, + "22": { + "count": 205 + }, + "19": { + "count": 45 + }, + "107": { + "count": 1 + }, + "85": { + "count": 16 + }, + "28": { + "count": 14 + }, + "27": { + "count": 141 + }, + "86": { + "count": 18 + }, + "32": { + "count": 111 + }, + "33": { + "count": 80 + }, + "5": { + "count": 201 + }, + "35": { + "count": 54 + }, + "34": { + "count": 36 + }, + "36": { + "count": 76 + }, + "37": { + "count": 59 + }, + "40": { + "count": 38 + }, + "24": { + "count": 96 + }, + "38": { + "count": 57 + }, + "80": { + "count": 25 + }, + "45": { + "count": 49 + }, + "89": { + "count": 15 + }, + "41": { + "count": 20 + }, + "10": { + "count": 15 + }, + "42": { + "count": 323 + }, + "43": { + "count": 988 + }, + "44": { + "count": 41 + }, + "51": { + "count": 49 + }, + "46": { + "count": 45 + }, + "110": { + "count": 1 + }, + "88": { + "count": 10 + }, + "47": { + "count": 98 + }, + "48": { + "count": 270 + }, + "49": { + "count": 36 + }, + "90": { + "count": 11 + }, + "92": { + "count": 10 + }, + "91": { + "count": 4 + }, + "50": { + "count": 16 + }, + "93": { + "count": 10 + }, + "72": { + "count": 74 + }, + "96": { + "count": 7 + }, + "98": { + "count": 7 + }, + "52": { + "count": 94 + }, + "108": { + "count": 5 + }, + "109": { + "count": 1 + }, + "57": { + "count": 46 + }, + "56": { + "count": 12 + }, + "101": { + "count": 12 + }, + "53": { + "count": 91 + }, + "97": { + "count": 5 + }, + "54": { + "count": 54 + }, + "55": { + "count": 38 + }, + "58": { + "count": 9 + }, + "100": { + "count": 4 + }, + "59": { + "count": 663 + }, + "61": { + "count": 77 + }, + "60": { + "count": 387 + }, + "63": { + "count": 113 + }, + "64": { + "count": 10 + }, + "69": { + "count": 47 + }, + "71": { + "count": 16 + }, + "79": { + "count": 50 + }, + "81": { + "count": 14 + }, + "68": { + "count": 23 + }, + "82": { + "count": 9 + }, + "73": { + "count": 6 + }, + "74": { + "count": 42 + }, + "106": { + "count": 4 + }, + "62": { + "count": 36 + }, + "8": { + "count": 8 + }, + "103": { + "count": 5 + }, + "30": { + "count": 15 + }, + "39": { + "count": 5 + }, + "104": { + "count": 4 + }, + "70": { + "count": 11 + }, + "76": { + "count": 4 + }, + "78": { + "count": 9 + }, + "67": { + "count": 12 + }, + "77": { + "count": 16 + }, + "111": { + "count": 1 + }, + "95": { + "count": 3 + }, + "99": { + "count": 1 + }, + "66": { + "count": 9 + }, + "112": { + "count": 2 + }, + "23": { + "count": 1 + }, + "26": { + "count": 3 + }, + "87": { + "count": 1 + }, + "102": { + "count": 3 + }, + "94": { + "count": 1 + }, + "105": { + "count": 1 + } + } + }, + "es": { + "num_samples": 10934, + "number_of_characters": 476798, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 43.60691421254801, + "max_text_length": 186, + "unique_text": 10740, + "unique_labels": 104, + "labels": { + "3": { + "count": 625 + }, + "44": { + "count": 38 + }, + "5": { + "count": 183 + }, + "22": { + "count": 198 + }, + "8": { + "count": 4 + }, + "31": { + "count": 29 + }, + "32": { + "count": 108 + }, + "104": { + "count": 3 + }, + "88": { + "count": 8 + }, + "33": { + "count": 69 + }, + "45": { + "count": 37 + }, + "35": { + "count": 48 + }, + "34": { + "count": 24 + }, + "89": { + "count": 14 + }, + "51": { + "count": 36 + }, + "46": { + "count": 34 + }, + "14": { + "count": 867 + }, + "36": { + "count": 72 + }, + "24": { + "count": 96 + }, + "37": { + "count": 59 + }, + "40": { + "count": 44 + }, + "38": { + "count": 50 + }, + "76": { + "count": 5 + }, + "16": { + "count": 549 + }, + "17": { + "count": 38 + }, + "21": { + "count": 7 + }, + "23": { + "count": 1 + }, + "39": { + "count": 6 + }, + "73": { + "count": 6 + }, + "80": { + "count": 28 + }, + "41": { + "count": 20 + }, + "81": { + "count": 11 + }, + "105": { + "count": 1 + }, + "95": { + "count": 4 + }, + "102": { + "count": 2 + }, + "62": { + "count": 32 + }, + "72": { + "count": 65 + }, + "68": { + "count": 16 + }, + "66": { + "count": 9 + }, + "69": { + "count": 32 + }, + "70": { + "count": 9 + }, + "78": { + "count": 5 + }, + "79": { + "count": 39 + }, + "71": { + "count": 16 + }, + "77": { + "count": 10 + }, + "67": { + "count": 7 + }, + "106": { + "count": 4 + }, + "82": { + "count": 5 + }, + "18": { + "count": 632 + }, + "64": { + "count": 9 + }, + "63": { + "count": 102 + }, + "15": { + "count": 324 + }, + "65": { + "count": 90 + }, + "9": { + "count": 67 + }, + "13": { + "count": 34 + }, + "11": { + "count": 152 + }, + "75": { + "count": 46 + }, + "12": { + "count": 40 + }, + "19": { + "count": 36 + }, + "74": { + "count": 41 + }, + "25": { + "count": 1356 + }, + "10": { + "count": 19 + }, + "86": { + "count": 18 + }, + "112": { + "count": 3 + }, + "110": { + "count": 1 + }, + "83": { + "count": 1 + }, + "84": { + "count": 38 + }, + "7": { + "count": 31 + }, + "6": { + "count": 40 + }, + "29": { + "count": 638 + }, + "20": { + "count": 16 + }, + "26": { + "count": 2 + }, + "85": { + "count": 15 + }, + "28": { + "count": 15 + }, + "27": { + "count": 20 + }, + "30": { + "count": 15 + }, + "43": { + "count": 942 + }, + "42": { + "count": 277 + }, + "50": { + "count": 21 + }, + "93": { + "count": 11 + }, + "47": { + "count": 94 + }, + "48": { + "count": 166 + }, + "49": { + "count": 24 + }, + "92": { + "count": 10 + }, + "90": { + "count": 4 + }, + "91": { + "count": 4 + }, + "94": { + "count": 2 + }, + "4": { + "count": 398 + }, + "96": { + "count": 1 + }, + "97": { + "count": 6 + }, + "52": { + "count": 7 + }, + "53": { + "count": 90 + }, + "54": { + "count": 6 + }, + "56": { + "count": 1 + }, + "55": { + "count": 3 + }, + "101": { + "count": 5 + }, + "59": { + "count": 125 + }, + "60": { + "count": 394 + }, + "61": { + "count": 41 + }, + "0": { + "count": 778 + }, + "1": { + "count": 130 + }, + "2": { + "count": 13 + }, + "103": { + "count": 6 + }, + "57": { + "count": 1 + } + } + }, + "fr": { + "num_samples": 11814, + "number_of_characters": 515029, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 43.594802776367025, + "max_text_length": 184, + "unique_text": 11727, + "unique_labels": 110, + "labels": { + "0": { + "count": 719 + }, + "1": { + "count": 130 + }, + "18": { + "count": 554 + }, + "3": { + "count": 1049 + }, + "4": { + "count": 480 + }, + "25": { + "count": 1226 + }, + "6": { + "count": 42 + }, + "84": { + "count": 40 + }, + "7": { + "count": 33 + }, + "8": { + "count": 8 + }, + "103": { + "count": 6 + }, + "9": { + "count": 61 + }, + "15": { + "count": 285 + }, + "13": { + "count": 33 + }, + "75": { + "count": 40 + }, + "65": { + "count": 73 + }, + "16": { + "count": 671 + }, + "17": { + "count": 37 + }, + "14": { + "count": 822 + }, + "19": { + "count": 29 + }, + "29": { + "count": 614 + }, + "20": { + "count": 17 + }, + "21": { + "count": 2 + }, + "22": { + "count": 196 + }, + "36": { + "count": 66 + }, + "85": { + "count": 14 + }, + "28": { + "count": 12 + }, + "30": { + "count": 13 + }, + "86": { + "count": 16 + }, + "31": { + "count": 39 + }, + "33": { + "count": 82 + }, + "35": { + "count": 45 + }, + "34": { + "count": 30 + }, + "40": { + "count": 40 + }, + "24": { + "count": 85 + }, + "38": { + "count": 42 + }, + "32": { + "count": 115 + }, + "80": { + "count": 28 + }, + "89": { + "count": 13 + }, + "5": { + "count": 202 + }, + "42": { + "count": 271 + }, + "43": { + "count": 965 + }, + "44": { + "count": 38 + }, + "51": { + "count": 48 + }, + "46": { + "count": 42 + }, + "45": { + "count": 47 + }, + "110": { + "count": 1 + }, + "93": { + "count": 8 + }, + "47": { + "count": 73 + }, + "48": { + "count": 265 + }, + "49": { + "count": 32 + }, + "50": { + "count": 15 + }, + "90": { + "count": 11 + }, + "94": { + "count": 2 + }, + "92": { + "count": 11 + }, + "91": { + "count": 3 + }, + "10": { + "count": 18 + }, + "72": { + "count": 68 + }, + "52": { + "count": 80 + }, + "27": { + "count": 161 + }, + "56": { + "count": 13 + }, + "53": { + "count": 88 + }, + "54": { + "count": 43 + }, + "97": { + "count": 7 + }, + "98": { + "count": 6 + }, + "58": { + "count": 12 + }, + "55": { + "count": 24 + }, + "100": { + "count": 6 + }, + "57": { + "count": 43 + }, + "59": { + "count": 258 + }, + "11": { + "count": 135 + }, + "60": { + "count": 376 + }, + "63": { + "count": 104 + }, + "12": { + "count": 41 + }, + "41": { + "count": 11 + }, + "81": { + "count": 13 + }, + "62": { + "count": 29 + }, + "77": { + "count": 18 + }, + "68": { + "count": 23 + }, + "79": { + "count": 48 + }, + "37": { + "count": 58 + }, + "69": { + "count": 41 + }, + "82": { + "count": 8 + }, + "74": { + "count": 37 + }, + "106": { + "count": 4 + }, + "70": { + "count": 10 + }, + "71": { + "count": 16 + }, + "61": { + "count": 63 + }, + "2": { + "count": 6 + }, + "83": { + "count": 1 + }, + "26": { + "count": 1 + }, + "73": { + "count": 7 + }, + "88": { + "count": 9 + }, + "39": { + "count": 5 + }, + "104": { + "count": 4 + }, + "95": { + "count": 4 + }, + "96": { + "count": 10 + }, + "108": { + "count": 4 + }, + "109": { + "count": 1 + }, + "101": { + "count": 9 + }, + "99": { + "count": 1 + }, + "64": { + "count": 10 + }, + "78": { + "count": 9 + }, + "76": { + "count": 6 + }, + "66": { + "count": 8 + }, + "67": { + "count": 10 + }, + "23": { + "count": 1 + }, + "102": { + "count": 3 + }, + "112": { + "count": 1 + }, + "105": { + "count": 1 + } + } + }, + "hi": { + "num_samples": 11330, + "number_of_characters": 425919, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 37.592144748455425, + "max_text_length": 216, + "unique_text": 11251, + "unique_labels": 113, + "labels": { + "0": { + "count": 615 + }, + "1": { + "count": 159 + }, + "2": { + "count": 14 + }, + "3": { + "count": 903 + }, + "4": { + "count": 452 + }, + "5": { + "count": 174 + }, + "6": { + "count": 25 + }, + "7": { + "count": 27 + }, + "8": { + "count": 8 + }, + "9": { + "count": 73 + }, + "10": { + "count": 18 + }, + "11": { + "count": 178 + }, + "12": { + "count": 42 + }, + "13": { + "count": 33 + }, + "14": { + "count": 654 + }, + "15": { + "count": 290 + }, + "16": { + "count": 694 + }, + "17": { + "count": 30 + }, + "18": { + "count": 689 + }, + "19": { + "count": 37 + }, + "20": { + "count": 15 + }, + "21": { + "count": 9 + }, + "22": { + "count": 197 + }, + "23": { + "count": 1 + }, + "24": { + "count": 85 + }, + "25": { + "count": 1237 + }, + "26": { + "count": 4 + }, + "27": { + "count": 163 + }, + "28": { + "count": 13 + }, + "29": { + "count": 519 + }, + "30": { + "count": 14 + }, + "31": { + "count": 39 + }, + "32": { + "count": 109 + }, + "33": { + "count": 64 + }, + "34": { + "count": 33 + }, + "35": { + "count": 41 + }, + "36": { + "count": 63 + }, + "37": { + "count": 56 + }, + "38": { + "count": 46 + }, + "39": { + "count": 5 + }, + "40": { + "count": 36 + }, + "41": { + "count": 21 + }, + "42": { + "count": 227 + }, + "43": { + "count": 713 + }, + "44": { + "count": 36 + }, + "45": { + "count": 36 + }, + "46": { + "count": 39 + }, + "47": { + "count": 66 + }, + "48": { + "count": 212 + }, + "49": { + "count": 24 + }, + "50": { + "count": 10 + }, + "51": { + "count": 37 + }, + "52": { + "count": 103 + }, + "53": { + "count": 82 + }, + "54": { + "count": 46 + }, + "55": { + "count": 31 + }, + "56": { + "count": 13 + }, + "57": { + "count": 46 + }, + "58": { + "count": 10 + }, + "59": { + "count": 560 + }, + "60": { + "count": 340 + }, + "61": { + "count": 68 + }, + "62": { + "count": 31 + }, + "63": { + "count": 86 + }, + "64": { + "count": 11 + }, + "65": { + "count": 88 + }, + "66": { + "count": 6 + }, + "67": { + "count": 9 + }, + "68": { + "count": 23 + }, + "69": { + "count": 33 + }, + "70": { + "count": 10 + }, + "71": { + "count": 13 + }, + "72": { + "count": 68 + }, + "73": { + "count": 6 + }, + "74": { + "count": 37 + }, + "75": { + "count": 53 + }, + "76": { + "count": 5 + }, + "77": { + "count": 15 + }, + "78": { + "count": 4 + }, + "79": { + "count": 47 + }, + "80": { + "count": 22 + }, + "81": { + "count": 6 + }, + "82": { + "count": 6 + }, + "83": { + "count": 1 + }, + "84": { + "count": 28 + }, + "85": { + "count": 11 + }, + "86": { + "count": 16 + }, + "87": { + "count": 1 + }, + "88": { + "count": 6 + }, + "89": { + "count": 13 + }, + "90": { + "count": 8 + }, + "91": { + "count": 3 + }, + "92": { + "count": 8 + }, + "93": { + "count": 8 + }, + "94": { + "count": 2 + }, + "95": { + "count": 2 + }, + "96": { + "count": 11 + }, + "97": { + "count": 6 + }, + "98": { + "count": 5 + }, + "99": { + "count": 1 + }, + "100": { + "count": 4 + }, + "101": { + "count": 10 + }, + "102": { + "count": 2 + }, + "103": { + "count": 5 + }, + "104": { + "count": 4 + }, + "105": { + "count": 1 + }, + "106": { + "count": 3 + }, + "107": { + "count": 2 + }, + "108": { + "count": 2 + }, + "109": { + "count": 1 + }, + "110": { + "count": 1 + }, + "111": { + "count": 1 + }, + "112": { + "count": 1 + } + } + }, + "th": { + "num_samples": 10759, + "number_of_characters": 366241, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 34.04043126684636, + "max_text_length": 135, + "unique_text": 10622, + "unique_labels": 110, + "labels": { + "1": { + "count": 117 + }, + "0": { + "count": 620 + }, + "2": { + "count": 11 + }, + "18": { + "count": 563 + }, + "3": { + "count": 887 + }, + "4": { + "count": 438 + }, + "5": { + "count": 181 + }, + "22": { + "count": 176 + }, + "7": { + "count": 30 + }, + "84": { + "count": 36 + }, + "6": { + "count": 36 + }, + "8": { + "count": 8 + }, + "15": { + "count": 266 + }, + "65": { + "count": 79 + }, + "9": { + "count": 64 + }, + "25": { + "count": 1227 + }, + "14": { + "count": 766 + }, + "75": { + "count": 43 + }, + "11": { + "count": 147 + }, + "16": { + "count": 613 + }, + "17": { + "count": 39 + }, + "19": { + "count": 37 + }, + "29": { + "count": 566 + }, + "20": { + "count": 15 + }, + "24": { + "count": 91 + }, + "26": { + "count": 2 + }, + "85": { + "count": 15 + }, + "27": { + "count": 117 + }, + "30": { + "count": 13 + }, + "39": { + "count": 4 + }, + "86": { + "count": 17 + }, + "31": { + "count": 36 + }, + "88": { + "count": 7 + }, + "45": { + "count": 43 + }, + "33": { + "count": 75 + }, + "34": { + "count": 28 + }, + "35": { + "count": 46 + }, + "89": { + "count": 13 + }, + "32": { + "count": 112 + }, + "80": { + "count": 25 + }, + "36": { + "count": 59 + }, + "37": { + "count": 55 + }, + "40": { + "count": 36 + }, + "38": { + "count": 46 + }, + "41": { + "count": 20 + }, + "42": { + "count": 203 + }, + "51": { + "count": 42 + }, + "46": { + "count": 37 + }, + "44": { + "count": 41 + }, + "47": { + "count": 54 + }, + "43": { + "count": 797 + }, + "48": { + "count": 174 + }, + "49": { + "count": 19 + }, + "91": { + "count": 4 + }, + "50": { + "count": 10 + }, + "90": { + "count": 6 + }, + "81": { + "count": 13 + }, + "95": { + "count": 3 + }, + "72": { + "count": 69 + }, + "96": { + "count": 4 + }, + "98": { + "count": 3 + }, + "52": { + "count": 67 + }, + "57": { + "count": 36 + }, + "97": { + "count": 6 + }, + "54": { + "count": 34 + }, + "53": { + "count": 88 + }, + "55": { + "count": 19 + }, + "100": { + "count": 3 + }, + "56": { + "count": 10 + }, + "101": { + "count": 7 + }, + "60": { + "count": 368 + }, + "61": { + "count": 59 + }, + "59": { + "count": 231 + }, + "63": { + "count": 100 + }, + "13": { + "count": 34 + }, + "64": { + "count": 9 + }, + "66": { + "count": 9 + }, + "78": { + "count": 6 + }, + "68": { + "count": 20 + }, + "62": { + "count": 33 + }, + "10": { + "count": 19 + }, + "79": { + "count": 45 + }, + "71": { + "count": 16 + }, + "77": { + "count": 14 + }, + "69": { + "count": 40 + }, + "73": { + "count": 7 + }, + "74": { + "count": 40 + }, + "70": { + "count": 10 + }, + "12": { + "count": 42 + }, + "67": { + "count": 10 + }, + "93": { + "count": 8 + }, + "58": { + "count": 8 + }, + "102": { + "count": 3 + }, + "83": { + "count": 1 + }, + "23": { + "count": 1 + }, + "28": { + "count": 9 + }, + "92": { + "count": 6 + }, + "99": { + "count": 1 + }, + "82": { + "count": 5 + }, + "76": { + "count": 5 + }, + "21": { + "count": 7 + }, + "103": { + "count": 6 + }, + "105": { + "count": 1 + }, + "106": { + "count": 4 + }, + "108": { + "count": 1 + }, + "104": { + "count": 3 + }, + "110": { + "count": 1 + }, + "94": { + "count": 1 + }, + "112": { + "count": 1 + }, + "109": { + "count": 1 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MacedonianTweetSentimentClassification.json b/mteb/descriptive_stats/Classification/MacedonianTweetSentimentClassification.json new file mode 100644 index 0000000000..07e63d6376 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MacedonianTweetSentimentClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 1139, + "number_of_characters": 77028, + "number_texts_intersect_with_train": 5, + "min_text_length": 5, + "average_text_length": 67.62774363476734, + "max_text_length": 140, + "unique_text": 1139, + "unique_labels": 3, + "labels": { + "0": { + "count": 200 + }, + "-1": { + "count": 508 + }, + "1": { + "count": 431 + } + } + }, + "train": { + "num_samples": 8566, + "number_of_characters": 660795, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 77.14160635068878, + "max_text_length": 200, + "unique_text": 8566, + "unique_labels": 3, + "labels": { + "-1": { + "count": 4676 + }, + "1": { + "count": 2610 + }, + "0": { + "count": 1280 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MalayalamNewsClassification.json b/mteb/descriptive_stats/Classification/MalayalamNewsClassification.json new file mode 100644 index 0000000000..ec35560ac9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MalayalamNewsClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 1260, + "number_of_characters": 101349, + "number_texts_intersect_with_train": 34, + "min_text_length": 14, + "average_text_length": 80.43571428571428, + "max_text_length": 375, + "unique_text": 1251, + "unique_labels": 3, + "labels": { + "business": { + "count": 383 + }, + "sports": { + "count": 446 + }, + "entertainment": { + "count": 431 + } + } + }, + "train": { + "num_samples": 5036, + "number_of_characters": 400263, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 79.48034154090548, + "max_text_length": 399, + "unique_text": 4958, + "unique_labels": 3, + "labels": { + "business": { + "count": 1540 + }, + "sports": { + "count": 1743 + }, + "entertainment": { + "count": 1753 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MarathiNewsClassification.json b/mteb/descriptive_stats/Classification/MarathiNewsClassification.json new file mode 100644 index 0000000000..a801d9e7fb --- /dev/null +++ b/mteb/descriptive_stats/Classification/MarathiNewsClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 107545, + "number_texts_intersect_with_train": 3, + "min_text_length": 11, + "average_text_length": 52.51220703125, + "max_text_length": 122, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "0": { + "count": 575 + }, + "1": { + "count": 193 + }, + "2": { + "count": 1280 + } + } + }, + "train": { + "num_samples": 9673, + "number_of_characters": 505970, + "number_texts_intersect_with_train": null, + "min_text_length": 8, + "average_text_length": 52.30745373720666, + "max_text_length": 134, + "unique_text": 9658, + "unique_labels": 3, + "labels": { + "2": { + "count": 6035 + }, + "0": { + "count": 2644 + }, + "1": { + "count": 994 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MasakhaNEWSClassification.json b/mteb/descriptive_stats/Classification/MasakhaNEWSClassification.json new file mode 100644 index 0000000000..c3feaf2b3f --- /dev/null +++ b/mteb/descriptive_stats/Classification/MasakhaNEWSClassification.json @@ -0,0 +1,978 @@ +{ + "test": { + "num_samples": 6242, + "number_of_characters": 16946423, + "number_texts_intersect_with_train": 66, + "min_text_length": 1, + "average_text_length": 2714.9027555270745, + "max_text_length": 26369, + "unique_text": 6234, + "unique_labels": 7, + "labels": { + "business": { + "count": 785 + }, + "health": { + "count": 1258 + }, + "politics": { + "count": 1589 + }, + "sports": { + "count": 1265 + }, + "entertainment": { + "count": 762 + }, + "technology": { + "count": 297 + }, + "religion": { + "count": 286 + } + }, + "hf_subset_descriptive_stats": { + "amh": { + "num_samples": 376, + "number_of_characters": 832938, + "number_texts_intersect_with_train": 34, + "min_text_length": 118, + "average_text_length": 2215.2606382978724, + "max_text_length": 12042, + "unique_text": 373, + "unique_labels": 4, + "labels": { + "business": { + "count": 81 + }, + "health": { + "count": 100 + }, + "politics": { + "count": 100 + }, + "sports": { + "count": 95 + } + } + }, + "eng": { + "num_samples": 948, + "number_of_characters": 2910616, + "number_texts_intersect_with_train": 2, + "min_text_length": 81, + "average_text_length": 3070.2700421940926, + "max_text_length": 20178, + "unique_text": 947, + "unique_labels": 6, + "labels": { + "business": { + "count": 160 + }, + "entertainment": { + "count": 150 + }, + "health": { + "count": 150 + }, + "politics": { + "count": 165 + }, + "sports": { + "count": 200 + }, + "technology": { + "count": 123 + } + } + }, + "fra": { + "num_samples": 422, + "number_of_characters": 2159222, + "number_texts_intersect_with_train": 1, + "min_text_length": 206, + "average_text_length": 5116.63981042654, + "max_text_length": 25973, + "unique_text": 422, + "unique_labels": 5, + "labels": { + "business": { + "count": 100 + }, + "health": { + "count": 100 + }, + "politics": { + "count": 100 + }, + "sports": { + "count": 100 + }, + "technology": { + "count": 22 + } + } + }, + "hau": { + "num_samples": 637, + "number_of_characters": 1625030, + "number_texts_intersect_with_train": 1, + "min_text_length": 14, + "average_text_length": 2551.0675039246466, + "max_text_length": 25355, + "unique_text": 637, + "unique_labels": 7, + "labels": { + "business": { + "count": 80 + }, + "entertainment": { + "count": 100 + }, + "health": { + "count": 99 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 99 + }, + "sports": { + "count": 100 + }, + "technology": { + "count": 59 + } + } + }, + "ibo": { + "num_samples": 390, + "number_of_characters": 741370, + "number_texts_intersect_with_train": 23, + "min_text_length": 1, + "average_text_length": 1900.948717948718, + "max_text_length": 8864, + "unique_text": 387, + "unique_labels": 6, + "labels": { + "business": { + "count": 59 + }, + "entertainment": { + "count": 74 + }, + "health": { + "count": 85 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 15 + }, + "sports": { + "count": 57 + } + } + }, + "lin": { + "num_samples": 175, + "number_of_characters": 185022, + "number_texts_intersect_with_train": 0, + "min_text_length": 207, + "average_text_length": 1057.2685714285715, + "max_text_length": 3962, + "unique_text": 175, + "unique_labels": 4, + "labels": { + "business": { + "count": 17 + }, + "health": { + "count": 39 + }, + "politics": { + "count": 100 + }, + "sports": { + "count": 19 + } + } + }, + "lug": { + "num_samples": 223, + "number_of_characters": 385757, + "number_texts_intersect_with_train": 0, + "min_text_length": 409, + "average_text_length": 1729.8520179372197, + "max_text_length": 3451, + "unique_text": 223, + "unique_labels": 5, + "labels": { + "business": { + "count": 34 + }, + "health": { + "count": 46 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 19 + }, + "sports": { + "count": 24 + } + } + }, + "orm": { + "num_samples": 325, + "number_of_characters": 1021567, + "number_texts_intersect_with_train": 0, + "min_text_length": 122, + "average_text_length": 3143.283076923077, + "max_text_length": 15516, + "unique_text": 325, + "unique_labels": 5, + "labels": { + "entertainment": { + "count": 32 + }, + "health": { + "count": 97 + }, + "politics": { + "count": 100 + }, + "sports": { + "count": 81 + }, + "technology": { + "count": 15 + } + } + }, + "pcm": { + "num_samples": 305, + "number_of_characters": 735570, + "number_texts_intersect_with_train": 5, + "min_text_length": 244, + "average_text_length": 2411.7049180327867, + "max_text_length": 14377, + "unique_text": 305, + "unique_labels": 5, + "labels": { + "business": { + "count": 20 + }, + "entertainment": { + "count": 92 + }, + "health": { + "count": 32 + }, + "politics": { + "count": 62 + }, + "sports": { + "count": 99 + } + } + }, + "run": { + "num_samples": 322, + "number_of_characters": 899422, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 2793.2360248447203, + "max_text_length": 12756, + "unique_text": 322, + "unique_labels": 6, + "labels": { + "business": { + "count": 16 + }, + "entertainment": { + "count": 32 + }, + "health": { + "count": 75 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 15 + }, + "sports": { + "count": 84 + } + } + }, + "sna": { + "num_samples": 369, + "number_of_characters": 740792, + "number_texts_intersect_with_train": 0, + "min_text_length": 495, + "average_text_length": 2007.5663956639567, + "max_text_length": 4861, + "unique_text": 369, + "unique_labels": 4, + "labels": { + "business": { + "count": 100 + }, + "health": { + "count": 85 + }, + "politics": { + "count": 100 + }, + "sports": { + "count": 84 + } + } + }, + "som": { + "num_samples": 294, + "number_of_characters": 1043387, + "number_texts_intersect_with_train": 0, + "min_text_length": 94, + "average_text_length": 3548.9353741496598, + "max_text_length": 26369, + "unique_text": 294, + "unique_labels": 7, + "labels": { + "business": { + "count": 23 + }, + "entertainment": { + "count": 28 + }, + "health": { + "count": 71 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 15 + }, + "sports": { + "count": 30 + }, + "technology": { + "count": 27 + } + } + }, + "swa": { + "num_samples": 476, + "number_of_characters": 1694087, + "number_texts_intersect_with_train": 0, + "min_text_length": 103, + "average_text_length": 3559.0063025210084, + "max_text_length": 12005, + "unique_text": 476, + "unique_labels": 7, + "labels": { + "business": { + "count": 64 + }, + "entertainment": { + "count": 20 + }, + "health": { + "count": 100 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 59 + }, + "sports": { + "count": 100 + }, + "technology": { + "count": 33 + } + } + }, + "tir": { + "num_samples": 272, + "number_of_characters": 568854, + "number_texts_intersect_with_train": 0, + "min_text_length": 93, + "average_text_length": 2091.375, + "max_text_length": 9587, + "unique_text": 272, + "unique_labels": 6, + "labels": { + "business": { + "count": 16 + }, + "entertainment": { + "count": 34 + }, + "health": { + "count": 79 + }, + "politics": { + "count": 100 + }, + "sports": { + "count": 25 + }, + "technology": { + "count": 18 + } + } + }, + "xho": { + "num_samples": 297, + "number_of_characters": 562970, + "number_texts_intersect_with_train": 0, + "min_text_length": 409, + "average_text_length": 1895.5218855218855, + "max_text_length": 5994, + "unique_text": 297, + "unique_labels": 5, + "labels": { + "business": { + "count": 15 + }, + "entertainment": { + "count": 100 + }, + "health": { + "count": 20 + }, + "politics": { + "count": 62 + }, + "sports": { + "count": 100 + } + } + }, + "yor": { + "num_samples": 411, + "number_of_characters": 839819, + "number_texts_intersect_with_train": 0, + "min_text_length": 1, + "average_text_length": 2043.3552311435524, + "max_text_length": 10464, + "unique_text": 411, + "unique_labels": 5, + "labels": { + "entertainment": { + "count": 100 + }, + "health": { + "count": 80 + }, + "politics": { + "count": 100 + }, + "religion": { + "count": 64 + }, + "sports": { + "count": 67 + } + } + } + } + }, + "train": { + "num_samples": 21734, + "number_of_characters": 58485151, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 2690.952010674519, + "max_text_length": 46502, + "unique_text": 21591, + "unique_labels": 7, + "labels": { + "sports": { + "count": 4401 + }, + "business": { + "count": 2725 + }, + "health": { + "count": 4384 + }, + "politics": { + "count": 5555 + }, + "entertainment": { + "count": 2654 + }, + "technology": { + "count": 1029 + }, + "religion": { + "count": 986 + } + }, + "hf_subset_descriptive_stats": { + "amh": { + "num_samples": 1311, + "number_of_characters": 2747332, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 2095.6003051106027, + "max_text_length": 16770, + "unique_text": 1243, + "unique_labels": 4, + "labels": { + "sports": { + "count": 329 + }, + "business": { + "count": 282 + }, + "health": { + "count": 350 + }, + "politics": { + "count": 350 + } + } + }, + "eng": { + "num_samples": 3309, + "number_of_characters": 9997822, + "number_texts_intersect_with_train": null, + "min_text_length": 25, + "average_text_length": 3021.402840737383, + "max_text_length": 28842, + "unique_text": 3305, + "unique_labels": 6, + "labels": { + "business": { + "count": 559 + }, + "entertainment": { + "count": 525 + }, + "health": { + "count": 522 + }, + "technology": { + "count": 429 + }, + "sports": { + "count": 700 + }, + "politics": { + "count": 574 + } + } + }, + "fra": { + "num_samples": 1476, + "number_of_characters": 7475668, + "number_texts_intersect_with_train": null, + "min_text_length": 67, + "average_text_length": 5064.815718157181, + "max_text_length": 46502, + "unique_text": 1474, + "unique_labels": 5, + "labels": { + "sports": { + "count": 350 + }, + "business": { + "count": 350 + }, + "politics": { + "count": 350 + }, + "health": { + "count": 350 + }, + "technology": { + "count": 76 + } + } + }, + "hau": { + "num_samples": 2219, + "number_of_characters": 5273176, + "number_texts_intersect_with_train": null, + "min_text_length": 12, + "average_text_length": 2376.3749436683192, + "max_text_length": 16709, + "unique_text": 2218, + "unique_labels": 7, + "labels": { + "religion": { + "count": 345 + }, + "entertainment": { + "count": 350 + }, + "business": { + "count": 279 + }, + "health": { + "count": 345 + }, + "politics": { + "count": 350 + }, + "sports": { + "count": 347 + }, + "technology": { + "count": 203 + } + } + }, + "ibo": { + "num_samples": 1356, + "number_of_characters": 2559120, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 1887.2566371681417, + "max_text_length": 15301, + "unique_text": 1295, + "unique_labels": 6, + "labels": { + "business": { + "count": 204 + }, + "politics": { + "count": 350 + }, + "sports": { + "count": 199 + }, + "entertainment": { + "count": 256 + }, + "health": { + "count": 296 + }, + "religion": { + "count": 51 + } + } + }, + "lin": { + "num_samples": 608, + "number_of_characters": 626299, + "number_texts_intersect_with_train": null, + "min_text_length": 241, + "average_text_length": 1030.0970394736842, + "max_text_length": 3094, + "unique_text": 608, + "unique_labels": 4, + "labels": { + "health": { + "count": 135 + }, + "business": { + "count": 57 + }, + "politics": { + "count": 350 + }, + "sports": { + "count": 66 + } + } + }, + "lug": { + "num_samples": 771, + "number_of_characters": 1340821, + "number_texts_intersect_with_train": null, + "min_text_length": 248, + "average_text_length": 1739.0674448767834, + "max_text_length": 6106, + "unique_text": 771, + "unique_labels": 5, + "labels": { + "religion": { + "count": 63 + }, + "politics": { + "count": 350 + }, + "sports": { + "count": 81 + }, + "health": { + "count": 159 + }, + "business": { + "count": 118 + } + } + }, + "orm": { + "num_samples": 1128, + "number_of_characters": 3711657, + "number_texts_intersect_with_train": null, + "min_text_length": 99, + "average_text_length": 3290.476063829787, + "max_text_length": 16922, + "unique_text": 1126, + "unique_labels": 5, + "labels": { + "technology": { + "count": 50 + }, + "sports": { + "count": 280 + }, + "politics": { + "count": 350 + }, + "health": { + "count": 338 + }, + "entertainment": { + "count": 110 + } + } + }, + "pcm": { + "num_samples": 1060, + "number_of_characters": 2328253, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 2196.4650943396227, + "max_text_length": 27843, + "unique_text": 1058, + "unique_labels": 5, + "labels": { + "sports": { + "count": 344 + }, + "entertainment": { + "count": 322 + }, + "politics": { + "count": 216 + }, + "health": { + "count": 111 + }, + "business": { + "count": 67 + } + } + }, + "run": { + "num_samples": 1117, + "number_of_characters": 3155241, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 2824.7457475380484, + "max_text_length": 17059, + "unique_text": 1117, + "unique_labels": 6, + "labels": { + "sports": { + "count": 293 + }, + "entertainment": { + "count": 110 + }, + "politics": { + "count": 350 + }, + "religion": { + "count": 51 + }, + "business": { + "count": 53 + }, + "health": { + "count": 260 + } + } + }, + "sna": { + "num_samples": 1288, + "number_of_characters": 2514372, + "number_texts_intersect_with_train": null, + "min_text_length": 8, + "average_text_length": 1952.1521739130435, + "max_text_length": 6889, + "unique_text": 1288, + "unique_labels": 4, + "labels": { + "sports": { + "count": 291 + }, + "health": { + "count": 297 + }, + "business": { + "count": 350 + }, + "politics": { + "count": 350 + } + } + }, + "som": { + "num_samples": 1021, + "number_of_characters": 3693825, + "number_texts_intersect_with_train": null, + "min_text_length": 72, + "average_text_length": 3617.8501469147895, + "max_text_length": 16143, + "unique_text": 1021, + "unique_labels": 7, + "labels": { + "politics": { + "count": 350 + }, + "business": { + "count": 79 + }, + "health": { + "count": 247 + }, + "entertainment": { + "count": 97 + }, + "sports": { + "count": 103 + }, + "technology": { + "count": 94 + }, + "religion": { + "count": 51 + } + } + }, + "swa": { + "num_samples": 1658, + "number_of_characters": 6018165, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 3629.773823884198, + "max_text_length": 24751, + "unique_text": 1658, + "unique_labels": 7, + "labels": { + "sports": { + "count": 350 + }, + "health": { + "count": 350 + }, + "politics": { + "count": 350 + }, + "religion": { + "count": 204 + }, + "technology": { + "count": 115 + }, + "entertainment": { + "count": 68 + }, + "business": { + "count": 221 + } + } + }, + "tir": { + "num_samples": 947, + "number_of_characters": 2094882, + "number_texts_intersect_with_train": null, + "min_text_length": 41, + "average_text_length": 2212.1246040126716, + "max_text_length": 12624, + "unique_text": 946, + "unique_labels": 6, + "labels": { + "politics": { + "count": 350 + }, + "technology": { + "count": 62 + }, + "entertainment": { + "count": 116 + }, + "business": { + "count": 56 + }, + "health": { + "count": 276 + }, + "sports": { + "count": 87 + } + } + }, + "xho": { + "num_samples": 1032, + "number_of_characters": 1977998, + "number_texts_intersect_with_train": null, + "min_text_length": 77, + "average_text_length": 1916.6647286821706, + "max_text_length": 8675, + "unique_text": 1032, + "unique_labels": 5, + "labels": { + "entertainment": { + "count": 350 + }, + "sports": { + "count": 347 + }, + "politics": { + "count": 215 + }, + "health": { + "count": 70 + }, + "business": { + "count": 50 + } + } + }, + "yor": { + "num_samples": 1433, + "number_of_characters": 2970520, + "number_texts_intersect_with_train": null, + "min_text_length": 74, + "average_text_length": 2072.937892533147, + "max_text_length": 42588, + "unique_text": 1433, + "unique_labels": 5, + "labels": { + "health": { + "count": 278 + }, + "religion": { + "count": 221 + }, + "entertainment": { + "count": 350 + }, + "politics": { + "count": 350 + }, + "sports": { + "count": 234 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MassiveIntentClassification.json b/mteb/descriptive_stats/Classification/MassiveIntentClassification.json new file mode 100644 index 0000000000..83ffedc6c2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MassiveIntentClassification.json @@ -0,0 +1,29648 @@ +{ + "validation": { + "num_samples": 103683, + "number_of_characters": 3583467, + "number_texts_intersect_with_train": 5457, + "min_text_length": 1, + "average_text_length": 34.56176036573016, + "max_text_length": 224, + "unique_text": 102325, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 867 + }, + "iot_hue_lightdim": { + "count": 867 + }, + "iot_cleaning": { + "count": 969 + }, + "general_quirky": { + "count": 5355 + }, + "takeaway_query": { + "count": 1224 + }, + "play_music": { + "count": 6273 + }, + "music_query": { + "count": 1530 + }, + "weather_query": { + "count": 6426 + }, + "music_settings": { + "count": 408 + }, + "audio_volume_down": { + "count": 408 + }, + "datetime_query": { + "count": 3264 + }, + "general_greet": { + "count": 102 + }, + "alarm_set": { + "count": 1581 + }, + "audio_volume_up": { + "count": 612 + }, + "alarm_query": { + "count": 969 + }, + "news_query": { + "count": 4182 + }, + "iot_hue_lighton": { + "count": 255 + }, + "iot_wemo_off": { + "count": 255 + }, + "iot_hue_lightchange": { + "count": 1122 + }, + "audio_volume_mute": { + "count": 765 + }, + "alarm_remove": { + "count": 714 + }, + "general_joke": { + "count": 765 + }, + "datetime_convert": { + "count": 459 + }, + "iot_wemo_on": { + "count": 357 + }, + "iot_hue_lightup": { + "count": 612 + }, + "iot_coffee": { + "count": 714 + }, + "social_post": { + "count": 2550 + }, + "music_dislikeness": { + "count": 102 + }, + "cooking_recipe": { + "count": 2091 + }, + "takeaway_order": { + "count": 1020 + }, + "music_likeness": { + "count": 816 + }, + "calendar_query": { + "count": 5202 + }, + "qa_stock": { + "count": 1224 + }, + "qa_factoid": { + "count": 4590 + }, + "calendar_set": { + "count": 6681 + }, + "recommendation_events": { + "count": 1326 + }, + "cooking_query": { + "count": 102 + }, + "calendar_remove": { + "count": 2397 + }, + "email_sendemail": { + "count": 3213 + }, + "play_radio": { + "count": 2346 + }, + "play_audiobook": { + "count": 1785 + }, + "play_game": { + "count": 1122 + }, + "lists_query": { + "count": 2550 + }, + "lists_remove": { + "count": 1887 + }, + "lists_createoradd": { + "count": 1275 + }, + "email_addcontact": { + "count": 255 + }, + "play_podcasts": { + "count": 1734 + }, + "recommendation_movies": { + "count": 612 + }, + "recommendation_locations": { + "count": 1581 + }, + "transport_ticket": { + "count": 1275 + }, + "transport_query": { + "count": 1836 + }, + "transport_taxi": { + "count": 1377 + }, + "transport_traffic": { + "count": 1122 + }, + "qa_definition": { + "count": 2805 + }, + "qa_currency": { + "count": 1632 + }, + "qa_maths": { + "count": 663 + }, + "social_query": { + "count": 918 + }, + "email_query": { + "count": 3723 + }, + "email_querycontact": { + "count": 816 + } + }, + "hf_subset_descriptive_stats": { + "af": { + "num_samples": 2033, + "number_of_characters": 75759, + "number_texts_intersect_with_train": 66, + "min_text_length": 2, + "average_text_length": 37.26463354648303, + "max_text_length": 161, + "unique_text": 2024, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "am": { + "num_samples": 2033, + "number_of_characters": 48678, + "number_texts_intersect_with_train": 68, + "min_text_length": 2, + "average_text_length": 23.94392523364486, + "max_text_length": 104, + "unique_text": 2023, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ar": { + "num_samples": 2033, + "number_of_characters": 54567, + "number_texts_intersect_with_train": 129, + "min_text_length": 2, + "average_text_length": 26.840629611411707, + "max_text_length": 126, + "unique_text": 2007, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "az": { + "num_samples": 2033, + "number_of_characters": 73329, + "number_texts_intersect_with_train": 144, + "min_text_length": 3, + "average_text_length": 36.06935563207083, + "max_text_length": 158, + "unique_text": 2005, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "bn": { + "num_samples": 2033, + "number_of_characters": 69343, + "number_texts_intersect_with_train": 85, + "min_text_length": 2, + "average_text_length": 34.10870634530251, + "max_text_length": 153, + "unique_text": 2017, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "cy": { + "num_samples": 2033, + "number_of_characters": 78652, + "number_texts_intersect_with_train": 72, + "min_text_length": 3, + "average_text_length": 38.68765371372356, + "max_text_length": 166, + "unique_text": 2024, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "da": { + "num_samples": 2033, + "number_of_characters": 70289, + "number_texts_intersect_with_train": 91, + "min_text_length": 3, + "average_text_length": 34.574028529267096, + "max_text_length": 154, + "unique_text": 2020, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "de": { + "num_samples": 2033, + "number_of_characters": 79734, + "number_texts_intersect_with_train": 85, + "min_text_length": 4, + "average_text_length": 39.219872110181996, + "max_text_length": 156, + "unique_text": 2022, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "el": { + "num_samples": 2033, + "number_of_characters": 83301, + "number_texts_intersect_with_train": 89, + "min_text_length": 2, + "average_text_length": 40.97442203639941, + "max_text_length": 179, + "unique_text": 2020, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "en": { + "num_samples": 2033, + "number_of_characters": 70729, + "number_texts_intersect_with_train": 11, + "min_text_length": 3, + "average_text_length": 34.790457452041316, + "max_text_length": 153, + "unique_text": 2031, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "es": { + "num_samples": 2033, + "number_of_characters": 79868, + "number_texts_intersect_with_train": 99, + "min_text_length": 3, + "average_text_length": 39.28578455484506, + "max_text_length": 145, + "unique_text": 2015, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "fa": { + "num_samples": 2033, + "number_of_characters": 67524, + "number_texts_intersect_with_train": 116, + "min_text_length": 2, + "average_text_length": 33.21396950319725, + "max_text_length": 147, + "unique_text": 2003, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "fi": { + "num_samples": 2033, + "number_of_characters": 74125, + "number_texts_intersect_with_train": 129, + "min_text_length": 3, + "average_text_length": 36.460895228726024, + "max_text_length": 166, + "unique_text": 2013, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "fr": { + "num_samples": 2033, + "number_of_characters": 85978, + "number_texts_intersect_with_train": 90, + "min_text_length": 2, + "average_text_length": 42.29119527791441, + "max_text_length": 170, + "unique_text": 2016, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "he": { + "num_samples": 2033, + "number_of_characters": 57225, + "number_texts_intersect_with_train": 91, + "min_text_length": 3, + "average_text_length": 28.148057058534185, + "max_text_length": 116, + "unique_text": 2017, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "hi": { + "num_samples": 2033, + "number_of_characters": 71207, + "number_texts_intersect_with_train": 101, + "min_text_length": 3, + "average_text_length": 35.02557796360059, + "max_text_length": 169, + "unique_text": 2013, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "hu": { + "num_samples": 2033, + "number_of_characters": 74798, + "number_texts_intersect_with_train": 112, + "min_text_length": 2, + "average_text_length": 36.79193310378751, + "max_text_length": 160, + "unique_text": 2018, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "hy": { + "num_samples": 2033, + "number_of_characters": 74198, + "number_texts_intersect_with_train": 90, + "min_text_length": 3, + "average_text_length": 36.496802754549925, + "max_text_length": 172, + "unique_text": 2016, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "id": { + "num_samples": 2033, + "number_of_characters": 77036, + "number_texts_intersect_with_train": 147, + "min_text_length": 3, + "average_text_length": 37.89276930644368, + "max_text_length": 172, + "unique_text": 2008, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "is": { + "num_samples": 2033, + "number_of_characters": 75521, + "number_texts_intersect_with_train": 99, + "min_text_length": 2, + "average_text_length": 37.14756517461879, + "max_text_length": 182, + "unique_text": 2014, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "it": { + "num_samples": 2033, + "number_of_characters": 77604, + "number_texts_intersect_with_train": 143, + "min_text_length": 3, + "average_text_length": 38.17215937038859, + "max_text_length": 173, + "unique_text": 2001, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ja": { + "num_samples": 2033, + "number_of_characters": 31479, + "number_texts_intersect_with_train": 134, + "min_text_length": 1, + "average_text_length": 15.48401377274963, + "max_text_length": 63, + "unique_text": 2015, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "jv": { + "num_samples": 2033, + "number_of_characters": 69773, + "number_texts_intersect_with_train": 103, + "min_text_length": 3, + "average_text_length": 34.32021642892278, + "max_text_length": 169, + "unique_text": 2019, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ka": { + "num_samples": 2033, + "number_of_characters": 63256, + "number_texts_intersect_with_train": 170, + "min_text_length": 2, + "average_text_length": 31.11460895228726, + "max_text_length": 147, + "unique_text": 1989, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "km": { + "num_samples": 2033, + "number_of_characters": 64843, + "number_texts_intersect_with_train": 196, + "min_text_length": 2, + "average_text_length": 31.895228726020658, + "max_text_length": 147, + "unique_text": 1932, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "kn": { + "num_samples": 2033, + "number_of_characters": 77652, + "number_texts_intersect_with_train": 91, + "min_text_length": 2, + "average_text_length": 38.195769798327596, + "max_text_length": 164, + "unique_text": 2025, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ko": { + "num_samples": 2033, + "number_of_characters": 31722, + "number_texts_intersect_with_train": 116, + "min_text_length": 1, + "average_text_length": 15.60354156419085, + "max_text_length": 69, + "unique_text": 2014, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "lv": { + "num_samples": 2033, + "number_of_characters": 74498, + "number_texts_intersect_with_train": 110, + "min_text_length": 3, + "average_text_length": 36.644367929168716, + "max_text_length": 132, + "unique_text": 2021, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ml": { + "num_samples": 2033, + "number_of_characters": 87266, + "number_texts_intersect_with_train": 107, + "min_text_length": 4, + "average_text_length": 42.924741760944414, + "max_text_length": 196, + "unique_text": 2013, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "mn": { + "num_samples": 2033, + "number_of_characters": 78032, + "number_texts_intersect_with_train": 110, + "min_text_length": 3, + "average_text_length": 38.382685686178064, + "max_text_length": 155, + "unique_text": 2010, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ms": { + "num_samples": 2033, + "number_of_characters": 80301, + "number_texts_intersect_with_train": 83, + "min_text_length": 3, + "average_text_length": 39.49877029021151, + "max_text_length": 191, + "unique_text": 2014, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "my": { + "num_samples": 2033, + "number_of_characters": 82073, + "number_texts_intersect_with_train": 49, + "min_text_length": 3, + "average_text_length": 40.37038858829316, + "max_text_length": 167, + "unique_text": 2025, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "nb": { + "num_samples": 2033, + "number_of_characters": 70801, + "number_texts_intersect_with_train": 79, + "min_text_length": 3, + "average_text_length": 34.82587309394983, + "max_text_length": 161, + "unique_text": 2022, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "nl": { + "num_samples": 2033, + "number_of_characters": 80046, + "number_texts_intersect_with_train": 86, + "min_text_length": 3, + "average_text_length": 39.37333989178554, + "max_text_length": 165, + "unique_text": 2017, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "pl": { + "num_samples": 2033, + "number_of_characters": 73462, + "number_texts_intersect_with_train": 151, + "min_text_length": 4, + "average_text_length": 36.13477619281849, + "max_text_length": 156, + "unique_text": 2009, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "pt": { + "num_samples": 2033, + "number_of_characters": 77129, + "number_texts_intersect_with_train": 119, + "min_text_length": 3, + "average_text_length": 37.9385145105755, + "max_text_length": 160, + "unique_text": 2004, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ro": { + "num_samples": 2033, + "number_of_characters": 75176, + "number_texts_intersect_with_train": 88, + "min_text_length": 3, + "average_text_length": 36.97786522380718, + "max_text_length": 166, + "unique_text": 2027, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ru": { + "num_samples": 2033, + "number_of_characters": 74623, + "number_texts_intersect_with_train": 128, + "min_text_length": 4, + "average_text_length": 36.705853418593215, + "max_text_length": 163, + "unique_text": 2002, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "sl": { + "num_samples": 2033, + "number_of_characters": 71648, + "number_texts_intersect_with_train": 134, + "min_text_length": 3, + "average_text_length": 35.24249877029021, + "max_text_length": 145, + "unique_text": 2005, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "sq": { + "num_samples": 2033, + "number_of_characters": 75812, + "number_texts_intersect_with_train": 121, + "min_text_length": 2, + "average_text_length": 37.29070339399902, + "max_text_length": 163, + "unique_text": 2004, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "sv": { + "num_samples": 2033, + "number_of_characters": 70697, + "number_texts_intersect_with_train": 110, + "min_text_length": 3, + "average_text_length": 34.774717166748644, + "max_text_length": 169, + "unique_text": 2015, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "sw": { + "num_samples": 2033, + "number_of_characters": 77779, + "number_texts_intersect_with_train": 97, + "min_text_length": 4, + "average_text_length": 38.258239055582884, + "max_text_length": 161, + "unique_text": 2011, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ta": { + "num_samples": 2033, + "number_of_characters": 85963, + "number_texts_intersect_with_train": 94, + "min_text_length": 1, + "average_text_length": 42.28381701918347, + "max_text_length": 171, + "unique_text": 2013, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "te": { + "num_samples": 2033, + "number_of_characters": 77244, + "number_texts_intersect_with_train": 55, + "min_text_length": 2, + "average_text_length": 37.99508116084604, + "max_text_length": 148, + "unique_text": 2022, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "th": { + "num_samples": 2033, + "number_of_characters": 63528, + "number_texts_intersect_with_train": 148, + "min_text_length": 3, + "average_text_length": 31.248401377274963, + "max_text_length": 146, + "unique_text": 2004, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "tl": { + "num_samples": 2033, + "number_of_characters": 89203, + "number_texts_intersect_with_train": 100, + "min_text_length": 3, + "average_text_length": 43.877520905066405, + "max_text_length": 224, + "unique_text": 2014, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "tr": { + "num_samples": 2033, + "number_of_characters": 70353, + "number_texts_intersect_with_train": 113, + "min_text_length": 3, + "average_text_length": 34.60550909985243, + "max_text_length": 150, + "unique_text": 2018, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "ur": { + "num_samples": 2033, + "number_of_characters": 70462, + "number_texts_intersect_with_train": 92, + "min_text_length": 3, + "average_text_length": 34.65912444663059, + "max_text_length": 175, + "unique_text": 2019, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "vi": { + "num_samples": 2033, + "number_of_characters": 77911, + "number_texts_intersect_with_train": 103, + "min_text_length": 1, + "average_text_length": 38.32316773241515, + "max_text_length": 184, + "unique_text": 2020, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "zh-CN": { + "num_samples": 2033, + "number_of_characters": 21222, + "number_texts_intersect_with_train": 132, + "min_text_length": 2, + "average_text_length": 10.438760452533202, + "max_text_length": 45, + "unique_text": 2018, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + }, + "zh-TW": { + "num_samples": 2033, + "number_of_characters": 20048, + "number_texts_intersect_with_train": 177, + "min_text_length": 1, + "average_text_length": 9.861288735858338, + "max_text_length": 55, + "unique_text": 1989, + "unique_labels": 59, + "labels": { + "iot_hue_lightoff": { + "count": 17 + }, + "iot_hue_lightdim": { + "count": 17 + }, + "iot_cleaning": { + "count": 19 + }, + "general_quirky": { + "count": 105 + }, + "takeaway_query": { + "count": 24 + }, + "play_music": { + "count": 123 + }, + "music_query": { + "count": 30 + }, + "weather_query": { + "count": 126 + }, + "music_settings": { + "count": 8 + }, + "audio_volume_down": { + "count": 8 + }, + "datetime_query": { + "count": 64 + }, + "general_greet": { + "count": 2 + }, + "alarm_set": { + "count": 31 + }, + "audio_volume_up": { + "count": 12 + }, + "alarm_query": { + "count": 19 + }, + "news_query": { + "count": 82 + }, + "iot_hue_lighton": { + "count": 5 + }, + "iot_wemo_off": { + "count": 5 + }, + "iot_hue_lightchange": { + "count": 22 + }, + "audio_volume_mute": { + "count": 15 + }, + "alarm_remove": { + "count": 14 + }, + "general_joke": { + "count": 15 + }, + "datetime_convert": { + "count": 9 + }, + "iot_wemo_on": { + "count": 7 + }, + "iot_hue_lightup": { + "count": 12 + }, + "iot_coffee": { + "count": 14 + }, + "social_post": { + "count": 50 + }, + "music_dislikeness": { + "count": 2 + }, + "cooking_recipe": { + "count": 41 + }, + "takeaway_order": { + "count": 20 + }, + "music_likeness": { + "count": 16 + }, + "calendar_query": { + "count": 102 + }, + "qa_stock": { + "count": 24 + }, + "qa_factoid": { + "count": 90 + }, + "calendar_set": { + "count": 131 + }, + "recommendation_events": { + "count": 26 + }, + "cooking_query": { + "count": 2 + }, + "calendar_remove": { + "count": 47 + }, + "email_sendemail": { + "count": 63 + }, + "play_radio": { + "count": 46 + }, + "play_audiobook": { + "count": 35 + }, + "play_game": { + "count": 22 + }, + "lists_query": { + "count": 50 + }, + "lists_remove": { + "count": 37 + }, + "lists_createoradd": { + "count": 25 + }, + "email_addcontact": { + "count": 5 + }, + "play_podcasts": { + "count": 34 + }, + "recommendation_movies": { + "count": 12 + }, + "recommendation_locations": { + "count": 31 + }, + "transport_ticket": { + "count": 25 + }, + "transport_query": { + "count": 36 + }, + "transport_taxi": { + "count": 27 + }, + "transport_traffic": { + "count": 22 + }, + "qa_definition": { + "count": 55 + }, + "qa_currency": { + "count": 32 + }, + "qa_maths": { + "count": 13 + }, + "social_query": { + "count": 18 + }, + "email_query": { + "count": 73 + }, + "email_querycontact": { + "count": 16 + } + } + } + } + }, + "test": { + "num_samples": 151674, + "number_of_characters": 5230011, + "number_texts_intersect_with_train": 7273, + "min_text_length": 1, + "average_text_length": 34.48192175323391, + "max_text_length": 495, + "unique_text": 148972, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 2091 + }, + "audio_volume_mute": { + "count": 1632 + }, + "iot_hue_lightchange": { + "count": 1836 + }, + "iot_hue_lighton": { + "count": 153 + }, + "iot_hue_lightoff": { + "count": 2193 + }, + "iot_cleaning": { + "count": 1326 + }, + "general_quirky": { + "count": 8619 + }, + "general_greet": { + "count": 51 + }, + "datetime_query": { + "count": 4488 + }, + "datetime_convert": { + "count": 765 + }, + "alarm_remove": { + "count": 1071 + }, + "alarm_query": { + "count": 1734 + }, + "music_likeness": { + "count": 1836 + }, + "iot_hue_lightup": { + "count": 1377 + }, + "takeaway_order": { + "count": 1122 + }, + "weather_query": { + "count": 7956 + }, + "general_joke": { + "count": 969 + }, + "play_music": { + "count": 8976 + }, + "iot_hue_lightdim": { + "count": 1071 + }, + "takeaway_query": { + "count": 1785 + }, + "news_query": { + "count": 6324 + }, + "audio_volume_up": { + "count": 663 + }, + "iot_wemo_off": { + "count": 918 + }, + "iot_wemo_on": { + "count": 510 + }, + "iot_coffee": { + "count": 1836 + }, + "music_query": { + "count": 1785 + }, + "audio_volume_down": { + "count": 561 + }, + "audio_volume_other": { + "count": 306 + }, + "music_dislikeness": { + "count": 204 + }, + "music_settings": { + "count": 306 + }, + "recommendation_events": { + "count": 2193 + }, + "qa_stock": { + "count": 1326 + }, + "calendar_set": { + "count": 10659 + }, + "play_audiobook": { + "count": 2091 + }, + "social_query": { + "count": 1275 + }, + "qa_factoid": { + "count": 7191 + }, + "transport_ticket": { + "count": 1785 + }, + "recommendation_locations": { + "count": 1581 + }, + "calendar_query": { + "count": 6426 + }, + "recommendation_movies": { + "count": 1020 + }, + "transport_query": { + "count": 2601 + }, + "cooking_recipe": { + "count": 3672 + }, + "play_game": { + "count": 1785 + }, + "calendar_remove": { + "count": 3417 + }, + "email_query": { + "count": 6069 + }, + "email_sendemail": { + "count": 5814 + }, + "play_radio": { + "count": 3672 + }, + "play_podcasts": { + "count": 3213 + }, + "lists_query": { + "count": 2601 + }, + "lists_remove": { + "count": 2652 + }, + "lists_createoradd": { + "count": 1989 + }, + "transport_taxi": { + "count": 1173 + }, + "transport_traffic": { + "count": 765 + }, + "qa_definition": { + "count": 2907 + }, + "qa_maths": { + "count": 1275 + }, + "social_post": { + "count": 4131 + }, + "qa_currency": { + "count": 1989 + }, + "email_addcontact": { + "count": 612 + }, + "email_querycontact": { + "count": 1326 + } + }, + "hf_subset_descriptive_stats": { + "af": { + "num_samples": 2974, + "number_of_characters": 110262, + "number_texts_intersect_with_train": 80, + "min_text_length": 2, + "average_text_length": 37.075319435104234, + "max_text_length": 397, + "unique_text": 2961, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "am": { + "num_samples": 2974, + "number_of_characters": 70145, + "number_texts_intersect_with_train": 89, + "min_text_length": 2, + "average_text_length": 23.58607935440484, + "max_text_length": 209, + "unique_text": 2947, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ar": { + "num_samples": 2974, + "number_of_characters": 79720, + "number_texts_intersect_with_train": 187, + "min_text_length": 1, + "average_text_length": 26.80564895763282, + "max_text_length": 247, + "unique_text": 2907, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "az": { + "num_samples": 2974, + "number_of_characters": 107751, + "number_texts_intersect_with_train": 182, + "min_text_length": 2, + "average_text_length": 36.23100201748487, + "max_text_length": 366, + "unique_text": 2907, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "bn": { + "num_samples": 2974, + "number_of_characters": 101528, + "number_texts_intersect_with_train": 130, + "min_text_length": 2, + "average_text_length": 34.13853396099529, + "max_text_length": 339, + "unique_text": 2943, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "cy": { + "num_samples": 2974, + "number_of_characters": 114977, + "number_texts_intersect_with_train": 125, + "min_text_length": 2, + "average_text_length": 38.66072629455279, + "max_text_length": 367, + "unique_text": 2950, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "da": { + "num_samples": 2974, + "number_of_characters": 102537, + "number_texts_intersect_with_train": 125, + "min_text_length": 1, + "average_text_length": 34.4778076664425, + "max_text_length": 327, + "unique_text": 2941, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "de": { + "num_samples": 2974, + "number_of_characters": 117616, + "number_texts_intersect_with_train": 109, + "min_text_length": 3, + "average_text_length": 39.54808338937458, + "max_text_length": 393, + "unique_text": 2955, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "el": { + "num_samples": 2974, + "number_of_characters": 119736, + "number_texts_intersect_with_train": 115, + "min_text_length": 2, + "average_text_length": 40.26092804303968, + "max_text_length": 409, + "unique_text": 2946, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "en": { + "num_samples": 2974, + "number_of_characters": 102785, + "number_texts_intersect_with_train": 21, + "min_text_length": 2, + "average_text_length": 34.56119704102219, + "max_text_length": 365, + "unique_text": 2970, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "es": { + "num_samples": 2974, + "number_of_characters": 116394, + "number_texts_intersect_with_train": 119, + "min_text_length": 1, + "average_text_length": 39.137188971082715, + "max_text_length": 457, + "unique_text": 2944, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "fa": { + "num_samples": 2974, + "number_of_characters": 98823, + "number_texts_intersect_with_train": 177, + "min_text_length": 2, + "average_text_length": 33.228984532616, + "max_text_length": 306, + "unique_text": 2921, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "fi": { + "num_samples": 2974, + "number_of_characters": 108496, + "number_texts_intersect_with_train": 161, + "min_text_length": 2, + "average_text_length": 36.48150638870209, + "max_text_length": 278, + "unique_text": 2930, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "fr": { + "num_samples": 2974, + "number_of_characters": 125811, + "number_texts_intersect_with_train": 132, + "min_text_length": 2, + "average_text_length": 42.30363147276395, + "max_text_length": 479, + "unique_text": 2943, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "he": { + "num_samples": 2974, + "number_of_characters": 83417, + "number_texts_intersect_with_train": 136, + "min_text_length": 1, + "average_text_length": 28.048755884330866, + "max_text_length": 283, + "unique_text": 2946, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "hi": { + "num_samples": 2974, + "number_of_characters": 105274, + "number_texts_intersect_with_train": 115, + "min_text_length": 2, + "average_text_length": 35.39811701412239, + "max_text_length": 405, + "unique_text": 2945, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "hu": { + "num_samples": 2974, + "number_of_characters": 108969, + "number_texts_intersect_with_train": 152, + "min_text_length": 2, + "average_text_length": 36.640551445864155, + "max_text_length": 426, + "unique_text": 2920, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "hy": { + "num_samples": 2974, + "number_of_characters": 109134, + "number_texts_intersect_with_train": 104, + "min_text_length": 2, + "average_text_length": 36.6960322797579, + "max_text_length": 359, + "unique_text": 2938, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "id": { + "num_samples": 2974, + "number_of_characters": 112577, + "number_texts_intersect_with_train": 166, + "min_text_length": 2, + "average_text_length": 37.853732347007394, + "max_text_length": 437, + "unique_text": 2913, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "is": { + "num_samples": 2974, + "number_of_characters": 109732, + "number_texts_intersect_with_train": 129, + "min_text_length": 2, + "average_text_length": 36.89710827168796, + "max_text_length": 349, + "unique_text": 2940, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "it": { + "num_samples": 2974, + "number_of_characters": 113060, + "number_texts_intersect_with_train": 197, + "min_text_length": 3, + "average_text_length": 38.01613987895091, + "max_text_length": 445, + "unique_text": 2918, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ja": { + "num_samples": 2974, + "number_of_characters": 46059, + "number_texts_intersect_with_train": 171, + "min_text_length": 1, + "average_text_length": 15.48722259583053, + "max_text_length": 132, + "unique_text": 2919, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "jv": { + "num_samples": 2974, + "number_of_characters": 101977, + "number_texts_intersect_with_train": 144, + "min_text_length": 1, + "average_text_length": 34.28950907868191, + "max_text_length": 361, + "unique_text": 2932, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ka": { + "num_samples": 2974, + "number_of_characters": 93772, + "number_texts_intersect_with_train": 227, + "min_text_length": 2, + "average_text_length": 31.530598520511095, + "max_text_length": 327, + "unique_text": 2880, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "km": { + "num_samples": 2974, + "number_of_characters": 93992, + "number_texts_intersect_with_train": 267, + "min_text_length": 2, + "average_text_length": 31.604572965702758, + "max_text_length": 389, + "unique_text": 2786, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "kn": { + "num_samples": 2974, + "number_of_characters": 114246, + "number_texts_intersect_with_train": 101, + "min_text_length": 2, + "average_text_length": 38.41492938802959, + "max_text_length": 337, + "unique_text": 2950, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ko": { + "num_samples": 2974, + "number_of_characters": 45762, + "number_texts_intersect_with_train": 155, + "min_text_length": 1, + "average_text_length": 15.38735709482179, + "max_text_length": 157, + "unique_text": 2919, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "lv": { + "num_samples": 2974, + "number_of_characters": 108878, + "number_texts_intersect_with_train": 135, + "min_text_length": 2, + "average_text_length": 36.60995292535306, + "max_text_length": 317, + "unique_text": 2946, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ml": { + "num_samples": 2974, + "number_of_characters": 127789, + "number_texts_intersect_with_train": 133, + "min_text_length": 2, + "average_text_length": 42.96872898453262, + "max_text_length": 168, + "unique_text": 2937, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "mn": { + "num_samples": 2974, + "number_of_characters": 111998, + "number_texts_intersect_with_train": 163, + "min_text_length": 2, + "average_text_length": 37.65904505716207, + "max_text_length": 167, + "unique_text": 2931, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ms": { + "num_samples": 2974, + "number_of_characters": 117326, + "number_texts_intersect_with_train": 136, + "min_text_length": 2, + "average_text_length": 39.45057162071284, + "max_text_length": 322, + "unique_text": 2930, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "my": { + "num_samples": 2974, + "number_of_characters": 120671, + "number_texts_intersect_with_train": 66, + "min_text_length": 3, + "average_text_length": 40.575319435104234, + "max_text_length": 191, + "unique_text": 2962, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "nb": { + "num_samples": 2974, + "number_of_characters": 102940, + "number_texts_intersect_with_train": 121, + "min_text_length": 2, + "average_text_length": 34.613315400134496, + "max_text_length": 317, + "unique_text": 2951, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "nl": { + "num_samples": 2974, + "number_of_characters": 117126, + "number_texts_intersect_with_train": 124, + "min_text_length": 1, + "average_text_length": 39.383322125084064, + "max_text_length": 406, + "unique_text": 2953, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "pl": { + "num_samples": 2974, + "number_of_characters": 106914, + "number_texts_intersect_with_train": 207, + "min_text_length": 2, + "average_text_length": 35.949562878278414, + "max_text_length": 389, + "unique_text": 2906, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "pt": { + "num_samples": 2974, + "number_of_characters": 113079, + "number_texts_intersect_with_train": 164, + "min_text_length": 2, + "average_text_length": 38.022528581035644, + "max_text_length": 439, + "unique_text": 2932, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ro": { + "num_samples": 2974, + "number_of_characters": 109519, + "number_texts_intersect_with_train": 106, + "min_text_length": 3, + "average_text_length": 36.82548755884331, + "max_text_length": 403, + "unique_text": 2937, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ru": { + "num_samples": 2974, + "number_of_characters": 108199, + "number_texts_intersect_with_train": 167, + "min_text_length": 3, + "average_text_length": 36.381640887693344, + "max_text_length": 416, + "unique_text": 2913, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "sl": { + "num_samples": 2974, + "number_of_characters": 104437, + "number_texts_intersect_with_train": 163, + "min_text_length": 2, + "average_text_length": 35.116677874915936, + "max_text_length": 316, + "unique_text": 2933, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "sq": { + "num_samples": 2974, + "number_of_characters": 110202, + "number_texts_intersect_with_train": 168, + "min_text_length": 1, + "average_text_length": 37.0551445864156, + "max_text_length": 406, + "unique_text": 2927, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "sv": { + "num_samples": 2974, + "number_of_characters": 103142, + "number_texts_intersect_with_train": 150, + "min_text_length": 2, + "average_text_length": 34.681237390719566, + "max_text_length": 352, + "unique_text": 2932, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "sw": { + "num_samples": 2974, + "number_of_characters": 112209, + "number_texts_intersect_with_train": 143, + "min_text_length": 3, + "average_text_length": 37.72999327505044, + "max_text_length": 407, + "unique_text": 2935, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ta": { + "num_samples": 2974, + "number_of_characters": 125008, + "number_texts_intersect_with_train": 129, + "min_text_length": 3, + "average_text_length": 42.03362474781439, + "max_text_length": 408, + "unique_text": 2946, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "te": { + "num_samples": 2974, + "number_of_characters": 113476, + "number_texts_intersect_with_train": 78, + "min_text_length": 2, + "average_text_length": 38.15601882985877, + "max_text_length": 390, + "unique_text": 2963, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "th": { + "num_samples": 2974, + "number_of_characters": 91732, + "number_texts_intersect_with_train": 190, + "min_text_length": 3, + "average_text_length": 30.84465366509751, + "max_text_length": 300, + "unique_text": 2906, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "tl": { + "num_samples": 2974, + "number_of_characters": 130918, + "number_texts_intersect_with_train": 131, + "min_text_length": 2, + "average_text_length": 44.020847343644924, + "max_text_length": 495, + "unique_text": 2943, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "tr": { + "num_samples": 2974, + "number_of_characters": 103538, + "number_texts_intersect_with_train": 129, + "min_text_length": 2, + "average_text_length": 34.81439139206456, + "max_text_length": 366, + "unique_text": 2937, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "ur": { + "num_samples": 2974, + "number_of_characters": 103680, + "number_texts_intersect_with_train": 101, + "min_text_length": 1, + "average_text_length": 34.862138533960994, + "max_text_length": 137, + "unique_text": 2943, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "vi": { + "num_samples": 2974, + "number_of_characters": 112612, + "number_texts_intersect_with_train": 143, + "min_text_length": 1, + "average_text_length": 37.865501008742434, + "max_text_length": 426, + "unique_text": 2935, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "zh-CN": { + "num_samples": 2974, + "number_of_characters": 31013, + "number_texts_intersect_with_train": 187, + "min_text_length": 1, + "average_text_length": 10.428043039677203, + "max_text_length": 87, + "unique_text": 2921, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + }, + "zh-TW": { + "num_samples": 2974, + "number_of_characters": 29053, + "number_texts_intersect_with_train": 225, + "min_text_length": 1, + "average_text_length": 9.768997982515131, + "max_text_length": 90, + "unique_text": 2880, + "unique_labels": 59, + "labels": { + "alarm_set": { + "count": 41 + }, + "audio_volume_mute": { + "count": 32 + }, + "iot_hue_lightchange": { + "count": 36 + }, + "iot_hue_lighton": { + "count": 3 + }, + "iot_hue_lightoff": { + "count": 43 + }, + "iot_cleaning": { + "count": 26 + }, + "general_quirky": { + "count": 169 + }, + "general_greet": { + "count": 1 + }, + "datetime_query": { + "count": 88 + }, + "datetime_convert": { + "count": 15 + }, + "alarm_remove": { + "count": 21 + }, + "alarm_query": { + "count": 34 + }, + "music_likeness": { + "count": 36 + }, + "iot_hue_lightup": { + "count": 27 + }, + "takeaway_order": { + "count": 22 + }, + "weather_query": { + "count": 156 + }, + "general_joke": { + "count": 19 + }, + "play_music": { + "count": 176 + }, + "iot_hue_lightdim": { + "count": 21 + }, + "takeaway_query": { + "count": 35 + }, + "news_query": { + "count": 124 + }, + "audio_volume_up": { + "count": 13 + }, + "iot_wemo_off": { + "count": 18 + }, + "iot_wemo_on": { + "count": 10 + }, + "iot_coffee": { + "count": 36 + }, + "music_query": { + "count": 35 + }, + "audio_volume_down": { + "count": 11 + }, + "audio_volume_other": { + "count": 6 + }, + "music_dislikeness": { + "count": 4 + }, + "music_settings": { + "count": 6 + }, + "recommendation_events": { + "count": 43 + }, + "qa_stock": { + "count": 26 + }, + "calendar_set": { + "count": 209 + }, + "play_audiobook": { + "count": 41 + }, + "social_query": { + "count": 25 + }, + "qa_factoid": { + "count": 141 + }, + "transport_ticket": { + "count": 35 + }, + "recommendation_locations": { + "count": 31 + }, + "calendar_query": { + "count": 126 + }, + "recommendation_movies": { + "count": 20 + }, + "transport_query": { + "count": 51 + }, + "cooking_recipe": { + "count": 72 + }, + "play_game": { + "count": 35 + }, + "calendar_remove": { + "count": 67 + }, + "email_query": { + "count": 119 + }, + "email_sendemail": { + "count": 114 + }, + "play_radio": { + "count": 72 + }, + "play_podcasts": { + "count": 63 + }, + "lists_query": { + "count": 51 + }, + "lists_remove": { + "count": 52 + }, + "lists_createoradd": { + "count": 39 + }, + "transport_taxi": { + "count": 23 + }, + "transport_traffic": { + "count": 15 + }, + "qa_definition": { + "count": 57 + }, + "qa_maths": { + "count": 25 + }, + "social_post": { + "count": 81 + }, + "qa_currency": { + "count": 39 + }, + "email_addcontact": { + "count": 12 + }, + "email_querycontact": { + "count": 26 + } + } + } + } + }, + "train": { + "num_samples": 587214, + "number_of_characters": 20507758, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 34.92382334208653, + "max_text_length": 295, + "unique_text": 565055, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 9282 + }, + "audio_volume_mute": { + "count": 5610 + }, + "iot_hue_lightchange": { + "count": 6375 + }, + "iot_hue_lightoff": { + "count": 7803 + }, + "iot_hue_lightdim": { + "count": 3876 + }, + "iot_cleaning": { + "count": 4743 + }, + "calendar_query": { + "count": 28866 + }, + "play_music": { + "count": 32589 + }, + "general_quirky": { + "count": 28305 + }, + "general_greet": { + "count": 1275 + }, + "datetime_query": { + "count": 17850 + }, + "datetime_convert": { + "count": 2652 + }, + "takeaway_query": { + "count": 6222 + }, + "alarm_remove": { + "count": 3978 + }, + "alarm_query": { + "count": 6630 + }, + "news_query": { + "count": 25653 + }, + "music_likeness": { + "count": 5763 + }, + "music_query": { + "count": 7854 + }, + "iot_hue_lightup": { + "count": 3876 + }, + "takeaway_order": { + "count": 6885 + }, + "weather_query": { + "count": 29223 + }, + "music_settings": { + "count": 2601 + }, + "general_joke": { + "count": 3672 + }, + "music_dislikeness": { + "count": 714 + }, + "audio_volume_other": { + "count": 918 + }, + "iot_coffee": { + "count": 6324 + }, + "audio_volume_up": { + "count": 5610 + }, + "iot_wemo_on": { + "count": 2448 + }, + "iot_hue_lighton": { + "count": 1122 + }, + "iot_wemo_off": { + "count": 2652 + }, + "audio_volume_down": { + "count": 2652 + }, + "qa_stock": { + "count": 7752 + }, + "play_radio": { + "count": 14433 + }, + "recommendation_locations": { + "count": 8823 + }, + "qa_factoid": { + "count": 27744 + }, + "calendar_set": { + "count": 41310 + }, + "play_audiobook": { + "count": 7650 + }, + "play_podcasts": { + "count": 9843 + }, + "social_query": { + "count": 5508 + }, + "transport_query": { + "count": 11577 + }, + "email_sendemail": { + "count": 18054 + }, + "recommendation_movies": { + "count": 3570 + }, + "lists_query": { + "count": 10098 + }, + "play_game": { + "count": 5712 + }, + "transport_ticket": { + "count": 6477 + }, + "recommendation_events": { + "count": 9690 + }, + "email_query": { + "count": 21318 + }, + "transport_traffic": { + "count": 5967 + }, + "cooking_query": { + "count": 204 + }, + "qa_definition": { + "count": 13617 + }, + "calendar_remove": { + "count": 15912 + }, + "lists_remove": { + "count": 8364 + }, + "cooking_recipe": { + "count": 10557 + }, + "email_querycontact": { + "count": 6477 + }, + "lists_createoradd": { + "count": 9027 + }, + "transport_taxi": { + "count": 5100 + }, + "qa_maths": { + "count": 3978 + }, + "social_post": { + "count": 14433 + }, + "qa_currency": { + "count": 7242 + }, + "email_addcontact": { + "count": 2754 + } + }, + "hf_subset_descriptive_stats": { + "af": { + "num_samples": 11514, + "number_of_characters": 432629, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.574170574952234, + "max_text_length": 192, + "unique_text": 11315, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "am": { + "num_samples": 11514, + "number_of_characters": 276957, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 24.053934340802503, + "max_text_length": 114, + "unique_text": 11250, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ar": { + "num_samples": 11514, + "number_of_characters": 310505, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 26.96760465520236, + "max_text_length": 152, + "unique_text": 10991, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "az": { + "num_samples": 11514, + "number_of_characters": 421607, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 36.61690116380059, + "max_text_length": 208, + "unique_text": 10971, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "bn": { + "num_samples": 11514, + "number_of_characters": 398927, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 34.64712523883967, + "max_text_length": 295, + "unique_text": 11209, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "cy": { + "num_samples": 11514, + "number_of_characters": 448116, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.91922876498176, + "max_text_length": 189, + "unique_text": 11214, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "da": { + "num_samples": 11514, + "number_of_characters": 402298, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 34.939899253083205, + "max_text_length": 184, + "unique_text": 11206, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "de": { + "num_samples": 11514, + "number_of_characters": 455798, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 39.58641653639048, + "max_text_length": 200, + "unique_text": 11267, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "el": { + "num_samples": 11514, + "number_of_characters": 475367, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 41.285999652596836, + "max_text_length": 209, + "unique_text": 11209, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "en": { + "num_samples": 11514, + "number_of_characters": 403436, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.03873545249262, + "max_text_length": 189, + "unique_text": 11468, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "es": { + "num_samples": 11514, + "number_of_characters": 456222, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 39.62324127149557, + "max_text_length": 205, + "unique_text": 11170, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "fa": { + "num_samples": 11514, + "number_of_characters": 387146, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 33.62393607781831, + "max_text_length": 175, + "unique_text": 11032, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "fi": { + "num_samples": 11514, + "number_of_characters": 426132, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.00990099009901, + "max_text_length": 230, + "unique_text": 11100, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "fr": { + "num_samples": 11514, + "number_of_characters": 492591, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 42.781917665450756, + "max_text_length": 221, + "unique_text": 11189, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "he": { + "num_samples": 11514, + "number_of_characters": 326685, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 28.37285044293903, + "max_text_length": 137, + "unique_text": 11167, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "hi": { + "num_samples": 11514, + "number_of_characters": 412758, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.84835852006253, + "max_text_length": 178, + "unique_text": 11212, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "hu": { + "num_samples": 11514, + "number_of_characters": 426261, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.021104742053154, + "max_text_length": 214, + "unique_text": 11139, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "hy": { + "num_samples": 11514, + "number_of_characters": 427568, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.1346187250304, + "max_text_length": 202, + "unique_text": 11174, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "id": { + "num_samples": 11514, + "number_of_characters": 440413, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 38.250217126975855, + "max_text_length": 190, + "unique_text": 10998, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "is": { + "num_samples": 11514, + "number_of_characters": 431245, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.453969081118636, + "max_text_length": 183, + "unique_text": 11151, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "it": { + "num_samples": 11514, + "number_of_characters": 444123, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.57243355914539, + "max_text_length": 176, + "unique_text": 10961, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ja": { + "num_samples": 11514, + "number_of_characters": 179863, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 15.6212437033177, + "max_text_length": 75, + "unique_text": 11025, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "jv": { + "num_samples": 11514, + "number_of_characters": 402160, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 34.92791384401598, + "max_text_length": 198, + "unique_text": 11152, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ka": { + "num_samples": 11514, + "number_of_characters": 363520, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 31.57199930519368, + "max_text_length": 177, + "unique_text": 10732, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "km": { + "num_samples": 11514, + "number_of_characters": 367358, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 31.90533263852701, + "max_text_length": 190, + "unique_text": 10320, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "kn": { + "num_samples": 11514, + "number_of_characters": 445768, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.71530310925829, + "max_text_length": 214, + "unique_text": 11279, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ko": { + "num_samples": 11514, + "number_of_characters": 179441, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 15.584592669793295, + "max_text_length": 96, + "unique_text": 11100, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "lv": { + "num_samples": 11514, + "number_of_characters": 425766, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 36.978113600833765, + "max_text_length": 208, + "unique_text": 11169, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ml": { + "num_samples": 11514, + "number_of_characters": 499847, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 43.4121070001737, + "max_text_length": 207, + "unique_text": 11195, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "mn": { + "num_samples": 11514, + "number_of_characters": 442389, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.421834288692025, + "max_text_length": 171, + "unique_text": 11082, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ms": { + "num_samples": 11514, + "number_of_characters": 460564, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 40.00034740316137, + "max_text_length": 200, + "unique_text": 11151, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "my": { + "num_samples": 11514, + "number_of_characters": 473258, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 41.10283133576515, + "max_text_length": 202, + "unique_text": 11342, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "nb": { + "num_samples": 11514, + "number_of_characters": 404200, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 35.105089456314055, + "max_text_length": 176, + "unique_text": 11222, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "nl": { + "num_samples": 11514, + "number_of_characters": 458912, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 39.85686989751607, + "max_text_length": 199, + "unique_text": 11260, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "pl": { + "num_samples": 11514, + "number_of_characters": 418669, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 36.36173354177523, + "max_text_length": 197, + "unique_text": 10912, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "pt": { + "num_samples": 11514, + "number_of_characters": 443661, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 38.532308494007296, + "max_text_length": 199, + "unique_text": 11078, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ro": { + "num_samples": 11514, + "number_of_characters": 430447, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.38466215042557, + "max_text_length": 187, + "unique_text": 11224, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ru": { + "num_samples": 11514, + "number_of_characters": 424549, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 36.87241618898732, + "max_text_length": 203, + "unique_text": 11002, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "sl": { + "num_samples": 11514, + "number_of_characters": 410081, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.61585895431649, + "max_text_length": 167, + "unique_text": 10987, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "sq": { + "num_samples": 11514, + "number_of_characters": 434272, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.716866423484454, + "max_text_length": 202, + "unique_text": 11011, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "sv": { + "num_samples": 11514, + "number_of_characters": 404954, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.170574952232066, + "max_text_length": 200, + "unique_text": 11107, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "sw": { + "num_samples": 11514, + "number_of_characters": 439885, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 38.204359909675176, + "max_text_length": 221, + "unique_text": 11119, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ta": { + "num_samples": 11514, + "number_of_characters": 491066, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 42.64947021017891, + "max_text_length": 211, + "unique_text": 11196, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "te": { + "num_samples": 11514, + "number_of_characters": 443490, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.51745700885878, + "max_text_length": 215, + "unique_text": 11351, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "th": { + "num_samples": 11514, + "number_of_characters": 361003, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 31.353395865902378, + "max_text_length": 141, + "unique_text": 10912, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "tl": { + "num_samples": 11514, + "number_of_characters": 515338, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 44.7575125933646, + "max_text_length": 253, + "unique_text": 11173, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "tr": { + "num_samples": 11514, + "number_of_characters": 403389, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.03465346534654, + "max_text_length": 206, + "unique_text": 11131, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "ur": { + "num_samples": 11514, + "number_of_characters": 407108, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.35765155462915, + "max_text_length": 184, + "unique_text": 11242, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "vi": { + "num_samples": 11514, + "number_of_characters": 443651, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.53143998610388, + "max_text_length": 166, + "unique_text": 11126, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "zh-CN": { + "num_samples": 11514, + "number_of_characters": 121615, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 10.562358867465694, + "max_text_length": 58, + "unique_text": 11022, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + }, + "zh-TW": { + "num_samples": 11514, + "number_of_characters": 114750, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 9.966128191766545, + "max_text_length": 74, + "unique_text": 10755, + "unique_labels": 60, + "labels": { + "alarm_set": { + "count": 182 + }, + "audio_volume_mute": { + "count": 110 + }, + "iot_hue_lightchange": { + "count": 125 + }, + "iot_hue_lightoff": { + "count": 153 + }, + "iot_hue_lightdim": { + "count": 76 + }, + "iot_cleaning": { + "count": 93 + }, + "calendar_query": { + "count": 566 + }, + "play_music": { + "count": 639 + }, + "general_quirky": { + "count": 555 + }, + "general_greet": { + "count": 25 + }, + "datetime_query": { + "count": 350 + }, + "datetime_convert": { + "count": 52 + }, + "takeaway_query": { + "count": 122 + }, + "alarm_remove": { + "count": 78 + }, + "alarm_query": { + "count": 130 + }, + "news_query": { + "count": 503 + }, + "music_likeness": { + "count": 113 + }, + "music_query": { + "count": 154 + }, + "iot_hue_lightup": { + "count": 76 + }, + "takeaway_order": { + "count": 135 + }, + "weather_query": { + "count": 573 + }, + "music_settings": { + "count": 51 + }, + "general_joke": { + "count": 72 + }, + "music_dislikeness": { + "count": 14 + }, + "audio_volume_other": { + "count": 18 + }, + "iot_coffee": { + "count": 124 + }, + "audio_volume_up": { + "count": 110 + }, + "iot_wemo_on": { + "count": 48 + }, + "iot_hue_lighton": { + "count": 22 + }, + "iot_wemo_off": { + "count": 52 + }, + "audio_volume_down": { + "count": 52 + }, + "qa_stock": { + "count": 152 + }, + "play_radio": { + "count": 283 + }, + "recommendation_locations": { + "count": 173 + }, + "qa_factoid": { + "count": 544 + }, + "calendar_set": { + "count": 810 + }, + "play_audiobook": { + "count": 150 + }, + "play_podcasts": { + "count": 193 + }, + "social_query": { + "count": 108 + }, + "transport_query": { + "count": 227 + }, + "email_sendemail": { + "count": 354 + }, + "recommendation_movies": { + "count": 70 + }, + "lists_query": { + "count": 198 + }, + "play_game": { + "count": 112 + }, + "transport_ticket": { + "count": 127 + }, + "recommendation_events": { + "count": 190 + }, + "email_query": { + "count": 418 + }, + "transport_traffic": { + "count": 117 + }, + "cooking_query": { + "count": 4 + }, + "qa_definition": { + "count": 267 + }, + "calendar_remove": { + "count": 312 + }, + "lists_remove": { + "count": 164 + }, + "cooking_recipe": { + "count": 207 + }, + "email_querycontact": { + "count": 127 + }, + "lists_createoradd": { + "count": 177 + }, + "transport_taxi": { + "count": 100 + }, + "qa_maths": { + "count": 78 + }, + "social_post": { + "count": 283 + }, + "qa_currency": { + "count": 142 + }, + "email_addcontact": { + "count": 54 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MassiveScenarioClassification.json b/mteb/descriptive_stats/Classification/MassiveScenarioClassification.json new file mode 100644 index 0000000000..e399ad4a29 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MassiveScenarioClassification.json @@ -0,0 +1,10304 @@ +{ + "validation": { + "num_samples": 103683, + "number_of_characters": 3583467, + "number_texts_intersect_with_train": 5457, + "min_text_length": 1, + "average_text_length": 34.56176036573016, + "max_text_length": 224, + "unique_text": 102325, + "unique_labels": 18, + "labels": { + "iot": { + "count": 6018 + }, + "general": { + "count": 6222 + }, + "takeaway": { + "count": 2244 + }, + "play": { + "count": 13260 + }, + "music": { + "count": 2856 + }, + "weather": { + "count": 6426 + }, + "audio": { + "count": 1785 + }, + "datetime": { + "count": 3723 + }, + "alarm": { + "count": 3264 + }, + "news": { + "count": 4182 + }, + "social": { + "count": 3468 + }, + "cooking": { + "count": 2193 + }, + "calendar": { + "count": 14280 + }, + "qa": { + "count": 10914 + }, + "recommendation": { + "count": 3519 + }, + "email": { + "count": 8007 + }, + "lists": { + "count": 5712 + }, + "transport": { + "count": 5610 + } + }, + "hf_subset_descriptive_stats": { + "af": { + "num_samples": 2033, + "number_of_characters": 75759, + "number_texts_intersect_with_train": 66, + "min_text_length": 2, + "average_text_length": 37.26463354648303, + "max_text_length": 161, + "unique_text": 2024, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "am": { + "num_samples": 2033, + "number_of_characters": 48678, + "number_texts_intersect_with_train": 68, + "min_text_length": 2, + "average_text_length": 23.94392523364486, + "max_text_length": 104, + "unique_text": 2023, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ar": { + "num_samples": 2033, + "number_of_characters": 54567, + "number_texts_intersect_with_train": 129, + "min_text_length": 2, + "average_text_length": 26.840629611411707, + "max_text_length": 126, + "unique_text": 2007, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "az": { + "num_samples": 2033, + "number_of_characters": 73329, + "number_texts_intersect_with_train": 144, + "min_text_length": 3, + "average_text_length": 36.06935563207083, + "max_text_length": 158, + "unique_text": 2005, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "bn": { + "num_samples": 2033, + "number_of_characters": 69343, + "number_texts_intersect_with_train": 85, + "min_text_length": 2, + "average_text_length": 34.10870634530251, + "max_text_length": 153, + "unique_text": 2017, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "cy": { + "num_samples": 2033, + "number_of_characters": 78652, + "number_texts_intersect_with_train": 72, + "min_text_length": 3, + "average_text_length": 38.68765371372356, + "max_text_length": 166, + "unique_text": 2024, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "da": { + "num_samples": 2033, + "number_of_characters": 70289, + "number_texts_intersect_with_train": 91, + "min_text_length": 3, + "average_text_length": 34.574028529267096, + "max_text_length": 154, + "unique_text": 2020, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "de": { + "num_samples": 2033, + "number_of_characters": 79734, + "number_texts_intersect_with_train": 85, + "min_text_length": 4, + "average_text_length": 39.219872110181996, + "max_text_length": 156, + "unique_text": 2022, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "el": { + "num_samples": 2033, + "number_of_characters": 83301, + "number_texts_intersect_with_train": 89, + "min_text_length": 2, + "average_text_length": 40.97442203639941, + "max_text_length": 179, + "unique_text": 2020, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "en": { + "num_samples": 2033, + "number_of_characters": 70729, + "number_texts_intersect_with_train": 11, + "min_text_length": 3, + "average_text_length": 34.790457452041316, + "max_text_length": 153, + "unique_text": 2031, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "es": { + "num_samples": 2033, + "number_of_characters": 79868, + "number_texts_intersect_with_train": 99, + "min_text_length": 3, + "average_text_length": 39.28578455484506, + "max_text_length": 145, + "unique_text": 2015, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "fa": { + "num_samples": 2033, + "number_of_characters": 67524, + "number_texts_intersect_with_train": 116, + "min_text_length": 2, + "average_text_length": 33.21396950319725, + "max_text_length": 147, + "unique_text": 2003, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "fi": { + "num_samples": 2033, + "number_of_characters": 74125, + "number_texts_intersect_with_train": 129, + "min_text_length": 3, + "average_text_length": 36.460895228726024, + "max_text_length": 166, + "unique_text": 2013, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "fr": { + "num_samples": 2033, + "number_of_characters": 85978, + "number_texts_intersect_with_train": 90, + "min_text_length": 2, + "average_text_length": 42.29119527791441, + "max_text_length": 170, + "unique_text": 2016, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "he": { + "num_samples": 2033, + "number_of_characters": 57225, + "number_texts_intersect_with_train": 91, + "min_text_length": 3, + "average_text_length": 28.148057058534185, + "max_text_length": 116, + "unique_text": 2017, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "hi": { + "num_samples": 2033, + "number_of_characters": 71207, + "number_texts_intersect_with_train": 101, + "min_text_length": 3, + "average_text_length": 35.02557796360059, + "max_text_length": 169, + "unique_text": 2013, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "hu": { + "num_samples": 2033, + "number_of_characters": 74798, + "number_texts_intersect_with_train": 112, + "min_text_length": 2, + "average_text_length": 36.79193310378751, + "max_text_length": 160, + "unique_text": 2018, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "hy": { + "num_samples": 2033, + "number_of_characters": 74198, + "number_texts_intersect_with_train": 90, + "min_text_length": 3, + "average_text_length": 36.496802754549925, + "max_text_length": 172, + "unique_text": 2016, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "id": { + "num_samples": 2033, + "number_of_characters": 77036, + "number_texts_intersect_with_train": 147, + "min_text_length": 3, + "average_text_length": 37.89276930644368, + "max_text_length": 172, + "unique_text": 2008, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "is": { + "num_samples": 2033, + "number_of_characters": 75521, + "number_texts_intersect_with_train": 99, + "min_text_length": 2, + "average_text_length": 37.14756517461879, + "max_text_length": 182, + "unique_text": 2014, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "it": { + "num_samples": 2033, + "number_of_characters": 77604, + "number_texts_intersect_with_train": 143, + "min_text_length": 3, + "average_text_length": 38.17215937038859, + "max_text_length": 173, + "unique_text": 2001, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ja": { + "num_samples": 2033, + "number_of_characters": 31479, + "number_texts_intersect_with_train": 134, + "min_text_length": 1, + "average_text_length": 15.48401377274963, + "max_text_length": 63, + "unique_text": 2015, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "jv": { + "num_samples": 2033, + "number_of_characters": 69773, + "number_texts_intersect_with_train": 103, + "min_text_length": 3, + "average_text_length": 34.32021642892278, + "max_text_length": 169, + "unique_text": 2019, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ka": { + "num_samples": 2033, + "number_of_characters": 63256, + "number_texts_intersect_with_train": 170, + "min_text_length": 2, + "average_text_length": 31.11460895228726, + "max_text_length": 147, + "unique_text": 1989, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "km": { + "num_samples": 2033, + "number_of_characters": 64843, + "number_texts_intersect_with_train": 196, + "min_text_length": 2, + "average_text_length": 31.895228726020658, + "max_text_length": 147, + "unique_text": 1932, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "kn": { + "num_samples": 2033, + "number_of_characters": 77652, + "number_texts_intersect_with_train": 91, + "min_text_length": 2, + "average_text_length": 38.195769798327596, + "max_text_length": 164, + "unique_text": 2025, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ko": { + "num_samples": 2033, + "number_of_characters": 31722, + "number_texts_intersect_with_train": 116, + "min_text_length": 1, + "average_text_length": 15.60354156419085, + "max_text_length": 69, + "unique_text": 2014, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "lv": { + "num_samples": 2033, + "number_of_characters": 74498, + "number_texts_intersect_with_train": 110, + "min_text_length": 3, + "average_text_length": 36.644367929168716, + "max_text_length": 132, + "unique_text": 2021, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ml": { + "num_samples": 2033, + "number_of_characters": 87266, + "number_texts_intersect_with_train": 107, + "min_text_length": 4, + "average_text_length": 42.924741760944414, + "max_text_length": 196, + "unique_text": 2013, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "mn": { + "num_samples": 2033, + "number_of_characters": 78032, + "number_texts_intersect_with_train": 110, + "min_text_length": 3, + "average_text_length": 38.382685686178064, + "max_text_length": 155, + "unique_text": 2010, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ms": { + "num_samples": 2033, + "number_of_characters": 80301, + "number_texts_intersect_with_train": 83, + "min_text_length": 3, + "average_text_length": 39.49877029021151, + "max_text_length": 191, + "unique_text": 2014, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "my": { + "num_samples": 2033, + "number_of_characters": 82073, + "number_texts_intersect_with_train": 49, + "min_text_length": 3, + "average_text_length": 40.37038858829316, + "max_text_length": 167, + "unique_text": 2025, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "nb": { + "num_samples": 2033, + "number_of_characters": 70801, + "number_texts_intersect_with_train": 79, + "min_text_length": 3, + "average_text_length": 34.82587309394983, + "max_text_length": 161, + "unique_text": 2022, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "nl": { + "num_samples": 2033, + "number_of_characters": 80046, + "number_texts_intersect_with_train": 86, + "min_text_length": 3, + "average_text_length": 39.37333989178554, + "max_text_length": 165, + "unique_text": 2017, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "pl": { + "num_samples": 2033, + "number_of_characters": 73462, + "number_texts_intersect_with_train": 151, + "min_text_length": 4, + "average_text_length": 36.13477619281849, + "max_text_length": 156, + "unique_text": 2009, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "pt": { + "num_samples": 2033, + "number_of_characters": 77129, + "number_texts_intersect_with_train": 119, + "min_text_length": 3, + "average_text_length": 37.9385145105755, + "max_text_length": 160, + "unique_text": 2004, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ro": { + "num_samples": 2033, + "number_of_characters": 75176, + "number_texts_intersect_with_train": 88, + "min_text_length": 3, + "average_text_length": 36.97786522380718, + "max_text_length": 166, + "unique_text": 2027, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ru": { + "num_samples": 2033, + "number_of_characters": 74623, + "number_texts_intersect_with_train": 128, + "min_text_length": 4, + "average_text_length": 36.705853418593215, + "max_text_length": 163, + "unique_text": 2002, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "sl": { + "num_samples": 2033, + "number_of_characters": 71648, + "number_texts_intersect_with_train": 134, + "min_text_length": 3, + "average_text_length": 35.24249877029021, + "max_text_length": 145, + "unique_text": 2005, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "sq": { + "num_samples": 2033, + "number_of_characters": 75812, + "number_texts_intersect_with_train": 121, + "min_text_length": 2, + "average_text_length": 37.29070339399902, + "max_text_length": 163, + "unique_text": 2004, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "sv": { + "num_samples": 2033, + "number_of_characters": 70697, + "number_texts_intersect_with_train": 110, + "min_text_length": 3, + "average_text_length": 34.774717166748644, + "max_text_length": 169, + "unique_text": 2015, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "sw": { + "num_samples": 2033, + "number_of_characters": 77779, + "number_texts_intersect_with_train": 97, + "min_text_length": 4, + "average_text_length": 38.258239055582884, + "max_text_length": 161, + "unique_text": 2011, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ta": { + "num_samples": 2033, + "number_of_characters": 85963, + "number_texts_intersect_with_train": 94, + "min_text_length": 1, + "average_text_length": 42.28381701918347, + "max_text_length": 171, + "unique_text": 2013, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "te": { + "num_samples": 2033, + "number_of_characters": 77244, + "number_texts_intersect_with_train": 55, + "min_text_length": 2, + "average_text_length": 37.99508116084604, + "max_text_length": 148, + "unique_text": 2022, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "th": { + "num_samples": 2033, + "number_of_characters": 63528, + "number_texts_intersect_with_train": 148, + "min_text_length": 3, + "average_text_length": 31.248401377274963, + "max_text_length": 146, + "unique_text": 2004, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "tl": { + "num_samples": 2033, + "number_of_characters": 89203, + "number_texts_intersect_with_train": 100, + "min_text_length": 3, + "average_text_length": 43.877520905066405, + "max_text_length": 224, + "unique_text": 2014, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "tr": { + "num_samples": 2033, + "number_of_characters": 70353, + "number_texts_intersect_with_train": 113, + "min_text_length": 3, + "average_text_length": 34.60550909985243, + "max_text_length": 150, + "unique_text": 2018, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "ur": { + "num_samples": 2033, + "number_of_characters": 70462, + "number_texts_intersect_with_train": 92, + "min_text_length": 3, + "average_text_length": 34.65912444663059, + "max_text_length": 175, + "unique_text": 2019, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "vi": { + "num_samples": 2033, + "number_of_characters": 77911, + "number_texts_intersect_with_train": 103, + "min_text_length": 1, + "average_text_length": 38.32316773241515, + "max_text_length": 184, + "unique_text": 2020, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "zh-CN": { + "num_samples": 2033, + "number_of_characters": 21222, + "number_texts_intersect_with_train": 132, + "min_text_length": 2, + "average_text_length": 10.438760452533202, + "max_text_length": 45, + "unique_text": 2018, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + }, + "zh-TW": { + "num_samples": 2033, + "number_of_characters": 20048, + "number_texts_intersect_with_train": 177, + "min_text_length": 1, + "average_text_length": 9.861288735858338, + "max_text_length": 55, + "unique_text": 1989, + "unique_labels": 18, + "labels": { + "iot": { + "count": 118 + }, + "general": { + "count": 122 + }, + "takeaway": { + "count": 44 + }, + "play": { + "count": 260 + }, + "music": { + "count": 56 + }, + "weather": { + "count": 126 + }, + "audio": { + "count": 35 + }, + "datetime": { + "count": 73 + }, + "alarm": { + "count": 64 + }, + "news": { + "count": 82 + }, + "social": { + "count": 68 + }, + "cooking": { + "count": 43 + }, + "calendar": { + "count": 280 + }, + "qa": { + "count": 214 + }, + "recommendation": { + "count": 69 + }, + "email": { + "count": 157 + }, + "lists": { + "count": 112 + }, + "transport": { + "count": 110 + } + } + } + } + }, + "test": { + "num_samples": 151674, + "number_of_characters": 5230011, + "number_texts_intersect_with_train": 7273, + "min_text_length": 1, + "average_text_length": 34.48192175323391, + "max_text_length": 495, + "unique_text": 148972, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 4896 + }, + "audio": { + "count": 3162 + }, + "iot": { + "count": 11220 + }, + "general": { + "count": 9639 + }, + "datetime": { + "count": 5253 + }, + "music": { + "count": 4131 + }, + "takeaway": { + "count": 2907 + }, + "weather": { + "count": 7956 + }, + "play": { + "count": 19737 + }, + "news": { + "count": 6324 + }, + "recommendation": { + "count": 4794 + }, + "qa": { + "count": 14688 + }, + "calendar": { + "count": 20502 + }, + "social": { + "count": 5406 + }, + "transport": { + "count": 6324 + }, + "cooking": { + "count": 3672 + }, + "email": { + "count": 13821 + }, + "lists": { + "count": 7242 + } + }, + "hf_subset_descriptive_stats": { + "af": { + "num_samples": 2974, + "number_of_characters": 110262, + "number_texts_intersect_with_train": 80, + "min_text_length": 2, + "average_text_length": 37.075319435104234, + "max_text_length": 397, + "unique_text": 2961, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "am": { + "num_samples": 2974, + "number_of_characters": 70145, + "number_texts_intersect_with_train": 89, + "min_text_length": 2, + "average_text_length": 23.58607935440484, + "max_text_length": 209, + "unique_text": 2947, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ar": { + "num_samples": 2974, + "number_of_characters": 79720, + "number_texts_intersect_with_train": 187, + "min_text_length": 1, + "average_text_length": 26.80564895763282, + "max_text_length": 247, + "unique_text": 2907, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "az": { + "num_samples": 2974, + "number_of_characters": 107751, + "number_texts_intersect_with_train": 182, + "min_text_length": 2, + "average_text_length": 36.23100201748487, + "max_text_length": 366, + "unique_text": 2907, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "bn": { + "num_samples": 2974, + "number_of_characters": 101528, + "number_texts_intersect_with_train": 130, + "min_text_length": 2, + "average_text_length": 34.13853396099529, + "max_text_length": 339, + "unique_text": 2943, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "cy": { + "num_samples": 2974, + "number_of_characters": 114977, + "number_texts_intersect_with_train": 125, + "min_text_length": 2, + "average_text_length": 38.66072629455279, + "max_text_length": 367, + "unique_text": 2950, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "da": { + "num_samples": 2974, + "number_of_characters": 102537, + "number_texts_intersect_with_train": 125, + "min_text_length": 1, + "average_text_length": 34.4778076664425, + "max_text_length": 327, + "unique_text": 2941, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "de": { + "num_samples": 2974, + "number_of_characters": 117616, + "number_texts_intersect_with_train": 109, + "min_text_length": 3, + "average_text_length": 39.54808338937458, + "max_text_length": 393, + "unique_text": 2955, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "el": { + "num_samples": 2974, + "number_of_characters": 119736, + "number_texts_intersect_with_train": 115, + "min_text_length": 2, + "average_text_length": 40.26092804303968, + "max_text_length": 409, + "unique_text": 2946, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "en": { + "num_samples": 2974, + "number_of_characters": 102785, + "number_texts_intersect_with_train": 21, + "min_text_length": 2, + "average_text_length": 34.56119704102219, + "max_text_length": 365, + "unique_text": 2970, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "es": { + "num_samples": 2974, + "number_of_characters": 116394, + "number_texts_intersect_with_train": 119, + "min_text_length": 1, + "average_text_length": 39.137188971082715, + "max_text_length": 457, + "unique_text": 2944, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "fa": { + "num_samples": 2974, + "number_of_characters": 98823, + "number_texts_intersect_with_train": 177, + "min_text_length": 2, + "average_text_length": 33.228984532616, + "max_text_length": 306, + "unique_text": 2921, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "fi": { + "num_samples": 2974, + "number_of_characters": 108496, + "number_texts_intersect_with_train": 161, + "min_text_length": 2, + "average_text_length": 36.48150638870209, + "max_text_length": 278, + "unique_text": 2930, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "fr": { + "num_samples": 2974, + "number_of_characters": 125811, + "number_texts_intersect_with_train": 132, + "min_text_length": 2, + "average_text_length": 42.30363147276395, + "max_text_length": 479, + "unique_text": 2943, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "he": { + "num_samples": 2974, + "number_of_characters": 83417, + "number_texts_intersect_with_train": 136, + "min_text_length": 1, + "average_text_length": 28.048755884330866, + "max_text_length": 283, + "unique_text": 2946, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "hi": { + "num_samples": 2974, + "number_of_characters": 105274, + "number_texts_intersect_with_train": 115, + "min_text_length": 2, + "average_text_length": 35.39811701412239, + "max_text_length": 405, + "unique_text": 2945, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "hu": { + "num_samples": 2974, + "number_of_characters": 108969, + "number_texts_intersect_with_train": 152, + "min_text_length": 2, + "average_text_length": 36.640551445864155, + "max_text_length": 426, + "unique_text": 2920, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "hy": { + "num_samples": 2974, + "number_of_characters": 109134, + "number_texts_intersect_with_train": 104, + "min_text_length": 2, + "average_text_length": 36.6960322797579, + "max_text_length": 359, + "unique_text": 2938, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "id": { + "num_samples": 2974, + "number_of_characters": 112577, + "number_texts_intersect_with_train": 166, + "min_text_length": 2, + "average_text_length": 37.853732347007394, + "max_text_length": 437, + "unique_text": 2913, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "is": { + "num_samples": 2974, + "number_of_characters": 109732, + "number_texts_intersect_with_train": 129, + "min_text_length": 2, + "average_text_length": 36.89710827168796, + "max_text_length": 349, + "unique_text": 2940, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "it": { + "num_samples": 2974, + "number_of_characters": 113060, + "number_texts_intersect_with_train": 197, + "min_text_length": 3, + "average_text_length": 38.01613987895091, + "max_text_length": 445, + "unique_text": 2918, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ja": { + "num_samples": 2974, + "number_of_characters": 46059, + "number_texts_intersect_with_train": 171, + "min_text_length": 1, + "average_text_length": 15.48722259583053, + "max_text_length": 132, + "unique_text": 2919, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "jv": { + "num_samples": 2974, + "number_of_characters": 101977, + "number_texts_intersect_with_train": 144, + "min_text_length": 1, + "average_text_length": 34.28950907868191, + "max_text_length": 361, + "unique_text": 2932, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ka": { + "num_samples": 2974, + "number_of_characters": 93772, + "number_texts_intersect_with_train": 227, + "min_text_length": 2, + "average_text_length": 31.530598520511095, + "max_text_length": 327, + "unique_text": 2880, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "km": { + "num_samples": 2974, + "number_of_characters": 93992, + "number_texts_intersect_with_train": 267, + "min_text_length": 2, + "average_text_length": 31.604572965702758, + "max_text_length": 389, + "unique_text": 2786, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "kn": { + "num_samples": 2974, + "number_of_characters": 114246, + "number_texts_intersect_with_train": 101, + "min_text_length": 2, + "average_text_length": 38.41492938802959, + "max_text_length": 337, + "unique_text": 2950, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ko": { + "num_samples": 2974, + "number_of_characters": 45762, + "number_texts_intersect_with_train": 155, + "min_text_length": 1, + "average_text_length": 15.38735709482179, + "max_text_length": 157, + "unique_text": 2919, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "lv": { + "num_samples": 2974, + "number_of_characters": 108878, + "number_texts_intersect_with_train": 135, + "min_text_length": 2, + "average_text_length": 36.60995292535306, + "max_text_length": 317, + "unique_text": 2946, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ml": { + "num_samples": 2974, + "number_of_characters": 127789, + "number_texts_intersect_with_train": 133, + "min_text_length": 2, + "average_text_length": 42.96872898453262, + "max_text_length": 168, + "unique_text": 2937, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "mn": { + "num_samples": 2974, + "number_of_characters": 111998, + "number_texts_intersect_with_train": 163, + "min_text_length": 2, + "average_text_length": 37.65904505716207, + "max_text_length": 167, + "unique_text": 2931, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ms": { + "num_samples": 2974, + "number_of_characters": 117326, + "number_texts_intersect_with_train": 136, + "min_text_length": 2, + "average_text_length": 39.45057162071284, + "max_text_length": 322, + "unique_text": 2930, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "my": { + "num_samples": 2974, + "number_of_characters": 120671, + "number_texts_intersect_with_train": 66, + "min_text_length": 3, + "average_text_length": 40.575319435104234, + "max_text_length": 191, + "unique_text": 2962, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "nb": { + "num_samples": 2974, + "number_of_characters": 102940, + "number_texts_intersect_with_train": 121, + "min_text_length": 2, + "average_text_length": 34.613315400134496, + "max_text_length": 317, + "unique_text": 2951, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "nl": { + "num_samples": 2974, + "number_of_characters": 117126, + "number_texts_intersect_with_train": 124, + "min_text_length": 1, + "average_text_length": 39.383322125084064, + "max_text_length": 406, + "unique_text": 2953, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "pl": { + "num_samples": 2974, + "number_of_characters": 106914, + "number_texts_intersect_with_train": 207, + "min_text_length": 2, + "average_text_length": 35.949562878278414, + "max_text_length": 389, + "unique_text": 2906, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "pt": { + "num_samples": 2974, + "number_of_characters": 113079, + "number_texts_intersect_with_train": 164, + "min_text_length": 2, + "average_text_length": 38.022528581035644, + "max_text_length": 439, + "unique_text": 2932, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ro": { + "num_samples": 2974, + "number_of_characters": 109519, + "number_texts_intersect_with_train": 106, + "min_text_length": 3, + "average_text_length": 36.82548755884331, + "max_text_length": 403, + "unique_text": 2937, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ru": { + "num_samples": 2974, + "number_of_characters": 108199, + "number_texts_intersect_with_train": 167, + "min_text_length": 3, + "average_text_length": 36.381640887693344, + "max_text_length": 416, + "unique_text": 2913, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "sl": { + "num_samples": 2974, + "number_of_characters": 104437, + "number_texts_intersect_with_train": 163, + "min_text_length": 2, + "average_text_length": 35.116677874915936, + "max_text_length": 316, + "unique_text": 2933, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "sq": { + "num_samples": 2974, + "number_of_characters": 110202, + "number_texts_intersect_with_train": 168, + "min_text_length": 1, + "average_text_length": 37.0551445864156, + "max_text_length": 406, + "unique_text": 2927, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "sv": { + "num_samples": 2974, + "number_of_characters": 103142, + "number_texts_intersect_with_train": 150, + "min_text_length": 2, + "average_text_length": 34.681237390719566, + "max_text_length": 352, + "unique_text": 2932, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "sw": { + "num_samples": 2974, + "number_of_characters": 112209, + "number_texts_intersect_with_train": 143, + "min_text_length": 3, + "average_text_length": 37.72999327505044, + "max_text_length": 407, + "unique_text": 2935, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ta": { + "num_samples": 2974, + "number_of_characters": 125008, + "number_texts_intersect_with_train": 129, + "min_text_length": 3, + "average_text_length": 42.03362474781439, + "max_text_length": 408, + "unique_text": 2946, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "te": { + "num_samples": 2974, + "number_of_characters": 113476, + "number_texts_intersect_with_train": 78, + "min_text_length": 2, + "average_text_length": 38.15601882985877, + "max_text_length": 390, + "unique_text": 2963, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "th": { + "num_samples": 2974, + "number_of_characters": 91732, + "number_texts_intersect_with_train": 190, + "min_text_length": 3, + "average_text_length": 30.84465366509751, + "max_text_length": 300, + "unique_text": 2906, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "tl": { + "num_samples": 2974, + "number_of_characters": 130918, + "number_texts_intersect_with_train": 131, + "min_text_length": 2, + "average_text_length": 44.020847343644924, + "max_text_length": 495, + "unique_text": 2943, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "tr": { + "num_samples": 2974, + "number_of_characters": 103538, + "number_texts_intersect_with_train": 129, + "min_text_length": 2, + "average_text_length": 34.81439139206456, + "max_text_length": 366, + "unique_text": 2937, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "ur": { + "num_samples": 2974, + "number_of_characters": 103680, + "number_texts_intersect_with_train": 101, + "min_text_length": 1, + "average_text_length": 34.862138533960994, + "max_text_length": 137, + "unique_text": 2943, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "vi": { + "num_samples": 2974, + "number_of_characters": 112612, + "number_texts_intersect_with_train": 143, + "min_text_length": 1, + "average_text_length": 37.865501008742434, + "max_text_length": 426, + "unique_text": 2935, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "zh-CN": { + "num_samples": 2974, + "number_of_characters": 31013, + "number_texts_intersect_with_train": 187, + "min_text_length": 1, + "average_text_length": 10.428043039677203, + "max_text_length": 87, + "unique_text": 2921, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + }, + "zh-TW": { + "num_samples": 2974, + "number_of_characters": 29053, + "number_texts_intersect_with_train": 225, + "min_text_length": 1, + "average_text_length": 9.768997982515131, + "max_text_length": 90, + "unique_text": 2880, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 96 + }, + "audio": { + "count": 62 + }, + "iot": { + "count": 220 + }, + "general": { + "count": 189 + }, + "datetime": { + "count": 103 + }, + "music": { + "count": 81 + }, + "takeaway": { + "count": 57 + }, + "weather": { + "count": 156 + }, + "play": { + "count": 387 + }, + "news": { + "count": 124 + }, + "recommendation": { + "count": 94 + }, + "qa": { + "count": 288 + }, + "calendar": { + "count": 402 + }, + "social": { + "count": 106 + }, + "transport": { + "count": 124 + }, + "cooking": { + "count": 72 + }, + "email": { + "count": 271 + }, + "lists": { + "count": 142 + } + } + } + } + }, + "train": { + "num_samples": 587214, + "number_of_characters": 20507758, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 34.92382334208653, + "max_text_length": 295, + "unique_text": 565055, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 19890 + }, + "audio": { + "count": 14790 + }, + "iot": { + "count": 39219 + }, + "calendar": { + "count": 86088 + }, + "play": { + "count": 70227 + }, + "general": { + "count": 33252 + }, + "datetime": { + "count": 20502 + }, + "takeaway": { + "count": 13107 + }, + "news": { + "count": 25653 + }, + "music": { + "count": 16932 + }, + "weather": { + "count": 29223 + }, + "qa": { + "count": 60333 + }, + "recommendation": { + "count": 22083 + }, + "social": { + "count": 19941 + }, + "transport": { + "count": 29121 + }, + "email": { + "count": 48603 + }, + "lists": { + "count": 27489 + }, + "cooking": { + "count": 10761 + } + }, + "hf_subset_descriptive_stats": { + "af": { + "num_samples": 11514, + "number_of_characters": 432629, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.574170574952234, + "max_text_length": 192, + "unique_text": 11315, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "am": { + "num_samples": 11514, + "number_of_characters": 276957, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 24.053934340802503, + "max_text_length": 114, + "unique_text": 11250, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ar": { + "num_samples": 11514, + "number_of_characters": 310505, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 26.96760465520236, + "max_text_length": 152, + "unique_text": 10991, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "az": { + "num_samples": 11514, + "number_of_characters": 421607, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 36.61690116380059, + "max_text_length": 208, + "unique_text": 10971, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "bn": { + "num_samples": 11514, + "number_of_characters": 398927, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 34.64712523883967, + "max_text_length": 295, + "unique_text": 11209, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "cy": { + "num_samples": 11514, + "number_of_characters": 448116, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.91922876498176, + "max_text_length": 189, + "unique_text": 11214, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "da": { + "num_samples": 11514, + "number_of_characters": 402298, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 34.939899253083205, + "max_text_length": 184, + "unique_text": 11206, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "de": { + "num_samples": 11514, + "number_of_characters": 455798, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 39.58641653639048, + "max_text_length": 200, + "unique_text": 11267, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "el": { + "num_samples": 11514, + "number_of_characters": 475367, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 41.285999652596836, + "max_text_length": 209, + "unique_text": 11209, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "en": { + "num_samples": 11514, + "number_of_characters": 403436, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.03873545249262, + "max_text_length": 189, + "unique_text": 11468, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "es": { + "num_samples": 11514, + "number_of_characters": 456222, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 39.62324127149557, + "max_text_length": 205, + "unique_text": 11170, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "fa": { + "num_samples": 11514, + "number_of_characters": 387146, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 33.62393607781831, + "max_text_length": 175, + "unique_text": 11032, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "fi": { + "num_samples": 11514, + "number_of_characters": 426132, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.00990099009901, + "max_text_length": 230, + "unique_text": 11100, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "fr": { + "num_samples": 11514, + "number_of_characters": 492591, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 42.781917665450756, + "max_text_length": 221, + "unique_text": 11189, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "he": { + "num_samples": 11514, + "number_of_characters": 326685, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 28.37285044293903, + "max_text_length": 137, + "unique_text": 11167, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "hi": { + "num_samples": 11514, + "number_of_characters": 412758, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.84835852006253, + "max_text_length": 178, + "unique_text": 11212, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "hu": { + "num_samples": 11514, + "number_of_characters": 426261, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.021104742053154, + "max_text_length": 214, + "unique_text": 11139, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "hy": { + "num_samples": 11514, + "number_of_characters": 427568, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.1346187250304, + "max_text_length": 202, + "unique_text": 11174, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "id": { + "num_samples": 11514, + "number_of_characters": 440413, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 38.250217126975855, + "max_text_length": 190, + "unique_text": 10998, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "is": { + "num_samples": 11514, + "number_of_characters": 431245, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.453969081118636, + "max_text_length": 183, + "unique_text": 11151, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "it": { + "num_samples": 11514, + "number_of_characters": 444123, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.57243355914539, + "max_text_length": 176, + "unique_text": 10961, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ja": { + "num_samples": 11514, + "number_of_characters": 179863, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 15.6212437033177, + "max_text_length": 75, + "unique_text": 11025, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "jv": { + "num_samples": 11514, + "number_of_characters": 402160, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 34.92791384401598, + "max_text_length": 198, + "unique_text": 11152, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ka": { + "num_samples": 11514, + "number_of_characters": 363520, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 31.57199930519368, + "max_text_length": 177, + "unique_text": 10732, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "km": { + "num_samples": 11514, + "number_of_characters": 367358, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 31.90533263852701, + "max_text_length": 190, + "unique_text": 10320, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "kn": { + "num_samples": 11514, + "number_of_characters": 445768, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.71530310925829, + "max_text_length": 214, + "unique_text": 11279, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ko": { + "num_samples": 11514, + "number_of_characters": 179441, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 15.584592669793295, + "max_text_length": 96, + "unique_text": 11100, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "lv": { + "num_samples": 11514, + "number_of_characters": 425766, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 36.978113600833765, + "max_text_length": 208, + "unique_text": 11169, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ml": { + "num_samples": 11514, + "number_of_characters": 499847, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 43.4121070001737, + "max_text_length": 207, + "unique_text": 11195, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "mn": { + "num_samples": 11514, + "number_of_characters": 442389, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.421834288692025, + "max_text_length": 171, + "unique_text": 11082, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ms": { + "num_samples": 11514, + "number_of_characters": 460564, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 40.00034740316137, + "max_text_length": 200, + "unique_text": 11151, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "my": { + "num_samples": 11514, + "number_of_characters": 473258, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 41.10283133576515, + "max_text_length": 202, + "unique_text": 11342, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "nb": { + "num_samples": 11514, + "number_of_characters": 404200, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 35.105089456314055, + "max_text_length": 176, + "unique_text": 11222, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "nl": { + "num_samples": 11514, + "number_of_characters": 458912, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 39.85686989751607, + "max_text_length": 199, + "unique_text": 11260, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "pl": { + "num_samples": 11514, + "number_of_characters": 418669, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 36.36173354177523, + "max_text_length": 197, + "unique_text": 10912, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "pt": { + "num_samples": 11514, + "number_of_characters": 443661, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 38.532308494007296, + "max_text_length": 199, + "unique_text": 11078, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ro": { + "num_samples": 11514, + "number_of_characters": 430447, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.38466215042557, + "max_text_length": 187, + "unique_text": 11224, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ru": { + "num_samples": 11514, + "number_of_characters": 424549, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 36.87241618898732, + "max_text_length": 203, + "unique_text": 11002, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "sl": { + "num_samples": 11514, + "number_of_characters": 410081, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.61585895431649, + "max_text_length": 167, + "unique_text": 10987, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "sq": { + "num_samples": 11514, + "number_of_characters": 434272, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 37.716866423484454, + "max_text_length": 202, + "unique_text": 11011, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "sv": { + "num_samples": 11514, + "number_of_characters": 404954, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.170574952232066, + "max_text_length": 200, + "unique_text": 11107, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "sw": { + "num_samples": 11514, + "number_of_characters": 439885, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 38.204359909675176, + "max_text_length": 221, + "unique_text": 11119, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ta": { + "num_samples": 11514, + "number_of_characters": 491066, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 42.64947021017891, + "max_text_length": 211, + "unique_text": 11196, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "te": { + "num_samples": 11514, + "number_of_characters": 443490, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.51745700885878, + "max_text_length": 215, + "unique_text": 11351, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "th": { + "num_samples": 11514, + "number_of_characters": 361003, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 31.353395865902378, + "max_text_length": 141, + "unique_text": 10912, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "tl": { + "num_samples": 11514, + "number_of_characters": 515338, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 44.7575125933646, + "max_text_length": 253, + "unique_text": 11173, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "tr": { + "num_samples": 11514, + "number_of_characters": 403389, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.03465346534654, + "max_text_length": 206, + "unique_text": 11131, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "ur": { + "num_samples": 11514, + "number_of_characters": 407108, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 35.35765155462915, + "max_text_length": 184, + "unique_text": 11242, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "vi": { + "num_samples": 11514, + "number_of_characters": 443651, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 38.53143998610388, + "max_text_length": 166, + "unique_text": 11126, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "zh-CN": { + "num_samples": 11514, + "number_of_characters": 121615, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 10.562358867465694, + "max_text_length": 58, + "unique_text": 11022, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + }, + "zh-TW": { + "num_samples": 11514, + "number_of_characters": 114750, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 9.966128191766545, + "max_text_length": 74, + "unique_text": 10755, + "unique_labels": 18, + "labels": { + "alarm": { + "count": 390 + }, + "audio": { + "count": 290 + }, + "iot": { + "count": 769 + }, + "calendar": { + "count": 1688 + }, + "play": { + "count": 1377 + }, + "general": { + "count": 652 + }, + "datetime": { + "count": 402 + }, + "takeaway": { + "count": 257 + }, + "news": { + "count": 503 + }, + "music": { + "count": 332 + }, + "weather": { + "count": 573 + }, + "qa": { + "count": 1183 + }, + "recommendation": { + "count": 433 + }, + "social": { + "count": 391 + }, + "transport": { + "count": 571 + }, + "email": { + "count": 953 + }, + "lists": { + "count": 539 + }, + "cooking": { + "count": 211 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Moroco.json b/mteb/descriptive_stats/Classification/Moroco.json new file mode 100644 index 0000000000..1c9bb1912c --- /dev/null +++ b/mteb/descriptive_stats/Classification/Moroco.json @@ -0,0 +1,62 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 3474068, + "number_texts_intersect_with_train": 113, + "min_text_length": 37, + "average_text_length": 1696.322265625, + "max_text_length": 26938, + "unique_text": 2042, + "unique_labels": 6, + "labels": { + "5": { + "count": 284 + }, + "1": { + "count": 521 + }, + "2": { + "count": 557 + }, + "0": { + "count": 140 + }, + "4": { + "count": 368 + }, + "3": { + "count": 178 + } + } + }, + "train": { + "num_samples": 21719, + "number_of_characters": 37160017, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 1710.9451171785072, + "max_text_length": 92966, + "unique_text": 21025, + "unique_labels": 6, + "labels": { + "2": { + "count": 5910 + }, + "3": { + "count": 1890 + }, + "4": { + "count": 3899 + }, + "5": { + "count": 3014 + }, + "1": { + "count": 5522 + }, + "0": { + "count": 1484 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MovieReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/MovieReviewSentimentClassification.json new file mode 100644 index 0000000000..58c4670b0d --- /dev/null +++ b/mteb/descriptive_stats/Classification/MovieReviewSentimentClassification.json @@ -0,0 +1,56 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 1087029, + "number_texts_intersect_with_train": 2, + "min_text_length": 5, + "average_text_length": 530.77587890625, + "max_text_length": 2000, + "unique_text": 2047, + "unique_labels": 2, + "labels": { + "0": { + "count": 1045 + }, + "1": { + "count": 1003 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 1117580, + "number_texts_intersect_with_train": 6, + "min_text_length": 3, + "average_text_length": 545.693359375, + "max_text_length": 2000, + "unique_text": 2047, + "unique_labels": 2, + "labels": { + "0": { + "count": 1066 + }, + "1": { + "count": 982 + } + } + }, + "train": { + "num_samples": 160000, + "number_of_characters": 86798181, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 542.48863125, + "max_text_length": 2000, + "unique_text": 159461, + "unique_labels": 2, + "labels": { + "0": { + "count": 79413 + }, + "1": { + "count": 80587 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MultiHateClassification.json b/mteb/descriptive_stats/Classification/MultiHateClassification.json new file mode 100644 index 0000000000..f897d08594 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MultiHateClassification.json @@ -0,0 +1,438 @@ +{ + "test": { + "num_samples": 11000, + "number_of_characters": 502013, + "number_texts_intersect_with_train": 16, + "min_text_length": 1, + "average_text_length": 45.63754545454545, + "max_text_length": 135, + "unique_text": 10990, + "unique_labels": 2, + "labels": { + "0": { + "count": 7661 + }, + "1": { + "count": 3339 + } + }, + "hf_subset_descriptive_stats": { + "ara": { + "num_samples": 1000, + "number_of_characters": 33644, + "number_texts_intersect_with_train": 5, + "min_text_length": 6, + "average_text_length": 33.644, + "max_text_length": 83, + "unique_text": 994, + "unique_labels": 2, + "labels": { + "0": { + "count": 699 + }, + "1": { + "count": 301 + } + } + }, + "cmn": { + "num_samples": 1000, + "number_of_characters": 14940, + "number_texts_intersect_with_train": 6, + "min_text_length": 5, + "average_text_length": 14.94, + "max_text_length": 34, + "unique_text": 999, + "unique_labels": 2, + "labels": { + "1": { + "count": 327 + }, + "0": { + "count": 673 + } + } + }, + "eng": { + "num_samples": 1000, + "number_of_characters": 48378, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 48.378, + "max_text_length": 100, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 686 + }, + "1": { + "count": 314 + } + } + }, + "deu": { + "num_samples": 1000, + "number_of_characters": 53350, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 53.35, + "max_text_length": 118, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "1": { + "count": 300 + }, + "0": { + "count": 700 + } + } + }, + "fra": { + "num_samples": 1000, + "number_of_characters": 55169, + "number_texts_intersect_with_train": 0, + "min_text_length": 1, + "average_text_length": 55.169, + "max_text_length": 135, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 699 + }, + "1": { + "count": 301 + } + } + }, + "hin": { + "num_samples": 1000, + "number_of_characters": 47262, + "number_texts_intersect_with_train": 3, + "min_text_length": 13, + "average_text_length": 47.262, + "max_text_length": 130, + "unique_text": 999, + "unique_labels": 2, + "labels": { + "0": { + "count": 698 + }, + "1": { + "count": 302 + } + } + }, + "ita": { + "num_samples": 1000, + "number_of_characters": 50502, + "number_texts_intersect_with_train": 1, + "min_text_length": 8, + "average_text_length": 50.502, + "max_text_length": 114, + "unique_text": 999, + "unique_labels": 2, + "labels": { + "0": { + "count": 700 + }, + "1": { + "count": 300 + } + } + }, + "nld": { + "num_samples": 1000, + "number_of_characters": 53056, + "number_texts_intersect_with_train": 1, + "min_text_length": 15, + "average_text_length": 53.056, + "max_text_length": 121, + "unique_text": 999, + "unique_labels": 2, + "labels": { + "1": { + "count": 298 + }, + "0": { + "count": 702 + } + } + }, + "pol": { + "num_samples": 1000, + "number_of_characters": 48907, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 48.907, + "max_text_length": 109, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "1": { + "count": 298 + }, + "0": { + "count": 702 + } + } + }, + "por": { + "num_samples": 1000, + "number_of_characters": 48400, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 48.4, + "max_text_length": 109, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 698 + }, + "1": { + "count": 302 + } + } + }, + "spa": { + "num_samples": 1000, + "number_of_characters": 48405, + "number_texts_intersect_with_train": 0, + "min_text_length": 11, + "average_text_length": 48.405, + "max_text_length": 106, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "1": { + "count": 296 + }, + "0": { + "count": 704 + } + } + } + } + }, + "train": { + "num_samples": 11000, + "number_of_characters": 505993, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 45.99936363636364, + "max_text_length": 131, + "unique_text": 10993, + "unique_labels": 2, + "labels": { + "0": { + "count": 7659 + }, + "1": { + "count": 3341 + } + }, + "hf_subset_descriptive_stats": { + "ara": { + "num_samples": 1000, + "number_of_characters": 34165, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 34.165, + "max_text_length": 86, + "unique_text": 997, + "unique_labels": 2, + "labels": { + "0": { + "count": 699 + }, + "1": { + "count": 301 + } + } + }, + "cmn": { + "num_samples": 1000, + "number_of_characters": 14946, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 14.946, + "max_text_length": 34, + "unique_text": 996, + "unique_labels": 2, + "labels": { + "1": { + "count": 331 + }, + "0": { + "count": 669 + } + } + }, + "eng": { + "num_samples": 1000, + "number_of_characters": 48134, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 48.134, + "max_text_length": 101, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 687 + }, + "1": { + "count": 313 + } + } + }, + "deu": { + "num_samples": 1000, + "number_of_characters": 52717, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 52.717, + "max_text_length": 121, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "1": { + "count": 302 + }, + "0": { + "count": 698 + } + } + }, + "fra": { + "num_samples": 1000, + "number_of_characters": 56635, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 56.635, + "max_text_length": 131, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 700 + }, + "1": { + "count": 300 + } + } + }, + "hin": { + "num_samples": 1000, + "number_of_characters": 47185, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 47.185, + "max_text_length": 128, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "1": { + "count": 301 + }, + "0": { + "count": 699 + } + } + }, + "ita": { + "num_samples": 1000, + "number_of_characters": 51270, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 51.27, + "max_text_length": 125, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 701 + }, + "1": { + "count": 299 + } + } + }, + "nld": { + "num_samples": 1000, + "number_of_characters": 53534, + "number_texts_intersect_with_train": null, + "min_text_length": 14, + "average_text_length": 53.534, + "max_text_length": 122, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 699 + }, + "1": { + "count": 301 + } + } + }, + "pol": { + "num_samples": 1000, + "number_of_characters": 50010, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 50.01, + "max_text_length": 111, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 703 + }, + "1": { + "count": 297 + } + } + }, + "por": { + "num_samples": 1000, + "number_of_characters": 49011, + "number_texts_intersect_with_train": null, + "min_text_length": 14, + "average_text_length": 49.011, + "max_text_length": 108, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 701 + }, + "1": { + "count": 299 + } + } + }, + "spa": { + "num_samples": 1000, + "number_of_characters": 48386, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 48.386, + "max_text_length": 103, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 703 + }, + "1": { + "count": 297 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MultilingualSentiment.json b/mteb/descriptive_stats/Classification/MultilingualSentiment.json new file mode 100644 index 0000000000..ea2fd58517 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MultilingualSentiment.json @@ -0,0 +1,65 @@ +{ + "validation": { + "num_samples": 3000, + "number_of_characters": 153027, + "number_texts_intersect_with_train": 92, + "min_text_length": 16, + "average_text_length": 51.009, + "max_text_length": 1378, + "unique_text": 2989, + "unique_labels": 3, + "labels": { + "2": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "0": { + "count": 1000 + } + } + }, + "test": { + "num_samples": 3000, + "number_of_characters": 155863, + "number_texts_intersect_with_train": 89, + "min_text_length": 18, + "average_text_length": 51.95433333333333, + "max_text_length": 833, + "unique_text": 2998, + "unique_labels": 3, + "labels": { + "2": { + "count": 1000 + }, + "1": { + "count": 1000 + }, + "0": { + "count": 1000 + } + } + }, + "train": { + "num_samples": 120000, + "number_of_characters": 6210602, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 51.75501666666667, + "max_text_length": 2377, + "unique_text": 117494, + "unique_labels": 3, + "labels": { + "2": { + "count": 40000 + }, + "1": { + "count": 40000 + }, + "0": { + "count": 40000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MultilingualSentimentClassification.json b/mteb/descriptive_stats/Classification/MultilingualSentimentClassification.json new file mode 100644 index 0000000000..1b8ce1aadf --- /dev/null +++ b/mteb/descriptive_stats/Classification/MultilingualSentimentClassification.json @@ -0,0 +1,1086 @@ +{ + "test": { + "num_samples": 49450, + "number_of_characters": 11764042, + "number_texts_intersect_with_train": 7395, + "min_text_length": 1, + "average_text_length": 237.8977148634985, + "max_text_length": 37249, + "unique_text": 49415, + "unique_labels": 2, + "labels": { + "1": { + "count": 30554 + }, + "0": { + "count": 18896 + } + }, + "hf_subset_descriptive_stats": { + "urd": { + "num_samples": 294, + "number_of_characters": 24186, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 82.26530612244898, + "max_text_length": 157, + "unique_text": 294, + "unique_labels": 2, + "labels": { + "1": { + "count": 143 + }, + "0": { + "count": 151 + } + } + }, + "vie": { + "num_samples": 685, + "number_of_characters": 75578, + "number_texts_intersect_with_train": 11, + "min_text_length": 3, + "average_text_length": 110.33284671532847, + "max_text_length": 2753, + "unique_text": 685, + "unique_labels": 2, + "labels": { + "1": { + "count": 352 + }, + "0": { + "count": 333 + } + } + }, + "dza": { + "num_samples": 92, + "number_of_characters": 5630, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 61.19565217391305, + "max_text_length": 225, + "unique_text": 92, + "unique_labels": 2, + "labels": { + "1": { + "count": 59 + }, + "0": { + "count": 33 + } + } + }, + "tha": { + "num_samples": 2344, + "number_of_characters": 185436, + "number_texts_intersect_with_train": 0, + "min_text_length": 1, + "average_text_length": 79.11092150170649, + "max_text_length": 1978, + "unique_text": 2344, + "unique_labels": 2, + "labels": { + "0": { + "count": 1388 + }, + "1": { + "count": 956 + } + } + }, + "tur": { + "num_samples": 211, + "number_of_characters": 12960, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 61.4218009478673, + "max_text_length": 373, + "unique_text": 211, + "unique_labels": 2, + "labels": { + "1": { + "count": 125 + }, + "0": { + "count": 86 + } + } + }, + "slk": { + "num_samples": 1042, + "number_of_characters": 95814, + "number_texts_intersect_with_train": 51, + "min_text_length": 1, + "average_text_length": 91.95201535508637, + "max_text_length": 1020, + "unique_text": 1042, + "unique_labels": 2, + "labels": { + "0": { + "count": 128 + }, + "1": { + "count": 914 + } + } + }, + "nor": { + "num_samples": 417, + "number_of_characters": 42489, + "number_texts_intersect_with_train": 1, + "min_text_length": 11, + "average_text_length": 101.89208633093526, + "max_text_length": 333, + "unique_text": 417, + "unique_labels": 2, + "labels": { + "0": { + "count": 118 + }, + "1": { + "count": 299 + } + } + }, + "spa": { + "num_samples": 296, + "number_of_characters": 26022, + "number_texts_intersect_with_train": 0, + "min_text_length": 5, + "average_text_length": 87.91216216216216, + "max_text_length": 636, + "unique_text": 296, + "unique_labels": 2, + "labels": { + "1": { + "count": 244 + }, + "0": { + "count": 52 + } + } + }, + "rus": { + "num_samples": 867, + "number_of_characters": 2823548, + "number_texts_intersect_with_train": 6, + "min_text_length": 51, + "average_text_length": 3256.6874279123413, + "max_text_length": 37249, + "unique_text": 867, + "unique_labels": 2, + "labels": { + "1": { + "count": 582 + }, + "0": { + "count": 285 + } + } + }, + "mlt": { + "num_samples": 171, + "number_of_characters": 22643, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 132.41520467836258, + "max_text_length": 2037, + "unique_text": 171, + "unique_labels": 2, + "labels": { + "1": { + "count": 60 + }, + "0": { + "count": 111 + } + } + }, + "kor": { + "num_samples": 2667, + "number_of_characters": 93842, + "number_texts_intersect_with_train": 3, + "min_text_length": 1, + "average_text_length": 35.186351706036746, + "max_text_length": 143, + "unique_text": 2667, + "unique_labels": 2, + "labels": { + "0": { + "count": 1365 + }, + "1": { + "count": 1302 + } + } + }, + "ind": { + "num_samples": 2266, + "number_of_characters": 446547, + "number_texts_intersect_with_train": 0, + "min_text_length": 3, + "average_text_length": 197.0639894086496, + "max_text_length": 526, + "unique_text": 2266, + "unique_labels": 2, + "labels": { + "1": { + "count": 1467 + }, + "0": { + "count": 799 + } + } + }, + "heb": { + "num_samples": 2305, + "number_of_characters": 262910, + "number_texts_intersect_with_train": 1053, + "min_text_length": 3, + "average_text_length": 114.06073752711497, + "max_text_length": 2763, + "unique_text": 2305, + "unique_labels": 2, + "labels": { + "1": { + "count": 1578 + }, + "0": { + "count": 727 + } + } + }, + "jpn": { + "num_samples": 2552, + "number_of_characters": 420128, + "number_texts_intersect_with_train": 1389, + "min_text_length": 20, + "average_text_length": 164.6269592476489, + "max_text_length": 1407, + "unique_text": 2552, + "unique_labels": 2, + "labels": { + "1": { + "count": 1618 + }, + "0": { + "count": 934 + } + } + }, + "ell": { + "num_samples": 767, + "number_of_characters": 158317, + "number_texts_intersect_with_train": 212, + "min_text_length": 6, + "average_text_length": 206.41069100391135, + "max_text_length": 1014, + "unique_text": 761, + "unique_labels": 2, + "labels": { + "0": { + "count": 400 + }, + "1": { + "count": 367 + } + } + }, + "deu": { + "num_samples": 1490, + "number_of_characters": 498076, + "number_texts_intersect_with_train": 6, + "min_text_length": 13, + "average_text_length": 334.2791946308725, + "max_text_length": 12036, + "unique_text": 1487, + "unique_labels": 2, + "labels": { + "0": { + "count": 1277 + }, + "1": { + "count": 213 + } + } + }, + "eng": { + "num_samples": 1821, + "number_of_characters": 187754, + "number_texts_intersect_with_train": 2, + "min_text_length": 5, + "average_text_length": 103.10488742449203, + "max_text_length": 256, + "unique_text": 1821, + "unique_labels": 2, + "labels": { + "1": { + "count": 909 + }, + "0": { + "count": 912 + } + } + }, + "fin": { + "num_samples": 1267, + "number_of_characters": 98192, + "number_texts_intersect_with_train": 22, + "min_text_length": 2, + "average_text_length": 77.49960536700868, + "max_text_length": 3264, + "unique_text": 1257, + "unique_labels": 2, + "labels": { + "0": { + "count": 703 + }, + "1": { + "count": 564 + } + } + }, + "hrv": { + "num_samples": 437, + "number_of_characters": 66150, + "number_texts_intersect_with_train": 1, + "min_text_length": 7, + "average_text_length": 151.37299771167048, + "max_text_length": 1037, + "unique_text": 437, + "unique_labels": 2, + "labels": { + "1": { + "count": 341 + }, + "0": { + "count": 96 + } + } + }, + "zho": { + "num_samples": 4896, + "number_of_characters": 636279, + "number_texts_intersect_with_train": 3482, + "min_text_length": 2, + "average_text_length": 129.95894607843138, + "max_text_length": 1431, + "unique_text": 4896, + "unique_labels": 2, + "labels": { + "1": { + "count": 2975 + }, + "0": { + "count": 1921 + } + } + }, + "cmn": { + "num_samples": 8915, + "number_of_characters": 1432948, + "number_texts_intersect_with_train": 941, + "min_text_length": 3, + "average_text_length": 160.7344924284913, + "max_text_length": 248, + "unique_text": 8913, + "unique_labels": 2, + "labels": { + "1": { + "count": 7642 + }, + "0": { + "count": 1273 + } + } + }, + "bul": { + "num_samples": 1673, + "number_of_characters": 77419, + "number_texts_intersect_with_train": 172, + "min_text_length": 1, + "average_text_length": 46.27555289898386, + "max_text_length": 140, + "unique_text": 1672, + "unique_labels": 2, + "labels": { + "1": { + "count": 1334 + }, + "0": { + "count": 339 + } + } + }, + "eus": { + "num_samples": 227, + "number_of_characters": 13419, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 59.11453744493392, + "max_text_length": 234, + "unique_text": 227, + "unique_labels": 2, + "labels": { + "0": { + "count": 35 + }, + "1": { + "count": 192 + } + } + }, + "uig": { + "num_samples": 841, + "number_of_characters": 206190, + "number_texts_intersect_with_train": 22, + "min_text_length": 11, + "average_text_length": 245.17241379310346, + "max_text_length": 1354, + "unique_text": 829, + "unique_labels": 2, + "labels": { + "1": { + "count": 741 + }, + "0": { + "count": 100 + } + } + }, + "bam": { + "num_samples": 673, + "number_of_characters": 19801, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 29.421991084695392, + "max_text_length": 290, + "unique_text": 673, + "unique_labels": 2, + "labels": { + "1": { + "count": 493 + }, + "0": { + "count": 180 + } + } + }, + "pol": { + "num_samples": 480, + "number_of_characters": 363268, + "number_texts_intersect_with_train": 0, + "min_text_length": 211, + "average_text_length": 756.8083333333333, + "max_text_length": 3917, + "unique_text": 480, + "unique_labels": 2, + "labels": { + "0": { + "count": 271 + }, + "1": { + "count": 209 + } + } + }, + "cym": { + "num_samples": 2048, + "number_of_characters": 2784179, + "number_texts_intersect_with_train": 19, + "min_text_length": 66, + "average_text_length": 1359.46240234375, + "max_text_length": 6351, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "ara": { + "num_samples": 706, + "number_of_characters": 50229, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 71.14589235127478, + "max_text_length": 2096, + "unique_text": 706, + "unique_labels": 2, + "labels": { + "1": { + "count": 351 + }, + "0": { + "count": 355 + } + } + }, + "fas": { + "num_samples": 7000, + "number_of_characters": 634088, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 90.584, + "max_text_length": 834, + "unique_text": 7000, + "unique_labels": 2, + "labels": { + "1": { + "count": 3500 + }, + "0": { + "count": 3500 + } + } + } + } + }, + "train": { + "num_samples": 243325, + "number_of_characters": 63920925, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 262.69772937429366, + "max_text_length": 390168, + "unique_text": 240760, + "unique_labels": 2, + "labels": { + "0": { + "count": 101833 + }, + "1": { + "count": 141492 + } + }, + "hf_subset_descriptive_stats": { + "urd": { + "num_samples": 685, + "number_of_characters": 56821, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 82.95036496350365, + "max_text_length": 155, + "unique_text": 684, + "unique_labels": 2, + "labels": { + "0": { + "count": 348 + }, + "1": { + "count": 337 + } + } + }, + "vie": { + "num_samples": 2384, + "number_of_characters": 251071, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 105.31501677852349, + "max_text_length": 1532, + "unique_text": 2367, + "unique_labels": 2, + "labels": { + "1": { + "count": 1186 + }, + "0": { + "count": 1198 + } + } + }, + "dza": { + "num_samples": 564, + "number_of_characters": 38627, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 68.48758865248227, + "max_text_length": 395, + "unique_text": 563, + "unique_labels": 2, + "labels": { + "0": { + "count": 274 + }, + "1": { + "count": 290 + } + } + }, + "tha": { + "num_samples": 8103, + "number_of_characters": 595292, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 73.46563001357522, + "max_text_length": 1626, + "unique_text": 8102, + "unique_labels": 2, + "labels": { + "1": { + "count": 3363 + }, + "0": { + "count": 4740 + } + } + }, + "tur": { + "num_samples": 4486, + "number_of_characters": 724148, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 161.42398573339278, + "max_text_length": 1816, + "unique_text": 4431, + "unique_labels": 2, + "labels": { + "1": { + "count": 2311 + }, + "0": { + "count": 2175 + } + } + }, + "slk": { + "num_samples": 3560, + "number_of_characters": 313145, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 87.96207865168539, + "max_text_length": 1040, + "unique_text": 3560, + "unique_labels": 2, + "labels": { + "1": { + "count": 3026 + }, + "0": { + "count": 534 + } + } + }, + "nor": { + "num_samples": 2675, + "number_of_characters": 262617, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 98.17457943925234, + "max_text_length": 421, + "unique_text": 2669, + "unique_labels": 2, + "labels": { + "0": { + "count": 909 + }, + "1": { + "count": 1766 + } + } + }, + "spa": { + "num_samples": 1029, + "number_of_characters": 87318, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 84.85714285714286, + "max_text_length": 608, + "unique_text": 1029, + "unique_labels": 2, + "labels": { + "1": { + "count": 851 + }, + "0": { + "count": 178 + } + } + }, + "rus": { + "num_samples": 2938, + "number_of_characters": 11486879, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 3909.7614023144997, + "max_text_length": 390168, + "unique_text": 2934, + "unique_labels": 2, + "labels": { + "0": { + "count": 996 + }, + "1": { + "count": 1942 + } + } + }, + "mlt": { + "num_samples": 595, + "number_of_characters": 73363, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 123.29915966386555, + "max_text_length": 1260, + "unique_text": 595, + "unique_labels": 2, + "labels": { + "1": { + "count": 182 + }, + "0": { + "count": 413 + } + } + }, + "kor": { + "num_samples": 36000, + "number_of_characters": 1290066, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 35.835166666666666, + "max_text_length": 146, + "unique_text": 35992, + "unique_labels": 2, + "labels": { + "1": { + "count": 17897 + }, + "0": { + "count": 18103 + } + } + }, + "ind": { + "num_samples": 7926, + "number_of_characters": 1578732, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 199.18395155185465, + "max_text_length": 567, + "unique_text": 7926, + "unique_labels": 2, + "labels": { + "1": { + "count": 5129 + }, + "0": { + "count": 2797 + } + } + }, + "heb": { + "num_samples": 6621, + "number_of_characters": 721542, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 108.9777979157227, + "max_text_length": 4767, + "unique_text": 6621, + "unique_labels": 2, + "labels": { + "1": { + "count": 4679 + }, + "0": { + "count": 1942 + } + } + }, + "jpn": { + "num_samples": 9831, + "number_of_characters": 1697449, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 172.66290306174346, + "max_text_length": 1587, + "unique_text": 9831, + "unique_labels": 2, + "labels": { + "1": { + "count": 6080 + }, + "0": { + "count": 3751 + } + } + }, + "ell": { + "num_samples": 5936, + "number_of_characters": 2083879, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 351.05778301886795, + "max_text_length": 4992, + "unique_text": 3968, + "unique_labels": 2, + "labels": { + "0": { + "count": 716 + }, + "1": { + "count": 5220 + } + } + }, + "deu": { + "num_samples": 6444, + "number_of_characters": 1855246, + "number_texts_intersect_with_train": null, + "min_text_length": 8, + "average_text_length": 287.90285536933584, + "max_text_length": 15575, + "unique_text": 6207, + "unique_labels": 2, + "labels": { + "1": { + "count": 1216 + }, + "0": { + "count": 5228 + } + } + }, + "eng": { + "num_samples": 6920, + "number_of_characters": 716918, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 103.60086705202312, + "max_text_length": 283, + "unique_text": 6911, + "unique_labels": 2, + "labels": { + "1": { + "count": 3610 + }, + "0": { + "count": 3310 + } + } + }, + "fin": { + "num_samples": 4432, + "number_of_characters": 353568, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 79.77617328519855, + "max_text_length": 8973, + "unique_text": 4349, + "unique_labels": 2, + "labels": { + "1": { + "count": 1880 + }, + "0": { + "count": 2552 + } + } + }, + "hrv": { + "num_samples": 1507, + "number_of_characters": 243021, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 161.26144658261447, + "max_text_length": 1851, + "unique_text": 1503, + "unique_labels": 2, + "labels": { + "1": { + "count": 1148 + }, + "0": { + "count": 359 + } + } + }, + "zho": { + "num_samples": 12348, + "number_of_characters": 1615558, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 130.8356009070295, + "max_text_length": 4722, + "unique_text": 12348, + "unique_labels": 2, + "labels": { + "1": { + "count": 7400 + }, + "0": { + "count": 4948 + } + } + }, + "cmn": { + "num_samples": 28204, + "number_of_characters": 4518788, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 160.21798326478515, + "max_text_length": 266, + "unique_text": 28198, + "unique_labels": 2, + "labels": { + "1": { + "count": 23945 + }, + "0": { + "count": 4259 + } + } + }, + "bul": { + "num_samples": 5412, + "number_of_characters": 260756, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 48.18107908351811, + "max_text_length": 134, + "unique_text": 5407, + "unique_labels": 2, + "labels": { + "1": { + "count": 4587 + }, + "0": { + "count": 825 + } + } + }, + "eus": { + "num_samples": 789, + "number_of_characters": 48794, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 61.842839036755386, + "max_text_length": 446, + "unique_text": 789, + "unique_labels": 2, + "labels": { + "1": { + "count": 668 + }, + "0": { + "count": 121 + } + } + }, + "uig": { + "num_samples": 1962, + "number_of_characters": 483107, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 246.23190621814476, + "max_text_length": 1288, + "unique_text": 1923, + "unique_labels": 2, + "labels": { + "1": { + "count": 1709 + }, + "0": { + "count": 253 + } + } + }, + "bam": { + "num_samples": 1569, + "number_of_characters": 44912, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 28.62460165710644, + "max_text_length": 686, + "unique_text": 1569, + "unique_labels": 2, + "labels": { + "1": { + "count": 1170 + }, + "0": { + "count": 399 + } + } + }, + "pol": { + "num_samples": 3737, + "number_of_characters": 2909471, + "number_texts_intersect_with_train": null, + "min_text_length": 47, + "average_text_length": 778.5579341717955, + "max_text_length": 10735, + "unique_text": 3737, + "unique_labels": 2, + "labels": { + "1": { + "count": 1553 + }, + "0": { + "count": 2184 + } + } + }, + "cym": { + "num_samples": 17500, + "number_of_characters": 24364519, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 1392.2582285714286, + "max_text_length": 13751, + "unique_text": 17395, + "unique_labels": 2, + "labels": { + "1": { + "count": 8750 + }, + "0": { + "count": 8750 + } + } + }, + "ara": { + "num_samples": 2468, + "number_of_characters": 163891, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 66.40640194489465, + "max_text_length": 954, + "unique_text": 2468, + "unique_labels": 2, + "labels": { + "1": { + "count": 1247 + }, + "0": { + "count": 1221 + } + } + }, + "fas": { + "num_samples": 56700, + "number_of_characters": 5081427, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 89.61952380952381, + "max_text_length": 1700, + "unique_text": 56700, + "unique_labels": 2, + "labels": { + "0": { + "count": 28350 + }, + "1": { + "count": 28350 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/MyanmarNews.json b/mteb/descriptive_stats/Classification/MyanmarNews.json new file mode 100644 index 0000000000..a8dac1c2a1 --- /dev/null +++ b/mteb/descriptive_stats/Classification/MyanmarNews.json @@ -0,0 +1,26 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 354794, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 173.2392578125, + "max_text_length": 2268, + "unique_text": 2042, + "unique_labels": 4, + "labels": { + "2": { + "count": 523 + }, + "0": { + "count": 511 + }, + "3": { + "count": 507 + }, + "1": { + "count": 507 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NYSJudicialEthicsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/NYSJudicialEthicsLegalBenchClassification.json new file mode 100644 index 0000000000..80522a46f7 --- /dev/null +++ b/mteb/descriptive_stats/Classification/NYSJudicialEthicsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 292, + "number_of_characters": 46562, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 159.45890410958904, + "max_text_length": 458, + "unique_text": 292, + "unique_labels": 2, + "labels": { + "1": { + "count": 152 + }, + "0": { + "count": 140 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1507, + "number_texts_intersect_with_train": null, + "min_text_length": 152, + "average_text_length": 188.375, + "max_text_length": 243, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "0": { + "count": 4 + }, + "1": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NaijaSenti.json b/mteb/descriptive_stats/Classification/NaijaSenti.json new file mode 100644 index 0000000000..436639be61 --- /dev/null +++ b/mteb/descriptive_stats/Classification/NaijaSenti.json @@ -0,0 +1,216 @@ +{ + "test": { + "num_samples": 17654, + "number_of_characters": 1295492, + "number_texts_intersect_with_train": 926, + "min_text_length": 6, + "average_text_length": 73.38234960915374, + "max_text_length": 276, + "unique_text": 17654, + "unique_labels": 3, + "labels": { + "0": { + "count": 6188 + }, + "1": { + "count": 5457 + }, + "2": { + "count": 6009 + } + }, + "hf_subset_descriptive_stats": { + "hau": { + "num_samples": 5303, + "number_of_characters": 355133, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 66.9683198189704, + "max_text_length": 275, + "unique_text": 5303, + "unique_labels": 3, + "labels": { + "0": { + "count": 1755 + }, + "1": { + "count": 1789 + }, + "2": { + "count": 1759 + } + } + }, + "ibo": { + "num_samples": 3682, + "number_of_characters": 175228, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 47.59043997827268, + "max_text_length": 269, + "unique_text": 3682, + "unique_labels": 3, + "labels": { + "0": { + "count": 1118 + }, + "1": { + "count": 1621 + }, + "2": { + "count": 943 + } + } + }, + "pcm": { + "num_samples": 4154, + "number_of_characters": 375268, + "number_texts_intersect_with_train": 926, + "min_text_length": 8, + "average_text_length": 90.3389504092441, + "max_text_length": 276, + "unique_text": 4154, + "unique_labels": 3, + "labels": { + "0": { + "count": 1397 + }, + "1": { + "count": 431 + }, + "2": { + "count": 2326 + } + } + }, + "yor": { + "num_samples": 4515, + "number_of_characters": 389863, + "number_texts_intersect_with_train": 0, + "min_text_length": 6, + "average_text_length": 86.3483942414175, + "max_text_length": 266, + "unique_text": 4515, + "unique_labels": 3, + "labels": { + "0": { + "count": 1918 + }, + "1": { + "count": 1616 + }, + "2": { + "count": 981 + } + } + } + } + }, + "train": { + "num_samples": 38007, + "number_of_characters": 3412356, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 89.7823032599258, + "max_text_length": 354, + "unique_text": 37495, + "unique_labels": 3, + "labels": { + "2": { + "count": 12286 + }, + "1": { + "count": 12600 + }, + "0": { + "count": 13121 + } + }, + "hf_subset_descriptive_stats": { + "hau": { + "num_samples": 14172, + "number_of_characters": 1106209, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 78.055955405024, + "max_text_length": 337, + "unique_text": 14172, + "unique_labels": 3, + "labels": { + "2": { + "count": 4573 + }, + "1": { + "count": 4912 + }, + "0": { + "count": 4687 + } + } + }, + "ibo": { + "num_samples": 10192, + "number_of_characters": 709705, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 69.6335361067504, + "max_text_length": 354, + "unique_text": 10192, + "unique_labels": 3, + "labels": { + "2": { + "count": 2600 + }, + "1": { + "count": 4508 + }, + "0": { + "count": 3084 + } + } + }, + "pcm": { + "num_samples": 5121, + "number_of_characters": 594073, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 116.00722515133764, + "max_text_length": 279, + "unique_text": 4609, + "unique_labels": 3, + "labels": { + "2": { + "count": 3241 + }, + "1": { + "count": 72 + }, + "0": { + "count": 1808 + } + } + }, + "yor": { + "num_samples": 8522, + "number_of_characters": 1002369, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 117.62133302041774, + "max_text_length": 354, + "unique_text": 8522, + "unique_labels": 3, + "labels": { + "2": { + "count": 1872 + }, + "1": { + "count": 3108 + }, + "0": { + "count": 3542 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NepaliNewsClassification.json b/mteb/descriptive_stats/Classification/NepaliNewsClassification.json new file mode 100644 index 0000000000..f230dd8e5b --- /dev/null +++ b/mteb/descriptive_stats/Classification/NepaliNewsClassification.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 402747, + "number_texts_intersect_with_train": null, + "min_text_length": 71, + "average_text_length": 196.65380859375, + "max_text_length": 459, + "unique_text": 2047, + "unique_labels": 3, + "labels": { + "2": { + "count": 699 + }, + "1": { + "count": 623 + }, + "0": { + "count": 726 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NewsClassification.json b/mteb/descriptive_stats/Classification/NewsClassification.json new file mode 100644 index 0000000000..9bc416c63b --- /dev/null +++ b/mteb/descriptive_stats/Classification/NewsClassification.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 7600, + "number_of_characters": 1788274, + "number_texts_intersect_with_train": 0, + "min_text_length": 100, + "average_text_length": 235.2992105263158, + "max_text_length": 892, + "unique_text": 7600, + "unique_labels": 4, + "labels": { + "2": { + "count": 1900 + }, + "3": { + "count": 1900 + }, + "1": { + "count": 1900 + }, + "0": { + "count": 1900 + } + } + }, + "train": { + "num_samples": 120000, + "number_of_characters": 28377303, + "number_texts_intersect_with_train": null, + "min_text_length": 100, + "average_text_length": 236.477525, + "max_text_length": 1012, + "unique_text": 120000, + "unique_labels": 4, + "labels": { + "2": { + "count": 30000 + }, + "3": { + "count": 30000 + }, + "1": { + "count": 30000 + }, + "0": { + "count": 30000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NoRecClassification.json b/mteb/descriptive_stats/Classification/NoRecClassification.json new file mode 100644 index 0000000000..2245602467 --- /dev/null +++ b/mteb/descriptive_stats/Classification/NoRecClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 183540, + "number_texts_intersect_with_train": 1, + "min_text_length": 2, + "average_text_length": 89.619140625, + "max_text_length": 417, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "positive": { + "count": 715 + }, + "neutral": { + "count": 998 + }, + "negative": { + "count": 335 + } + } + }, + "train": { + "num_samples": 1024, + "number_of_characters": 88991, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 86.9052734375, + "max_text_length": 397, + "unique_text": 1024, + "unique_labels": 3, + "labels": { + "neutral": { + "count": 544 + }, + "positive": { + "count": 330 + }, + "negative": { + "count": 150 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NordicLangClassification.json b/mteb/descriptive_stats/Classification/NordicLangClassification.json new file mode 100644 index 0000000000..515f11c2fb --- /dev/null +++ b/mteb/descriptive_stats/Classification/NordicLangClassification.json @@ -0,0 +1,62 @@ +{ + "test": { + "num_samples": 3000, + "number_of_characters": 234686, + "number_texts_intersect_with_train": 320, + "min_text_length": 10, + "average_text_length": 78.22866666666667, + "max_text_length": 294, + "unique_text": 2841, + "unique_labels": 6, + "labels": { + "1": { + "count": 510 + }, + "5": { + "count": 480 + }, + "4": { + "count": 522 + }, + "2": { + "count": 455 + }, + "0": { + "count": 532 + }, + "3": { + "count": 501 + } + } + }, + "train": { + "num_samples": 56985, + "number_of_characters": 4471932, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 78.47559884180048, + "max_text_length": 294, + "unique_text": 49950, + "unique_labels": 6, + "labels": { + "2": { + "count": 9543 + }, + "0": { + "count": 9462 + }, + "5": { + "count": 9519 + }, + "1": { + "count": 9490 + }, + "4": { + "count": 9477 + }, + "3": { + "count": 9494 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NorwegianParliamentClassification.json b/mteb/descriptive_stats/Classification/NorwegianParliamentClassification.json new file mode 100644 index 0000000000..e7516387a6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/NorwegianParliamentClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 1200, + "number_of_characters": 2260808, + "number_texts_intersect_with_train": 1, + "min_text_length": 26, + "average_text_length": 1884.0066666666667, + "max_text_length": 31458, + "unique_text": 1200, + "unique_labels": 2, + "labels": { + "1": { + "count": 600 + }, + "0": { + "count": 600 + } + } + }, + "validation": { + "num_samples": 1200, + "number_of_characters": 2293204, + "number_texts_intersect_with_train": 1, + "min_text_length": 33, + "average_text_length": 1911.0033333333333, + "max_text_length": 30118, + "unique_text": 1200, + "unique_labels": 2, + "labels": { + "0": { + "count": 600 + }, + "1": { + "count": 600 + } + } + }, + "train": { + "num_samples": 3600, + "number_of_characters": 6385292, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 1773.6922222222222, + "max_text_length": 16395, + "unique_text": 3600, + "unique_labels": 2, + "labels": { + "1": { + "count": 1800 + }, + "0": { + "count": 1800 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NusaParagraphEmotionClassification.json b/mteb/descriptive_stats/Classification/NusaParagraphEmotionClassification.json new file mode 100644 index 0000000000..c92c7d8c5c --- /dev/null +++ b/mteb/descriptive_stats/Classification/NusaParagraphEmotionClassification.json @@ -0,0 +1,732 @@ +{ + "test": { + "num_samples": 5700, + "number_of_characters": 4194411, + "number_texts_intersect_with_train": 9, + "min_text_length": 495, + "average_text_length": 735.8615789473685, + "max_text_length": 1842, + "unique_text": 5697, + "unique_labels": 7, + "labels": { + "4": { + "count": 649 + }, + "5": { + "count": 687 + }, + "3": { + "count": 896 + }, + "0": { + "count": 1518 + }, + "6": { + "count": 496 + }, + "2": { + "count": 778 + }, + "1": { + "count": 676 + } + }, + "hf_subset_descriptive_stats": { + "btk": { + "num_samples": 500, + "number_of_characters": 339185, + "number_texts_intersect_with_train": 4, + "min_text_length": 495, + "average_text_length": 678.37, + "max_text_length": 1808, + "unique_text": 499, + "unique_labels": 7, + "labels": { + "4": { + "count": 58 + }, + "5": { + "count": 71 + }, + "3": { + "count": 84 + }, + "0": { + "count": 103 + }, + "6": { + "count": 51 + }, + "2": { + "count": 73 + }, + "1": { + "count": 60 + } + } + }, + "bew": { + "num_samples": 800, + "number_of_characters": 625862, + "number_texts_intersect_with_train": 3, + "min_text_length": 561, + "average_text_length": 782.3275, + "max_text_length": 1598, + "unique_text": 798, + "unique_labels": 7, + "labels": { + "0": { + "count": 221 + }, + "5": { + "count": 96 + }, + "6": { + "count": 82 + }, + "1": { + "count": 126 + }, + "3": { + "count": 100 + }, + "4": { + "count": 83 + }, + "2": { + "count": 92 + } + } + }, + "bug": { + "num_samples": 300, + "number_of_characters": 234950, + "number_texts_intersect_with_train": 0, + "min_text_length": 583, + "average_text_length": 783.1666666666666, + "max_text_length": 1255, + "unique_text": 300, + "unique_labels": 7, + "labels": { + "0": { + "count": 82 + }, + "4": { + "count": 45 + }, + "3": { + "count": 65 + }, + "5": { + "count": 23 + }, + "1": { + "count": 24 + }, + "6": { + "count": 23 + }, + "2": { + "count": 38 + } + } + }, + "jav": { + "num_samples": 800, + "number_of_characters": 548221, + "number_texts_intersect_with_train": 0, + "min_text_length": 564, + "average_text_length": 685.27625, + "max_text_length": 1106, + "unique_text": 800, + "unique_labels": 7, + "labels": { + "3": { + "count": 101 + }, + "5": { + "count": 87 + }, + "6": { + "count": 90 + }, + "1": { + "count": 93 + }, + "4": { + "count": 102 + }, + "0": { + "count": 222 + }, + "2": { + "count": 105 + } + } + }, + "mad": { + "num_samples": 500, + "number_of_characters": 352867, + "number_texts_intersect_with_train": 2, + "min_text_length": 585, + "average_text_length": 705.734, + "max_text_length": 1260, + "unique_text": 500, + "unique_labels": 7, + "labels": { + "5": { + "count": 49 + }, + "0": { + "count": 163 + }, + "3": { + "count": 110 + }, + "1": { + "count": 28 + }, + "2": { + "count": 96 + }, + "4": { + "count": 51 + }, + "6": { + "count": 3 + } + } + }, + "mak": { + "num_samples": 500, + "number_of_characters": 352366, + "number_texts_intersect_with_train": 0, + "min_text_length": 498, + "average_text_length": 704.732, + "max_text_length": 1096, + "unique_text": 500, + "unique_labels": 7, + "labels": { + "5": { + "count": 78 + }, + "3": { + "count": 110 + }, + "4": { + "count": 69 + }, + "1": { + "count": 44 + }, + "2": { + "count": 71 + }, + "6": { + "count": 25 + }, + "0": { + "count": 103 + } + } + }, + "min": { + "num_samples": 800, + "number_of_characters": 590388, + "number_texts_intersect_with_train": 0, + "min_text_length": 558, + "average_text_length": 737.985, + "max_text_length": 1636, + "unique_text": 800, + "unique_labels": 7, + "labels": { + "6": { + "count": 86 + }, + "1": { + "count": 130 + }, + "0": { + "count": 239 + }, + "5": { + "count": 89 + }, + "3": { + "count": 103 + }, + "4": { + "count": 66 + }, + "2": { + "count": 87 + } + } + }, + "mui": { + "num_samples": 400, + "number_of_characters": 322255, + "number_texts_intersect_with_train": 0, + "min_text_length": 590, + "average_text_length": 805.6375, + "max_text_length": 1352, + "unique_text": 400, + "unique_labels": 7, + "labels": { + "0": { + "count": 117 + }, + "3": { + "count": 58 + }, + "4": { + "count": 61 + }, + "2": { + "count": 57 + }, + "5": { + "count": 58 + }, + "6": { + "count": 18 + }, + "1": { + "count": 31 + } + } + }, + "rej": { + "num_samples": 300, + "number_of_characters": 218191, + "number_texts_intersect_with_train": 0, + "min_text_length": 520, + "average_text_length": 727.3033333333333, + "max_text_length": 1187, + "unique_text": 300, + "unique_labels": 7, + "labels": { + "3": { + "count": 60 + }, + "4": { + "count": 26 + }, + "2": { + "count": 62 + }, + "0": { + "count": 59 + }, + "6": { + "count": 26 + }, + "1": { + "count": 35 + }, + "5": { + "count": 32 + } + } + }, + "sun": { + "num_samples": 800, + "number_of_characters": 610126, + "number_texts_intersect_with_train": 0, + "min_text_length": 564, + "average_text_length": 762.6575, + "max_text_length": 1842, + "unique_text": 800, + "unique_labels": 7, + "labels": { + "3": { + "count": 105 + }, + "6": { + "count": 92 + }, + "4": { + "count": 88 + }, + "5": { + "count": 104 + }, + "0": { + "count": 209 + }, + "2": { + "count": 97 + }, + "1": { + "count": 105 + } + } + } + } + }, + "train": { + "num_samples": 13963, + "number_of_characters": 10210343, + "number_texts_intersect_with_train": null, + "min_text_length": 467, + "average_text_length": 731.2427845018979, + "max_text_length": 2156, + "unique_text": 13959, + "unique_labels": 7, + "labels": { + "6": { + "count": 1343 + }, + "3": { + "count": 2070 + }, + "5": { + "count": 1686 + }, + "4": { + "count": 1648 + }, + "0": { + "count": 3609 + }, + "1": { + "count": 1730 + }, + "2": { + "count": 1877 + } + }, + "hf_subset_descriptive_stats": { + "btk": { + "num_samples": 1149, + "number_of_characters": 785657, + "number_texts_intersect_with_train": null, + "min_text_length": 467, + "average_text_length": 683.7745865970409, + "max_text_length": 1807, + "unique_text": 1149, + "unique_labels": 7, + "labels": { + "6": { + "count": 107 + }, + "3": { + "count": 186 + }, + "5": { + "count": 145 + }, + "4": { + "count": 141 + }, + "0": { + "count": 259 + }, + "1": { + "count": 155 + }, + "2": { + "count": 156 + } + } + }, + "bew": { + "num_samples": 2698, + "number_of_characters": 2120349, + "number_texts_intersect_with_train": null, + "min_text_length": 535, + "average_text_length": 785.896590066716, + "max_text_length": 1715, + "unique_text": 2694, + "unique_labels": 7, + "labels": { + "3": { + "count": 319 + }, + "5": { + "count": 279 + }, + "6": { + "count": 307 + }, + "0": { + "count": 744 + }, + "1": { + "count": 399 + }, + "2": { + "count": 347 + }, + "4": { + "count": 303 + } + } + }, + "bug": { + "num_samples": 87, + "number_of_characters": 66895, + "number_texts_intersect_with_train": null, + "min_text_length": 622, + "average_text_length": 768.9080459770115, + "max_text_length": 1150, + "unique_text": 87, + "unique_labels": 7, + "labels": { + "1": { + "count": 11 + }, + "5": { + "count": 7 + }, + "0": { + "count": 25 + }, + "2": { + "count": 8 + }, + "3": { + "count": 21 + }, + "4": { + "count": 11 + }, + "6": { + "count": 4 + } + } + }, + "jav": { + "num_samples": 2800, + "number_of_characters": 1918633, + "number_texts_intersect_with_train": null, + "min_text_length": 562, + "average_text_length": 685.2260714285715, + "max_text_length": 1405, + "unique_text": 2800, + "unique_labels": 7, + "labels": { + "5": { + "count": 348 + }, + "1": { + "count": 340 + }, + "0": { + "count": 678 + }, + "3": { + "count": 369 + }, + "4": { + "count": 362 + }, + "6": { + "count": 354 + }, + "2": { + "count": 349 + } + } + }, + "mad": { + "num_samples": 999, + "number_of_characters": 705416, + "number_texts_intersect_with_train": null, + "min_text_length": 564, + "average_text_length": 706.1221221221222, + "max_text_length": 2156, + "unique_text": 999, + "unique_labels": 7, + "labels": { + "5": { + "count": 100 + }, + "0": { + "count": 335 + }, + "2": { + "count": 185 + }, + "3": { + "count": 205 + }, + "4": { + "count": 117 + }, + "1": { + "count": 49 + }, + "6": { + "count": 8 + } + } + }, + "mak": { + "num_samples": 1499, + "number_of_characters": 1061229, + "number_texts_intersect_with_train": null, + "min_text_length": 484, + "average_text_length": 707.9579719813208, + "max_text_length": 1168, + "unique_text": 1499, + "unique_labels": 7, + "labels": { + "3": { + "count": 324 + }, + "4": { + "count": 189 + }, + "2": { + "count": 237 + }, + "0": { + "count": 304 + }, + "1": { + "count": 127 + }, + "6": { + "count": 81 + }, + "5": { + "count": 237 + } + } + }, + "min": { + "num_samples": 1996, + "number_of_characters": 1473263, + "number_texts_intersect_with_train": null, + "min_text_length": 543, + "average_text_length": 738.1077154308617, + "max_text_length": 1321, + "unique_text": 1996, + "unique_labels": 7, + "labels": { + "0": { + "count": 537 + }, + "6": { + "count": 230 + }, + "4": { + "count": 178 + }, + "2": { + "count": 240 + }, + "1": { + "count": 317 + }, + "3": { + "count": 301 + }, + "5": { + "count": 193 + } + } + }, + "mui": { + "num_samples": 201, + "number_of_characters": 162437, + "number_texts_intersect_with_train": null, + "min_text_length": 623, + "average_text_length": 808.1442786069651, + "max_text_length": 1404, + "unique_text": 201, + "unique_labels": 7, + "labels": { + "0": { + "count": 62 + }, + "5": { + "count": 32 + }, + "3": { + "count": 23 + }, + "1": { + "count": 17 + }, + "2": { + "count": 31 + }, + "4": { + "count": 31 + }, + "6": { + "count": 5 + } + } + }, + "rej": { + "num_samples": 136, + "number_of_characters": 96411, + "number_texts_intersect_with_train": null, + "min_text_length": 528, + "average_text_length": 708.9044117647059, + "max_text_length": 1138, + "unique_text": 136, + "unique_labels": 7, + "labels": { + "0": { + "count": 29 + }, + "3": { + "count": 26 + }, + "2": { + "count": 27 + }, + "1": { + "count": 12 + }, + "5": { + "count": 10 + }, + "4": { + "count": 20 + }, + "6": { + "count": 12 + } + } + }, + "sun": { + "num_samples": 2398, + "number_of_characters": 1820053, + "number_texts_intersect_with_train": null, + "min_text_length": 558, + "average_text_length": 758.987906588824, + "max_text_length": 1546, + "unique_text": 2398, + "unique_labels": 7, + "labels": { + "1": { + "count": 303 + }, + "4": { + "count": 296 + }, + "0": { + "count": 636 + }, + "2": { + "count": 297 + }, + "3": { + "count": 296 + }, + "6": { + "count": 235 + }, + "5": { + "count": 335 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NusaParagraphTopicClassification.json b/mteb/descriptive_stats/Classification/NusaParagraphTopicClassification.json new file mode 100644 index 0000000000..0b71146d85 --- /dev/null +++ b/mteb/descriptive_stats/Classification/NusaParagraphTopicClassification.json @@ -0,0 +1,792 @@ +{ + "test": { + "num_samples": 6250, + "number_of_characters": 4629468, + "number_texts_intersect_with_train": 1, + "min_text_length": 502, + "average_text_length": 740.71488, + "max_text_length": 1849, + "unique_text": 6250, + "unique_labels": 8, + "labels": { + "0": { + "count": 1198 + }, + "3": { + "count": 778 + }, + "1": { + "count": 851 + }, + "7": { + "count": 440 + }, + "5": { + "count": 580 + }, + "2": { + "count": 928 + }, + "6": { + "count": 819 + }, + "4": { + "count": 656 + } + }, + "hf_subset_descriptive_stats": { + "btk": { + "num_samples": 500, + "number_of_characters": 341829, + "number_texts_intersect_with_train": 0, + "min_text_length": 530, + "average_text_length": 683.658, + "max_text_length": 1777, + "unique_text": 500, + "unique_labels": 8, + "labels": { + "0": { + "count": 110 + }, + "3": { + "count": 64 + }, + "1": { + "count": 49 + }, + "7": { + "count": 31 + }, + "5": { + "count": 53 + }, + "2": { + "count": 84 + }, + "6": { + "count": 48 + }, + "4": { + "count": 61 + } + } + }, + "bew": { + "num_samples": 800, + "number_of_characters": 648577, + "number_texts_intersect_with_train": 0, + "min_text_length": 561, + "average_text_length": 810.72125, + "max_text_length": 1849, + "unique_text": 800, + "unique_labels": 8, + "labels": { + "6": { + "count": 119 + }, + "4": { + "count": 71 + }, + "0": { + "count": 143 + }, + "7": { + "count": 71 + }, + "1": { + "count": 122 + }, + "3": { + "count": 100 + }, + "5": { + "count": 74 + }, + "2": { + "count": 100 + } + } + }, + "bug": { + "num_samples": 300, + "number_of_characters": 225792, + "number_texts_intersect_with_train": 0, + "min_text_length": 594, + "average_text_length": 752.64, + "max_text_length": 1159, + "unique_text": 300, + "unique_labels": 8, + "labels": { + "7": { + "count": 33 + }, + "4": { + "count": 13 + }, + "1": { + "count": 33 + }, + "5": { + "count": 37 + }, + "0": { + "count": 65 + }, + "3": { + "count": 71 + }, + "2": { + "count": 33 + }, + "6": { + "count": 15 + } + } + }, + "jav": { + "num_samples": 800, + "number_of_characters": 560251, + "number_texts_intersect_with_train": 0, + "min_text_length": 578, + "average_text_length": 700.31375, + "max_text_length": 1190, + "unique_text": 800, + "unique_labels": 8, + "labels": { + "4": { + "count": 101 + }, + "6": { + "count": 125 + }, + "1": { + "count": 112 + }, + "3": { + "count": 94 + }, + "7": { + "count": 36 + }, + "2": { + "count": 106 + }, + "5": { + "count": 113 + }, + "0": { + "count": 113 + } + } + }, + "mad": { + "num_samples": 700, + "number_of_characters": 504078, + "number_texts_intersect_with_train": 0, + "min_text_length": 583, + "average_text_length": 720.1114285714285, + "max_text_length": 1128, + "unique_text": 700, + "unique_labels": 8, + "labels": { + "1": { + "count": 107 + }, + "7": { + "count": 53 + }, + "6": { + "count": 94 + }, + "0": { + "count": 187 + }, + "3": { + "count": 61 + }, + "5": { + "count": 16 + }, + "4": { + "count": 59 + }, + "2": { + "count": 123 + } + } + }, + "mak": { + "num_samples": 700, + "number_of_characters": 506143, + "number_texts_intersect_with_train": 0, + "min_text_length": 526, + "average_text_length": 723.0614285714286, + "max_text_length": 1153, + "unique_text": 700, + "unique_labels": 8, + "labels": { + "0": { + "count": 166 + }, + "4": { + "count": 69 + }, + "6": { + "count": 82 + }, + "1": { + "count": 96 + }, + "7": { + "count": 53 + }, + "2": { + "count": 108 + }, + "3": { + "count": 94 + }, + "5": { + "count": 32 + } + } + }, + "min": { + "num_samples": 800, + "number_of_characters": 589491, + "number_texts_intersect_with_train": 1, + "min_text_length": 541, + "average_text_length": 736.86375, + "max_text_length": 1571, + "unique_text": 800, + "unique_labels": 8, + "labels": { + "6": { + "count": 93 + }, + "3": { + "count": 78 + }, + "0": { + "count": 156 + }, + "4": { + "count": 73 + }, + "5": { + "count": 101 + }, + "7": { + "count": 59 + }, + "2": { + "count": 128 + }, + "1": { + "count": 112 + } + } + }, + "mui": { + "num_samples": 400, + "number_of_characters": 319747, + "number_texts_intersect_with_train": 0, + "min_text_length": 593, + "average_text_length": 799.3675, + "max_text_length": 1524, + "unique_text": 400, + "unique_labels": 7, + "labels": { + "6": { + "count": 65 + }, + "1": { + "count": 65 + }, + "4": { + "count": 65 + }, + "7": { + "count": 30 + }, + "3": { + "count": 55 + }, + "0": { + "count": 60 + }, + "2": { + "count": 60 + } + } + }, + "rej": { + "num_samples": 350, + "number_of_characters": 245109, + "number_texts_intersect_with_train": 0, + "min_text_length": 502, + "average_text_length": 700.3114285714286, + "max_text_length": 1067, + "unique_text": 350, + "unique_labels": 8, + "labels": { + "0": { + "count": 65 + }, + "5": { + "count": 33 + }, + "1": { + "count": 31 + }, + "6": { + "count": 37 + }, + "4": { + "count": 46 + }, + "2": { + "count": 71 + }, + "7": { + "count": 15 + }, + "3": { + "count": 52 + } + } + }, + "sun": { + "num_samples": 900, + "number_of_characters": 688451, + "number_texts_intersect_with_train": 0, + "min_text_length": 543, + "average_text_length": 764.9455555555555, + "max_text_length": 1425, + "unique_text": 900, + "unique_labels": 8, + "labels": { + "5": { + "count": 121 + }, + "4": { + "count": 98 + }, + "6": { + "count": 141 + }, + "3": { + "count": 109 + }, + "7": { + "count": 59 + }, + "1": { + "count": 124 + }, + "2": { + "count": 115 + }, + "0": { + "count": 133 + } + } + } + } + }, + "train": { + "num_samples": 15516, + "number_of_characters": 11485555, + "number_texts_intersect_with_train": null, + "min_text_length": 504, + "average_text_length": 740.2394302655324, + "max_text_length": 2300, + "unique_text": 15514, + "unique_labels": 8, + "labels": { + "3": { + "count": 1890 + }, + "4": { + "count": 1664 + }, + "0": { + "count": 2997 + }, + "5": { + "count": 1511 + }, + "6": { + "count": 1765 + }, + "2": { + "count": 2350 + }, + "1": { + "count": 2233 + }, + "7": { + "count": 1106 + } + }, + "hf_subset_descriptive_stats": { + "btk": { + "num_samples": 1350, + "number_of_characters": 927651, + "number_texts_intersect_with_train": null, + "min_text_length": 504, + "average_text_length": 687.1488888888889, + "max_text_length": 2267, + "unique_text": 1350, + "unique_labels": 8, + "labels": { + "3": { + "count": 176 + }, + "4": { + "count": 152 + }, + "0": { + "count": 288 + }, + "5": { + "count": 129 + }, + "6": { + "count": 124 + }, + "2": { + "count": 209 + }, + "1": { + "count": 184 + }, + "7": { + "count": 88 + } + } + }, + "bew": { + "num_samples": 2650, + "number_of_characters": 2145717, + "number_texts_intersect_with_train": null, + "min_text_length": 565, + "average_text_length": 809.7045283018867, + "max_text_length": 2300, + "unique_text": 2650, + "unique_labels": 8, + "labels": { + "5": { + "count": 308 + }, + "7": { + "count": 178 + }, + "0": { + "count": 482 + }, + "3": { + "count": 331 + }, + "1": { + "count": 399 + }, + "6": { + "count": 299 + }, + "2": { + "count": 341 + }, + "4": { + "count": 312 + } + } + }, + "bug": { + "num_samples": 93, + "number_of_characters": 69528, + "number_texts_intersect_with_train": null, + "min_text_length": 608, + "average_text_length": 747.6129032258065, + "max_text_length": 965, + "unique_text": 93, + "unique_labels": 8, + "labels": { + "2": { + "count": 7 + }, + "4": { + "count": 5 + }, + "7": { + "count": 10 + }, + "1": { + "count": 12 + }, + "6": { + "count": 4 + }, + "3": { + "count": 20 + }, + "5": { + "count": 15 + }, + "0": { + "count": 20 + } + } + }, + "jav": { + "num_samples": 2650, + "number_of_characters": 1841858, + "number_texts_intersect_with_train": null, + "min_text_length": 556, + "average_text_length": 695.0407547169812, + "max_text_length": 1354, + "unique_text": 2650, + "unique_labels": 8, + "labels": { + "5": { + "count": 337 + }, + "0": { + "count": 416 + }, + "1": { + "count": 338 + }, + "3": { + "count": 337 + }, + "4": { + "count": 343 + }, + "6": { + "count": 328 + }, + "2": { + "count": 372 + }, + "7": { + "count": 179 + } + } + }, + "mad": { + "num_samples": 1800, + "number_of_characters": 1293049, + "number_texts_intersect_with_train": null, + "min_text_length": 566, + "average_text_length": 718.3605555555556, + "max_text_length": 1157, + "unique_text": 1800, + "unique_labels": 8, + "labels": { + "0": { + "count": 483 + }, + "3": { + "count": 182 + }, + "2": { + "count": 303 + }, + "1": { + "count": 303 + }, + "6": { + "count": 204 + }, + "5": { + "count": 67 + }, + "4": { + "count": 130 + }, + "7": { + "count": 128 + } + } + }, + "mak": { + "num_samples": 1500, + "number_of_characters": 1084894, + "number_texts_intersect_with_train": null, + "min_text_length": 504, + "average_text_length": 723.2626666666666, + "max_text_length": 1187, + "unique_text": 1500, + "unique_labels": 8, + "labels": { + "0": { + "count": 332 + }, + "7": { + "count": 111 + }, + "3": { + "count": 223 + }, + "2": { + "count": 247 + }, + "1": { + "count": 226 + }, + "4": { + "count": 159 + }, + "6": { + "count": 146 + }, + "5": { + "count": 56 + } + } + }, + "min": { + "num_samples": 2400, + "number_of_characters": 1766506, + "number_texts_intersect_with_train": null, + "min_text_length": 520, + "average_text_length": 736.0441666666667, + "max_text_length": 1300, + "unique_text": 2398, + "unique_labels": 8, + "labels": { + "1": { + "count": 361 + }, + "4": { + "count": 193 + }, + "7": { + "count": 169 + }, + "2": { + "count": 415 + }, + "6": { + "count": 238 + }, + "0": { + "count": 540 + }, + "3": { + "count": 231 + }, + "5": { + "count": 253 + } + } + }, + "mui": { + "num_samples": 168, + "number_of_characters": 133585, + "number_texts_intersect_with_train": null, + "min_text_length": 616, + "average_text_length": 795.1488095238095, + "max_text_length": 1663, + "unique_text": 168, + "unique_labels": 7, + "labels": { + "3": { + "count": 36 + }, + "0": { + "count": 29 + }, + "6": { + "count": 27 + }, + "2": { + "count": 26 + }, + "1": { + "count": 20 + }, + "4": { + "count": 21 + }, + "7": { + "count": 9 + } + } + }, + "rej": { + "num_samples": 105, + "number_of_characters": 72800, + "number_texts_intersect_with_train": null, + "min_text_length": 539, + "average_text_length": 693.3333333333334, + "max_text_length": 935, + "unique_text": 105, + "unique_labels": 8, + "labels": { + "3": { + "count": 14 + }, + "0": { + "count": 19 + }, + "5": { + "count": 13 + }, + "4": { + "count": 13 + }, + "1": { + "count": 12 + }, + "2": { + "count": 21 + }, + "6": { + "count": 8 + }, + "7": { + "count": 5 + } + } + }, + "sun": { + "num_samples": 2800, + "number_of_characters": 2149967, + "number_texts_intersect_with_train": null, + "min_text_length": 562, + "average_text_length": 767.8453571428571, + "max_text_length": 1764, + "unique_text": 2800, + "unique_labels": 8, + "labels": { + "0": { + "count": 388 + }, + "7": { + "count": 229 + }, + "4": { + "count": 336 + }, + "3": { + "count": 340 + }, + "1": { + "count": 378 + }, + "5": { + "count": 333 + }, + "6": { + "count": 387 + }, + "2": { + "count": 409 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/NusaX-senti.json b/mteb/descriptive_stats/Classification/NusaX-senti.json new file mode 100644 index 0000000000..329ecc20bd --- /dev/null +++ b/mteb/descriptive_stats/Classification/NusaX-senti.json @@ -0,0 +1,552 @@ +{ + "test": { + "num_samples": 4800, + "number_of_characters": 739028, + "number_texts_intersect_with_train": 0, + "min_text_length": 5, + "average_text_length": 153.96416666666667, + "max_text_length": 539, + "unique_text": 4800, + "unique_labels": 3, + "labels": { + "2": { + "count": 1812 + }, + "1": { + "count": 1152 + }, + "0": { + "count": 1836 + } + }, + "hf_subset_descriptive_stats": { + "ace": { + "num_samples": 400, + "number_of_characters": 59587, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 148.9675, + "max_text_length": 433, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "ban": { + "num_samples": 400, + "number_of_characters": 61348, + "number_texts_intersect_with_train": 0, + "min_text_length": 32, + "average_text_length": 153.37, + "max_text_length": 472, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "bjn": { + "num_samples": 400, + "number_of_characters": 61042, + "number_texts_intersect_with_train": 0, + "min_text_length": 5, + "average_text_length": 152.605, + "max_text_length": 428, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "bug": { + "num_samples": 400, + "number_of_characters": 65138, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 162.845, + "max_text_length": 469, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "eng": { + "num_samples": 400, + "number_of_characters": 63803, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 159.5075, + "max_text_length": 465, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "ind": { + "num_samples": 400, + "number_of_characters": 61680, + "number_texts_intersect_with_train": 0, + "min_text_length": 31, + "average_text_length": 154.2, + "max_text_length": 433, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "jav": { + "num_samples": 400, + "number_of_characters": 59839, + "number_texts_intersect_with_train": 0, + "min_text_length": 28, + "average_text_length": 149.5975, + "max_text_length": 414, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "mad": { + "num_samples": 400, + "number_of_characters": 62098, + "number_texts_intersect_with_train": 0, + "min_text_length": 30, + "average_text_length": 155.245, + "max_text_length": 464, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "min": { + "num_samples": 400, + "number_of_characters": 59808, + "number_texts_intersect_with_train": 0, + "min_text_length": 28, + "average_text_length": 149.52, + "max_text_length": 412, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "nij": { + "num_samples": 400, + "number_of_characters": 60965, + "number_texts_intersect_with_train": 0, + "min_text_length": 28, + "average_text_length": 152.4125, + "max_text_length": 539, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "sun": { + "num_samples": 400, + "number_of_characters": 61212, + "number_texts_intersect_with_train": 0, + "min_text_length": 31, + "average_text_length": 153.03, + "max_text_length": 431, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + }, + "bbc": { + "num_samples": 400, + "number_of_characters": 62508, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 156.27, + "max_text_length": 461, + "unique_text": 400, + "unique_labels": 3, + "labels": { + "2": { + "count": 151 + }, + "1": { + "count": 96 + }, + "0": { + "count": 153 + } + } + } + } + }, + "train": { + "num_samples": 6000, + "number_of_characters": 920296, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 153.38266666666667, + "max_text_length": 562, + "unique_text": 5998, + "unique_labels": 3, + "labels": { + "1": { + "count": 1428 + }, + "2": { + "count": 2268 + }, + "0": { + "count": 2304 + } + }, + "hf_subset_descriptive_stats": { + "ace": { + "num_samples": 500, + "number_of_characters": 73892, + "number_texts_intersect_with_train": null, + "min_text_length": 20, + "average_text_length": 147.784, + "max_text_length": 550, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "ban": { + "num_samples": 500, + "number_of_characters": 76550, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 153.1, + "max_text_length": 491, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "bjn": { + "num_samples": 500, + "number_of_characters": 76498, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 152.996, + "max_text_length": 526, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "bug": { + "num_samples": 500, + "number_of_characters": 80722, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 161.444, + "max_text_length": 549, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "eng": { + "num_samples": 500, + "number_of_characters": 80830, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 161.66, + "max_text_length": 562, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "ind": { + "num_samples": 500, + "number_of_characters": 76650, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 153.3, + "max_text_length": 498, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "jav": { + "num_samples": 500, + "number_of_characters": 74441, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 148.882, + "max_text_length": 507, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "mad": { + "num_samples": 500, + "number_of_characters": 77112, + "number_texts_intersect_with_train": null, + "min_text_length": 24, + "average_text_length": 154.224, + "max_text_length": 496, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "min": { + "num_samples": 500, + "number_of_characters": 74082, + "number_texts_intersect_with_train": null, + "min_text_length": 23, + "average_text_length": 148.164, + "max_text_length": 500, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "nij": { + "num_samples": 500, + "number_of_characters": 74970, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 149.94, + "max_text_length": 479, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "sun": { + "num_samples": 500, + "number_of_characters": 76195, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 152.39, + "max_text_length": 493, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + }, + "bbc": { + "num_samples": 500, + "number_of_characters": 78354, + "number_texts_intersect_with_train": null, + "min_text_length": 25, + "average_text_length": 156.708, + "max_text_length": 521, + "unique_text": 500, + "unique_labels": 3, + "labels": { + "1": { + "count": 119 + }, + "2": { + "count": 189 + }, + "0": { + "count": 192 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115DataRetentionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115DataRetentionLegalBenchClassification.json new file mode 100644 index 0000000000..8c19ae1e99 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115DataRetentionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 88, + "number_of_characters": 17178, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 195.20454545454547, + "max_text_length": 1274, + "unique_text": 88, + "unique_labels": 2, + "labels": { + "1": { + "count": 44 + }, + "0": { + "count": 44 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1667, + "number_texts_intersect_with_train": null, + "min_text_length": 76, + "average_text_length": 208.375, + "max_text_length": 537, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115DataSecurityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115DataSecurityLegalBenchClassification.json new file mode 100644 index 0000000000..ddc2b9d4f7 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115DataSecurityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1334, + "number_of_characters": 329096, + "number_texts_intersect_with_train": 1, + "min_text_length": 48, + "average_text_length": 246.69865067466267, + "max_text_length": 1736, + "unique_text": 1334, + "unique_labels": 2, + "labels": { + "1": { + "count": 669 + }, + "0": { + "count": 665 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1999, + "number_texts_intersect_with_train": null, + "min_text_length": 77, + "average_text_length": 249.875, + "max_text_length": 451, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115DoNotTrackLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115DoNotTrackLegalBenchClassification.json new file mode 100644 index 0000000000..4347a026b6 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115DoNotTrackLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 110, + "number_of_characters": 24548, + "number_texts_intersect_with_train": 1, + "min_text_length": 49, + "average_text_length": 223.16363636363636, + "max_text_length": 1644, + "unique_text": 110, + "unique_labels": 2, + "labels": { + "1": { + "count": 55 + }, + "0": { + "count": 55 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 2383, + "number_texts_intersect_with_train": null, + "min_text_length": 58, + "average_text_length": 297.875, + "max_text_length": 855, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115FirstPartyCollectionUseLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115FirstPartyCollectionUseLegalBenchClassification.json new file mode 100644 index 0000000000..a3390d5e4e --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115FirstPartyCollectionUseLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2086, + "number_of_characters": 426083, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 204.25838926174498, + "max_text_length": 1644, + "unique_text": 2086, + "unique_labels": 2, + "labels": { + "1": { + "count": 1045 + }, + "0": { + "count": 1041 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 2103, + "number_texts_intersect_with_train": null, + "min_text_length": 66, + "average_text_length": 262.875, + "max_text_length": 824, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115InternationalAndSpecificAudiencesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115InternationalAndSpecificAudiencesLegalBenchClassification.json new file mode 100644 index 0000000000..be3bee42f3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115InternationalAndSpecificAudiencesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 980, + "number_of_characters": 321158, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 327.7122448979592, + "max_text_length": 1873, + "unique_text": 980, + "unique_labels": 2, + "labels": { + "1": { + "count": 488 + }, + "0": { + "count": 492 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1519, + "number_texts_intersect_with_train": null, + "min_text_length": 67, + "average_text_length": 189.875, + "max_text_length": 582, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115PolicyChangeLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115PolicyChangeLegalBenchClassification.json new file mode 100644 index 0000000000..f5aed6fd35 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115PolicyChangeLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 431, + "number_of_characters": 86628, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 200.99303944315545, + "max_text_length": 1873, + "unique_text": 431, + "unique_labels": 2, + "labels": { + "1": { + "count": 215 + }, + "0": { + "count": 216 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1077, + "number_texts_intersect_with_train": null, + "min_text_length": 55, + "average_text_length": 134.625, + "max_text_length": 337, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115ThirdPartySharingCollectionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115ThirdPartySharingCollectionLegalBenchClassification.json new file mode 100644 index 0000000000..23582721e5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115ThirdPartySharingCollectionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1590, + "number_of_characters": 355601, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 223.64842767295596, + "max_text_length": 1816, + "unique_text": 1590, + "unique_labels": 2, + "labels": { + "1": { + "count": 798 + }, + "0": { + "count": 792 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1093, + "number_texts_intersect_with_train": null, + "min_text_length": 68, + "average_text_length": 136.625, + "max_text_length": 278, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115UserAccessEditAndDeletionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115UserAccessEditAndDeletionLegalBenchClassification.json new file mode 100644 index 0000000000..4fa7428964 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115UserAccessEditAndDeletionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 462, + "number_of_characters": 100993, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 218.5995670995671, + "max_text_length": 1265, + "unique_text": 462, + "unique_labels": 2, + "labels": { + "1": { + "count": 232 + }, + "0": { + "count": 230 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1127, + "number_texts_intersect_with_train": null, + "min_text_length": 61, + "average_text_length": 140.875, + "max_text_length": 261, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OPP115UserChoiceControlLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OPP115UserChoiceControlLegalBenchClassification.json new file mode 100644 index 0000000000..ce056d40bd --- /dev/null +++ b/mteb/descriptive_stats/Classification/OPP115UserChoiceControlLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1546, + "number_of_characters": 325620, + "number_texts_intersect_with_train": 1, + "min_text_length": 45, + "average_text_length": 210.620957309185, + "max_text_length": 1736, + "unique_text": 1546, + "unique_labels": 2, + "labels": { + "1": { + "count": 775 + }, + "0": { + "count": 771 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 1571, + "number_texts_intersect_with_train": null, + "min_text_length": 98, + "average_text_length": 196.375, + "max_text_length": 369, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OdiaNewsClassification.json b/mteb/descriptive_stats/Classification/OdiaNewsClassification.json new file mode 100644 index 0000000000..a36883ae33 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OdiaNewsClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 101081, + "number_texts_intersect_with_train": 2, + "min_text_length": 11, + "average_text_length": 49.35595703125, + "max_text_length": 107, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "2": { + "count": 932 + }, + "1": { + "count": 488 + }, + "0": { + "count": 628 + } + } + }, + "train": { + "num_samples": 15200, + "number_of_characters": 751899, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 49.46703947368421, + "max_text_length": 110, + "unique_text": 15192, + "unique_labels": 3, + "labels": { + "0": { + "count": 4613 + }, + "1": { + "count": 3666 + }, + "2": { + "count": 6921 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OnlineShopping.json b/mteb/descriptive_stats/Classification/OnlineShopping.json new file mode 100644 index 0000000000..763067c631 --- /dev/null +++ b/mteb/descriptive_stats/Classification/OnlineShopping.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 56543, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 56.543, + "max_text_length": 759, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "1": { + "count": 524 + }, + "0": { + "count": 476 + } + } + }, + "train": { + "num_samples": 8000, + "number_of_characters": 460973, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 57.621625, + "max_text_length": 1359, + "unique_text": 8000, + "unique_labels": 2, + "labels": { + "0": { + "count": 3926 + }, + "1": { + "count": 4074 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OnlineStoreReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/OnlineStoreReviewSentimentClassification.json new file mode 100644 index 0000000000..a2305b247b --- /dev/null +++ b/mteb/descriptive_stats/Classification/OnlineStoreReviewSentimentClassification.json @@ -0,0 +1,29 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 99858, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 48.7587890625, + "max_text_length": 506, + "unique_text": 1872, + "unique_labels": 5, + "labels": { + "1": { + "count": 160 + }, + "4": { + "count": 1167 + }, + "0": { + "count": 221 + }, + "2": { + "count": 335 + }, + "3": { + "count": 165 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OralArgumentQuestionPurposeLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OralArgumentQuestionPurposeLegalBenchClassification.json new file mode 100644 index 0000000000..b2875f5b8e --- /dev/null +++ b/mteb/descriptive_stats/Classification/OralArgumentQuestionPurposeLegalBenchClassification.json @@ -0,0 +1,68 @@ +{ + "test": { + "num_samples": 312, + "number_of_characters": 84152, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 269.71794871794873, + "max_text_length": 2152, + "unique_text": 312, + "unique_labels": 7, + "labels": { + "Background": { + "count": 57 + }, + "Clarification": { + "count": 83 + }, + "Communicate": { + "count": 14 + }, + "Criticism": { + "count": 51 + }, + "Humor": { + "count": 28 + }, + "Implications": { + "count": 67 + }, + "Support": { + "count": 12 + } + } + }, + "train": { + "num_samples": 7, + "number_of_characters": 2184, + "number_texts_intersect_with_train": null, + "min_text_length": 72, + "average_text_length": 312.0, + "max_text_length": 928, + "unique_text": 7, + "unique_labels": 7, + "labels": { + "Background": { + "count": 1 + }, + "Clarification": { + "count": 1 + }, + "Communicate": { + "count": 1 + }, + "Criticism": { + "count": 1 + }, + "Humor": { + "count": 1 + }, + "Implications": { + "count": 1 + }, + "Support": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/OverrulingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/OverrulingLegalBenchClassification.json new file mode 100644 index 0000000000..8be732c94c --- /dev/null +++ b/mteb/descriptive_stats/Classification/OverrulingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 342772, + "number_texts_intersect_with_train": 0, + "min_text_length": 1, + "average_text_length": 167.369140625, + "max_text_length": 958, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1008 + }, + "1": { + "count": 1040 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 536, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 89.33333333333333, + "max_text_length": 131, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PAC.json b/mteb/descriptive_stats/Classification/PAC.json new file mode 100644 index 0000000000..a85ed4df91 --- /dev/null +++ b/mteb/descriptive_stats/Classification/PAC.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 3453, + "number_of_characters": 639765, + "number_texts_intersect_with_train": 0, + "min_text_length": 10, + "average_text_length": 185.27801911381408, + "max_text_length": 1189, + "unique_text": 3452, + "unique_labels": 2, + "labels": { + "0": { + "count": 1120 + }, + "1": { + "count": 2333 + } + } + }, + "train": { + "num_samples": 4284, + "number_of_characters": 793748, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 185.28197945845005, + "max_text_length": 1325, + "unique_text": 4282, + "unique_labels": 2, + "labels": { + "0": { + "count": 1946 + }, + "1": { + "count": 2338 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PROALegalBenchClassification.json b/mteb/descriptive_stats/Classification/PROALegalBenchClassification.json new file mode 100644 index 0000000000..80c2365f52 --- /dev/null +++ b/mteb/descriptive_stats/Classification/PROALegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 95, + "number_of_characters": 23915, + "number_texts_intersect_with_train": 0, + "min_text_length": 93, + "average_text_length": 251.73684210526315, + "max_text_length": 598, + "unique_text": 95, + "unique_labels": 2, + "labels": { + "1": { + "count": 47 + }, + "0": { + "count": 48 + } + } + }, + "train": { + "num_samples": 5, + "number_of_characters": 979, + "number_texts_intersect_with_train": null, + "min_text_length": 91, + "average_text_length": 195.8, + "max_text_length": 266, + "unique_text": 5, + "unique_labels": 2, + "labels": { + "0": { + "count": 2 + }, + "1": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PatentClassification.json b/mteb/descriptive_stats/Classification/PatentClassification.json new file mode 100644 index 0000000000..8b634e99ac --- /dev/null +++ b/mteb/descriptive_stats/Classification/PatentClassification.json @@ -0,0 +1,80 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 38376596, + "number_texts_intersect_with_train": 9, + "min_text_length": 2168, + "average_text_length": 18738.572265625, + "max_text_length": 226050, + "unique_text": 2048, + "unique_labels": 9, + "labels": { + "7": { + "count": 424 + }, + "0": { + "count": 309 + }, + "6": { + "count": 453 + }, + "2": { + "count": 161 + }, + "1": { + "count": 266 + }, + "8": { + "count": 206 + }, + "4": { + "count": 64 + }, + "5": { + "count": 147 + }, + "3": { + "count": 18 + } + } + }, + "train": { + "num_samples": 25000, + "number_of_characters": 465511243, + "number_texts_intersect_with_train": null, + "min_text_length": 1551, + "average_text_length": 18620.44972, + "max_text_length": 331797, + "unique_text": 24950, + "unique_labels": 9, + "labels": { + "6": { + "count": 5408 + }, + "0": { + "count": 3614 + }, + "7": { + "count": 5321 + }, + "8": { + "count": 2562 + }, + "2": { + "count": 2099 + }, + "4": { + "count": 705 + }, + "1": { + "count": 3357 + }, + "3": { + "count": 204 + }, + "5": { + "count": 1730 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PersianFoodSentimentClassification.json b/mteb/descriptive_stats/Classification/PersianFoodSentimentClassification.json new file mode 100644 index 0000000000..5fc81a4077 --- /dev/null +++ b/mteb/descriptive_stats/Classification/PersianFoodSentimentClassification.json @@ -0,0 +1,56 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 182972, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 89.341796875, + "max_text_length": 801, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 185530, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 90.5908203125, + "max_text_length": 795, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1024 + }, + "1": { + "count": 1024 + } + } + }, + "train": { + "num_samples": 56700, + "number_of_characters": 5081427, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 89.61952380952381, + "max_text_length": 1700, + "unique_text": 56700, + "unique_labels": 2, + "labels": { + "0": { + "count": 28350 + }, + "1": { + "count": 28350 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PersonalJurisdictionLegalBenchClassification.json b/mteb/descriptive_stats/Classification/PersonalJurisdictionLegalBenchClassification.json new file mode 100644 index 0000000000..611ce1e454 --- /dev/null +++ b/mteb/descriptive_stats/Classification/PersonalJurisdictionLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 50, + "number_of_characters": 19057, + "number_texts_intersect_with_train": 0, + "min_text_length": 262, + "average_text_length": 381.14, + "max_text_length": 553, + "unique_text": 50, + "unique_labels": 2, + "labels": { + "0": { + "count": 29 + }, + "1": { + "count": 21 + } + } + }, + "train": { + "num_samples": 4, + "number_of_characters": 1501, + "number_texts_intersect_with_train": null, + "min_text_length": 307, + "average_text_length": 375.25, + "max_text_length": 572, + "unique_text": 4, + "unique_labels": 2, + "labels": { + "1": { + "count": 2 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PoemSentimentClassification.json b/mteb/descriptive_stats/Classification/PoemSentimentClassification.json new file mode 100644 index 0000000000..2e04745ba1 --- /dev/null +++ b/mteb/descriptive_stats/Classification/PoemSentimentClassification.json @@ -0,0 +1,68 @@ +{ + "validation": { + "num_samples": 105, + "number_of_characters": 4096, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 39.00952380952381, + "max_text_length": 64, + "unique_text": 105, + "unique_labels": 3, + "labels": { + "2": { + "count": 69 + }, + "1": { + "count": 17 + }, + "0": { + "count": 19 + } + } + }, + "test": { + "num_samples": 104, + "number_of_characters": 3907, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 37.56730769230769, + "max_text_length": 75, + "unique_text": 104, + "unique_labels": 3, + "labels": { + "2": { + "count": 69 + }, + "1": { + "count": 16 + }, + "0": { + "count": 19 + } + } + }, + "train": { + "num_samples": 892, + "number_of_characters": 34197, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 38.337443946188344, + "max_text_length": 109, + "unique_text": 892, + "unique_labels": 4, + "labels": { + "1": { + "count": 133 + }, + "2": { + "count": 555 + }, + "0": { + "count": 155 + }, + "3": { + "count": 49 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PolEmo2.0-IN.json b/mteb/descriptive_stats/Classification/PolEmo2.0-IN.json new file mode 100644 index 0000000000..0c0375dc69 --- /dev/null +++ b/mteb/descriptive_stats/Classification/PolEmo2.0-IN.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 722, + "number_of_characters": 545967, + "number_texts_intersect_with_train": 0, + "min_text_length": 29, + "average_text_length": 756.1869806094182, + "max_text_length": 2567, + "unique_text": 722, + "unique_labels": 4, + "labels": { + "1": { + "count": 300 + }, + "3": { + "count": 117 + }, + "2": { + "count": 197 + }, + "0": { + "count": 108 + } + } + }, + "train": { + "num_samples": 5783, + "number_of_characters": 4514027, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 780.56839010894, + "max_text_length": 5391, + "unique_text": 5783, + "unique_labels": 4, + "labels": { + "2": { + "count": 1568 + }, + "1": { + "count": 2194 + }, + "0": { + "count": 1050 + }, + "3": { + "count": 971 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PolEmo2.0-OUT.json b/mteb/descriptive_stats/Classification/PolEmo2.0-OUT.json new file mode 100644 index 0000000000..4a05f4122f --- /dev/null +++ b/mteb/descriptive_stats/Classification/PolEmo2.0-OUT.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 494, + "number_of_characters": 289999, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 587.0425101214574, + "max_text_length": 1831, + "unique_text": 494, + "unique_labels": 4, + "labels": { + "2": { + "count": 149 + }, + "0": { + "count": 162 + }, + "1": { + "count": 182 + }, + "3": { + "count": 1 + } + } + }, + "train": { + "num_samples": 5783, + "number_of_characters": 4514027, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 780.56839010894, + "max_text_length": 5391, + "unique_text": 5783, + "unique_labels": 4, + "labels": { + "2": { + "count": 1568 + }, + "1": { + "count": 2194 + }, + "0": { + "count": 1050 + }, + "3": { + "count": 971 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/PunjabiNewsClassification.json b/mteb/descriptive_stats/Classification/PunjabiNewsClassification.json new file mode 100644 index 0000000000..3b8728790f --- /dev/null +++ b/mteb/descriptive_stats/Classification/PunjabiNewsClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 157, + "number_of_characters": 646077, + "number_texts_intersect_with_train": 0, + "min_text_length": 110, + "average_text_length": 4115.140127388535, + "max_text_length": 22115, + "unique_text": 157, + "unique_labels": 2, + "labels": { + "False": { + "count": 131 + }, + "True": { + "count": 26 + } + } + }, + "train": { + "num_samples": 627, + "number_of_characters": 2647333, + "number_texts_intersect_with_train": null, + "min_text_length": 95, + "average_text_length": 4222.221690590111, + "max_text_length": 18911, + "unique_text": 627, + "unique_labels": 2, + "labels": { + "True": { + "count": 88 + }, + "False": { + "count": 539 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/RestaurantReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/RestaurantReviewSentimentClassification.json new file mode 100644 index 0000000000..cdc87b9235 --- /dev/null +++ b/mteb/descriptive_stats/Classification/RestaurantReviewSentimentClassification.json @@ -0,0 +1,20 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 478718, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 233.7490234375, + "max_text_length": 4250, + "unique_text": 2043, + "unique_labels": 2, + "labels": { + "1": { + "count": 1456 + }, + "0": { + "count": 592 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/RomanianReviewsSentiment.json b/mteb/descriptive_stats/Classification/RomanianReviewsSentiment.json new file mode 100644 index 0000000000..a3bed7901b --- /dev/null +++ b/mteb/descriptive_stats/Classification/RomanianReviewsSentiment.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 404298, + "number_texts_intersect_with_train": 15, + "min_text_length": 4, + "average_text_length": 197.4111328125, + "max_text_length": 4427, + "unique_text": 2048, + "unique_labels": 4, + "labels": { + "1": { + "count": 272 + }, + "0": { + "count": 752 + }, + "3": { + "count": 857 + }, + "2": { + "count": 167 + } + } + }, + "train": { + "num_samples": 12000, + "number_of_characters": 2479161, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 206.59675, + "max_text_length": 5876, + "unique_text": 11959, + "unique_labels": 4, + "labels": { + "0": { + "count": 4459 + }, + "3": { + "count": 4982 + }, + "2": { + "count": 1018 + }, + "1": { + "count": 1541 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/RomanianSentimentClassification.json b/mteb/descriptive_stats/Classification/RomanianSentimentClassification.json new file mode 100644 index 0000000000..87077359fc --- /dev/null +++ b/mteb/descriptive_stats/Classification/RomanianSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1207509, + "number_texts_intersect_with_train": 17, + "min_text_length": 10, + "average_text_length": 589.60400390625, + "max_text_length": 1080, + "unique_text": 2043, + "unique_labels": 2, + "labels": { + "0": { + "count": 898 + }, + "1": { + "count": 1150 + } + } + }, + "train": { + "num_samples": 17941, + "number_of_characters": 7859610, + "number_texts_intersect_with_train": null, + "min_text_length": 0, + "average_text_length": 438.0809319435929, + "max_text_length": 1977, + "unique_text": 16553, + "unique_labels": 2, + "labels": { + "1": { + "count": 11094 + }, + "0": { + "count": 6847 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/RuReviewsClassification.json b/mteb/descriptive_stats/Classification/RuReviewsClassification.json new file mode 100644 index 0000000000..ac298682c3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/RuReviewsClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 265017, + "number_texts_intersect_with_train": 0, + "min_text_length": 1, + "average_text_length": 129.40283203125, + "max_text_length": 1000, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "0": { + "count": 682 + }, + "2": { + "count": 683 + }, + "1": { + "count": 683 + } + } + }, + "train": { + "num_samples": 45000, + "number_of_characters": 5995314, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 133.2292, + "max_text_length": 1000, + "unique_text": 45000, + "unique_labels": 3, + "labels": { + "2": { + "count": 15000 + }, + "1": { + "count": 15000 + }, + "0": { + "count": 15000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/RuSciBenchGRNTIClassification.json b/mteb/descriptive_stats/Classification/RuSciBenchGRNTIClassification.json new file mode 100644 index 0000000000..fb45f0f5e5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/RuSciBenchGRNTIClassification.json @@ -0,0 +1,194 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1822339, + "number_texts_intersect_with_train": 0, + "min_text_length": 84, + "average_text_length": 889.81396484375, + "max_text_length": 3143, + "unique_text": 2048, + "unique_labels": 28, + "labels": { + "3": { + "count": 73 + }, + "4": { + "count": 73 + }, + "20": { + "count": 73 + }, + "9": { + "count": 73 + }, + "21": { + "count": 73 + }, + "15": { + "count": 73 + }, + "16": { + "count": 74 + }, + "2": { + "count": 73 + }, + "8": { + "count": 73 + }, + "23": { + "count": 73 + }, + "6": { + "count": 73 + }, + "24": { + "count": 73 + }, + "10": { + "count": 73 + }, + "1": { + "count": 73 + }, + "17": { + "count": 74 + }, + "14": { + "count": 74 + }, + "18": { + "count": 73 + }, + "27": { + "count": 73 + }, + "19": { + "count": 73 + }, + "22": { + "count": 73 + }, + "12": { + "count": 73 + }, + "25": { + "count": 73 + }, + "5": { + "count": 74 + }, + "0": { + "count": 73 + }, + "26": { + "count": 73 + }, + "11": { + "count": 73 + }, + "13": { + "count": 73 + }, + "7": { + "count": 73 + } + } + }, + "train": { + "num_samples": 28476, + "number_of_characters": 24504627, + "number_texts_intersect_with_train": null, + "min_text_length": 58, + "average_text_length": 860.5361356932153, + "max_text_length": 10142, + "unique_text": 28476, + "unique_labels": 28, + "labels": { + "18": { + "count": 1017 + }, + "12": { + "count": 1017 + }, + "22": { + "count": 1017 + }, + "3": { + "count": 1017 + }, + "25": { + "count": 1017 + }, + "20": { + "count": 1017 + }, + "16": { + "count": 1017 + }, + "5": { + "count": 1017 + }, + "11": { + "count": 1017 + }, + "6": { + "count": 1017 + }, + "24": { + "count": 1017 + }, + "2": { + "count": 1017 + }, + "10": { + "count": 1017 + }, + "13": { + "count": 1017 + }, + "4": { + "count": 1017 + }, + "21": { + "count": 1017 + }, + "17": { + "count": 1017 + }, + "9": { + "count": 1017 + }, + "14": { + "count": 1017 + }, + "0": { + "count": 1017 + }, + "7": { + "count": 1017 + }, + "26": { + "count": 1017 + }, + "23": { + "count": 1017 + }, + "8": { + "count": 1017 + }, + "15": { + "count": 1017 + }, + "1": { + "count": 1017 + }, + "19": { + "count": 1017 + }, + "27": { + "count": 1017 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/RuSciBenchOECDClassification.json b/mteb/descriptive_stats/Classification/RuSciBenchOECDClassification.json new file mode 100644 index 0000000000..afb277ba82 --- /dev/null +++ b/mteb/descriptive_stats/Classification/RuSciBenchOECDClassification.json @@ -0,0 +1,200 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1725558, + "number_texts_intersect_with_train": 0, + "min_text_length": 83, + "average_text_length": 842.5576171875, + "max_text_length": 4538, + "unique_text": 2048, + "unique_labels": 29, + "labels": { + "13": { + "count": 70 + }, + "25": { + "count": 71 + }, + "8": { + "count": 71 + }, + "12": { + "count": 71 + }, + "6": { + "count": 71 + }, + "4": { + "count": 71 + }, + "26": { + "count": 71 + }, + "2": { + "count": 70 + }, + "0": { + "count": 71 + }, + "24": { + "count": 71 + }, + "17": { + "count": 71 + }, + "7": { + "count": 71 + }, + "14": { + "count": 71 + }, + "9": { + "count": 70 + }, + "10": { + "count": 71 + }, + "18": { + "count": 70 + }, + "21": { + "count": 71 + }, + "27": { + "count": 70 + }, + "19": { + "count": 71 + }, + "28": { + "count": 70 + }, + "1": { + "count": 70 + }, + "23": { + "count": 71 + }, + "22": { + "count": 70 + }, + "3": { + "count": 71 + }, + "16": { + "count": 71 + }, + "15": { + "count": 70 + }, + "11": { + "count": 71 + }, + "20": { + "count": 70 + }, + "5": { + "count": 70 + } + } + }, + "train": { + "num_samples": 27782, + "number_of_characters": 23279078, + "number_texts_intersect_with_train": null, + "min_text_length": 57, + "average_text_length": 837.9194442444748, + "max_text_length": 5830, + "unique_text": 27782, + "unique_labels": 29, + "labels": { + "22": { + "count": 958 + }, + "24": { + "count": 958 + }, + "10": { + "count": 958 + }, + "20": { + "count": 958 + }, + "13": { + "count": 958 + }, + "4": { + "count": 958 + }, + "9": { + "count": 958 + }, + "1": { + "count": 958 + }, + "2": { + "count": 958 + }, + "16": { + "count": 958 + }, + "14": { + "count": 958 + }, + "23": { + "count": 958 + }, + "11": { + "count": 958 + }, + "3": { + "count": 958 + }, + "21": { + "count": 958 + }, + "19": { + "count": 958 + }, + "5": { + "count": 958 + }, + "7": { + "count": 958 + }, + "6": { + "count": 958 + }, + "17": { + "count": 958 + }, + "12": { + "count": 958 + }, + "8": { + "count": 958 + }, + "26": { + "count": 958 + }, + "18": { + "count": 958 + }, + "28": { + "count": 958 + }, + "0": { + "count": 958 + }, + "15": { + "count": 958 + }, + "25": { + "count": 958 + }, + "27": { + "count": 958 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDBPAccountabilityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDBPAccountabilityLegalBenchClassification.json new file mode 100644 index 0000000000..324a3908f2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDBPAccountabilityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1334189, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3520.287598944591, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 328 + }, + "0": { + "count": 51 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 18805, + "number_texts_intersect_with_train": null, + "min_text_length": 980, + "average_text_length": 2350.625, + "max_text_length": 4262, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDBPAuditsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDBPAuditsLegalBenchClassification.json new file mode 100644 index 0000000000..2a29736503 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDBPAuditsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1329321, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3507.44327176781, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 260 + }, + "0": { + "count": 119 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 23673, + "number_texts_intersect_with_train": null, + "min_text_length": 1432, + "average_text_length": 2959.125, + "max_text_length": 5327, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDBPCertificationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDBPCertificationLegalBenchClassification.json new file mode 100644 index 0000000000..b2a4a4bd25 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDBPCertificationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 378, + "number_of_characters": 1325776, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3507.3439153439153, + "max_text_length": 24886, + "unique_text": 378, + "unique_labels": 2, + "labels": { + "1": { + "count": 284 + }, + "0": { + "count": 94 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 21853, + "number_texts_intersect_with_train": null, + "min_text_length": 980, + "average_text_length": 2731.625, + "max_text_length": 5327, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDBPTrainingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDBPTrainingLegalBenchClassification.json new file mode 100644 index 0000000000..0e0abd8799 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDBPTrainingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1329135, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3506.952506596306, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 272 + }, + "0": { + "count": 107 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 23859, + "number_texts_intersect_with_train": null, + "min_text_length": 1449, + "average_text_length": 2982.375, + "max_text_length": 5327, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDBPVerificationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDBPVerificationLegalBenchClassification.json new file mode 100644 index 0000000000..33dfa106ae --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDBPVerificationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1326083, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3498.8997361477573, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 213 + }, + "0": { + "count": 166 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 26911, + "number_texts_intersect_with_train": null, + "min_text_length": 2504, + "average_text_length": 3363.875, + "max_text_length": 5327, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDDAccountabilityLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDDAccountabilityLegalBenchClassification.json new file mode 100644 index 0000000000..d9cc892d6d --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDDAccountabilityLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 378, + "number_of_characters": 1331610, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3522.777777777778, + "max_text_length": 24886, + "unique_text": 378, + "unique_labels": 2, + "labels": { + "1": { + "count": 357 + }, + "0": { + "count": 21 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 18706, + "number_texts_intersect_with_train": null, + "min_text_length": 980, + "average_text_length": 2338.25, + "max_text_length": 4262, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDDAuditsLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDDAuditsLegalBenchClassification.json new file mode 100644 index 0000000000..1b5ec2e41e --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDDAuditsLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1328858, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3506.221635883905, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 344 + }, + "0": { + "count": 35 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 24136, + "number_texts_intersect_with_train": null, + "min_text_length": 712, + "average_text_length": 3017.0, + "max_text_length": 8439, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDDCertificationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDDCertificationLegalBenchClassification.json new file mode 100644 index 0000000000..2def2826fd --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDDCertificationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 378, + "number_of_characters": 1329829, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3518.0661375661375, + "max_text_length": 24886, + "unique_text": 378, + "unique_labels": 2, + "labels": { + "1": { + "count": 343 + }, + "0": { + "count": 35 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 17800, + "number_texts_intersect_with_train": null, + "min_text_length": 712, + "average_text_length": 2225.0, + "max_text_length": 4262, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDDTrainingLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDDTrainingLegalBenchClassification.json new file mode 100644 index 0000000000..fd856a1230 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDDTrainingLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1326137, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3499.042216358839, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 328 + }, + "0": { + "count": 51 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 26857, + "number_texts_intersect_with_train": null, + "min_text_length": 980, + "average_text_length": 3357.125, + "max_text_length": 9825, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SCDDVerificationLegalBenchClassification.json b/mteb/descriptive_stats/Classification/SCDDVerificationLegalBenchClassification.json new file mode 100644 index 0000000000..e05f64cf80 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SCDDVerificationLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 379, + "number_of_characters": 1327828, + "number_texts_intersect_with_train": 0, + "min_text_length": 468, + "average_text_length": 3503.503957783641, + "max_text_length": 24886, + "unique_text": 379, + "unique_labels": 2, + "labels": { + "1": { + "count": 281 + }, + "0": { + "count": 98 + } + } + }, + "train": { + "num_samples": 8, + "number_of_characters": 25166, + "number_texts_intersect_with_train": null, + "min_text_length": 2504, + "average_text_length": 3145.75, + "max_text_length": 4262, + "unique_text": 8, + "unique_labels": 2, + "labels": { + "1": { + "count": 4 + }, + "0": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SIB200Classification.json b/mteb/descriptive_stats/Classification/SIB200Classification.json new file mode 100644 index 0000000000..484686bdee --- /dev/null +++ b/mteb/descriptive_stats/Classification/SIB200Classification.json @@ -0,0 +1,19610 @@ +{ + "train": { + "num_samples": 138097, + "number_of_characters": 18730984, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 135.63642946624475, + "max_text_length": 585, + "unique_text": 137968, + "unique_labels": 7, + "labels": { + "1": { + "count": 11426 + }, + "4": { + "count": 34672 + }, + "0": { + "count": 12805 + }, + "3": { + "count": 20094 + }, + "2": { + "count": 15169 + }, + "6": { + "count": 27186 + }, + "5": { + "count": 16745 + } + }, + "hf_subset_descriptive_stats": { + "ace_Latn": { + "num_samples": 701, + "number_of_characters": 97915, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 139.679029957204, + "max_text_length": 338, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "acm_Arab": { + "num_samples": 701, + "number_of_characters": 79815, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 113.85877318116975, + "max_text_length": 296, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "acq_Arab": { + "num_samples": 701, + "number_of_characters": 80504, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 114.8416547788873, + "max_text_length": 295, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "aeb_Arab": { + "num_samples": 701, + "number_of_characters": 78757, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 112.34950071326676, + "max_text_length": 288, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "afr_Latn": { + "num_samples": 701, + "number_of_characters": 96967, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 138.32667617689015, + "max_text_length": 343, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ajp_Arab": { + "num_samples": 701, + "number_of_characters": 76057, + "number_texts_intersect_with_train": null, + "min_text_length": 28, + "average_text_length": 108.4978601997147, + "max_text_length": 281, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "aka_Latn": { + "num_samples": 701, + "number_of_characters": 92832, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 132.42796005706134, + "max_text_length": 350, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "als_Latn": { + "num_samples": 701, + "number_of_characters": 102839, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 146.70328102710414, + "max_text_length": 399, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "amh_Ethi": { + "num_samples": 701, + "number_of_characters": 61587, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 87.85592011412268, + "max_text_length": 219, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "apc_Arab": { + "num_samples": 701, + "number_of_characters": 75709, + "number_texts_intersect_with_train": null, + "min_text_length": 24, + "average_text_length": 108.00142653352354, + "max_text_length": 256, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "arb_Latn": { + "num_samples": 701, + "number_of_characters": 106696, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 152.20542082738945, + "max_text_length": 406, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ars_Arab": { + "num_samples": 701, + "number_of_characters": 81473, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 116.22396576319544, + "max_text_length": 299, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ary_Arab": { + "num_samples": 701, + "number_of_characters": 79587, + "number_texts_intersect_with_train": null, + "min_text_length": 20, + "average_text_length": 113.53352353780313, + "max_text_length": 300, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "arz_Arab": { + "num_samples": 701, + "number_of_characters": 79337, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 113.17689015691869, + "max_text_length": 322, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "asm_Beng": { + "num_samples": 701, + "number_of_characters": 87672, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 125.06704707560628, + "max_text_length": 329, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ast_Latn": { + "num_samples": 701, + "number_of_characters": 93716, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 133.68901569186875, + "max_text_length": 360, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "awa_Deva": { + "num_samples": 701, + "number_of_characters": 89302, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 127.3922967189729, + "max_text_length": 378, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ayr_Latn": { + "num_samples": 701, + "number_of_characters": 95463, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 136.1811697574893, + "max_text_length": 383, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "azb_Arab": { + "num_samples": 701, + "number_of_characters": 81716, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 116.57061340941512, + "max_text_length": 306, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "azj_Latn": { + "num_samples": 701, + "number_of_characters": 100629, + "number_texts_intersect_with_train": null, + "min_text_length": 47, + "average_text_length": 143.5506419400856, + "max_text_length": 383, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bak_Cyrl": { + "num_samples": 701, + "number_of_characters": 92860, + "number_texts_intersect_with_train": null, + "min_text_length": 28, + "average_text_length": 132.4679029957204, + "max_text_length": 389, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bam_Latn": { + "num_samples": 701, + "number_of_characters": 88077, + "number_texts_intersect_with_train": null, + "min_text_length": 26, + "average_text_length": 125.64479315263908, + "max_text_length": 281, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ban_Latn": { + "num_samples": 701, + "number_of_characters": 101525, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 144.82881597717545, + "max_text_length": 365, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bel_Cyrl": { + "num_samples": 701, + "number_of_characters": 104088, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 148.48502139800286, + "max_text_length": 429, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bem_Latn": { + "num_samples": 701, + "number_of_characters": 112088, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 159.89728958630528, + "max_text_length": 401, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ben_Beng": { + "num_samples": 701, + "number_of_characters": 89816, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 128.12553495007134, + "max_text_length": 333, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bho_Deva": { + "num_samples": 701, + "number_of_characters": 88956, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 126.89871611982882, + "max_text_length": 352, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bjn_Latn": { + "num_samples": 701, + "number_of_characters": 95595, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 136.3694721825963, + "max_text_length": 320, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bod_Tibt": { + "num_samples": 701, + "number_of_characters": 103404, + "number_texts_intersect_with_train": null, + "min_text_length": 45, + "average_text_length": 147.509272467903, + "max_text_length": 431, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bos_Latn": { + "num_samples": 701, + "number_of_characters": 92359, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 131.75320970042796, + "max_text_length": 337, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bug_Latn": { + "num_samples": 701, + "number_of_characters": 97574, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 139.1925820256776, + "max_text_length": 348, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "bul_Cyrl": { + "num_samples": 701, + "number_of_characters": 96005, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 136.9543509272468, + "max_text_length": 369, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "cat_Latn": { + "num_samples": 701, + "number_of_characters": 100860, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 143.88017118402283, + "max_text_length": 388, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ceb_Latn": { + "num_samples": 701, + "number_of_characters": 109741, + "number_texts_intersect_with_train": null, + "min_text_length": 48, + "average_text_length": 156.54921540656204, + "max_text_length": 370, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ces_Latn": { + "num_samples": 701, + "number_of_characters": 88810, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 126.6904422253923, + "max_text_length": 362, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "cjk_Latn": { + "num_samples": 701, + "number_of_characters": 97590, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 139.2154065620542, + "max_text_length": 354, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ckb_Arab": { + "num_samples": 701, + "number_of_characters": 89352, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 127.46362339514978, + "max_text_length": 333, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "crh_Latn": { + "num_samples": 701, + "number_of_characters": 93571, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 133.48216833095577, + "max_text_length": 354, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "cym_Latn": { + "num_samples": 701, + "number_of_characters": 97068, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 138.47075606276746, + "max_text_length": 330, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "dan_Latn": { + "num_samples": 701, + "number_of_characters": 93611, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 133.5392296718973, + "max_text_length": 338, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "deu_Latn": { + "num_samples": 701, + "number_of_characters": 106784, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 152.33095577746076, + "max_text_length": 407, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "dik_Latn": { + "num_samples": 701, + "number_of_characters": 79037, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 112.74893009985735, + "max_text_length": 585, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "dyu_Latn": { + "num_samples": 701, + "number_of_characters": 93105, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 132.81740370898717, + "max_text_length": 342, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "dzo_Tibt": { + "num_samples": 701, + "number_of_characters": 115048, + "number_texts_intersect_with_train": null, + "min_text_length": 55, + "average_text_length": 164.11982881597717, + "max_text_length": 435, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ell_Grek": { + "num_samples": 701, + "number_of_characters": 109411, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 156.07845934379458, + "max_text_length": 404, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "eng_Latn": { + "num_samples": 701, + "number_of_characters": 90953, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 129.74750356633382, + "max_text_length": 328, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "epo_Latn": { + "num_samples": 701, + "number_of_characters": 91268, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 130.19686162624822, + "max_text_length": 345, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "est_Latn": { + "num_samples": 701, + "number_of_characters": 89574, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 127.78031383737518, + "max_text_length": 356, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "eus_Latn": { + "num_samples": 701, + "number_of_characters": 97870, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 139.61483594864478, + "max_text_length": 333, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ewe_Latn": { + "num_samples": 701, + "number_of_characters": 89367, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 127.48502139800286, + "max_text_length": 344, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fao_Latn": { + "num_samples": 701, + "number_of_characters": 93639, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 133.57917261055636, + "max_text_length": 335, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fij_Latn": { + "num_samples": 701, + "number_of_characters": 107684, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 153.61483594864478, + "max_text_length": 371, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fin_Latn": { + "num_samples": 701, + "number_of_characters": 97170, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 138.61626248216834, + "max_text_length": 378, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fon_Latn": { + "num_samples": 701, + "number_of_characters": 94820, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 135.2639087018545, + "max_text_length": 481, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fra_Latn": { + "num_samples": 701, + "number_of_characters": 109242, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 155.83737517831668, + "max_text_length": 396, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fur_Latn": { + "num_samples": 701, + "number_of_characters": 100420, + "number_texts_intersect_with_train": null, + "min_text_length": 40, + "average_text_length": 143.25249643366618, + "max_text_length": 371, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "fuv_Latn": { + "num_samples": 701, + "number_of_characters": 85613, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 122.12981455064194, + "max_text_length": 308, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "gaz_Latn": { + "num_samples": 701, + "number_of_characters": 108345, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 154.5577746077033, + "max_text_length": 380, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "gla_Latn": { + "num_samples": 701, + "number_of_characters": 113878, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 162.45078459343796, + "max_text_length": 393, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "gle_Latn": { + "num_samples": 701, + "number_of_characters": 105661, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 150.7289586305278, + "max_text_length": 352, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "glg_Latn": { + "num_samples": 701, + "number_of_characters": 100978, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 144.0485021398003, + "max_text_length": 362, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "grn_Latn": { + "num_samples": 701, + "number_of_characters": 92162, + "number_texts_intersect_with_train": null, + "min_text_length": 41, + "average_text_length": 131.47218259629102, + "max_text_length": 331, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "guj_Gujr": { + "num_samples": 701, + "number_of_characters": 87469, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 124.7774607703281, + "max_text_length": 308, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hat_Latn": { + "num_samples": 701, + "number_of_characters": 84277, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 120.22396576319544, + "max_text_length": 291, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hau_Latn": { + "num_samples": 701, + "number_of_characters": 97809, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 139.52781740370898, + "max_text_length": 357, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "heb_Hebr": { + "num_samples": 701, + "number_of_characters": 71283, + "number_texts_intersect_with_train": null, + "min_text_length": 20, + "average_text_length": 101.68758915834522, + "max_text_length": 277, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hin_Deva": { + "num_samples": 701, + "number_of_characters": 90985, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 129.79315263908703, + "max_text_length": 338, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hne_Deva": { + "num_samples": 701, + "number_of_characters": 88245, + "number_texts_intersect_with_train": null, + "min_text_length": 28, + "average_text_length": 125.88445078459344, + "max_text_length": 326, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hrv_Latn": { + "num_samples": 701, + "number_of_characters": 90682, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 129.36091298145507, + "max_text_length": 336, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hun_Latn": { + "num_samples": 701, + "number_of_characters": 96232, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 137.27817403708988, + "max_text_length": 384, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "hye_Armn": { + "num_samples": 701, + "number_of_characters": 102287, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 145.91583452211128, + "max_text_length": 381, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ibo_Latn": { + "num_samples": 701, + "number_of_characters": 93186, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 132.93295292439373, + "max_text_length": 335, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ilo_Latn": { + "num_samples": 701, + "number_of_characters": 111437, + "number_texts_intersect_with_train": null, + "min_text_length": 44, + "average_text_length": 158.96861626248216, + "max_text_length": 399, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ind_Latn": { + "num_samples": 701, + "number_of_characters": 98849, + "number_texts_intersect_with_train": null, + "min_text_length": 41, + "average_text_length": 141.0114122681883, + "max_text_length": 332, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "isl_Latn": { + "num_samples": 701, + "number_of_characters": 90986, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 129.79457917261055, + "max_text_length": 324, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ita_Latn": { + "num_samples": 701, + "number_of_characters": 108257, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 154.43223965763195, + "max_text_length": 398, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "jav_Latn": { + "num_samples": 701, + "number_of_characters": 95325, + "number_texts_intersect_with_train": null, + "min_text_length": 40, + "average_text_length": 135.98430813124108, + "max_text_length": 319, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "jpn_Jpan": { + "num_samples": 701, + "number_of_characters": 40148, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 57.27246790299572, + "max_text_length": 137, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kab_Latn": { + "num_samples": 701, + "number_of_characters": 90568, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 129.19828815977175, + "max_text_length": 352, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kac_Latn": { + "num_samples": 701, + "number_of_characters": 117135, + "number_texts_intersect_with_train": null, + "min_text_length": 56, + "average_text_length": 167.09700427960058, + "max_text_length": 513, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kam_Latn": { + "num_samples": 701, + "number_of_characters": 89379, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 127.5021398002853, + "max_text_length": 315, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kan_Knda": { + "num_samples": 701, + "number_of_characters": 95914, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 136.82453637660484, + "max_text_length": 380, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kas_Deva": { + "num_samples": 701, + "number_of_characters": 88105, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 125.68473609129815, + "max_text_length": 452, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kat_Geor": { + "num_samples": 701, + "number_of_characters": 101292, + "number_texts_intersect_with_train": null, + "min_text_length": 44, + "average_text_length": 144.49643366619117, + "max_text_length": 366, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kaz_Cyrl": { + "num_samples": 701, + "number_of_characters": 93950, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 134.02282453637662, + "max_text_length": 388, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kbp_Latn": { + "num_samples": 701, + "number_of_characters": 99739, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 142.28102710413694, + "max_text_length": 377, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kea_Latn": { + "num_samples": 701, + "number_of_characters": 90861, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 129.61626248216834, + "max_text_length": 331, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "khk_Cyrl": { + "num_samples": 701, + "number_of_characters": 96054, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 137.02425106990015, + "max_text_length": 335, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "khm_Khmr": { + "num_samples": 701, + "number_of_characters": 108816, + "number_texts_intersect_with_train": null, + "min_text_length": 49, + "average_text_length": 155.2296718972896, + "max_text_length": 507, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kik_Latn": { + "num_samples": 701, + "number_of_characters": 106633, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 152.11554921540656, + "max_text_length": 515, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kin_Latn": { + "num_samples": 701, + "number_of_characters": 101859, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 145.30527817403708, + "max_text_length": 391, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kir_Cyrl": { + "num_samples": 701, + "number_of_characters": 94184, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 134.35663338088446, + "max_text_length": 325, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kmb_Latn": { + "num_samples": 701, + "number_of_characters": 101484, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 144.7703281027104, + "max_text_length": 379, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kmr_Latn": { + "num_samples": 701, + "number_of_characters": 91358, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 130.32524964336662, + "max_text_length": 347, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "knc_Latn": { + "num_samples": 701, + "number_of_characters": 97410, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 138.9586305278174, + "max_text_length": 427, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kon_Latn": { + "num_samples": 701, + "number_of_characters": 103634, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 147.83737517831668, + "max_text_length": 390, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "kor_Hang": { + "num_samples": 701, + "number_of_characters": 46038, + "number_texts_intersect_with_train": null, + "min_text_length": 22, + "average_text_length": 65.67475035663338, + "max_text_length": 177, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lao_Laoo": { + "num_samples": 701, + "number_of_characters": 90928, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 129.71184022824536, + "max_text_length": 296, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lij_Latn": { + "num_samples": 701, + "number_of_characters": 100784, + "number_texts_intersect_with_train": null, + "min_text_length": 45, + "average_text_length": 143.77175463623396, + "max_text_length": 378, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lim_Latn": { + "num_samples": 701, + "number_of_characters": 94626, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 134.98716119828816, + "max_text_length": 351, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lin_Latn": { + "num_samples": 701, + "number_of_characters": 98467, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 140.46647646219685, + "max_text_length": 369, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lit_Latn": { + "num_samples": 701, + "number_of_characters": 92240, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 131.58345221112697, + "max_text_length": 357, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lmo_Latn": { + "num_samples": 701, + "number_of_characters": 98437, + "number_texts_intersect_with_train": null, + "min_text_length": 45, + "average_text_length": 140.42368045649073, + "max_text_length": 379, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ltg_Latn": { + "num_samples": 701, + "number_of_characters": 90763, + "number_texts_intersect_with_train": null, + "min_text_length": 25, + "average_text_length": 129.47646219686163, + "max_text_length": 326, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ltz_Latn": { + "num_samples": 701, + "number_of_characters": 102242, + "number_texts_intersect_with_train": null, + "min_text_length": 42, + "average_text_length": 145.85164051355207, + "max_text_length": 375, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lua_Latn": { + "num_samples": 701, + "number_of_characters": 98191, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 140.07275320970044, + "max_text_length": 361, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lug_Latn": { + "num_samples": 701, + "number_of_characters": 93725, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 133.7018544935806, + "max_text_length": 333, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "luo_Latn": { + "num_samples": 701, + "number_of_characters": 94990, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 135.50641940085592, + "max_text_length": 365, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lus_Latn": { + "num_samples": 701, + "number_of_characters": 99679, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 142.19543509272467, + "max_text_length": 418, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "lvs_Latn": { + "num_samples": 701, + "number_of_characters": 93650, + "number_texts_intersect_with_train": null, + "min_text_length": 26, + "average_text_length": 133.59486447931528, + "max_text_length": 380, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mag_Deva": { + "num_samples": 701, + "number_of_characters": 87997, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 125.53067047075606, + "max_text_length": 319, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mai_Deva": { + "num_samples": 701, + "number_of_characters": 89566, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 127.76890156918688, + "max_text_length": 350, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mal_Mlym": { + "num_samples": 701, + "number_of_characters": 104011, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 148.37517831669044, + "max_text_length": 367, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mar_Deva": { + "num_samples": 701, + "number_of_characters": 92423, + "number_texts_intersect_with_train": null, + "min_text_length": 44, + "average_text_length": 131.84450784593437, + "max_text_length": 327, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "min_Latn": { + "num_samples": 701, + "number_of_characters": 97560, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 139.17261055634808, + "max_text_length": 348, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mkd_Cyrl": { + "num_samples": 701, + "number_of_characters": 95693, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 136.509272467903, + "max_text_length": 360, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mlt_Latn": { + "num_samples": 701, + "number_of_characters": 101232, + "number_texts_intersect_with_train": null, + "min_text_length": 28, + "average_text_length": 144.4108416547789, + "max_text_length": 354, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mni_Beng": { + "num_samples": 701, + "number_of_characters": 94335, + "number_texts_intersect_with_train": null, + "min_text_length": 40, + "average_text_length": 134.57203994293866, + "max_text_length": 354, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mos_Latn": { + "num_samples": 701, + "number_of_characters": 87446, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 124.74465049928673, + "max_text_length": 342, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mri_Latn": { + "num_samples": 701, + "number_of_characters": 101206, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 144.3737517831669, + "max_text_length": 348, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "mya_Mymr": { + "num_samples": 701, + "number_of_characters": 113484, + "number_texts_intersect_with_train": null, + "min_text_length": 44, + "average_text_length": 161.88873038516405, + "max_text_length": 405, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nld_Latn": { + "num_samples": 701, + "number_of_characters": 101519, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 144.82025677603423, + "max_text_length": 366, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nno_Latn": { + "num_samples": 701, + "number_of_characters": 92265, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 131.6191155492154, + "max_text_length": 339, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nob_Latn": { + "num_samples": 701, + "number_of_characters": 92019, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 131.2681883024251, + "max_text_length": 324, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "npi_Deva": { + "num_samples": 701, + "number_of_characters": 87687, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 125.08844507845934, + "max_text_length": 314, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nqo_Nkoo": { + "num_samples": 701, + "number_of_characters": 116951, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 166.83452211126962, + "max_text_length": 408, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nso_Latn": { + "num_samples": 701, + "number_of_characters": 104634, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 149.2639087018545, + "max_text_length": 371, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nus_Latn": { + "num_samples": 701, + "number_of_characters": 97535, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 139.13694721825962, + "max_text_length": 346, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "nya_Latn": { + "num_samples": 701, + "number_of_characters": 102810, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 146.66191155492155, + "max_text_length": 351, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "oci_Latn": { + "num_samples": 701, + "number_of_characters": 104030, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 148.40228245363767, + "max_text_length": 373, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ory_Orya": { + "num_samples": 701, + "number_of_characters": 93540, + "number_texts_intersect_with_train": null, + "min_text_length": 40, + "average_text_length": 133.4379457917261, + "max_text_length": 354, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "pag_Latn": { + "num_samples": 701, + "number_of_characters": 92135, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 131.43366619115548, + "max_text_length": 347, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "pan_Guru": { + "num_samples": 701, + "number_of_characters": 92283, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 131.6447931526391, + "max_text_length": 380, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "pap_Latn": { + "num_samples": 701, + "number_of_characters": 96322, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 137.40656205420828, + "max_text_length": 388, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "pbt_Arab": { + "num_samples": 701, + "number_of_characters": 87842, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 125.3095577746077, + "max_text_length": 295, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "pes_Arab": { + "num_samples": 701, + "number_of_characters": 86813, + "number_texts_intersect_with_train": null, + "min_text_length": 25, + "average_text_length": 123.8416547788873, + "max_text_length": 286, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "plt_Latn": { + "num_samples": 701, + "number_of_characters": 112616, + "number_texts_intersect_with_train": null, + "min_text_length": 56, + "average_text_length": 160.65049928673324, + "max_text_length": 432, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "pol_Latn": { + "num_samples": 701, + "number_of_characters": 97338, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 138.85592011412268, + "max_text_length": 367, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "por_Latn": { + "num_samples": 701, + "number_of_characters": 99525, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 141.97574893009985, + "max_text_length": 391, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "prs_Arab": { + "num_samples": 701, + "number_of_characters": 84319, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 120.28388017118402, + "max_text_length": 297, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "quy_Latn": { + "num_samples": 701, + "number_of_characters": 97924, + "number_texts_intersect_with_train": null, + "min_text_length": 43, + "average_text_length": 139.69186875891583, + "max_text_length": 378, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ron_Latn": { + "num_samples": 701, + "number_of_characters": 103161, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 147.16262482168332, + "max_text_length": 397, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "run_Latn": { + "num_samples": 701, + "number_of_characters": 103164, + "number_texts_intersect_with_train": null, + "min_text_length": 40, + "average_text_length": 147.16690442225394, + "max_text_length": 411, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "rus_Cyrl": { + "num_samples": 701, + "number_of_characters": 100261, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 143.02567760342367, + "max_text_length": 351, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "sag_Latn": { + "num_samples": 701, + "number_of_characters": 99560, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 142.02567760342367, + "max_text_length": 406, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "san_Deva": { + "num_samples": 701, + "number_of_characters": 90200, + "number_texts_intersect_with_train": null, + "min_text_length": 33, + "average_text_length": 128.67332382310985, + "max_text_length": 323, + "unique_text": 699, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "sat_Olck": { + "num_samples": 701, + "number_of_characters": 96526, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 137.69757489301, + "max_text_length": 366, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "scn_Latn": { + "num_samples": 701, + "number_of_characters": 96203, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 137.2368045649073, + "max_text_length": 343, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "shn_Mymr": { + "num_samples": 701, + "number_of_characters": 131897, + "number_texts_intersect_with_train": null, + "min_text_length": 48, + "average_text_length": 188.15549215406563, + "max_text_length": 518, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "sin_Sinh": { + "num_samples": 701, + "number_of_characters": 92212, + "number_texts_intersect_with_train": null, + "min_text_length": 36, + "average_text_length": 131.5435092724679, + "max_text_length": 359, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "slk_Latn": { + "num_samples": 701, + "number_of_characters": 91793, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 130.94579172610557, + "max_text_length": 370, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "slv_Latn": { + "num_samples": 701, + "number_of_characters": 91909, + "number_texts_intersect_with_train": null, + "min_text_length": 43, + "average_text_length": 131.11126961483595, + "max_text_length": 353, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "smo_Latn": { + "num_samples": 701, + "number_of_characters": 106696, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 152.20542082738945, + "max_text_length": 412, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "sna_Latn": { + "num_samples": 701, + "number_of_characters": 102694, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 146.49643366619117, + "max_text_length": 424, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "snd_Arab": { + "num_samples": 701, + "number_of_characters": 82729, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 118.01569186875892, + "max_text_length": 304, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "som_Latn": { + "num_samples": 701, + "number_of_characters": 103971, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 148.31811697574892, + "max_text_length": 367, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "sot_Latn": { + "num_samples": 701, + "number_of_characters": 110424, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 157.52353780313837, + "max_text_length": 387, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "spa_Latn": { + "num_samples": 701, + "number_of_characters": 108558, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 154.86162624821682, + "max_text_length": 367, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "srd_Latn": { + "num_samples": 701, + "number_of_characters": 105786, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 150.90727532097003, + "max_text_length": 391, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "srp_Cyrl": { + "num_samples": 701, + "number_of_characters": 91036, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 129.86590584878743, + "max_text_length": 344, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ssw_Latn": { + "num_samples": 701, + "number_of_characters": 103117, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 147.09985734664764, + "max_text_length": 364, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "sun_Latn": { + "num_samples": 701, + "number_of_characters": 94797, + "number_texts_intersect_with_train": null, + "min_text_length": 41, + "average_text_length": 135.23109843081312, + "max_text_length": 336, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "swe_Latn": { + "num_samples": 701, + "number_of_characters": 91921, + "number_texts_intersect_with_train": null, + "min_text_length": 35, + "average_text_length": 131.1283880171184, + "max_text_length": 330, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "swh_Latn": { + "num_samples": 701, + "number_of_characters": 95890, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 136.79029957203994, + "max_text_length": 370, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "szl_Latn": { + "num_samples": 701, + "number_of_characters": 95087, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 135.6447931526391, + "max_text_length": 349, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tam_Taml": { + "num_samples": 701, + "number_of_characters": 106223, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 151.53067047075606, + "max_text_length": 404, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "taq_Tfng": { + "num_samples": 701, + "number_of_characters": 85263, + "number_texts_intersect_with_train": null, + "min_text_length": 21, + "average_text_length": 121.6305278174037, + "max_text_length": 294, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tat_Cyrl": { + "num_samples": 701, + "number_of_characters": 93196, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 132.9472182596291, + "max_text_length": 329, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tel_Telu": { + "num_samples": 701, + "number_of_characters": 92683, + "number_texts_intersect_with_train": null, + "min_text_length": 46, + "average_text_length": 132.2154065620542, + "max_text_length": 359, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tgk_Cyrl": { + "num_samples": 701, + "number_of_characters": 101795, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 145.21398002853067, + "max_text_length": 324, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tgl_Latn": { + "num_samples": 701, + "number_of_characters": 115629, + "number_texts_intersect_with_train": null, + "min_text_length": 46, + "average_text_length": 164.94864479315265, + "max_text_length": 410, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tha_Thai": { + "num_samples": 701, + "number_of_characters": 88339, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 126.01854493580599, + "max_text_length": 325, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tir_Ethi": { + "num_samples": 701, + "number_of_characters": 63365, + "number_texts_intersect_with_train": null, + "min_text_length": 31, + "average_text_length": 90.3922967189729, + "max_text_length": 246, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tpi_Latn": { + "num_samples": 701, + "number_of_characters": 116396, + "number_texts_intersect_with_train": null, + "min_text_length": 50, + "average_text_length": 166.04279600570612, + "max_text_length": 441, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tsn_Latn": { + "num_samples": 701, + "number_of_characters": 113370, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 161.72610556348073, + "max_text_length": 427, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tso_Latn": { + "num_samples": 701, + "number_of_characters": 109970, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 156.87589158345222, + "max_text_length": 406, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tuk_Latn": { + "num_samples": 701, + "number_of_characters": 97526, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 139.12410841654778, + "max_text_length": 397, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tum_Latn": { + "num_samples": 701, + "number_of_characters": 120121, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 171.35663338088446, + "max_text_length": 542, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tur_Latn": { + "num_samples": 701, + "number_of_characters": 94294, + "number_texts_intersect_with_train": null, + "min_text_length": 25, + "average_text_length": 134.51355206847362, + "max_text_length": 343, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "twi_Latn": { + "num_samples": 701, + "number_of_characters": 89189, + "number_texts_intersect_with_train": null, + "min_text_length": 28, + "average_text_length": 127.23109843081312, + "max_text_length": 295, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "tzm_Tfng": { + "num_samples": 701, + "number_of_characters": 82014, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 116.99572039942939, + "max_text_length": 288, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "uig_Arab": { + "num_samples": 701, + "number_of_characters": 97856, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 139.59486447931528, + "max_text_length": 354, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ukr_Cyrl": { + "num_samples": 701, + "number_of_characters": 93746, + "number_texts_intersect_with_train": null, + "min_text_length": 37, + "average_text_length": 133.7318116975749, + "max_text_length": 339, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "umb_Latn": { + "num_samples": 701, + "number_of_characters": 92602, + "number_texts_intersect_with_train": null, + "min_text_length": 32, + "average_text_length": 132.09985734664764, + "max_text_length": 343, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "urd_Arab": { + "num_samples": 701, + "number_of_characters": 90940, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 129.7289586305278, + "max_text_length": 348, + "unique_text": 700, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "uzn_Latn": { + "num_samples": 701, + "number_of_characters": 103997, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 148.3552068473609, + "max_text_length": 381, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "vec_Latn": { + "num_samples": 701, + "number_of_characters": 91655, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 130.74893009985735, + "max_text_length": 358, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "vie_Latn": { + "num_samples": 701, + "number_of_characters": 96362, + "number_texts_intersect_with_train": null, + "min_text_length": 39, + "average_text_length": 137.4636233951498, + "max_text_length": 357, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "war_Latn": { + "num_samples": 701, + "number_of_characters": 113561, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 161.99857346647647, + "max_text_length": 390, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "wol_Latn": { + "num_samples": 701, + "number_of_characters": 87380, + "number_texts_intersect_with_train": null, + "min_text_length": 30, + "average_text_length": 124.65049928673324, + "max_text_length": 323, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "xho_Latn": { + "num_samples": 701, + "number_of_characters": 97482, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 139.06134094151213, + "max_text_length": 369, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "ydd_Hebr": { + "num_samples": 701, + "number_of_characters": 97748, + "number_texts_intersect_with_train": null, + "min_text_length": 45, + "average_text_length": 139.44079885877318, + "max_text_length": 365, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "yor_Latn": { + "num_samples": 701, + "number_of_characters": 88263, + "number_texts_intersect_with_train": null, + "min_text_length": 27, + "average_text_length": 125.91012838801711, + "max_text_length": 306, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "yue_Hant": { + "num_samples": 701, + "number_of_characters": 27975, + "number_texts_intersect_with_train": null, + "min_text_length": 10, + "average_text_length": 39.907275320970044, + "max_text_length": 118, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "zho_Hant": { + "num_samples": 701, + "number_of_characters": 28551, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 40.72895863052782, + "max_text_length": 136, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "zsm_Latn": { + "num_samples": 701, + "number_of_characters": 102112, + "number_texts_intersect_with_train": null, + "min_text_length": 43, + "average_text_length": 145.66619115549216, + "max_text_length": 362, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + }, + "zul_Latn": { + "num_samples": 701, + "number_of_characters": 102585, + "number_texts_intersect_with_train": null, + "min_text_length": 34, + "average_text_length": 146.34094151212554, + "max_text_length": 406, + "unique_text": 701, + "unique_labels": 7, + "labels": { + "1": { + "count": 58 + }, + "4": { + "count": 176 + }, + "0": { + "count": 65 + }, + "3": { + "count": 102 + }, + "2": { + "count": 77 + }, + "6": { + "count": 138 + }, + "5": { + "count": 85 + } + } + } + } + }, + "validation": { + "num_samples": 19503, + "number_of_characters": 2455481, + "number_texts_intersect_with_train": 1, + "min_text_length": 15, + "average_text_length": 125.9027329128852, + "max_text_length": 450, + "unique_text": 19488, + "unique_labels": 7, + "labels": { + "5": { + "count": 2364 + }, + "6": { + "count": 3940 + }, + "1": { + "count": 1576 + }, + "4": { + "count": 4925 + }, + "0": { + "count": 1773 + }, + "2": { + "count": 2167 + }, + "3": { + "count": 2758 + } + }, + "hf_subset_descriptive_stats": { + "ace_Latn": { + "num_samples": 99, + "number_of_characters": 12866, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 129.95959595959596, + "max_text_length": 311, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "acm_Arab": { + "num_samples": 99, + "number_of_characters": 10545, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 106.51515151515152, + "max_text_length": 246, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "acq_Arab": { + "num_samples": 99, + "number_of_characters": 10573, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 106.79797979797979, + "max_text_length": 252, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "aeb_Arab": { + "num_samples": 99, + "number_of_characters": 10284, + "number_texts_intersect_with_train": 0, + "min_text_length": 36, + "average_text_length": 103.87878787878788, + "max_text_length": 251, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "afr_Latn": { + "num_samples": 99, + "number_of_characters": 12741, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 128.6969696969697, + "max_text_length": 306, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ajp_Arab": { + "num_samples": 99, + "number_of_characters": 10029, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 101.3030303030303, + "max_text_length": 253, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "aka_Latn": { + "num_samples": 99, + "number_of_characters": 11846, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 119.65656565656566, + "max_text_length": 275, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "als_Latn": { + "num_samples": 99, + "number_of_characters": 13356, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 134.9090909090909, + "max_text_length": 320, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "amh_Ethi": { + "num_samples": 99, + "number_of_characters": 7952, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 80.32323232323232, + "max_text_length": 173, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "apc_Arab": { + "num_samples": 99, + "number_of_characters": 9823, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 99.22222222222223, + "max_text_length": 261, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "arb_Latn": { + "num_samples": 99, + "number_of_characters": 14005, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 141.46464646464648, + "max_text_length": 299, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ars_Arab": { + "num_samples": 99, + "number_of_characters": 10766, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 108.74747474747475, + "max_text_length": 253, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ary_Arab": { + "num_samples": 99, + "number_of_characters": 10566, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 106.72727272727273, + "max_text_length": 308, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "arz_Arab": { + "num_samples": 99, + "number_of_characters": 10569, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 106.75757575757575, + "max_text_length": 252, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "asm_Beng": { + "num_samples": 99, + "number_of_characters": 11410, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 115.25252525252525, + "max_text_length": 276, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ast_Latn": { + "num_samples": 99, + "number_of_characters": 12410, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 125.35353535353535, + "max_text_length": 279, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "awa_Deva": { + "num_samples": 99, + "number_of_characters": 11719, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 118.37373737373737, + "max_text_length": 270, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ayr_Latn": { + "num_samples": 99, + "number_of_characters": 12759, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 128.87878787878788, + "max_text_length": 271, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "azb_Arab": { + "num_samples": 99, + "number_of_characters": 10659, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 107.66666666666667, + "max_text_length": 198, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "azj_Latn": { + "num_samples": 99, + "number_of_characters": 12999, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 131.3030303030303, + "max_text_length": 297, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bak_Cyrl": { + "num_samples": 99, + "number_of_characters": 12162, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 122.84848484848484, + "max_text_length": 338, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bam_Latn": { + "num_samples": 99, + "number_of_characters": 11659, + "number_texts_intersect_with_train": 1, + "min_text_length": 55, + "average_text_length": 117.76767676767676, + "max_text_length": 291, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ban_Latn": { + "num_samples": 99, + "number_of_characters": 13253, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 133.86868686868686, + "max_text_length": 274, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bel_Cyrl": { + "num_samples": 99, + "number_of_characters": 13674, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 138.12121212121212, + "max_text_length": 282, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bem_Latn": { + "num_samples": 99, + "number_of_characters": 14649, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 147.96969696969697, + "max_text_length": 337, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ben_Beng": { + "num_samples": 99, + "number_of_characters": 11633, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 117.5050505050505, + "max_text_length": 262, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bho_Deva": { + "num_samples": 99, + "number_of_characters": 12008, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 121.29292929292929, + "max_text_length": 278, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bjn_Latn": { + "num_samples": 99, + "number_of_characters": 12463, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 125.88888888888889, + "max_text_length": 329, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bod_Tibt": { + "num_samples": 99, + "number_of_characters": 13657, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 137.94949494949495, + "max_text_length": 284, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bos_Latn": { + "num_samples": 99, + "number_of_characters": 11994, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 121.15151515151516, + "max_text_length": 269, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bug_Latn": { + "num_samples": 99, + "number_of_characters": 12952, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 130.82828282828282, + "max_text_length": 229, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "bul_Cyrl": { + "num_samples": 99, + "number_of_characters": 12284, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 124.08080808080808, + "max_text_length": 287, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "cat_Latn": { + "num_samples": 99, + "number_of_characters": 13145, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 132.77777777777777, + "max_text_length": 327, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ceb_Latn": { + "num_samples": 99, + "number_of_characters": 14309, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 144.53535353535352, + "max_text_length": 375, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ces_Latn": { + "num_samples": 99, + "number_of_characters": 11745, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 118.63636363636364, + "max_text_length": 236, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "cjk_Latn": { + "num_samples": 99, + "number_of_characters": 12655, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 127.82828282828282, + "max_text_length": 308, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ckb_Arab": { + "num_samples": 99, + "number_of_characters": 11442, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 115.57575757575758, + "max_text_length": 241, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "crh_Latn": { + "num_samples": 99, + "number_of_characters": 12000, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 121.21212121212122, + "max_text_length": 219, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "cym_Latn": { + "num_samples": 99, + "number_of_characters": 12866, + "number_texts_intersect_with_train": 0, + "min_text_length": 64, + "average_text_length": 129.95959595959596, + "max_text_length": 305, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "dan_Latn": { + "num_samples": 99, + "number_of_characters": 12552, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 126.78787878787878, + "max_text_length": 309, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "deu_Latn": { + "num_samples": 99, + "number_of_characters": 13849, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 139.88888888888889, + "max_text_length": 314, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "dik_Latn": { + "num_samples": 99, + "number_of_characters": 10617, + "number_texts_intersect_with_train": 0, + "min_text_length": 34, + "average_text_length": 107.24242424242425, + "max_text_length": 315, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "dyu_Latn": { + "num_samples": 99, + "number_of_characters": 12006, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 121.27272727272727, + "max_text_length": 314, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "dzo_Tibt": { + "num_samples": 99, + "number_of_characters": 14736, + "number_texts_intersect_with_train": 0, + "min_text_length": 78, + "average_text_length": 148.84848484848484, + "max_text_length": 327, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ell_Grek": { + "num_samples": 99, + "number_of_characters": 14330, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 144.74747474747474, + "max_text_length": 275, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "eng_Latn": { + "num_samples": 99, + "number_of_characters": 11925, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 120.45454545454545, + "max_text_length": 278, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "epo_Latn": { + "num_samples": 99, + "number_of_characters": 12087, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 122.0909090909091, + "max_text_length": 306, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "est_Latn": { + "num_samples": 99, + "number_of_characters": 11850, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 119.6969696969697, + "max_text_length": 274, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "eus_Latn": { + "num_samples": 99, + "number_of_characters": 12607, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 127.34343434343434, + "max_text_length": 306, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ewe_Latn": { + "num_samples": 99, + "number_of_characters": 12246, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 123.6969696969697, + "max_text_length": 289, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fao_Latn": { + "num_samples": 99, + "number_of_characters": 12233, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 123.56565656565657, + "max_text_length": 273, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fij_Latn": { + "num_samples": 99, + "number_of_characters": 14314, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 144.58585858585857, + "max_text_length": 299, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fin_Latn": { + "num_samples": 99, + "number_of_characters": 12901, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 130.31313131313132, + "max_text_length": 296, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fon_Latn": { + "num_samples": 99, + "number_of_characters": 12044, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 121.65656565656566, + "max_text_length": 272, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fra_Latn": { + "num_samples": 99, + "number_of_characters": 14258, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 144.02020202020202, + "max_text_length": 301, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fur_Latn": { + "num_samples": 99, + "number_of_characters": 13233, + "number_texts_intersect_with_train": 0, + "min_text_length": 62, + "average_text_length": 133.66666666666666, + "max_text_length": 268, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "fuv_Latn": { + "num_samples": 99, + "number_of_characters": 11242, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 113.55555555555556, + "max_text_length": 301, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "gaz_Latn": { + "num_samples": 99, + "number_of_characters": 14295, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 144.3939393939394, + "max_text_length": 342, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "gla_Latn": { + "num_samples": 99, + "number_of_characters": 14630, + "number_texts_intersect_with_train": 0, + "min_text_length": 66, + "average_text_length": 147.77777777777777, + "max_text_length": 330, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "gle_Latn": { + "num_samples": 99, + "number_of_characters": 13673, + "number_texts_intersect_with_train": 0, + "min_text_length": 62, + "average_text_length": 138.11111111111111, + "max_text_length": 262, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "glg_Latn": { + "num_samples": 99, + "number_of_characters": 13351, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 134.85858585858585, + "max_text_length": 278, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "grn_Latn": { + "num_samples": 99, + "number_of_characters": 12080, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 122.02020202020202, + "max_text_length": 224, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "guj_Gujr": { + "num_samples": 99, + "number_of_characters": 11458, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 115.73737373737374, + "max_text_length": 241, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hat_Latn": { + "num_samples": 99, + "number_of_characters": 11162, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 112.74747474747475, + "max_text_length": 232, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hau_Latn": { + "num_samples": 99, + "number_of_characters": 13103, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 132.35353535353536, + "max_text_length": 264, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "heb_Hebr": { + "num_samples": 99, + "number_of_characters": 9368, + "number_texts_intersect_with_train": 0, + "min_text_length": 30, + "average_text_length": 94.62626262626263, + "max_text_length": 234, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hin_Deva": { + "num_samples": 99, + "number_of_characters": 12141, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 122.63636363636364, + "max_text_length": 265, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hne_Deva": { + "num_samples": 99, + "number_of_characters": 11629, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 117.46464646464646, + "max_text_length": 277, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hrv_Latn": { + "num_samples": 99, + "number_of_characters": 11612, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 117.29292929292929, + "max_text_length": 266, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hun_Latn": { + "num_samples": 99, + "number_of_characters": 12615, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 127.42424242424242, + "max_text_length": 279, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "hye_Armn": { + "num_samples": 99, + "number_of_characters": 13012, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 131.43434343434345, + "max_text_length": 313, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ibo_Latn": { + "num_samples": 99, + "number_of_characters": 12297, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 124.21212121212122, + "max_text_length": 282, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ilo_Latn": { + "num_samples": 99, + "number_of_characters": 14531, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 146.77777777777777, + "max_text_length": 382, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ind_Latn": { + "num_samples": 99, + "number_of_characters": 13151, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 132.83838383838383, + "max_text_length": 322, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "isl_Latn": { + "num_samples": 99, + "number_of_characters": 11691, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 118.0909090909091, + "max_text_length": 277, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ita_Latn": { + "num_samples": 99, + "number_of_characters": 14301, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 144.45454545454547, + "max_text_length": 354, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "jav_Latn": { + "num_samples": 99, + "number_of_characters": 12507, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 126.33333333333333, + "max_text_length": 319, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "jpn_Jpan": { + "num_samples": 99, + "number_of_characters": 5230, + "number_texts_intersect_with_train": 0, + "min_text_length": 23, + "average_text_length": 52.82828282828283, + "max_text_length": 139, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kab_Latn": { + "num_samples": 99, + "number_of_characters": 12310, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 124.34343434343434, + "max_text_length": 273, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kac_Latn": { + "num_samples": 99, + "number_of_characters": 14674, + "number_texts_intersect_with_train": 0, + "min_text_length": 67, + "average_text_length": 148.22222222222223, + "max_text_length": 270, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kam_Latn": { + "num_samples": 99, + "number_of_characters": 11973, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 120.93939393939394, + "max_text_length": 294, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kan_Knda": { + "num_samples": 99, + "number_of_characters": 12536, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 126.62626262626263, + "max_text_length": 290, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kas_Deva": { + "num_samples": 99, + "number_of_characters": 11616, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 117.33333333333333, + "max_text_length": 321, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kat_Geor": { + "num_samples": 99, + "number_of_characters": 12645, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 127.72727272727273, + "max_text_length": 258, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kaz_Cyrl": { + "num_samples": 99, + "number_of_characters": 12138, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 122.60606060606061, + "max_text_length": 262, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kbp_Latn": { + "num_samples": 99, + "number_of_characters": 13290, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 134.24242424242425, + "max_text_length": 285, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kea_Latn": { + "num_samples": 99, + "number_of_characters": 12097, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 122.1919191919192, + "max_text_length": 274, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "khk_Cyrl": { + "num_samples": 99, + "number_of_characters": 12472, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 125.97979797979798, + "max_text_length": 294, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "khm_Khmr": { + "num_samples": 99, + "number_of_characters": 14257, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 144.010101010101, + "max_text_length": 335, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kik_Latn": { + "num_samples": 99, + "number_of_characters": 14642, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 147.8989898989899, + "max_text_length": 450, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kin_Latn": { + "num_samples": 99, + "number_of_characters": 13598, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 137.35353535353536, + "max_text_length": 304, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kir_Cyrl": { + "num_samples": 99, + "number_of_characters": 11912, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 120.32323232323232, + "max_text_length": 304, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kmb_Latn": { + "num_samples": 99, + "number_of_characters": 13454, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 135.8989898989899, + "max_text_length": 344, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kmr_Latn": { + "num_samples": 99, + "number_of_characters": 12294, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 124.18181818181819, + "max_text_length": 252, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "knc_Latn": { + "num_samples": 99, + "number_of_characters": 12857, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 129.86868686868686, + "max_text_length": 336, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kon_Latn": { + "num_samples": 99, + "number_of_characters": 13817, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 139.56565656565655, + "max_text_length": 369, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "kor_Hang": { + "num_samples": 99, + "number_of_characters": 6002, + "number_texts_intersect_with_train": 0, + "min_text_length": 28, + "average_text_length": 60.62626262626262, + "max_text_length": 153, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lao_Laoo": { + "num_samples": 99, + "number_of_characters": 11735, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 118.53535353535354, + "max_text_length": 275, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lij_Latn": { + "num_samples": 99, + "number_of_characters": 13313, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 134.4747474747475, + "max_text_length": 280, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lim_Latn": { + "num_samples": 99, + "number_of_characters": 12281, + "number_texts_intersect_with_train": 0, + "min_text_length": 64, + "average_text_length": 124.05050505050505, + "max_text_length": 297, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lin_Latn": { + "num_samples": 99, + "number_of_characters": 13158, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 132.9090909090909, + "max_text_length": 351, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lit_Latn": { + "num_samples": 99, + "number_of_characters": 11627, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 117.44444444444444, + "max_text_length": 283, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lmo_Latn": { + "num_samples": 99, + "number_of_characters": 12958, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 130.88888888888889, + "max_text_length": 292, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ltg_Latn": { + "num_samples": 99, + "number_of_characters": 11656, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 117.73737373737374, + "max_text_length": 267, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ltz_Latn": { + "num_samples": 99, + "number_of_characters": 13281, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 134.15151515151516, + "max_text_length": 305, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lua_Latn": { + "num_samples": 99, + "number_of_characters": 12611, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 127.38383838383838, + "max_text_length": 279, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lug_Latn": { + "num_samples": 99, + "number_of_characters": 12375, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 125.0, + "max_text_length": 269, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "luo_Latn": { + "num_samples": 99, + "number_of_characters": 12626, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 127.53535353535354, + "max_text_length": 284, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lus_Latn": { + "num_samples": 99, + "number_of_characters": 13049, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 131.8080808080808, + "max_text_length": 269, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "lvs_Latn": { + "num_samples": 99, + "number_of_characters": 12030, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 121.51515151515152, + "max_text_length": 303, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mag_Deva": { + "num_samples": 99, + "number_of_characters": 11645, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 117.62626262626263, + "max_text_length": 288, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mai_Deva": { + "num_samples": 99, + "number_of_characters": 11954, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 120.74747474747475, + "max_text_length": 269, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mal_Mlym": { + "num_samples": 99, + "number_of_characters": 13505, + "number_texts_intersect_with_train": 0, + "min_text_length": 62, + "average_text_length": 136.41414141414143, + "max_text_length": 289, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mar_Deva": { + "num_samples": 99, + "number_of_characters": 11943, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 120.63636363636364, + "max_text_length": 278, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "min_Latn": { + "num_samples": 99, + "number_of_characters": 12986, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 131.17171717171718, + "max_text_length": 334, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mkd_Cyrl": { + "num_samples": 99, + "number_of_characters": 12250, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 123.73737373737374, + "max_text_length": 262, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mlt_Latn": { + "num_samples": 99, + "number_of_characters": 13403, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 135.3838383838384, + "max_text_length": 297, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mni_Beng": { + "num_samples": 99, + "number_of_characters": 12540, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 126.66666666666667, + "max_text_length": 302, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mos_Latn": { + "num_samples": 99, + "number_of_characters": 11316, + "number_texts_intersect_with_train": 0, + "min_text_length": 30, + "average_text_length": 114.3030303030303, + "max_text_length": 273, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mri_Latn": { + "num_samples": 99, + "number_of_characters": 13526, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 136.62626262626262, + "max_text_length": 262, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "mya_Mymr": { + "num_samples": 99, + "number_of_characters": 14699, + "number_texts_intersect_with_train": 0, + "min_text_length": 72, + "average_text_length": 148.4747474747475, + "max_text_length": 323, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nld_Latn": { + "num_samples": 99, + "number_of_characters": 13304, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 134.3838383838384, + "max_text_length": 302, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nno_Latn": { + "num_samples": 99, + "number_of_characters": 11901, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 120.21212121212122, + "max_text_length": 247, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nob_Latn": { + "num_samples": 99, + "number_of_characters": 12022, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 121.43434343434343, + "max_text_length": 256, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "npi_Deva": { + "num_samples": 99, + "number_of_characters": 11376, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 114.9090909090909, + "max_text_length": 234, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nqo_Nkoo": { + "num_samples": 99, + "number_of_characters": 15376, + "number_texts_intersect_with_train": 0, + "min_text_length": 62, + "average_text_length": 155.31313131313132, + "max_text_length": 405, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nso_Latn": { + "num_samples": 99, + "number_of_characters": 13908, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 140.4848484848485, + "max_text_length": 339, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nus_Latn": { + "num_samples": 99, + "number_of_characters": 12769, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 128.97979797979798, + "max_text_length": 294, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "nya_Latn": { + "num_samples": 99, + "number_of_characters": 13540, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 136.76767676767676, + "max_text_length": 302, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "oci_Latn": { + "num_samples": 99, + "number_of_characters": 13612, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 137.4949494949495, + "max_text_length": 303, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ory_Orya": { + "num_samples": 99, + "number_of_characters": 12294, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 124.18181818181819, + "max_text_length": 285, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "pag_Latn": { + "num_samples": 99, + "number_of_characters": 11999, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 121.20202020202021, + "max_text_length": 268, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "pan_Guru": { + "num_samples": 99, + "number_of_characters": 12092, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 122.14141414141415, + "max_text_length": 267, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "pap_Latn": { + "num_samples": 99, + "number_of_characters": 12612, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 127.39393939393939, + "max_text_length": 297, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "pbt_Arab": { + "num_samples": 99, + "number_of_characters": 11511, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 116.27272727272727, + "max_text_length": 233, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "pes_Arab": { + "num_samples": 99, + "number_of_characters": 11316, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 114.3030303030303, + "max_text_length": 266, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "plt_Latn": { + "num_samples": 99, + "number_of_characters": 14961, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 151.12121212121212, + "max_text_length": 318, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "pol_Latn": { + "num_samples": 99, + "number_of_characters": 12809, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 129.3838383838384, + "max_text_length": 275, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "por_Latn": { + "num_samples": 99, + "number_of_characters": 13032, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 131.63636363636363, + "max_text_length": 275, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "prs_Arab": { + "num_samples": 99, + "number_of_characters": 11080, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 111.91919191919192, + "max_text_length": 254, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "quy_Latn": { + "num_samples": 99, + "number_of_characters": 12709, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 128.37373737373738, + "max_text_length": 269, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ron_Latn": { + "num_samples": 99, + "number_of_characters": 13716, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 138.54545454545453, + "max_text_length": 293, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "run_Latn": { + "num_samples": 99, + "number_of_characters": 13240, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 133.73737373737373, + "max_text_length": 359, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "rus_Cyrl": { + "num_samples": 99, + "number_of_characters": 12914, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 130.44444444444446, + "max_text_length": 300, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "sag_Latn": { + "num_samples": 99, + "number_of_characters": 12837, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 129.66666666666666, + "max_text_length": 333, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "san_Deva": { + "num_samples": 99, + "number_of_characters": 11898, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 120.18181818181819, + "max_text_length": 244, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "sat_Olck": { + "num_samples": 99, + "number_of_characters": 13143, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 132.75757575757575, + "max_text_length": 341, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "scn_Latn": { + "num_samples": 99, + "number_of_characters": 12608, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 127.35353535353535, + "max_text_length": 253, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "shn_Mymr": { + "num_samples": 99, + "number_of_characters": 17115, + "number_texts_intersect_with_train": 0, + "min_text_length": 69, + "average_text_length": 172.87878787878788, + "max_text_length": 438, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "sin_Sinh": { + "num_samples": 99, + "number_of_characters": 11972, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 120.92929292929293, + "max_text_length": 270, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "slk_Latn": { + "num_samples": 99, + "number_of_characters": 12072, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 121.93939393939394, + "max_text_length": 260, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "slv_Latn": { + "num_samples": 99, + "number_of_characters": 11818, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 119.37373737373737, + "max_text_length": 277, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "smo_Latn": { + "num_samples": 99, + "number_of_characters": 14067, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 142.0909090909091, + "max_text_length": 328, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "sna_Latn": { + "num_samples": 99, + "number_of_characters": 13417, + "number_texts_intersect_with_train": 0, + "min_text_length": 62, + "average_text_length": 135.5252525252525, + "max_text_length": 338, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "snd_Arab": { + "num_samples": 99, + "number_of_characters": 10877, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 109.86868686868686, + "max_text_length": 236, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "som_Latn": { + "num_samples": 99, + "number_of_characters": 13911, + "number_texts_intersect_with_train": 0, + "min_text_length": 67, + "average_text_length": 140.5151515151515, + "max_text_length": 309, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "sot_Latn": { + "num_samples": 99, + "number_of_characters": 14701, + "number_texts_intersect_with_train": 0, + "min_text_length": 66, + "average_text_length": 148.4949494949495, + "max_text_length": 327, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "spa_Latn": { + "num_samples": 99, + "number_of_characters": 14103, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 142.45454545454547, + "max_text_length": 299, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "srd_Latn": { + "num_samples": 99, + "number_of_characters": 13989, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 141.3030303030303, + "max_text_length": 301, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "srp_Cyrl": { + "num_samples": 99, + "number_of_characters": 11739, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 118.57575757575758, + "max_text_length": 267, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ssw_Latn": { + "num_samples": 99, + "number_of_characters": 13602, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 137.3939393939394, + "max_text_length": 351, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "sun_Latn": { + "num_samples": 99, + "number_of_characters": 12583, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 127.1010101010101, + "max_text_length": 284, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "swe_Latn": { + "num_samples": 99, + "number_of_characters": 11944, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 120.64646464646465, + "max_text_length": 287, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "swh_Latn": { + "num_samples": 99, + "number_of_characters": 12885, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 130.15151515151516, + "max_text_length": 333, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "szl_Latn": { + "num_samples": 99, + "number_of_characters": 12522, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 126.48484848484848, + "max_text_length": 260, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tam_Taml": { + "num_samples": 99, + "number_of_characters": 14152, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 142.94949494949495, + "max_text_length": 299, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "taq_Tfng": { + "num_samples": 99, + "number_of_characters": 11019, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 111.3030303030303, + "max_text_length": 251, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tat_Cyrl": { + "num_samples": 99, + "number_of_characters": 12041, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 121.62626262626263, + "max_text_length": 286, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tel_Telu": { + "num_samples": 99, + "number_of_characters": 12173, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 122.95959595959596, + "max_text_length": 301, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tgk_Cyrl": { + "num_samples": 99, + "number_of_characters": 13159, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 132.91919191919192, + "max_text_length": 289, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tgl_Latn": { + "num_samples": 99, + "number_of_characters": 15191, + "number_texts_intersect_with_train": 0, + "min_text_length": 74, + "average_text_length": 153.44444444444446, + "max_text_length": 357, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tha_Thai": { + "num_samples": 99, + "number_of_characters": 11151, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 112.63636363636364, + "max_text_length": 258, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tir_Ethi": { + "num_samples": 99, + "number_of_characters": 8259, + "number_texts_intersect_with_train": 0, + "min_text_length": 33, + "average_text_length": 83.42424242424242, + "max_text_length": 217, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tpi_Latn": { + "num_samples": 99, + "number_of_characters": 15190, + "number_texts_intersect_with_train": 0, + "min_text_length": 57, + "average_text_length": 153.43434343434345, + "max_text_length": 294, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tsn_Latn": { + "num_samples": 99, + "number_of_characters": 15178, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 153.31313131313132, + "max_text_length": 311, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tso_Latn": { + "num_samples": 99, + "number_of_characters": 14338, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 144.82828282828282, + "max_text_length": 302, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tuk_Latn": { + "num_samples": 99, + "number_of_characters": 12568, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 126.94949494949495, + "max_text_length": 272, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tum_Latn": { + "num_samples": 99, + "number_of_characters": 15459, + "number_texts_intersect_with_train": 0, + "min_text_length": 75, + "average_text_length": 156.15151515151516, + "max_text_length": 428, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tur_Latn": { + "num_samples": 99, + "number_of_characters": 12340, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 124.64646464646465, + "max_text_length": 266, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "twi_Latn": { + "num_samples": 99, + "number_of_characters": 11987, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 121.08080808080808, + "max_text_length": 277, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "tzm_Tfng": { + "num_samples": 99, + "number_of_characters": 10671, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 107.78787878787878, + "max_text_length": 230, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "uig_Arab": { + "num_samples": 99, + "number_of_characters": 12983, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 131.14141414141415, + "max_text_length": 259, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ukr_Cyrl": { + "num_samples": 99, + "number_of_characters": 12157, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 122.79797979797979, + "max_text_length": 309, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "umb_Latn": { + "num_samples": 99, + "number_of_characters": 12458, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 125.83838383838383, + "max_text_length": 262, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "urd_Arab": { + "num_samples": 99, + "number_of_characters": 11951, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 120.71717171717172, + "max_text_length": 272, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "uzn_Latn": { + "num_samples": 99, + "number_of_characters": 13488, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 136.24242424242425, + "max_text_length": 306, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "vec_Latn": { + "num_samples": 99, + "number_of_characters": 12257, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 123.8080808080808, + "max_text_length": 281, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "vie_Latn": { + "num_samples": 99, + "number_of_characters": 12512, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 126.38383838383838, + "max_text_length": 308, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "war_Latn": { + "num_samples": 99, + "number_of_characters": 14978, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 151.2929292929293, + "max_text_length": 367, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "wol_Latn": { + "num_samples": 99, + "number_of_characters": 11448, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 115.63636363636364, + "max_text_length": 286, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "xho_Latn": { + "num_samples": 99, + "number_of_characters": 12889, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 130.1919191919192, + "max_text_length": 321, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "ydd_Hebr": { + "num_samples": 99, + "number_of_characters": 12835, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 129.64646464646464, + "max_text_length": 292, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "yor_Latn": { + "num_samples": 99, + "number_of_characters": 11868, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 119.87878787878788, + "max_text_length": 291, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "yue_Hant": { + "num_samples": 99, + "number_of_characters": 3639, + "number_texts_intersect_with_train": 0, + "min_text_length": 15, + "average_text_length": 36.75757575757576, + "max_text_length": 104, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "zho_Hant": { + "num_samples": 99, + "number_of_characters": 3823, + "number_texts_intersect_with_train": 0, + "min_text_length": 15, + "average_text_length": 38.61616161616162, + "max_text_length": 152, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "zsm_Latn": { + "num_samples": 99, + "number_of_characters": 13428, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 135.63636363636363, + "max_text_length": 328, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + }, + "zul_Latn": { + "num_samples": 99, + "number_of_characters": 13650, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 137.87878787878788, + "max_text_length": 305, + "unique_text": 99, + "unique_labels": 7, + "labels": { + "5": { + "count": 12 + }, + "6": { + "count": 20 + }, + "1": { + "count": 8 + }, + "4": { + "count": 25 + }, + "0": { + "count": 9 + }, + "2": { + "count": 11 + }, + "3": { + "count": 14 + } + } + } + } + }, + "test": { + "num_samples": 40188, + "number_of_characters": 5446774, + "number_texts_intersect_with_train": 6, + "min_text_length": 13, + "average_text_length": 135.53234796456653, + "max_text_length": 597, + "unique_text": 40140, + "unique_labels": 7, + "labels": { + "4": { + "count": 10047 + }, + "6": { + "count": 7880 + }, + "3": { + "count": 5910 + }, + "5": { + "count": 4925 + }, + "2": { + "count": 4334 + }, + "0": { + "count": 3743 + }, + "1": { + "count": 3349 + } + }, + "hf_subset_descriptive_stats": { + "ace_Latn": { + "num_samples": 204, + "number_of_characters": 28384, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 139.13725490196077, + "max_text_length": 355, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "acm_Arab": { + "num_samples": 204, + "number_of_characters": 23343, + "number_texts_intersect_with_train": 0, + "min_text_length": 36, + "average_text_length": 114.42647058823529, + "max_text_length": 303, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "acq_Arab": { + "num_samples": 204, + "number_of_characters": 23661, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 115.98529411764706, + "max_text_length": 318, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "aeb_Arab": { + "num_samples": 204, + "number_of_characters": 22835, + "number_texts_intersect_with_train": 0, + "min_text_length": 34, + "average_text_length": 111.93627450980392, + "max_text_length": 305, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "afr_Latn": { + "num_samples": 204, + "number_of_characters": 27684, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 135.7058823529412, + "max_text_length": 385, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ajp_Arab": { + "num_samples": 204, + "number_of_characters": 22226, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 108.95098039215686, + "max_text_length": 310, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "aka_Latn": { + "num_samples": 204, + "number_of_characters": 26703, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 130.89705882352942, + "max_text_length": 347, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "als_Latn": { + "num_samples": 204, + "number_of_characters": 29938, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 146.7549019607843, + "max_text_length": 392, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "amh_Ethi": { + "num_samples": 204, + "number_of_characters": 17950, + "number_texts_intersect_with_train": 0, + "min_text_length": 33, + "average_text_length": 87.99019607843137, + "max_text_length": 203, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "apc_Arab": { + "num_samples": 204, + "number_of_characters": 22036, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 108.01960784313725, + "max_text_length": 306, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "arb_Latn": { + "num_samples": 204, + "number_of_characters": 31280, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 153.33333333333334, + "max_text_length": 420, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ars_Arab": { + "num_samples": 204, + "number_of_characters": 23962, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 117.46078431372548, + "max_text_length": 316, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ary_Arab": { + "num_samples": 204, + "number_of_characters": 22941, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 112.45588235294117, + "max_text_length": 315, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "arz_Arab": { + "num_samples": 204, + "number_of_characters": 23362, + "number_texts_intersect_with_train": 0, + "min_text_length": 36, + "average_text_length": 114.51960784313725, + "max_text_length": 315, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "asm_Beng": { + "num_samples": 204, + "number_of_characters": 25327, + "number_texts_intersect_with_train": 0, + "min_text_length": 35, + "average_text_length": 124.15196078431373, + "max_text_length": 324, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ast_Latn": { + "num_samples": 204, + "number_of_characters": 27367, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 134.15196078431373, + "max_text_length": 388, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "awa_Deva": { + "num_samples": 204, + "number_of_characters": 25526, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 125.12745098039215, + "max_text_length": 344, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ayr_Latn": { + "num_samples": 204, + "number_of_characters": 27752, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 136.0392156862745, + "max_text_length": 376, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "azb_Arab": { + "num_samples": 204, + "number_of_characters": 24246, + "number_texts_intersect_with_train": 1, + "min_text_length": 30, + "average_text_length": 118.8529411764706, + "max_text_length": 327, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "azj_Latn": { + "num_samples": 204, + "number_of_characters": 29238, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 143.3235294117647, + "max_text_length": 378, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bak_Cyrl": { + "num_samples": 204, + "number_of_characters": 27075, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 132.72058823529412, + "max_text_length": 270, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bam_Latn": { + "num_samples": 204, + "number_of_characters": 25429, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 124.65196078431373, + "max_text_length": 372, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ban_Latn": { + "num_samples": 204, + "number_of_characters": 29593, + "number_texts_intersect_with_train": 0, + "min_text_length": 59, + "average_text_length": 145.06372549019608, + "max_text_length": 354, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bel_Cyrl": { + "num_samples": 204, + "number_of_characters": 30294, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 148.5, + "max_text_length": 375, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bem_Latn": { + "num_samples": 204, + "number_of_characters": 32751, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 160.5441176470588, + "max_text_length": 422, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ben_Beng": { + "num_samples": 204, + "number_of_characters": 25782, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 126.38235294117646, + "max_text_length": 325, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bho_Deva": { + "num_samples": 204, + "number_of_characters": 25557, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 125.27941176470588, + "max_text_length": 308, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bjn_Latn": { + "num_samples": 204, + "number_of_characters": 27830, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 136.42156862745097, + "max_text_length": 368, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bod_Tibt": { + "num_samples": 204, + "number_of_characters": 29634, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 145.26470588235293, + "max_text_length": 351, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bos_Latn": { + "num_samples": 204, + "number_of_characters": 26891, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 131.8186274509804, + "max_text_length": 345, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bug_Latn": { + "num_samples": 204, + "number_of_characters": 28817, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 141.25980392156862, + "max_text_length": 308, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "bul_Cyrl": { + "num_samples": 204, + "number_of_characters": 27995, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 137.23039215686273, + "max_text_length": 352, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "cat_Latn": { + "num_samples": 204, + "number_of_characters": 29147, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 142.87745098039215, + "max_text_length": 378, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ceb_Latn": { + "num_samples": 204, + "number_of_characters": 32095, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 157.32843137254903, + "max_text_length": 427, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ces_Latn": { + "num_samples": 204, + "number_of_characters": 25794, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 126.44117647058823, + "max_text_length": 327, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "cjk_Latn": { + "num_samples": 204, + "number_of_characters": 28701, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 140.69117647058823, + "max_text_length": 366, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ckb_Arab": { + "num_samples": 204, + "number_of_characters": 25571, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 125.34803921568627, + "max_text_length": 353, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "crh_Latn": { + "num_samples": 204, + "number_of_characters": 27141, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 133.0441176470588, + "max_text_length": 382, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "cym_Latn": { + "num_samples": 204, + "number_of_characters": 28271, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 138.58333333333334, + "max_text_length": 360, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "dan_Latn": { + "num_samples": 204, + "number_of_characters": 27166, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 133.16666666666666, + "max_text_length": 369, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "deu_Latn": { + "num_samples": 204, + "number_of_characters": 30845, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 151.20098039215685, + "max_text_length": 408, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "dik_Latn": { + "num_samples": 204, + "number_of_characters": 22760, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 111.56862745098039, + "max_text_length": 357, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "dyu_Latn": { + "num_samples": 204, + "number_of_characters": 27058, + "number_texts_intersect_with_train": 1, + "min_text_length": 42, + "average_text_length": 132.63725490196077, + "max_text_length": 308, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "dzo_Tibt": { + "num_samples": 204, + "number_of_characters": 32668, + "number_texts_intersect_with_train": 0, + "min_text_length": 67, + "average_text_length": 160.13725490196077, + "max_text_length": 390, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ell_Grek": { + "num_samples": 204, + "number_of_characters": 31839, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 156.0735294117647, + "max_text_length": 464, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "eng_Latn": { + "num_samples": 204, + "number_of_characters": 26599, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 130.38725490196077, + "max_text_length": 368, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "epo_Latn": { + "num_samples": 204, + "number_of_characters": 26512, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 129.9607843137255, + "max_text_length": 353, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "est_Latn": { + "num_samples": 204, + "number_of_characters": 26362, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 129.22549019607843, + "max_text_length": 322, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "eus_Latn": { + "num_samples": 204, + "number_of_characters": 28399, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 139.2107843137255, + "max_text_length": 421, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ewe_Latn": { + "num_samples": 204, + "number_of_characters": 25812, + "number_texts_intersect_with_train": 0, + "min_text_length": 30, + "average_text_length": 126.52941176470588, + "max_text_length": 351, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fao_Latn": { + "num_samples": 204, + "number_of_characters": 26765, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 131.20098039215685, + "max_text_length": 361, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fij_Latn": { + "num_samples": 204, + "number_of_characters": 31471, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 154.26960784313727, + "max_text_length": 522, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fin_Latn": { + "num_samples": 204, + "number_of_characters": 28721, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 140.7892156862745, + "max_text_length": 371, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fon_Latn": { + "num_samples": 204, + "number_of_characters": 27878, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 136.65686274509804, + "max_text_length": 345, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fra_Latn": { + "num_samples": 204, + "number_of_characters": 31697, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 155.37745098039215, + "max_text_length": 415, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fur_Latn": { + "num_samples": 204, + "number_of_characters": 28935, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 141.83823529411765, + "max_text_length": 399, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "fuv_Latn": { + "num_samples": 204, + "number_of_characters": 24909, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 122.1029411764706, + "max_text_length": 304, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "gaz_Latn": { + "num_samples": 204, + "number_of_characters": 31464, + "number_texts_intersect_with_train": 0, + "min_text_length": 65, + "average_text_length": 154.23529411764707, + "max_text_length": 456, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "gla_Latn": { + "num_samples": 204, + "number_of_characters": 33094, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 162.22549019607843, + "max_text_length": 478, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "gle_Latn": { + "num_samples": 204, + "number_of_characters": 30917, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 151.55392156862746, + "max_text_length": 435, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "glg_Latn": { + "num_samples": 204, + "number_of_characters": 29378, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 144.00980392156862, + "max_text_length": 368, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "grn_Latn": { + "num_samples": 204, + "number_of_characters": 26844, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 131.58823529411765, + "max_text_length": 311, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "guj_Gujr": { + "num_samples": 204, + "number_of_characters": 25270, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 123.87254901960785, + "max_text_length": 321, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hat_Latn": { + "num_samples": 204, + "number_of_characters": 24345, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 119.33823529411765, + "max_text_length": 337, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hau_Latn": { + "num_samples": 204, + "number_of_characters": 28577, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 140.08333333333334, + "max_text_length": 372, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "heb_Hebr": { + "num_samples": 204, + "number_of_characters": 20857, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 102.24019607843137, + "max_text_length": 281, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hin_Deva": { + "num_samples": 204, + "number_of_characters": 26621, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 130.4950980392157, + "max_text_length": 381, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hne_Deva": { + "num_samples": 204, + "number_of_characters": 25395, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 124.48529411764706, + "max_text_length": 326, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hrv_Latn": { + "num_samples": 204, + "number_of_characters": 26333, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 129.08333333333334, + "max_text_length": 346, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hun_Latn": { + "num_samples": 204, + "number_of_characters": 28073, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 137.61274509803923, + "max_text_length": 387, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "hye_Armn": { + "num_samples": 204, + "number_of_characters": 30352, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 148.7843137254902, + "max_text_length": 386, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ibo_Latn": { + "num_samples": 204, + "number_of_characters": 27175, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 133.2107843137255, + "max_text_length": 356, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ilo_Latn": { + "num_samples": 204, + "number_of_characters": 32366, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 158.65686274509804, + "max_text_length": 432, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ind_Latn": { + "num_samples": 204, + "number_of_characters": 28851, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 141.4264705882353, + "max_text_length": 353, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "isl_Latn": { + "num_samples": 204, + "number_of_characters": 25951, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 127.21078431372548, + "max_text_length": 359, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ita_Latn": { + "num_samples": 204, + "number_of_characters": 31292, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 153.3921568627451, + "max_text_length": 375, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "jav_Latn": { + "num_samples": 204, + "number_of_characters": 27262, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 133.63725490196077, + "max_text_length": 358, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "jpn_Jpan": { + "num_samples": 204, + "number_of_characters": 11555, + "number_texts_intersect_with_train": 0, + "min_text_length": 19, + "average_text_length": 56.6421568627451, + "max_text_length": 137, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kab_Latn": { + "num_samples": 204, + "number_of_characters": 26355, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 129.19117647058823, + "max_text_length": 358, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kac_Latn": { + "num_samples": 204, + "number_of_characters": 34990, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 171.51960784313727, + "max_text_length": 492, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kam_Latn": { + "num_samples": 204, + "number_of_characters": 25972, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 127.31372549019608, + "max_text_length": 398, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kan_Knda": { + "num_samples": 204, + "number_of_characters": 28048, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 137.49019607843138, + "max_text_length": 338, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kas_Deva": { + "num_samples": 204, + "number_of_characters": 25363, + "number_texts_intersect_with_train": 0, + "min_text_length": 32, + "average_text_length": 124.32843137254902, + "max_text_length": 390, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kat_Geor": { + "num_samples": 204, + "number_of_characters": 29764, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 145.90196078431373, + "max_text_length": 369, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kaz_Cyrl": { + "num_samples": 204, + "number_of_characters": 27794, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 136.2450980392157, + "max_text_length": 357, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kbp_Latn": { + "num_samples": 204, + "number_of_characters": 28972, + "number_texts_intersect_with_train": 1, + "min_text_length": 42, + "average_text_length": 142.01960784313727, + "max_text_length": 371, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kea_Latn": { + "num_samples": 204, + "number_of_characters": 26231, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 128.58333333333334, + "max_text_length": 360, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "khk_Cyrl": { + "num_samples": 204, + "number_of_characters": 28116, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 137.8235294117647, + "max_text_length": 355, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "khm_Khmr": { + "num_samples": 204, + "number_of_characters": 31492, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 154.37254901960785, + "max_text_length": 417, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kik_Latn": { + "num_samples": 204, + "number_of_characters": 30896, + "number_texts_intersect_with_train": 0, + "min_text_length": 56, + "average_text_length": 151.45098039215685, + "max_text_length": 366, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kin_Latn": { + "num_samples": 204, + "number_of_characters": 30079, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 147.44607843137254, + "max_text_length": 440, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kir_Cyrl": { + "num_samples": 204, + "number_of_characters": 27764, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 136.09803921568627, + "max_text_length": 401, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kmb_Latn": { + "num_samples": 204, + "number_of_characters": 29702, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 145.59803921568627, + "max_text_length": 426, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kmr_Latn": { + "num_samples": 204, + "number_of_characters": 26643, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 130.60294117647058, + "max_text_length": 359, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "knc_Latn": { + "num_samples": 204, + "number_of_characters": 27712, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 135.84313725490196, + "max_text_length": 356, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kon_Latn": { + "num_samples": 204, + "number_of_characters": 29941, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 146.76960784313727, + "max_text_length": 410, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "kor_Hang": { + "num_samples": 204, + "number_of_characters": 13175, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 64.58333333333333, + "max_text_length": 155, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lao_Laoo": { + "num_samples": 204, + "number_of_characters": 26239, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 128.62254901960785, + "max_text_length": 322, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lij_Latn": { + "num_samples": 204, + "number_of_characters": 29331, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 143.77941176470588, + "max_text_length": 346, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lim_Latn": { + "num_samples": 204, + "number_of_characters": 27445, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 134.5343137254902, + "max_text_length": 416, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lin_Latn": { + "num_samples": 204, + "number_of_characters": 28657, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 140.47549019607843, + "max_text_length": 379, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lit_Latn": { + "num_samples": 204, + "number_of_characters": 26929, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 132.0049019607843, + "max_text_length": 318, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lmo_Latn": { + "num_samples": 204, + "number_of_characters": 28464, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 139.52941176470588, + "max_text_length": 355, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ltg_Latn": { + "num_samples": 204, + "number_of_characters": 26264, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 128.7450980392157, + "max_text_length": 348, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ltz_Latn": { + "num_samples": 204, + "number_of_characters": 29693, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 145.55392156862746, + "max_text_length": 404, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lua_Latn": { + "num_samples": 204, + "number_of_characters": 28641, + "number_texts_intersect_with_train": 1, + "min_text_length": 41, + "average_text_length": 140.39705882352942, + "max_text_length": 407, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lug_Latn": { + "num_samples": 204, + "number_of_characters": 27146, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 133.0686274509804, + "max_text_length": 396, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "luo_Latn": { + "num_samples": 204, + "number_of_characters": 27557, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 135.08333333333334, + "max_text_length": 372, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lus_Latn": { + "num_samples": 204, + "number_of_characters": 28791, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 141.13235294117646, + "max_text_length": 322, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "lvs_Latn": { + "num_samples": 204, + "number_of_characters": 27262, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 133.63725490196077, + "max_text_length": 360, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mag_Deva": { + "num_samples": 204, + "number_of_characters": 25212, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 123.58823529411765, + "max_text_length": 343, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mai_Deva": { + "num_samples": 204, + "number_of_characters": 25849, + "number_texts_intersect_with_train": 0, + "min_text_length": 36, + "average_text_length": 126.71078431372548, + "max_text_length": 310, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mal_Mlym": { + "num_samples": 204, + "number_of_characters": 29679, + "number_texts_intersect_with_train": 0, + "min_text_length": 55, + "average_text_length": 145.48529411764707, + "max_text_length": 335, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mar_Deva": { + "num_samples": 204, + "number_of_characters": 26352, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 129.1764705882353, + "max_text_length": 355, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "min_Latn": { + "num_samples": 204, + "number_of_characters": 28474, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 139.57843137254903, + "max_text_length": 360, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mkd_Cyrl": { + "num_samples": 204, + "number_of_characters": 27917, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 136.84803921568627, + "max_text_length": 364, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mlt_Latn": { + "num_samples": 204, + "number_of_characters": 29459, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 144.40686274509804, + "max_text_length": 400, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mni_Beng": { + "num_samples": 204, + "number_of_characters": 27253, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 133.59313725490196, + "max_text_length": 353, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mos_Latn": { + "num_samples": 204, + "number_of_characters": 25387, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 124.44607843137256, + "max_text_length": 262, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mri_Latn": { + "num_samples": 204, + "number_of_characters": 29696, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 145.5686274509804, + "max_text_length": 392, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "mya_Mymr": { + "num_samples": 204, + "number_of_characters": 32871, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 161.13235294117646, + "max_text_length": 426, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nld_Latn": { + "num_samples": 204, + "number_of_characters": 29778, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 145.97058823529412, + "max_text_length": 384, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nno_Latn": { + "num_samples": 204, + "number_of_characters": 26902, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 131.87254901960785, + "max_text_length": 359, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nob_Latn": { + "num_samples": 204, + "number_of_characters": 26532, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 130.05882352941177, + "max_text_length": 351, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "npi_Deva": { + "num_samples": 204, + "number_of_characters": 25793, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 126.43627450980392, + "max_text_length": 362, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nqo_Nkoo": { + "num_samples": 204, + "number_of_characters": 34042, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 166.87254901960785, + "max_text_length": 479, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nso_Latn": { + "num_samples": 204, + "number_of_characters": 30704, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 150.50980392156862, + "max_text_length": 423, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nus_Latn": { + "num_samples": 204, + "number_of_characters": 28604, + "number_texts_intersect_with_train": 0, + "min_text_length": 28, + "average_text_length": 140.2156862745098, + "max_text_length": 490, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "nya_Latn": { + "num_samples": 204, + "number_of_characters": 29715, + "number_texts_intersect_with_train": 0, + "min_text_length": 54, + "average_text_length": 145.66176470588235, + "max_text_length": 421, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "oci_Latn": { + "num_samples": 204, + "number_of_characters": 30338, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 148.7156862745098, + "max_text_length": 408, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ory_Orya": { + "num_samples": 204, + "number_of_characters": 26813, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 131.43627450980392, + "max_text_length": 328, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "pag_Latn": { + "num_samples": 204, + "number_of_characters": 26363, + "number_texts_intersect_with_train": 0, + "min_text_length": 33, + "average_text_length": 129.23039215686273, + "max_text_length": 297, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "pan_Guru": { + "num_samples": 204, + "number_of_characters": 26641, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 130.59313725490196, + "max_text_length": 329, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "pap_Latn": { + "num_samples": 204, + "number_of_characters": 27792, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 136.23529411764707, + "max_text_length": 376, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "pbt_Arab": { + "num_samples": 204, + "number_of_characters": 25176, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 123.41176470588235, + "max_text_length": 307, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "pes_Arab": { + "num_samples": 204, + "number_of_characters": 25399, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 124.50490196078431, + "max_text_length": 324, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "plt_Latn": { + "num_samples": 204, + "number_of_characters": 32723, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 160.40686274509804, + "max_text_length": 479, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "pol_Latn": { + "num_samples": 204, + "number_of_characters": 28364, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 139.0392156862745, + "max_text_length": 337, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "por_Latn": { + "num_samples": 204, + "number_of_characters": 28599, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 140.19117647058823, + "max_text_length": 378, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "prs_Arab": { + "num_samples": 204, + "number_of_characters": 24313, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 119.18137254901961, + "max_text_length": 289, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "quy_Latn": { + "num_samples": 204, + "number_of_characters": 28775, + "number_texts_intersect_with_train": 0, + "min_text_length": 35, + "average_text_length": 141.05392156862746, + "max_text_length": 434, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ron_Latn": { + "num_samples": 204, + "number_of_characters": 29923, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 146.6813725490196, + "max_text_length": 378, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "run_Latn": { + "num_samples": 204, + "number_of_characters": 29505, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 144.63235294117646, + "max_text_length": 395, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "rus_Cyrl": { + "num_samples": 204, + "number_of_characters": 29475, + "number_texts_intersect_with_train": 0, + "min_text_length": 37, + "average_text_length": 144.48529411764707, + "max_text_length": 348, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "sag_Latn": { + "num_samples": 204, + "number_of_characters": 28862, + "number_texts_intersect_with_train": 0, + "min_text_length": 49, + "average_text_length": 141.48039215686273, + "max_text_length": 360, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "san_Deva": { + "num_samples": 204, + "number_of_characters": 26025, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 127.57352941176471, + "max_text_length": 313, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "sat_Olck": { + "num_samples": 204, + "number_of_characters": 27930, + "number_texts_intersect_with_train": 1, + "min_text_length": 55, + "average_text_length": 136.91176470588235, + "max_text_length": 366, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "scn_Latn": { + "num_samples": 204, + "number_of_characters": 28427, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 139.34803921568627, + "max_text_length": 359, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "shn_Mymr": { + "num_samples": 204, + "number_of_characters": 38135, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 186.93627450980392, + "max_text_length": 597, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "sin_Sinh": { + "num_samples": 204, + "number_of_characters": 26524, + "number_texts_intersect_with_train": 0, + "min_text_length": 44, + "average_text_length": 130.01960784313727, + "max_text_length": 401, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "slk_Latn": { + "num_samples": 204, + "number_of_characters": 26588, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 130.33333333333334, + "max_text_length": 326, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "slv_Latn": { + "num_samples": 204, + "number_of_characters": 26563, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 130.2107843137255, + "max_text_length": 361, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "smo_Latn": { + "num_samples": 204, + "number_of_characters": 30669, + "number_texts_intersect_with_train": 0, + "min_text_length": 53, + "average_text_length": 150.33823529411765, + "max_text_length": 409, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "sna_Latn": { + "num_samples": 204, + "number_of_characters": 30444, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 149.23529411764707, + "max_text_length": 399, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "snd_Arab": { + "num_samples": 204, + "number_of_characters": 24002, + "number_texts_intersect_with_train": 0, + "min_text_length": 36, + "average_text_length": 117.65686274509804, + "max_text_length": 306, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "som_Latn": { + "num_samples": 204, + "number_of_characters": 30660, + "number_texts_intersect_with_train": 0, + "min_text_length": 63, + "average_text_length": 150.2941176470588, + "max_text_length": 414, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "sot_Latn": { + "num_samples": 204, + "number_of_characters": 32203, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 157.8578431372549, + "max_text_length": 461, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "spa_Latn": { + "num_samples": 204, + "number_of_characters": 31649, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 155.1421568627451, + "max_text_length": 397, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "srd_Latn": { + "num_samples": 204, + "number_of_characters": 30880, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 151.37254901960785, + "max_text_length": 418, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "srp_Cyrl": { + "num_samples": 204, + "number_of_characters": 26415, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 129.48529411764707, + "max_text_length": 350, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ssw_Latn": { + "num_samples": 204, + "number_of_characters": 30083, + "number_texts_intersect_with_train": 0, + "min_text_length": 50, + "average_text_length": 147.4656862745098, + "max_text_length": 394, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "sun_Latn": { + "num_samples": 204, + "number_of_characters": 27644, + "number_texts_intersect_with_train": 0, + "min_text_length": 42, + "average_text_length": 135.50980392156862, + "max_text_length": 354, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "swe_Latn": { + "num_samples": 204, + "number_of_characters": 26584, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 130.31372549019608, + "max_text_length": 382, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "swh_Latn": { + "num_samples": 204, + "number_of_characters": 28033, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 137.41666666666666, + "max_text_length": 384, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "szl_Latn": { + "num_samples": 204, + "number_of_characters": 27668, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 135.62745098039215, + "max_text_length": 360, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tam_Taml": { + "num_samples": 204, + "number_of_characters": 31326, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 153.55882352941177, + "max_text_length": 385, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "taq_Tfng": { + "num_samples": 204, + "number_of_characters": 24888, + "number_texts_intersect_with_train": 0, + "min_text_length": 30, + "average_text_length": 122.0, + "max_text_length": 293, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tat_Cyrl": { + "num_samples": 204, + "number_of_characters": 26727, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 131.01470588235293, + "max_text_length": 328, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tel_Telu": { + "num_samples": 204, + "number_of_characters": 27168, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 133.1764705882353, + "max_text_length": 351, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tgk_Cyrl": { + "num_samples": 204, + "number_of_characters": 29879, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 146.4656862745098, + "max_text_length": 345, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tgl_Latn": { + "num_samples": 204, + "number_of_characters": 33582, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 164.61764705882354, + "max_text_length": 435, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tha_Thai": { + "num_samples": 204, + "number_of_characters": 25327, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 124.15196078431373, + "max_text_length": 333, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tir_Ethi": { + "num_samples": 204, + "number_of_characters": 18732, + "number_texts_intersect_with_train": 0, + "min_text_length": 29, + "average_text_length": 91.82352941176471, + "max_text_length": 231, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tpi_Latn": { + "num_samples": 204, + "number_of_characters": 33747, + "number_texts_intersect_with_train": 0, + "min_text_length": 61, + "average_text_length": 165.4264705882353, + "max_text_length": 500, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tsn_Latn": { + "num_samples": 204, + "number_of_characters": 33583, + "number_texts_intersect_with_train": 0, + "min_text_length": 58, + "average_text_length": 164.62254901960785, + "max_text_length": 440, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tso_Latn": { + "num_samples": 204, + "number_of_characters": 32245, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 158.06372549019608, + "max_text_length": 429, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tuk_Latn": { + "num_samples": 204, + "number_of_characters": 28573, + "number_texts_intersect_with_train": 0, + "min_text_length": 46, + "average_text_length": 140.06372549019608, + "max_text_length": 332, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tum_Latn": { + "num_samples": 204, + "number_of_characters": 35435, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 173.70098039215685, + "max_text_length": 484, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tur_Latn": { + "num_samples": 204, + "number_of_characters": 27515, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 134.87745098039215, + "max_text_length": 386, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "twi_Latn": { + "num_samples": 204, + "number_of_characters": 25915, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 127.0343137254902, + "max_text_length": 335, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "tzm_Tfng": { + "num_samples": 204, + "number_of_characters": 24071, + "number_texts_intersect_with_train": 0, + "min_text_length": 41, + "average_text_length": 117.99509803921569, + "max_text_length": 330, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "uig_Arab": { + "num_samples": 204, + "number_of_characters": 28610, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 140.2450980392157, + "max_text_length": 346, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ukr_Cyrl": { + "num_samples": 204, + "number_of_characters": 27494, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 134.77450980392157, + "max_text_length": 343, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "umb_Latn": { + "num_samples": 204, + "number_of_characters": 26710, + "number_texts_intersect_with_train": 0, + "min_text_length": 38, + "average_text_length": 130.9313725490196, + "max_text_length": 372, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "urd_Arab": { + "num_samples": 204, + "number_of_characters": 26147, + "number_texts_intersect_with_train": 0, + "min_text_length": 40, + "average_text_length": 128.17156862745097, + "max_text_length": 312, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "uzn_Latn": { + "num_samples": 204, + "number_of_characters": 30345, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 148.75, + "max_text_length": 378, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "vec_Latn": { + "num_samples": 204, + "number_of_characters": 26773, + "number_texts_intersect_with_train": 0, + "min_text_length": 43, + "average_text_length": 131.24019607843138, + "max_text_length": 374, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "vie_Latn": { + "num_samples": 204, + "number_of_characters": 28106, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 137.77450980392157, + "max_text_length": 329, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "war_Latn": { + "num_samples": 204, + "number_of_characters": 33306, + "number_texts_intersect_with_train": 0, + "min_text_length": 60, + "average_text_length": 163.26470588235293, + "max_text_length": 434, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "wol_Latn": { + "num_samples": 204, + "number_of_characters": 26045, + "number_texts_intersect_with_train": 0, + "min_text_length": 47, + "average_text_length": 127.67156862745098, + "max_text_length": 308, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "xho_Latn": { + "num_samples": 204, + "number_of_characters": 28212, + "number_texts_intersect_with_train": 0, + "min_text_length": 52, + "average_text_length": 138.2941176470588, + "max_text_length": 395, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "ydd_Hebr": { + "num_samples": 204, + "number_of_characters": 28374, + "number_texts_intersect_with_train": 0, + "min_text_length": 39, + "average_text_length": 139.08823529411765, + "max_text_length": 385, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "yor_Latn": { + "num_samples": 204, + "number_of_characters": 25936, + "number_texts_intersect_with_train": 0, + "min_text_length": 29, + "average_text_length": 127.13725490196079, + "max_text_length": 406, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "yue_Hant": { + "num_samples": 204, + "number_of_characters": 8045, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 39.43627450980392, + "max_text_length": 102, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "zho_Hant": { + "num_samples": 204, + "number_of_characters": 8236, + "number_texts_intersect_with_train": 0, + "min_text_length": 16, + "average_text_length": 40.372549019607845, + "max_text_length": 116, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "zsm_Latn": { + "num_samples": 204, + "number_of_characters": 29520, + "number_texts_intersect_with_train": 0, + "min_text_length": 51, + "average_text_length": 144.7058823529412, + "max_text_length": 354, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + }, + "zul_Latn": { + "num_samples": 204, + "number_of_characters": 29871, + "number_texts_intersect_with_train": 0, + "min_text_length": 45, + "average_text_length": 146.4264705882353, + "max_text_length": 425, + "unique_text": 204, + "unique_labels": 7, + "labels": { + "4": { + "count": 51 + }, + "6": { + "count": 40 + }, + "3": { + "count": 30 + }, + "5": { + "count": 25 + }, + "2": { + "count": 22 + }, + "0": { + "count": 19 + }, + "1": { + "count": 17 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SanskritShlokasClassification.json b/mteb/descriptive_stats/Classification/SanskritShlokasClassification.json new file mode 100644 index 0000000000..92c8b0e4e9 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SanskritShlokasClassification.json @@ -0,0 +1,44 @@ +{ + "train": { + "num_samples": 383, + "number_of_characters": 37693, + "number_texts_intersect_with_train": null, + "min_text_length": 17, + "average_text_length": 98.41514360313316, + "max_text_length": 276, + "unique_text": 383, + "unique_labels": 3, + "labels": { + "Vidur Niti Slokas": { + "count": 118 + }, + "Chanakya Slokas": { + "count": 138 + }, + "sanskrit-slogan": { + "count": 127 + } + } + }, + "validation": { + "num_samples": 96, + "number_of_characters": 9277, + "number_texts_intersect_with_train": 0, + "min_text_length": 20, + "average_text_length": 96.63541666666667, + "max_text_length": 170, + "unique_text": 96, + "unique_labels": 3, + "labels": { + "sanskrit-slogan": { + "count": 29 + }, + "Vidur Niti Slokas": { + "count": 28 + }, + "Chanakya Slokas": { + "count": 39 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ScalaClassification.json b/mteb/descriptive_stats/Classification/ScalaClassification.json new file mode 100644 index 0000000000..8d151ab6e0 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ScalaClassification.json @@ -0,0 +1,186 @@ +{ + "test": { + "num_samples": 8192, + "number_of_characters": 839257, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 102.4483642578125, + "max_text_length": 613, + "unique_text": 8192, + "unique_labels": 2, + "labels": { + "0": { + "count": 4096 + }, + "1": { + "count": 4096 + } + }, + "hf_subset_descriptive_stats": { + "Danish": { + "num_samples": 2048, + "number_of_characters": 224132, + "number_texts_intersect_with_train": 0, + "min_text_length": 13, + "average_text_length": 109.439453125, + "max_text_length": 443, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1024 + }, + "1": { + "count": 1024 + } + } + }, + "Norwegian_b": { + "num_samples": 2048, + "number_of_characters": 201596, + "number_texts_intersect_with_train": 0, + "min_text_length": 18, + "average_text_length": 98.435546875, + "max_text_length": 397, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "Norwegian_n": { + "num_samples": 2048, + "number_of_characters": 212059, + "number_texts_intersect_with_train": 0, + "min_text_length": 18, + "average_text_length": 103.54443359375, + "max_text_length": 349, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "Swedish": { + "num_samples": 2048, + "number_of_characters": 201470, + "number_texts_intersect_with_train": 0, + "min_text_length": 17, + "average_text_length": 98.3740234375, + "max_text_length": 613, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + } + } + }, + "train": { + "num_samples": 4096, + "number_of_characters": 421198, + "number_texts_intersect_with_train": null, + "min_text_length": 14, + "average_text_length": 102.83154296875, + "max_text_length": 402, + "unique_text": 4096, + "unique_labels": 2, + "labels": { + "1": { + "count": 2048 + }, + "0": { + "count": 2048 + } + }, + "hf_subset_descriptive_stats": { + "Danish": { + "num_samples": 1024, + "number_of_characters": 110271, + "number_texts_intersect_with_train": null, + "min_text_length": 14, + "average_text_length": 107.6865234375, + "max_text_length": 392, + "unique_text": 1024, + "unique_labels": 2, + "labels": { + "1": { + "count": 512 + }, + "0": { + "count": 512 + } + } + }, + "Norwegian_b": { + "num_samples": 1024, + "number_of_characters": 97878, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 95.583984375, + "max_text_length": 350, + "unique_text": 1024, + "unique_labels": 2, + "labels": { + "1": { + "count": 512 + }, + "0": { + "count": 512 + } + } + }, + "Norwegian_n": { + "num_samples": 1024, + "number_of_characters": 107913, + "number_texts_intersect_with_train": null, + "min_text_length": 20, + "average_text_length": 105.3837890625, + "max_text_length": 402, + "unique_text": 1024, + "unique_labels": 2, + "labels": { + "1": { + "count": 512 + }, + "0": { + "count": 512 + } + } + }, + "Swedish": { + "num_samples": 1024, + "number_of_characters": 105136, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 102.671875, + "max_text_length": 326, + "unique_text": 1024, + "unique_labels": 2, + "labels": { + "1": { + "count": 512 + }, + "0": { + "count": 512 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SentimentAnalysisHindi.json b/mteb/descriptive_stats/Classification/SentimentAnalysisHindi.json new file mode 100644 index 0000000000..57ffebdd71 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SentimentAnalysisHindi.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 166918, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 81.5029296875, + "max_text_length": 339, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "2": { + "count": 941 + }, + "1": { + "count": 819 + }, + "0": { + "count": 288 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SinhalaNewsClassification.json b/mteb/descriptive_stats/Classification/SinhalaNewsClassification.json new file mode 100644 index 0000000000..4c5a7afb3a --- /dev/null +++ b/mteb/descriptive_stats/Classification/SinhalaNewsClassification.json @@ -0,0 +1,29 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 306830, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 149.8193359375, + "max_text_length": 1094, + "unique_text": 2015, + "unique_labels": 5, + "labels": { + "2": { + "count": 526 + }, + "3": { + "count": 626 + }, + "0": { + "count": 345 + }, + "4": { + "count": 269 + }, + "1": { + "count": 282 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SinhalaNewsSourceClassification.json b/mteb/descriptive_stats/Classification/SinhalaNewsSourceClassification.json new file mode 100644 index 0000000000..cbac14de47 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SinhalaNewsSourceClassification.json @@ -0,0 +1,41 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 115676, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 56.482421875, + "max_text_length": 181, + "unique_text": 2040, + "unique_labels": 9, + "labels": { + "0": { + "count": 254 + }, + "8": { + "count": 255 + }, + "3": { + "count": 129 + }, + "2": { + "count": 253 + }, + "5": { + "count": 254 + }, + "6": { + "count": 253 + }, + "7": { + "count": 252 + }, + "4": { + "count": 255 + }, + "1": { + "count": 143 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SiswatiNewsClassification.json b/mteb/descriptive_stats/Classification/SiswatiNewsClassification.json new file mode 100644 index 0000000000..fb06756173 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SiswatiNewsClassification.json @@ -0,0 +1,50 @@ +{ + "train": { + "num_samples": 80, + "number_of_characters": 28336, + "number_texts_intersect_with_train": null, + "min_text_length": 147, + "average_text_length": 354.2, + "max_text_length": 748, + "unique_text": 80, + "unique_labels": 12, + "labels": { + "arts, culture, entertainment and media": { + "count": 14 + }, + "society": { + "count": 32 + }, + "health": { + "count": 3 + }, + "crime, law and justice": { + "count": 7 + }, + "labour": { + "count": 1 + }, + "disaster, accident and emergency incident": { + "count": 1 + }, + "education": { + "count": 6 + }, + "human interest": { + "count": 9 + }, + "religion and belief": { + "count": 4 + }, + "economy, business and finance": { + "count": 1 + }, + "religion and belief#human interest": { + "count": 1 + }, + "politics": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SlovakMovieReviewSentimentClassification.json b/mteb/descriptive_stats/Classification/SlovakMovieReviewSentimentClassification.json new file mode 100644 index 0000000000..37bb43b141 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SlovakMovieReviewSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 740805, + "number_texts_intersect_with_train": 11, + "min_text_length": 3, + "average_text_length": 361.72119140625, + "max_text_length": 3904, + "unique_text": 2044, + "unique_labels": 2, + "labels": { + "1": { + "count": 829 + }, + "0": { + "count": 1219 + } + } + }, + "train": { + "num_samples": 20832, + "number_of_characters": 7585694, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 364.13661674347156, + "max_text_length": 11278, + "unique_text": 20750, + "unique_labels": 2, + "labels": { + "0": { + "count": 12503 + }, + "1": { + "count": 8329 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SouthAfricanLangClassification.json b/mteb/descriptive_stats/Classification/SouthAfricanLangClassification.json new file mode 100644 index 0000000000..550d3b52e5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SouthAfricanLangClassification.json @@ -0,0 +1,92 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 505523, + "number_texts_intersect_with_train": 347, + "min_text_length": 204, + "average_text_length": 246.83740234375, + "max_text_length": 302, + "unique_text": 2041, + "unique_labels": 11, + "labels": { + "9": { + "count": 186 + }, + "2": { + "count": 186 + }, + "10": { + "count": 186 + }, + "5": { + "count": 187 + }, + "8": { + "count": 186 + }, + "0": { + "count": 186 + }, + "4": { + "count": 186 + }, + "1": { + "count": 187 + }, + "7": { + "count": 186 + }, + "3": { + "count": 186 + }, + "6": { + "count": 186 + } + } + }, + "train": { + "num_samples": 33000, + "number_of_characters": 8180949, + "number_texts_intersect_with_train": null, + "min_text_length": 204, + "average_text_length": 247.90754545454544, + "max_text_length": 302, + "unique_text": 29948, + "unique_labels": 11, + "labels": { + "9": { + "count": 3000 + }, + "1": { + "count": 3000 + }, + "3": { + "count": 3000 + }, + "8": { + "count": 3000 + }, + "6": { + "count": 3000 + }, + "2": { + "count": 3000 + }, + "10": { + "count": 3000 + }, + "5": { + "count": 3000 + }, + "7": { + "count": 3000 + }, + "4": { + "count": 3000 + }, + "0": { + "count": 3000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SpanishNewsClassification.json b/mteb/descriptive_stats/Classification/SpanishNewsClassification.json new file mode 100644 index 0000000000..483f3ac530 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SpanishNewsClassification.json @@ -0,0 +1,50 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 8681746, + "number_texts_intersect_with_train": null, + "min_text_length": 504, + "average_text_length": 4239.1337890625, + "max_text_length": 204324, + "unique_text": 2048, + "unique_labels": 12, + "labels": { + "0": { + "count": 170 + }, + "9": { + "count": 171 + }, + "5": { + "count": 170 + }, + "11": { + "count": 171 + }, + "8": { + "count": 171 + }, + "7": { + "count": 170 + }, + "10": { + "count": 170 + }, + "1": { + "count": 171 + }, + "3": { + "count": 171 + }, + "4": { + "count": 171 + }, + "6": { + "count": 171 + }, + "2": { + "count": 171 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SpanishSentimentClassification.json b/mteb/descriptive_stats/Classification/SpanishSentimentClassification.json new file mode 100644 index 0000000000..1dc3004743 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SpanishSentimentClassification.json @@ -0,0 +1,56 @@ +{ + "validation": { + "num_samples": 147, + "number_of_characters": 12499, + "number_texts_intersect_with_train": 0, + "min_text_length": 7, + "average_text_length": 85.02721088435374, + "max_text_length": 426, + "unique_text": 147, + "unique_labels": 2, + "labels": { + "1": { + "count": 121 + }, + "0": { + "count": 26 + } + } + }, + "test": { + "num_samples": 296, + "number_of_characters": 26022, + "number_texts_intersect_with_train": 0, + "min_text_length": 5, + "average_text_length": 87.91216216216216, + "max_text_length": 636, + "unique_text": 296, + "unique_labels": 2, + "labels": { + "1": { + "count": 244 + }, + "0": { + "count": 52 + } + } + }, + "train": { + "num_samples": 1029, + "number_of_characters": 87318, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 84.85714285714286, + "max_text_length": 608, + "unique_text": 1029, + "unique_labels": 2, + "labels": { + "1": { + "count": 851 + }, + "0": { + "count": 178 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SwahiliNewsClassification.json b/mteb/descriptive_stats/Classification/SwahiliNewsClassification.json new file mode 100644 index 0000000000..85ae5d824b --- /dev/null +++ b/mteb/descriptive_stats/Classification/SwahiliNewsClassification.json @@ -0,0 +1,32 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 4938689, + "number_texts_intersect_with_train": null, + "min_text_length": 1002, + "average_text_length": 2411.46923828125, + "max_text_length": 17327, + "unique_text": 1955, + "unique_labels": 6, + "labels": { + "3": { + "count": 977 + }, + "2": { + "count": 122 + }, + "0": { + "count": 68 + }, + "1": { + "count": 77 + }, + "5": { + "count": 254 + }, + "4": { + "count": 550 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SweRecClassification.json b/mteb/descriptive_stats/Classification/SweRecClassification.json new file mode 100644 index 0000000000..4b11611d45 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SweRecClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 652973, + "number_texts_intersect_with_train": 0, + "min_text_length": 12, + "average_text_length": 318.83447265625, + "max_text_length": 11715, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "negative": { + "count": 830 + }, + "neutral": { + "count": 256 + }, + "positive": { + "count": 962 + } + } + }, + "train": { + "num_samples": 1024, + "number_of_characters": 325359, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 317.7333984375, + "max_text_length": 3865, + "unique_text": 1024, + "unique_labels": 3, + "labels": { + "positive": { + "count": 481 + }, + "negative": { + "count": 415 + }, + "neutral": { + "count": 128 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SwedishSentimentClassification.json b/mteb/descriptive_stats/Classification/SwedishSentimentClassification.json new file mode 100644 index 0000000000..a9dfb276a4 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SwedishSentimentClassification.json @@ -0,0 +1,56 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 574726, + "number_texts_intersect_with_train": 503, + "min_text_length": 19, + "average_text_length": 280.6279296875, + "max_text_length": 4159, + "unique_text": 2032, + "unique_labels": 2, + "labels": { + "1": { + "count": 1027 + }, + "0": { + "count": 1021 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 558426, + "number_texts_intersect_with_train": 505, + "min_text_length": 3, + "average_text_length": 272.6689453125, + "max_text_length": 4181, + "unique_text": 2028, + "unique_labels": 2, + "labels": { + "0": { + "count": 1022 + }, + "1": { + "count": 1026 + } + } + }, + "train": { + "num_samples": 62089, + "number_of_characters": 17328750, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 279.0953308959719, + "max_text_length": 4995, + "unique_text": 51988, + "unique_labels": 2, + "labels": { + "0": { + "count": 31091 + }, + "1": { + "count": 30998 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TNews.json b/mteb/descriptive_stats/Classification/TNews.json new file mode 100644 index 0000000000..06e33f35a3 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TNews.json @@ -0,0 +1,116 @@ +{ + "validation": { + "num_samples": 10000, + "number_of_characters": 222089, + "number_texts_intersect_with_train": 924, + "min_text_length": 4, + "average_text_length": 22.2089, + "max_text_length": 129, + "unique_text": 9765, + "unique_labels": 15, + "labels": { + "2": { + "count": 910 + }, + "9": { + "count": 716 + }, + "4": { + "count": 956 + }, + "8": { + "count": 1089 + }, + "10": { + "count": 693 + }, + "1": { + "count": 736 + }, + "5": { + "count": 378 + }, + "7": { + "count": 646 + }, + "13": { + "count": 494 + }, + "6": { + "count": 791 + }, + "3": { + "count": 767 + }, + "11": { + "count": 905 + }, + "14": { + "count": 659 + }, + "0": { + "count": 215 + }, + "12": { + "count": 45 + } + } + }, + "train": { + "num_samples": 53360, + "number_of_characters": 1180923, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 22.13124062968516, + "max_text_length": 145, + "unique_text": 49726, + "unique_labels": 15, + "labels": { + "7": { + "count": 3437 + }, + "4": { + "count": 5200 + }, + "5": { + "count": 2107 + }, + "10": { + "count": 3368 + }, + "8": { + "count": 5955 + }, + "3": { + "count": 3991 + }, + "14": { + "count": 3390 + }, + "1": { + "count": 4081 + }, + "6": { + "count": 4118 + }, + "0": { + "count": 1111 + }, + "2": { + "count": 4976 + }, + "9": { + "count": 3632 + }, + "13": { + "count": 2886 + }, + "11": { + "count": 4851 + }, + "12": { + "count": 257 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TamilNewsClassification.json b/mteb/descriptive_stats/Classification/TamilNewsClassification.json new file mode 100644 index 0000000000..efb28658ff --- /dev/null +++ b/mteb/descriptive_stats/Classification/TamilNewsClassification.json @@ -0,0 +1,62 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 116048, + "number_texts_intersect_with_train": 1027, + "min_text_length": 21, + "average_text_length": 56.6640625, + "max_text_length": 110, + "unique_text": 1971, + "unique_labels": 6, + "labels": { + "4": { + "count": 687 + }, + "2": { + "count": 162 + }, + "1": { + "count": 522 + }, + "5": { + "count": 156 + }, + "0": { + "count": 273 + }, + "3": { + "count": 248 + } + } + }, + "train": { + "num_samples": 14521, + "number_of_characters": 820469, + "number_texts_intersect_with_train": null, + "min_text_length": 15, + "average_text_length": 56.50223813786929, + "max_text_length": 110, + "unique_text": 10575, + "unique_labels": 6, + "labels": { + "5": { + "count": 1099 + }, + "0": { + "count": 2050 + }, + "4": { + "count": 5028 + }, + "1": { + "count": 3443 + }, + "2": { + "count": 1200 + }, + "3": { + "count": 1701 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TelemarketingSalesRuleLegalBenchClassification.json b/mteb/descriptive_stats/Classification/TelemarketingSalesRuleLegalBenchClassification.json new file mode 100644 index 0000000000..1c457d1682 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TelemarketingSalesRuleLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 47, + "number_of_characters": 16370, + "number_texts_intersect_with_train": 0, + "min_text_length": 209, + "average_text_length": 348.29787234042556, + "max_text_length": 637, + "unique_text": 47, + "unique_labels": 2, + "labels": { + "1": { + "count": 28 + }, + "0": { + "count": 19 + } + } + }, + "train": { + "num_samples": 4, + "number_of_characters": 1168, + "number_texts_intersect_with_train": null, + "min_text_length": 274, + "average_text_length": 292.0, + "max_text_length": 327, + "unique_text": 4, + "unique_labels": 2, + "labels": { + "1": { + "count": 2 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TeluguAndhraJyotiNewsClassification.json b/mteb/descriptive_stats/Classification/TeluguAndhraJyotiNewsClassification.json new file mode 100644 index 0000000000..4eeeed980e --- /dev/null +++ b/mteb/descriptive_stats/Classification/TeluguAndhraJyotiNewsClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 2983880, + "number_texts_intersect_with_train": 3, + "min_text_length": 12, + "average_text_length": 1456.97265625, + "max_text_length": 29574, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "3": { + "count": 791 + }, + "2": { + "count": 610 + }, + "0": { + "count": 309 + }, + "4": { + "count": 207 + }, + "1": { + "count": 131 + } + } + }, + "train": { + "num_samples": 17312, + "number_of_characters": 24851827, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 1435.5260512939, + "max_text_length": 38670, + "unique_text": 17306, + "unique_labels": 5, + "labels": { + "0": { + "count": 2572 + }, + "4": { + "count": 1908 + }, + "3": { + "count": 6628 + }, + "2": { + "count": 5145 + }, + "1": { + "count": 1059 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TenKGnadClassification.json b/mteb/descriptive_stats/Classification/TenKGnadClassification.json new file mode 100644 index 0000000000..c117b5aef5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TenKGnadClassification.json @@ -0,0 +1,80 @@ +{ + "test": { + "num_samples": 1028, + "number_of_characters": 2700877, + "number_texts_intersect_with_train": 1, + "min_text_length": 67, + "average_text_length": 2627.3122568093386, + "max_text_length": 32600, + "unique_text": 1028, + "unique_labels": 9, + "labels": { + "3": { + "count": 141 + }, + "4": { + "count": 120 + }, + "0": { + "count": 168 + }, + "2": { + "count": 151 + }, + "1": { + "count": 168 + }, + "7": { + "count": 57 + }, + "5": { + "count": 102 + }, + "8": { + "count": 54 + }, + "6": { + "count": 67 + } + } + }, + "train": { + "num_samples": 9245, + "number_of_characters": 23923005, + "number_texts_intersect_with_train": null, + "min_text_length": 25, + "average_text_length": 2587.6695511087073, + "max_text_length": 22120, + "unique_text": 9244, + "unique_labels": 9, + "labels": { + "4": { + "count": 1081 + }, + "8": { + "count": 485 + }, + "0": { + "count": 1509 + }, + "3": { + "count": 1270 + }, + "5": { + "count": 913 + }, + "6": { + "count": 601 + }, + "2": { + "count": 1360 + }, + "1": { + "count": 1510 + }, + "7": { + "count": 516 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TextualismToolDictionariesLegalBenchClassification.json b/mteb/descriptive_stats/Classification/TextualismToolDictionariesLegalBenchClassification.json new file mode 100644 index 0000000000..a3d97066f0 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TextualismToolDictionariesLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 107, + "number_of_characters": 100926, + "number_texts_intersect_with_train": 0, + "min_text_length": 103, + "average_text_length": 943.2336448598131, + "max_text_length": 2711, + "unique_text": 107, + "unique_labels": 2, + "labels": { + "1": { + "count": 9 + }, + "0": { + "count": 98 + } + } + }, + "train": { + "num_samples": 4, + "number_of_characters": 4780, + "number_texts_intersect_with_train": null, + "min_text_length": 781, + "average_text_length": 1195.0, + "max_text_length": 1430, + "unique_text": 4, + "unique_labels": 2, + "labels": { + "1": { + "count": 2 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TextualismToolPlainLegalBenchClassification.json b/mteb/descriptive_stats/Classification/TextualismToolPlainLegalBenchClassification.json new file mode 100644 index 0000000000..968002e18a --- /dev/null +++ b/mteb/descriptive_stats/Classification/TextualismToolPlainLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 165, + "number_of_characters": 164666, + "number_texts_intersect_with_train": 0, + "min_text_length": 147, + "average_text_length": 997.9757575757576, + "max_text_length": 3036, + "unique_text": 165, + "unique_labels": 2, + "labels": { + "1": { + "count": 67 + }, + "0": { + "count": 98 + } + } + }, + "train": { + "num_samples": 4, + "number_of_characters": 3276, + "number_texts_intersect_with_train": null, + "min_text_length": 368, + "average_text_length": 819.0, + "max_text_length": 1430, + "unique_text": 4, + "unique_labels": 2, + "labels": { + "1": { + "count": 2 + }, + "0": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ToxicChatClassification.json b/mteb/descriptive_stats/Classification/ToxicChatClassification.json new file mode 100644 index 0000000000..5b5625eaed --- /dev/null +++ b/mteb/descriptive_stats/Classification/ToxicChatClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1164, + "number_of_characters": 217486, + "number_texts_intersect_with_train": 38, + "min_text_length": 7, + "average_text_length": 186.84364261168386, + "max_text_length": 1536, + "unique_text": 1157, + "unique_labels": 2, + "labels": { + "1": { + "count": 146 + }, + "0": { + "count": 1018 + } + } + }, + "train": { + "num_samples": 2801, + "number_of_characters": 568898, + "number_texts_intersect_with_train": null, + "min_text_length": 12, + "average_text_length": 203.10531952873973, + "max_text_length": 1536, + "unique_text": 2764, + "unique_labels": 2, + "labels": { + "0": { + "count": 2417 + }, + "1": { + "count": 384 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/ToxicConversationsClassification.json b/mteb/descriptive_stats/Classification/ToxicConversationsClassification.json new file mode 100644 index 0000000000..ce05103c84 --- /dev/null +++ b/mteb/descriptive_stats/Classification/ToxicConversationsClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 591942, + "number_texts_intersect_with_train": 3, + "min_text_length": 4, + "average_text_length": 289.0341796875, + "max_text_length": 1000, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1886 + }, + "1": { + "count": 162 + } + } + }, + "train": { + "num_samples": 50000, + "number_of_characters": 14940674, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 298.81348, + "max_text_length": 1000, + "unique_text": 49897, + "unique_labels": 2, + "labels": { + "0": { + "count": 46035 + }, + "1": { + "count": 3965 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TswanaNewsClassification.json b/mteb/descriptive_stats/Classification/TswanaNewsClassification.json new file mode 100644 index 0000000000..8c4576c08a --- /dev/null +++ b/mteb/descriptive_stats/Classification/TswanaNewsClassification.json @@ -0,0 +1,86 @@ +{ + "test": { + "num_samples": 487, + "number_of_characters": 1153959, + "number_texts_intersect_with_train": 0, + "min_text_length": 597, + "average_text_length": 2369.5256673511294, + "max_text_length": 6446, + "unique_text": 487, + "unique_labels": 10, + "labels": { + "7": { + "count": 102 + }, + "5": { + "count": 7 + }, + "9": { + "count": 110 + }, + "1": { + "count": 42 + }, + "4": { + "count": 59 + }, + "6": { + "count": 37 + }, + "8": { + "count": 27 + }, + "3": { + "count": 67 + }, + "0": { + "count": 21 + }, + "2": { + "count": 15 + } + } + }, + "train": { + "num_samples": 3893, + "number_of_characters": 9208125, + "number_texts_intersect_with_train": null, + "min_text_length": 565, + "average_text_length": 2365.3031081428203, + "max_text_length": 8027, + "unique_text": 3893, + "unique_labels": 10, + "labels": { + "7": { + "count": 668 + }, + "4": { + "count": 476 + }, + "3": { + "count": 556 + }, + "9": { + "count": 957 + }, + "6": { + "count": 312 + }, + "1": { + "count": 364 + }, + "5": { + "count": 68 + }, + "0": { + "count": 172 + }, + "8": { + "count": 168 + }, + "2": { + "count": 152 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TurkicClassification.json b/mteb/descriptive_stats/Classification/TurkicClassification.json new file mode 100644 index 0000000000..6fe977b950 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TurkicClassification.json @@ -0,0 +1,616 @@ +{ + "train": { + "num_samples": 6144, + "number_of_characters": 8711803, + "number_texts_intersect_with_train": null, + "min_text_length": 49, + "average_text_length": 1417.9366861979167, + "max_text_length": 71928, + "unique_text": 6104, + "unique_labels": 80, + "labels": { + "13": { + "count": 223 + }, + "4": { + "count": 259 + }, + "16": { + "count": 37 + }, + "2": { + "count": 969 + }, + "3": { + "count": 549 + }, + "6": { + "count": 366 + }, + "8": { + "count": 1256 + }, + "10": { + "count": 144 + }, + "15": { + "count": 17 + }, + "11": { + "count": 52 + }, + "0": { + "count": 58 + }, + "9": { + "count": 63 + }, + "17": { + "count": 19 + }, + "5": { + "count": 14 + }, + "12": { + "count": 8 + }, + "1": { + "count": 96 + }, + "14": { + "count": 7 + }, + "7": { + "count": 102 + }, + "38": { + "count": 6 + }, + "69": { + "count": 1415 + }, + "34": { + "count": 9 + }, + "71": { + "count": 6 + }, + "54": { + "count": 6 + }, + "43": { + "count": 10 + }, + "52": { + "count": 10 + }, + "21": { + "count": 6 + }, + "20": { + "count": 17 + }, + "76": { + "count": 6 + }, + "25": { + "count": 15 + }, + "57": { + "count": 9 + }, + "23": { + "count": 15 + }, + "78": { + "count": 9 + }, + "56": { + "count": 6 + }, + "61": { + "count": 6 + }, + "74": { + "count": 15 + }, + "60": { + "count": 6 + }, + "49": { + "count": 6 + }, + "26": { + "count": 6 + }, + "44": { + "count": 6 + }, + "55": { + "count": 11 + }, + "64": { + "count": 6 + }, + "40": { + "count": 6 + }, + "28": { + "count": 13 + }, + "42": { + "count": 6 + }, + "35": { + "count": 13 + }, + "41": { + "count": 14 + }, + "58": { + "count": 6 + }, + "53": { + "count": 8 + }, + "73": { + "count": 6 + }, + "79": { + "count": 6 + }, + "31": { + "count": 6 + }, + "22": { + "count": 6 + }, + "27": { + "count": 6 + }, + "33": { + "count": 6 + }, + "36": { + "count": 9 + }, + "72": { + "count": 7 + }, + "51": { + "count": 6 + }, + "24": { + "count": 6 + }, + "19": { + "count": 6 + }, + "68": { + "count": 11 + }, + "77": { + "count": 6 + }, + "18": { + "count": 11 + }, + "30": { + "count": 9 + }, + "65": { + "count": 10 + }, + "39": { + "count": 6 + }, + "32": { + "count": 6 + }, + "75": { + "count": 6 + }, + "45": { + "count": 6 + }, + "59": { + "count": 6 + }, + "48": { + "count": 6 + }, + "70": { + "count": 6 + }, + "67": { + "count": 6 + }, + "37": { + "count": 8 + }, + "63": { + "count": 15 + }, + "29": { + "count": 7 + }, + "62": { + "count": 8 + }, + "46": { + "count": 11 + }, + "50": { + "count": 6 + }, + "47": { + "count": 6 + }, + "66": { + "count": 6 + } + }, + "hf_subset_descriptive_stats": { + "ky": { + "num_samples": 2048, + "number_of_characters": 1967702, + "number_texts_intersect_with_train": null, + "min_text_length": 157, + "average_text_length": 960.7919921875, + "max_text_length": 9526, + "unique_text": 2048, + "unique_labels": 18, + "labels": { + "13": { + "count": 213 + }, + "4": { + "count": 34 + }, + "16": { + "count": 31 + }, + "2": { + "count": 834 + }, + "3": { + "count": 321 + }, + "6": { + "count": 107 + }, + "8": { + "count": 256 + }, + "10": { + "count": 138 + }, + "15": { + "count": 8 + }, + "11": { + "count": 46 + }, + "0": { + "count": 30 + }, + "9": { + "count": 9 + }, + "17": { + "count": 8 + }, + "5": { + "count": 2 + }, + "12": { + "count": 2 + }, + "1": { + "count": 4 + }, + "14": { + "count": 1 + }, + "7": { + "count": 4 + } + } + }, + "kk": { + "num_samples": 2048, + "number_of_characters": 2629102, + "number_texts_intersect_with_train": null, + "min_text_length": 195, + "average_text_length": 1283.7412109375, + "max_text_length": 10141, + "unique_text": 2046, + "unique_labels": 10, + "labels": { + "8": { + "count": 994 + }, + "6": { + "count": 253 + }, + "4": { + "count": 219 + }, + "7": { + "count": 81 + }, + "1": { + "count": 86 + }, + "3": { + "count": 221 + }, + "2": { + "count": 127 + }, + "0": { + "count": 17 + }, + "5": { + "count": 6 + }, + "9": { + "count": 44 + } + } + }, + "ba": { + "num_samples": 2048, + "number_of_characters": 4114999, + "number_texts_intersect_with_train": null, + "min_text_length": 49, + "average_text_length": 2009.27685546875, + "max_text_length": 71928, + "unique_text": 2010, + "unique_labels": 80, + "labels": { + "38": { + "count": 6 + }, + "69": { + "count": 1415 + }, + "34": { + "count": 9 + }, + "71": { + "count": 6 + }, + "54": { + "count": 6 + }, + "43": { + "count": 10 + }, + "52": { + "count": 10 + }, + "21": { + "count": 6 + }, + "20": { + "count": 17 + }, + "9": { + "count": 10 + }, + "76": { + "count": 6 + }, + "25": { + "count": 15 + }, + "57": { + "count": 9 + }, + "23": { + "count": 15 + }, + "78": { + "count": 9 + }, + "4": { + "count": 6 + }, + "56": { + "count": 6 + }, + "61": { + "count": 6 + }, + "74": { + "count": 15 + }, + "10": { + "count": 6 + }, + "60": { + "count": 6 + }, + "0": { + "count": 11 + }, + "13": { + "count": 10 + }, + "49": { + "count": 6 + }, + "26": { + "count": 6 + }, + "44": { + "count": 6 + }, + "55": { + "count": 11 + }, + "64": { + "count": 6 + }, + "40": { + "count": 6 + }, + "28": { + "count": 13 + }, + "1": { + "count": 6 + }, + "42": { + "count": 6 + }, + "35": { + "count": 13 + }, + "41": { + "count": 14 + }, + "58": { + "count": 6 + }, + "53": { + "count": 8 + }, + "73": { + "count": 6 + }, + "15": { + "count": 9 + }, + "7": { + "count": 17 + }, + "79": { + "count": 6 + }, + "6": { + "count": 6 + }, + "31": { + "count": 6 + }, + "22": { + "count": 6 + }, + "27": { + "count": 6 + }, + "2": { + "count": 8 + }, + "33": { + "count": 6 + }, + "3": { + "count": 7 + }, + "36": { + "count": 9 + }, + "5": { + "count": 6 + }, + "72": { + "count": 7 + }, + "51": { + "count": 6 + }, + "24": { + "count": 6 + }, + "19": { + "count": 6 + }, + "14": { + "count": 6 + }, + "68": { + "count": 11 + }, + "12": { + "count": 6 + }, + "77": { + "count": 6 + }, + "18": { + "count": 11 + }, + "30": { + "count": 9 + }, + "65": { + "count": 10 + }, + "39": { + "count": 6 + }, + "32": { + "count": 6 + }, + "75": { + "count": 6 + }, + "16": { + "count": 6 + }, + "45": { + "count": 6 + }, + "59": { + "count": 6 + }, + "8": { + "count": 6 + }, + "48": { + "count": 6 + }, + "70": { + "count": 6 + }, + "67": { + "count": 6 + }, + "17": { + "count": 11 + }, + "37": { + "count": 8 + }, + "63": { + "count": 15 + }, + "29": { + "count": 7 + }, + "62": { + "count": 8 + }, + "46": { + "count": 11 + }, + "11": { + "count": 6 + }, + "50": { + "count": 6 + }, + "47": { + "count": 6 + }, + "66": { + "count": 6 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TurkishMovieSentimentClassification.json b/mteb/descriptive_stats/Classification/TurkishMovieSentimentClassification.json new file mode 100644 index 0000000000..99806369b5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TurkishMovieSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 288169, + "number_texts_intersect_with_train": 0, + "min_text_length": 8, + "average_text_length": 140.70751953125, + "max_text_length": 300, + "unique_text": 2048, + "unique_labels": 2, + "labels": { + "1": { + "count": 1024 + }, + "0": { + "count": 1024 + } + } + }, + "train": { + "num_samples": 7972, + "number_of_characters": 1124326, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 141.03437029603612, + "max_text_length": 300, + "unique_text": 7972, + "unique_labels": 2, + "labels": { + "1": { + "count": 3986 + }, + "0": { + "count": 3986 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TurkishProductSentimentClassification.json b/mteb/descriptive_stats/Classification/TurkishProductSentimentClassification.json new file mode 100644 index 0000000000..ac5ad3f06e --- /dev/null +++ b/mteb/descriptive_stats/Classification/TurkishProductSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 800, + "number_of_characters": 197484, + "number_texts_intersect_with_train": 0, + "min_text_length": 26, + "average_text_length": 246.855, + "max_text_length": 1885, + "unique_text": 800, + "unique_labels": 2, + "labels": { + "1": { + "count": 400 + }, + "0": { + "count": 400 + } + } + }, + "train": { + "num_samples": 4800, + "number_of_characters": 1189220, + "number_texts_intersect_with_train": null, + "min_text_length": 20, + "average_text_length": 247.75416666666666, + "max_text_length": 1910, + "unique_text": 4800, + "unique_labels": 2, + "labels": { + "1": { + "count": 2400 + }, + "0": { + "count": 2400 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TweetEmotionClassification.json b/mteb/descriptive_stats/Classification/TweetEmotionClassification.json new file mode 100644 index 0000000000..e32b68de68 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TweetEmotionClassification.json @@ -0,0 +1,38 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 160916, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 78.572265625, + "max_text_length": 161, + "unique_text": 2047, + "unique_labels": 8, + "labels": { + "2": { + "count": 261 + }, + "6": { + "count": 213 + }, + "3": { + "count": 255 + }, + "5": { + "count": 216 + }, + "7": { + "count": 246 + }, + "1": { + "count": 294 + }, + "4": { + "count": 248 + }, + "0": { + "count": 315 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TweetSarcasmClassification.json b/mteb/descriptive_stats/Classification/TweetSarcasmClassification.json new file mode 100644 index 0000000000..0f8a9a9dd5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TweetSarcasmClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 2110, + "number_of_characters": 215552, + "number_texts_intersect_with_train": 130, + "min_text_length": 5, + "average_text_length": 102.15734597156398, + "max_text_length": 156, + "unique_text": 2094, + "unique_labels": 2, + "labels": { + "1": { + "count": 345 + }, + "0": { + "count": 1765 + } + } + }, + "train": { + "num_samples": 8437, + "number_of_characters": 861236, + "number_texts_intersect_with_train": null, + "min_text_length": 8, + "average_text_length": 102.07846390897238, + "max_text_length": 143, + "unique_text": 8178, + "unique_labels": 2, + "labels": { + "0": { + "count": 7100 + }, + "1": { + "count": 1337 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TweetSentimentClassification.json b/mteb/descriptive_stats/Classification/TweetSentimentClassification.json new file mode 100644 index 0000000000..42940543c8 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TweetSentimentClassification.json @@ -0,0 +1,384 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 169117, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 82.57666015625, + "max_text_length": 200, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "1": { + "count": 688 + }, + "2": { + "count": 680 + }, + "0": { + "count": 680 + } + }, + "hf_subset_descriptive_stats": { + "arabic": { + "num_samples": 256, + "number_of_characters": 21637, + "number_texts_intersect_with_train": 0, + "min_text_length": 14, + "average_text_length": 84.51953125, + "max_text_length": 140, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "english": { + "num_samples": 256, + "number_of_characters": 23508, + "number_texts_intersect_with_train": 0, + "min_text_length": 17, + "average_text_length": 91.828125, + "max_text_length": 141, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "german": { + "num_samples": 256, + "number_of_characters": 19069, + "number_texts_intersect_with_train": 0, + "min_text_length": 9, + "average_text_length": 74.48828125, + "max_text_length": 142, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "french": { + "num_samples": 256, + "number_of_characters": 24130, + "number_texts_intersect_with_train": 0, + "min_text_length": 23, + "average_text_length": 94.2578125, + "max_text_length": 140, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "italian": { + "num_samples": 256, + "number_of_characters": 23564, + "number_texts_intersect_with_train": 0, + "min_text_length": 14, + "average_text_length": 92.046875, + "max_text_length": 140, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "portuguese": { + "num_samples": 256, + "number_of_characters": 18522, + "number_texts_intersect_with_train": 0, + "min_text_length": 24, + "average_text_length": 72.3515625, + "max_text_length": 140, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "spanish": { + "num_samples": 256, + "number_of_characters": 21014, + "number_texts_intersect_with_train": 0, + "min_text_length": 22, + "average_text_length": 82.0859375, + "max_text_length": 137, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + }, + "hindi": { + "num_samples": 256, + "number_of_characters": 17673, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 69.03515625, + "max_text_length": 200, + "unique_text": 256, + "unique_labels": 3, + "labels": { + "1": { + "count": 86 + }, + "2": { + "count": 85 + }, + "0": { + "count": 85 + } + } + } + } + }, + "train": { + "num_samples": 14712, + "number_of_characters": 1277720, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 86.84883088635128, + "max_text_length": 1085, + "unique_text": 14712, + "unique_labels": 3, + "labels": { + "0": { + "count": 4904 + }, + "1": { + "count": 4904 + }, + "2": { + "count": 4904 + } + }, + "hf_subset_descriptive_stats": { + "arabic": { + "num_samples": 1839, + "number_of_characters": 164305, + "number_texts_intersect_with_train": null, + "min_text_length": 11, + "average_text_length": 89.34475258292551, + "max_text_length": 140, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "english": { + "num_samples": 1839, + "number_of_characters": 201493, + "number_texts_intersect_with_train": null, + "min_text_length": 29, + "average_text_length": 109.56661228928766, + "max_text_length": 185, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "german": { + "num_samples": 1839, + "number_of_characters": 137071, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 74.53561718325177, + "max_text_length": 144, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "french": { + "num_samples": 1839, + "number_of_characters": 178091, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 96.84121805328984, + "max_text_length": 144, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "italian": { + "num_samples": 1839, + "number_of_characters": 165828, + "number_texts_intersect_with_train": null, + "min_text_length": 6, + "average_text_length": 90.17292006525285, + "max_text_length": 150, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "portuguese": { + "num_samples": 1839, + "number_of_characters": 135761, + "number_texts_intersect_with_train": null, + "min_text_length": 18, + "average_text_length": 73.82327351821642, + "max_text_length": 146, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "spanish": { + "num_samples": 1839, + "number_of_characters": 153354, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 83.38988580750407, + "max_text_length": 138, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + }, + "hindi": { + "num_samples": 1839, + "number_of_characters": 141817, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 77.11636759108211, + "max_text_length": 1085, + "unique_text": 1839, + "unique_labels": 3, + "labels": { + "0": { + "count": 613 + }, + "1": { + "count": 613 + }, + "2": { + "count": 613 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TweetSentimentExtractionClassification.json b/mteb/descriptive_stats/Classification/TweetSentimentExtractionClassification.json new file mode 100644 index 0000000000..6b666d527a --- /dev/null +++ b/mteb/descriptive_stats/Classification/TweetSentimentExtractionClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 3534, + "number_of_characters": 239476, + "number_texts_intersect_with_train": 0, + "min_text_length": 4, + "average_text_length": 67.76344086021506, + "max_text_length": 142, + "unique_text": 3534, + "unique_labels": 3, + "labels": { + "1": { + "count": 1430 + }, + "2": { + "count": 1103 + }, + "0": { + "count": 1001 + } + } + }, + "train": { + "num_samples": 27481, + "number_of_characters": 1877709, + "number_texts_intersect_with_train": null, + "min_text_length": 0, + "average_text_length": 68.32753538808632, + "max_text_length": 141, + "unique_text": 27481, + "unique_labels": 3, + "labels": { + "1": { + "count": 11118 + }, + "0": { + "count": 7781 + }, + "2": { + "count": 8582 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/TweetTopicSingleClassification.json b/mteb/descriptive_stats/Classification/TweetTopicSingleClassification.json new file mode 100644 index 0000000000..7b6809a8d5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/TweetTopicSingleClassification.json @@ -0,0 +1,62 @@ +{ + "test_2021": { + "num_samples": 1693, + "number_of_characters": 283844, + "number_texts_intersect_with_train": 0, + "min_text_length": 48, + "average_text_length": 167.6574128765505, + "max_text_length": 342, + "unique_text": 1692, + "unique_labels": 6, + "labels": { + "4": { + "count": 630 + }, + "3": { + "count": 178 + }, + "2": { + "count": 671 + }, + "1": { + "count": 78 + }, + "0": { + "count": 48 + }, + "5": { + "count": 88 + } + } + }, + "train": { + "num_samples": 1516, + "number_of_characters": 252299, + "number_texts_intersect_with_train": null, + "min_text_length": 38, + "average_text_length": 166.4241424802111, + "max_text_length": 326, + "unique_text": 1516, + "unique_labels": 6, + "labels": { + "2": { + "count": 606 + }, + "3": { + "count": 161 + }, + "4": { + "count": 562 + }, + "0": { + "count": 42 + }, + "1": { + "count": 80 + }, + "5": { + "count": 65 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/UCCVCommonLawLegalBenchClassification.json b/mteb/descriptive_stats/Classification/UCCVCommonLawLegalBenchClassification.json new file mode 100644 index 0000000000..93d6f56e94 --- /dev/null +++ b/mteb/descriptive_stats/Classification/UCCVCommonLawLegalBenchClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 94, + "number_of_characters": 10728, + "number_texts_intersect_with_train": 0, + "min_text_length": 81, + "average_text_length": 114.12765957446808, + "max_text_length": 207, + "unique_text": 94, + "unique_labels": 2, + "labels": { + "0": { + "count": 54 + }, + "1": { + "count": 40 + } + } + }, + "train": { + "num_samples": 6, + "number_of_characters": 787, + "number_texts_intersect_with_train": null, + "min_text_length": 89, + "average_text_length": 131.16666666666666, + "max_text_length": 172, + "unique_text": 6, + "unique_labels": 2, + "labels": { + "1": { + "count": 3 + }, + "0": { + "count": 3 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/UkrFormalityClassification.json b/mteb/descriptive_stats/Classification/UkrFormalityClassification.json new file mode 100644 index 0000000000..f751a11fda --- /dev/null +++ b/mteb/descriptive_stats/Classification/UkrFormalityClassification.json @@ -0,0 +1,38 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 106714, + "number_texts_intersect_with_train": null, + "min_text_length": 9, + "average_text_length": 52.1064453125, + "max_text_length": 655, + "unique_text": 2045, + "unique_labels": 2, + "labels": { + "0": { + "count": 1024 + }, + "1": { + "count": 1024 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 108694, + "number_texts_intersect_with_train": 2, + "min_text_length": 6, + "average_text_length": 53.0732421875, + "max_text_length": 533, + "unique_text": 2044, + "unique_labels": 2, + "labels": { + "0": { + "count": 1161 + }, + "1": { + "count": 887 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/UnfairTOSLegalBenchClassification.json b/mteb/descriptive_stats/Classification/UnfairTOSLegalBenchClassification.json new file mode 100644 index 0000000000..ba0cb7ad00 --- /dev/null +++ b/mteb/descriptive_stats/Classification/UnfairTOSLegalBenchClassification.json @@ -0,0 +1,80 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 371930, + "number_texts_intersect_with_train": 0, + "min_text_length": 24, + "average_text_length": 181.6064453125, + "max_text_length": 1677, + "unique_text": 2048, + "unique_labels": 9, + "labels": { + "6": { + "count": 1855 + }, + "0": { + "count": 53 + }, + "8": { + "count": 17 + }, + "5": { + "count": 15 + }, + "7": { + "count": 38 + }, + "4": { + "count": 13 + }, + "1": { + "count": 20 + }, + "2": { + "count": 29 + }, + "3": { + "count": 8 + } + } + }, + "train": { + "num_samples": 9, + "number_of_characters": 1934, + "number_texts_intersect_with_train": null, + "min_text_length": 114, + "average_text_length": 214.88888888888889, + "max_text_length": 396, + "unique_text": 9, + "unique_labels": 9, + "labels": { + "0": { + "count": 1 + }, + "7": { + "count": 1 + }, + "2": { + "count": 1 + }, + "4": { + "count": 1 + }, + "1": { + "count": 1 + }, + "5": { + "count": 1 + }, + "8": { + "count": 1 + }, + "3": { + "count": 1 + }, + "6": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/UrduRomanSentimentClassification.json b/mteb/descriptive_stats/Classification/UrduRomanSentimentClassification.json new file mode 100644 index 0000000000..3cf71b9843 --- /dev/null +++ b/mteb/descriptive_stats/Classification/UrduRomanSentimentClassification.json @@ -0,0 +1,23 @@ +{ + "train": { + "num_samples": 2048, + "number_of_characters": 140832, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 68.765625, + "max_text_length": 936, + "unique_text": 2038, + "unique_labels": 3, + "labels": { + "2": { + "count": 904 + }, + "1": { + "count": 535 + }, + "0": { + "count": 609 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/VieStudentFeedbackClassification.json b/mteb/descriptive_stats/Classification/VieStudentFeedbackClassification.json new file mode 100644 index 0000000000..aa2f7998d2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/VieStudentFeedbackClassification.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 118851, + "number_texts_intersect_with_train": 0, + "min_text_length": 5, + "average_text_length": 58.03271484375, + "max_text_length": 411, + "unique_text": 2048, + "unique_labels": 3, + "labels": { + "1": { + "count": 108 + }, + "2": { + "count": 1029 + }, + "0": { + "count": 911 + } + } + }, + "train": { + "num_samples": 11426, + "number_of_characters": 675104, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 59.084894101172765, + "max_text_length": 660, + "unique_text": 11425, + "unique_labels": 3, + "labels": { + "2": { + "count": 5643 + }, + "0": { + "count": 5325 + }, + "1": { + "count": 458 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/WRIMEClassification.json b/mteb/descriptive_stats/Classification/WRIMEClassification.json new file mode 100644 index 0000000000..294639d55a --- /dev/null +++ b/mteb/descriptive_stats/Classification/WRIMEClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 97440, + "number_texts_intersect_with_train": 2, + "min_text_length": 2, + "average_text_length": 47.578125, + "max_text_length": 154, + "unique_text": 2046, + "unique_labels": 5, + "labels": { + "2": { + "count": 589 + }, + "3": { + "count": 657 + }, + "0": { + "count": 484 + }, + "4": { + "count": 214 + }, + "1": { + "count": 104 + } + } + }, + "train": { + "num_samples": 30000, + "number_of_characters": 1147435, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 38.24783333333333, + "max_text_length": 173, + "unique_text": 29867, + "unique_labels": 5, + "labels": { + "0": { + "count": 9162 + }, + "3": { + "count": 7572 + }, + "2": { + "count": 9851 + }, + "4": { + "count": 1924 + }, + "1": { + "count": 1491 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/Waimai.json b/mteb/descriptive_stats/Classification/Waimai.json new file mode 100644 index 0000000000..25d146baad --- /dev/null +++ b/mteb/descriptive_stats/Classification/Waimai.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 25155, + "number_texts_intersect_with_train": 1, + "min_text_length": 5, + "average_text_length": 25.155, + "max_text_length": 268, + "unique_text": 1000, + "unique_labels": 2, + "labels": { + "0": { + "count": 675 + }, + "1": { + "count": 325 + } + } + }, + "train": { + "num_samples": 8000, + "number_of_characters": 200086, + "number_texts_intersect_with_train": null, + "min_text_length": 5, + "average_text_length": 25.01075, + "max_text_length": 463, + "unique_text": 7999, + "unique_labels": 2, + "labels": { + "1": { + "count": 2692 + }, + "0": { + "count": 5308 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/WisesightSentimentClassification.json b/mteb/descriptive_stats/Classification/WisesightSentimentClassification.json new file mode 100644 index 0000000000..bfae1005af --- /dev/null +++ b/mteb/descriptive_stats/Classification/WisesightSentimentClassification.json @@ -0,0 +1,50 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 184381, + "number_texts_intersect_with_train": 3, + "min_text_length": 1, + "average_text_length": 90.02978515625, + "max_text_length": 1978, + "unique_text": 2048, + "unique_labels": 4, + "labels": { + "1": { + "count": 1114 + }, + "0": { + "count": 366 + }, + "2": { + "count": 524 + }, + "3": { + "count": 44 + } + } + }, + "train": { + "num_samples": 21628, + "number_of_characters": 1942590, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 89.81829110412428, + "max_text_length": 1997, + "unique_text": 21612, + "unique_labels": 4, + "labels": { + "1": { + "count": 11795 + }, + "0": { + "count": 3866 + }, + "2": { + "count": 5491 + }, + "3": { + "count": 476 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/YahooAnswersTopicsClassification.json b/mteb/descriptive_stats/Classification/YahooAnswersTopicsClassification.json new file mode 100644 index 0000000000..4af24b0ffe --- /dev/null +++ b/mteb/descriptive_stats/Classification/YahooAnswersTopicsClassification.json @@ -0,0 +1,86 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 667365, + "number_texts_intersect_with_train": 8, + "min_text_length": 0, + "average_text_length": 325.86181640625, + "max_text_length": 3984, + "unique_text": 2018, + "unique_labels": 10, + "labels": { + "3": { + "count": 205 + }, + "1": { + "count": 205 + }, + "2": { + "count": 205 + }, + "0": { + "count": 204 + }, + "5": { + "count": 205 + }, + "6": { + "count": 205 + }, + "9": { + "count": 205 + }, + "7": { + "count": 204 + }, + "8": { + "count": 205 + }, + "4": { + "count": 205 + } + } + }, + "train": { + "num_samples": 2048, + "number_of_characters": 678605, + "number_texts_intersect_with_train": null, + "min_text_length": 0, + "average_text_length": 331.35009765625, + "max_text_length": 3980, + "unique_text": 2005, + "unique_labels": 10, + "labels": { + "5": { + "count": 205 + }, + "0": { + "count": 204 + }, + "1": { + "count": 205 + }, + "8": { + "count": 205 + }, + "2": { + "count": 205 + }, + "3": { + "count": 205 + }, + "6": { + "count": 205 + }, + "9": { + "count": 205 + }, + "4": { + "count": 205 + }, + "7": { + "count": 204 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/YelpReviewFullClassification.json b/mteb/descriptive_stats/Classification/YelpReviewFullClassification.json new file mode 100644 index 0000000000..629b034a3f --- /dev/null +++ b/mteb/descriptive_stats/Classification/YelpReviewFullClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1515088, + "number_texts_intersect_with_train": 0, + "min_text_length": 1, + "average_text_length": 739.7890625, + "max_text_length": 5041, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "0": { + "count": 409 + }, + "4": { + "count": 410 + }, + "2": { + "count": 410 + }, + "3": { + "count": 409 + }, + "1": { + "count": 410 + } + } + }, + "train": { + "num_samples": 650000, + "number_of_characters": 476011554, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 732.3254676923077, + "max_text_length": 5637, + "unique_text": 650000, + "unique_labels": 5, + "labels": { + "4": { + "count": 130000 + }, + "1": { + "count": 130000 + }, + "3": { + "count": 130000 + }, + "0": { + "count": 130000 + }, + "2": { + "count": 130000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/YueOpenriceReviewClassification.json b/mteb/descriptive_stats/Classification/YueOpenriceReviewClassification.json new file mode 100644 index 0000000000..a7d2a38fa2 --- /dev/null +++ b/mteb/descriptive_stats/Classification/YueOpenriceReviewClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 351041, + "number_texts_intersect_with_train": 387, + "min_text_length": 7, + "average_text_length": 171.40673828125, + "max_text_length": 248, + "unique_text": 2043, + "unique_labels": 5, + "labels": { + "3": { + "count": 978 + }, + "1": { + "count": 172 + }, + "0": { + "count": 50 + }, + "2": { + "count": 568 + }, + "4": { + "count": 280 + } + } + }, + "train": { + "num_samples": 55449, + "number_of_characters": 9581743, + "number_texts_intersect_with_train": null, + "min_text_length": 3, + "average_text_length": 172.80280978917563, + "max_text_length": 294, + "unique_text": 50184, + "unique_labels": 5, + "labels": { + "3": { + "count": 26762 + }, + "2": { + "count": 15307 + }, + "1": { + "count": 4725 + }, + "4": { + "count": 7430 + }, + "0": { + "count": 1225 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/AlloProfClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/AlloProfClusteringP2P.v2.json new file mode 100644 index 0000000000..b3d8cc46d1 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/AlloProfClusteringP2P.v2.json @@ -0,0 +1,55 @@ +{ + "test": { + "num_samples": 2556, + "number_of_characters": 9047048, + "min_text_length": 42, + "average_text_length": 3539.533646322379, + "max_text_length": 47972, + "unique_texts": 2128, + "min_labels_per_text": 4, + "average_labels_per_text": 1.0, + "max_labels_per_text": 582, + "unique_labels": 13, + "labels": { + "4": { + "count": 582 + }, + "11": { + "count": 422 + }, + "7": { + "count": 498 + }, + "6": { + "count": 435 + }, + "2": { + "count": 206 + }, + "9": { + "count": 93 + }, + "1": { + "count": 88 + }, + "10": { + "count": 21 + }, + "0": { + "count": 71 + }, + "8": { + "count": 4 + }, + "5": { + "count": 84 + }, + "12": { + "count": 23 + }, + "3": { + "count": 29 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/AlloProfClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/AlloProfClusteringS2S.v2.json new file mode 100644 index 0000000000..df04c1f0d9 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/AlloProfClusteringS2S.v2.json @@ -0,0 +1,55 @@ +{ + "test": { + "num_samples": 2556, + "number_of_characters": 83909, + "min_text_length": 4, + "average_text_length": 32.82824726134585, + "max_text_length": 100, + "unique_texts": 83, + "min_labels_per_text": 4, + "average_labels_per_text": 1.0, + "max_labels_per_text": 582, + "unique_labels": 13, + "labels": { + "4": { + "count": 582 + }, + "11": { + "count": 422 + }, + "7": { + "count": 498 + }, + "6": { + "count": 435 + }, + "2": { + "count": 206 + }, + "9": { + "count": 93 + }, + "1": { + "count": 88 + }, + "10": { + "count": 21 + }, + "0": { + "count": 71 + }, + "8": { + "count": 4 + }, + "5": { + "count": 84 + }, + "12": { + "count": 23 + }, + "3": { + "count": 29 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringS2S.json b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringS2S.json new file mode 100644 index 0000000000..16850c98cd --- /dev/null +++ b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringS2S.json @@ -0,0 +1,403 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 150007, + "min_text_length": 8, + "average_text_length": 73.24560546875, + "max_text_length": 204, + "unique_texts": 148, + "min_labels_per_text": 1, + "average_labels_per_text": 1.46337890625, + "max_labels_per_text": 381, + "unique_labels": 129, + "labels": { + "cs": { + "count": 356 + }, + "math": { + "count": 381 + }, + "OC": { + "count": 11 + }, + "hep-lat": { + "count": 13 + }, + "hep": { + "count": 98 + }, + "astro-ph": { + "count": 213 + }, + "eess": { + "count": 76 + }, + "quant-ph": { + "count": 135 + }, + "DC": { + "count": 5 + }, + "cond-mat": { + "count": 274 + }, + "hep-th": { + "count": 66 + }, + "SP": { + "count": 33 + }, + "hep-ph": { + "count": 69 + }, + "FA": { + "count": 6 + }, + "nucl-th": { + "count": 17 + }, + "q-bio": { + "count": 80 + }, + "HE": { + "count": 22 + }, + "HC": { + "count": 2 + }, + "stat": { + "count": 60 + }, + "ML": { + "count": 16 + }, + "IV": { + "count": 13 + }, + "stat-mech": { + "count": 47 + }, + "DS": { + "count": 14 + }, + "ME": { + "count": 12 + }, + "CC": { + "count": 2 + }, + "mtrl-sci": { + "count": 22 + }, + "PE": { + "count": 16 + }, + "NT": { + "count": 11 + }, + "SC": { + "count": 6 + }, + "AG": { + "count": 13 + }, + "physics": { + "count": 81 + }, + "ins-det": { + "count": 9 + }, + "GA": { + "count": 18 + }, + "BM": { + "count": 6 + }, + "GN": { + "count": 17 + }, + "NA": { + "count": 15 + }, + "app-ph": { + "count": 7 + }, + "RT": { + "count": 6 + }, + "other": { + "count": 37 + }, + "soft": { + "count": 15 + }, + "CO": { + "count": 33 + }, + "supr-con": { + "count": 21 + }, + "chem-ph": { + "count": 3 + }, + "DM": { + "count": 2 + }, + "MN": { + "count": 12 + }, + "q-fin": { + "count": 27 + }, + "PM": { + "count": 2 + }, + "AP": { + "count": 27 + }, + "gr-qc": { + "count": 15 + }, + "quant-gas": { + "count": 8 + }, + "mes-hall": { + "count": 33 + }, + "IT": { + "count": 19 + }, + "SI": { + "count": 6 + }, + "SG": { + "count": 3 + }, + "bio-ph": { + "count": 2 + }, + "SR": { + "count": 16 + }, + "soc-ph": { + "count": 5 + }, + "hep-ex": { + "count": 15 + }, + "DG": { + "count": 11 + }, + "NE": { + "count": 5 + }, + "CR": { + "count": 6 + }, + "CL": { + "count": 12 + }, + "RM": { + "count": 3 + }, + "econ": { + "count": 17 + }, + "nlin": { + "count": 5 + }, + "PS": { + "count": 1 + }, + "LG": { + "count": 26 + }, + "QA": { + "count": 9 + }, + "str-el": { + "count": 26 + }, + "CV": { + "count": 34 + }, + "MF": { + "count": 6 + }, + "IM": { + "count": 7 + }, + "EM": { + "count": 6 + }, + "TH": { + "count": 5 + }, + "PR": { + "count": 20 + }, + "AT": { + "count": 4 + }, + "OA": { + "count": 4 + }, + "CP": { + "count": 6 + }, + "LO": { + "count": 14 + }, + "flu-dyn": { + "count": 6 + }, + "atom-ph": { + "count": 8 + }, + "class-ph": { + "count": 1 + }, + "SY": { + "count": 20 + }, + "IR": { + "count": 1 + }, + "plasm-ph": { + "count": 8 + }, + "CE": { + "count": 2 + }, + "AO": { + "count": 1 + }, + "comp-ph": { + "count": 3 + }, + "optics": { + "count": 12 + }, + "MG": { + "count": 4 + }, + "ST": { + "count": 6 + }, + "nucl-ex": { + "count": 6 + }, + "CY": { + "count": 9 + }, + "ao-ph": { + "count": 2 + }, + "DB": { + "count": 1 + }, + "math-ph": { + "count": 10 + }, + "NC": { + "count": 13 + }, + "GT": { + "count": 11 + }, + "TO": { + "count": 2 + }, + "AI": { + "count": 9 + }, + "NI": { + "count": 2 + }, + "gen-ph": { + "count": 4 + }, + "OT": { + "count": 4 + }, + "SD": { + "count": 2 + }, + "dis-nn": { + "count": 4 + }, + "RO": { + "count": 7 + }, + "CA": { + "count": 6 + }, + "FL": { + "count": 1 + }, + "SE": { + "count": 5 + }, + "EP": { + "count": 9 + }, + "hist-ph": { + "count": 1 + }, + "QM": { + "count": 9 + }, + "ed-ph": { + "count": 2 + }, + "GR": { + "count": 4 + }, + "MS": { + "count": 1 + }, + "CD": { + "count": 1 + }, + "ET": { + "count": 1 + }, + "acc-ph": { + "count": 5 + }, + "AC": { + "count": 2 + }, + "OH": { + "count": 1 + }, + "EC": { + "count": 2 + }, + "DL": { + "count": 1 + }, + "AS": { + "count": 3 + }, + "geo-ph": { + "count": 2 + }, + "CG": { + "count": 3 + }, + "CB": { + "count": 1 + }, + "AR": { + "count": 1 + }, + "TR": { + "count": 1 + }, + "atm-clus": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/BigPatentClustering.v2.json b/mteb/descriptive_stats/Clustering/BigPatentClustering.v2.json new file mode 100644 index 0000000000..04c6051092 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/BigPatentClustering.v2.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 65738274, + "min_text_length": 4907, + "average_text_length": 32098.7666015625, + "max_text_length": 3105802, + "unique_texts": 2007, + "min_labels_per_text": 17, + "average_labels_per_text": 1.0, + "max_labels_per_text": 439, + "unique_labels": 9, + "labels": { + "4": { + "count": 211 + }, + "7": { + "count": 274 + }, + "8": { + "count": 171 + }, + "3": { + "count": 439 + }, + "6": { + "count": 17 + }, + "5": { + "count": 436 + }, + "1": { + "count": 296 + }, + "2": { + "count": 145 + }, + "0": { + "count": 59 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/BiorxivClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/BiorxivClusteringP2P.v2.json new file mode 100644 index 0000000000..fc8ee44f44 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/BiorxivClusteringP2P.v2.json @@ -0,0 +1,94 @@ +{ + "test": { + "num_samples": 53787, + "number_of_characters": 89499827, + "min_text_length": 139, + "average_text_length": 1663.9676315838399, + "max_text_length": 8912, + "unique_texts": 3239, + "min_labels_per_text": 4, + "average_labels_per_text": 1.0, + "max_labels_per_text": 9821, + "unique_labels": 26, + "labels": { + "bioinformatics": { + "count": 4324 + }, + "evolutionary biology": { + "count": 2548 + }, + "synthetic biology": { + "count": 480 + }, + "genetics": { + "count": 1668 + }, + "plant biology": { + "count": 2005 + }, + "neuroscience": { + "count": 9821 + }, + "zoology": { + "count": 297 + }, + "biophysics": { + "count": 2700 + }, + "developmental biology": { + "count": 1720 + }, + "cell biology": { + "count": 3179 + }, + "bioengineering": { + "count": 1626 + }, + "microbiology": { + "count": 5368 + }, + "ecology": { + "count": 2467 + }, + "biochemistry": { + "count": 2167 + }, + "genomics": { + "count": 2423 + }, + "animal behavior and cognition": { + "count": 816 + }, + "cancer biology": { + "count": 2105 + }, + "immunology": { + "count": 2632 + }, + "scientific communication and education": { + "count": 245 + }, + "systems biology": { + "count": 1078 + }, + "molecular biology": { + "count": 2094 + }, + "physiology": { + "count": 936 + }, + "epidemiology": { + "count": 4 + }, + "pharmacology and toxicology": { + "count": 634 + }, + "pathology": { + "count": 364 + }, + "paleontology": { + "count": 86 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.v2.json new file mode 100644 index 0000000000..e35563ad81 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.v2.json @@ -0,0 +1,94 @@ +{ + "test": { + "num_samples": 53787, + "number_of_characters": 5471373, + "min_text_length": 13, + "average_text_length": 101.72296279770205, + "max_text_length": 378, + "unique_texts": 246, + "min_labels_per_text": 4, + "average_labels_per_text": 1.0, + "max_labels_per_text": 9821, + "unique_labels": 26, + "labels": { + "bioinformatics": { + "count": 4324 + }, + "evolutionary biology": { + "count": 2548 + }, + "synthetic biology": { + "count": 480 + }, + "genetics": { + "count": 1668 + }, + "plant biology": { + "count": 2005 + }, + "neuroscience": { + "count": 9821 + }, + "zoology": { + "count": 297 + }, + "biophysics": { + "count": 2700 + }, + "developmental biology": { + "count": 1720 + }, + "cell biology": { + "count": 3179 + }, + "bioengineering": { + "count": 1626 + }, + "microbiology": { + "count": 5368 + }, + "ecology": { + "count": 2467 + }, + "biochemistry": { + "count": 2167 + }, + "genomics": { + "count": 2423 + }, + "animal behavior and cognition": { + "count": 816 + }, + "cancer biology": { + "count": 2105 + }, + "immunology": { + "count": 2632 + }, + "scientific communication and education": { + "count": 245 + }, + "systems biology": { + "count": 1078 + }, + "molecular biology": { + "count": 2094 + }, + "physiology": { + "count": 936 + }, + "epidemiology": { + "count": 4 + }, + "pharmacology and toxicology": { + "count": 634 + }, + "pathology": { + "count": 364 + }, + "paleontology": { + "count": 86 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/BlurbsClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/BlurbsClusteringP2P.v2.json new file mode 100644 index 0000000000..a71f1c6721 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/BlurbsClusteringP2P.v2.json @@ -0,0 +1,121 @@ +{ + "test": { + "num_samples": 18084, + "number_of_characters": 12006204, + "min_text_length": 79, + "average_text_length": 663.9130723291307, + "max_text_length": 4878, + "unique_texts": 1541, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 10480, + "unique_labels": 35, + "labels": { + "Literatur & Unterhaltung": { + "count": 10480 + }, + "Sachbuch": { + "count": 2359 + }, + "Ratgeber": { + "count": 1975 + }, + "Kinderbuch & Jugendbuch": { + "count": 1461 + }, + "Ganzheitliches Bewusstsein": { + "count": 799 + }, + "Architektur & Garten": { + "count": 175 + }, + "K\u00fcnste": { + "count": 175 + }, + "Glaube & Ethik": { + "count": 608 + }, + "Science Fiction": { + "count": 3 + }, + "Fantasy": { + "count": 3 + }, + "Frauenunterhaltung": { + "count": 4 + }, + "Romane & Erz\u00e4hlungen": { + "count": 4 + }, + "Krimi & Thriller": { + "count": 9 + }, + "Historische Romane": { + "count": 2 + }, + "Literatur & Unterhaltung Satire": { + "count": 1 + }, + "Klassiker & Lyrik": { + "count": 1 + }, + "Lebenshilfe & Psychologie": { + "count": 1 + }, + "Freizeit & Hobby": { + "count": 2 + }, + "Essen & Trinken": { + "count": 1 + }, + "Gesundheit & Ern\u00e4hrung": { + "count": 1 + }, + "Sachbuch Philosophie": { + "count": 1 + }, + "(Zeit-) Geschichte": { + "count": 3 + }, + "Biographien & Autobiographien": { + "count": 1 + }, + "Glaube und Grenzerfahrungen": { + "count": 1 + }, + "Politik & Gesellschaft": { + "count": 1 + }, + "Gemeindearbeit": { + "count": 2 + }, + "Abenteuer": { + "count": 1 + }, + "Krimis und Thriller": { + "count": 2 + }, + "Liebe, Beziehung und Freundschaft": { + "count": 1 + }, + "Fantasy und Science Fiction": { + "count": 1 + }, + "Echtes Leben, Realistischer Roman": { + "count": 1 + }, + "Energieheilung": { + "count": 2 + }, + "Ganzheitlich Leben": { + "count": 1 + }, + "Architektur": { + "count": 1 + }, + "Handwerk Farbe": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/BlurbsClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/BlurbsClusteringS2S.v2.json new file mode 100644 index 0000000000..1a0e66dd26 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/BlurbsClusteringS2S.v2.json @@ -0,0 +1,301 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 47170, + "min_text_length": 3, + "average_text_length": 23.0322265625, + "max_text_length": 115, + "unique_texts": 80, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 484, + "unique_labels": 95, + "labels": { + "24": { + "count": 102 + }, + "89": { + "count": 95 + }, + "67": { + "count": 5 + }, + "47": { + "count": 185 + }, + "58": { + "count": 484 + }, + "81": { + "count": 155 + }, + "14": { + "count": 18 + }, + "44": { + "count": 68 + }, + "73": { + "count": 92 + }, + "32": { + "count": 11 + }, + "28": { + "count": 38 + }, + "42": { + "count": 7 + }, + "20": { + "count": 57 + }, + "21": { + "count": 18 + }, + "55": { + "count": 21 + }, + "100": { + "count": 8 + }, + "83": { + "count": 109 + }, + "56": { + "count": 18 + }, + "70": { + "count": 27 + }, + "35": { + "count": 28 + }, + "16": { + "count": 16 + }, + "85": { + "count": 11 + }, + "34": { + "count": 21 + }, + "27": { + "count": 14 + }, + "57": { + "count": 5 + }, + "68": { + "count": 14 + }, + "8": { + "count": 22 + }, + "18": { + "count": 25 + }, + "52": { + "count": 7 + }, + "48": { + "count": 9 + }, + "49": { + "count": 6 + }, + "39": { + "count": 33 + }, + "82": { + "count": 12 + }, + "43": { + "count": 10 + }, + "46": { + "count": 6 + }, + "95": { + "count": 4 + }, + "45": { + "count": 11 + }, + "94": { + "count": 18 + }, + "74": { + "count": 10 + }, + "0": { + "count": 21 + }, + "22": { + "count": 9 + }, + "99": { + "count": 3 + }, + "65": { + "count": 12 + }, + "91": { + "count": 3 + }, + "15": { + "count": 5 + }, + "53": { + "count": 8 + }, + "41": { + "count": 11 + }, + "59": { + "count": 4 + }, + "80": { + "count": 11 + }, + "86": { + "count": 3 + }, + "84": { + "count": 3 + }, + "26": { + "count": 3 + }, + "98": { + "count": 3 + }, + "50": { + "count": 6 + }, + "88": { + "count": 2 + }, + "1": { + "count": 14 + }, + "23": { + "count": 3 + }, + "71": { + "count": 10 + }, + "25": { + "count": 18 + }, + "9": { + "count": 2 + }, + "90": { + "count": 8 + }, + "4": { + "count": 8 + }, + "97": { + "count": 6 + }, + "10": { + "count": 2 + }, + "96": { + "count": 9 + }, + "36": { + "count": 2 + }, + "13": { + "count": 7 + }, + "60": { + "count": 1 + }, + "66": { + "count": 2 + }, + "5": { + "count": 2 + }, + "29": { + "count": 4 + }, + "33": { + "count": 1 + }, + "77": { + "count": 1 + }, + "63": { + "count": 1 + }, + "62": { + "count": 2 + }, + "2": { + "count": 7 + }, + "12": { + "count": 1 + }, + "3": { + "count": 4 + }, + "78": { + "count": 1 + }, + "93": { + "count": 2 + }, + "6": { + "count": 3 + }, + "31": { + "count": 1 + }, + "30": { + "count": 1 + }, + "19": { + "count": 2 + }, + "72": { + "count": 1 + }, + "40": { + "count": 1 + }, + "69": { + "count": 3 + }, + "54": { + "count": 1 + }, + "51": { + "count": 3 + }, + "87": { + "count": 1 + }, + "92": { + "count": 2 + }, + "64": { + "count": 1 + }, + "17": { + "count": 1 + }, + "11": { + "count": 1 + }, + "37": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/CLSClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/CLSClusteringP2P.v2.json new file mode 100644 index 0000000000..872aa42ac7 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/CLSClusteringP2P.v2.json @@ -0,0 +1,55 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 435264, + "min_text_length": 24, + "average_text_length": 212.53125, + "max_text_length": 1507, + "unique_texts": 448, + "min_labels_per_text": 18, + "average_labels_per_text": 1.0, + "max_labels_per_text": 920, + "unique_labels": 13, + "labels": { + "1": { + "count": 202 + }, + "5": { + "count": 920 + }, + "10": { + "count": 122 + }, + "9": { + "count": 184 + }, + "2": { + "count": 191 + }, + "12": { + "count": 28 + }, + "8": { + "count": 110 + }, + "11": { + "count": 59 + }, + "4": { + "count": 39 + }, + "6": { + "count": 87 + }, + "7": { + "count": 55 + }, + "3": { + "count": 33 + }, + "0": { + "count": 18 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/CLSClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/CLSClusteringS2S.v2.json new file mode 100644 index 0000000000..b0108b39c1 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/CLSClusteringS2S.v2.json @@ -0,0 +1,55 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 39301, + "min_text_length": 2, + "average_text_length": 19.18994140625, + "max_text_length": 50, + "unique_texts": 45, + "min_labels_per_text": 18, + "average_labels_per_text": 1.0, + "max_labels_per_text": 920, + "unique_labels": 13, + "labels": { + "1": { + "count": 202 + }, + "5": { + "count": 920 + }, + "10": { + "count": 122 + }, + "9": { + "count": 184 + }, + "2": { + "count": 191 + }, + "12": { + "count": 28 + }, + "8": { + "count": 110 + }, + "11": { + "count": 59 + }, + "4": { + "count": 39 + }, + "6": { + "count": 87 + }, + "7": { + "count": 55 + }, + "3": { + "count": 33 + }, + "0": { + "count": 18 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/EightTagsClustering.v2.json b/mteb/descriptive_stats/Clustering/EightTagsClustering.v2.json new file mode 100644 index 0000000000..102cc423c3 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/EightTagsClustering.v2.json @@ -0,0 +1,40 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 161234, + "min_text_length": 31, + "average_text_length": 78.7275390625, + "max_text_length": 303, + "unique_texts": 200, + "min_labels_per_text": 202, + "average_labels_per_text": 1.0, + "max_labels_per_text": 297, + "unique_labels": 8, + "labels": { + "1": { + "count": 243 + }, + "5": { + "count": 256 + }, + "2": { + "count": 297 + }, + "7": { + "count": 266 + }, + "3": { + "count": 260 + }, + "0": { + "count": 260 + }, + "4": { + "count": 202 + }, + "6": { + "count": 264 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/GeoreviewClusteringP2P.json b/mteb/descriptive_stats/Clustering/GeoreviewClusteringP2P.json new file mode 100644 index 0000000000..c294a6094a --- /dev/null +++ b/mteb/descriptive_stats/Clustering/GeoreviewClusteringP2P.json @@ -0,0 +1,166 @@ +{ + "test": { + "num_samples": 2000, + "number_of_characters": 768911, + "min_text_length": 201, + "average_text_length": 384.4555, + "max_text_length": 993, + "unique_texts": 545, + "min_labels_per_text": 40, + "average_labels_per_text": 1.0, + "max_labels_per_text": 40, + "unique_labels": 50, + "labels": { + "\u0422\u043e\u0440\u0433\u043e\u0432\u044b\u0439 \u0446\u0435\u043d\u0442\u0440": { + "count": 40 + }, + "\u0411\u0430\u043d\u044f": { + "count": 40 + }, + "\u041f\u0430\u0440\u043a \u043a\u0443\u043b\u044c\u0442\u0443\u0440\u044b \u0438 \u043e\u0442\u0434\u044b\u0445\u0430": { + "count": 40 + }, + "\u041a\u043e\u043d\u0434\u0438\u0442\u0435\u0440\u0441\u043a\u0430\u044f": { + "count": 40 + }, + "\u041f\u0430\u0440\u0438\u043a\u043c\u0430\u0445\u0435\u0440\u0441\u043a\u0430\u044f": { + "count": 40 + }, + "\u041d\u043e\u0433\u0442\u0435\u0432\u0430\u044f \u0441\u0442\u0443\u0434\u0438\u044f": { + "count": 40 + }, + "\u041a\u043e\u0441\u043c\u0435\u0442\u043e\u043b\u043e\u0433\u0438\u044f": { + "count": 40 + }, + "\u0421\u0443\u043f\u0435\u0440\u043c\u0430\u0440\u043a\u0435\u0442": { + "count": 40 + }, + "\u0410\u0432\u0442\u043e\u0441\u0430\u043b\u043e\u043d": { + "count": 40 + }, + "\u041c\u0430\u0433\u0430\u0437\u0438\u043d \u0446\u0432\u0435\u0442\u043e\u0432": { + "count": 40 + }, + "\u0412\u0435\u0442\u0435\u0440\u0438\u043d\u0430\u0440\u043d\u0430\u044f \u043a\u043b\u0438\u043d\u0438\u043a\u0430": { + "count": 40 + }, + "\u0411\u0430\u0440, \u043f\u0430\u0431": { + "count": 40 + }, + "\u0411\u0430\u0437\u0430, \u0434\u043e\u043c \u043e\u0442\u0434\u044b\u0445\u0430": { + "count": 40 + }, + "\u0414\u043e\u0441\u0442\u043e\u043f\u0440\u0438\u043c\u0435\u0447\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c": { + "count": 40 + }, + "\u0410\u0432\u0442\u043e\u0448\u043a\u043e\u043b\u0430": { + "count": 40 + }, + "\u0421\u0442\u043e\u043b\u043e\u0432\u0430\u044f": { + "count": 40 + }, + "\u0414\u0435\u0442\u0441\u043a\u0438\u0439 \u0441\u0430\u0434, \u044f\u0441\u043b\u0438": { + "count": 40 + }, + "\u042d\u043f\u0438\u043b\u044f\u0446\u0438\u044f": { + "count": 40 + }, + "\u0424\u0438\u0442\u043d\u0435\u0441-\u043a\u043b\u0443\u0431": { + "count": 40 + }, + "\u0410\u0432\u0442\u043e\u0441\u0435\u0440\u0432\u0438\u0441, \u0430\u0432\u0442\u043e\u0442\u0435\u0445\u0446\u0435\u043d\u0442\u0440": { + "count": 40 + }, + "\u041c\u0435\u0434\u0446\u0435\u043d\u0442\u0440, \u043a\u043b\u0438\u043d\u0438\u043a\u0430": { + "count": 40 + }, + "\u041a\u043e\u0444\u0435\u0439\u043d\u044f": { + "count": 40 + }, + "\u0416\u0438\u043b\u043e\u0439 \u043a\u043e\u043c\u043f\u043b\u0435\u043a\u0441": { + "count": 40 + }, + "\u0410\u0432\u0442\u043e\u043c\u043e\u0439\u043a\u0430": { + "count": 40 + }, + "\u0413\u043e\u0441\u0442\u0438\u043d\u0438\u0446\u0430": { + "count": 40 + }, + "\u0411\u0430\u0441\u0441\u0435\u0439\u043d": { + "count": 40 + }, + "\u0421\u0430\u043d\u0430\u0442\u043e\u0440\u0438\u0439": { + "count": 40 + }, + "\u041c\u0430\u0433\u0430\u0437\u0438\u043d \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u043e\u0432": { + "count": 40 + }, + "\u0421\u0430\u043b\u043e\u043d \u043a\u0440\u0430\u0441\u043e\u0442\u044b": { + "count": 40 + }, + "\u041a\u0430\u043b\u044c\u044f\u043d-\u0431\u0430\u0440": { + "count": 40 + }, + "\u0411\u043e\u043b\u044c\u043d\u0438\u0446\u0430 \u0434\u043b\u044f \u0432\u0437\u0440\u043e\u0441\u043b\u044b\u0445": { + "count": 40 + }, + "\u0420\u0435\u0441\u0442\u043e\u0440\u0430\u043d": { + "count": 40 + }, + "\u041f\u043b\u044f\u0436": { + "count": 40 + }, + "\u0410\u043f\u0442\u0435\u043a\u0430": { + "count": 40 + }, + "\u0421\u0442\u043e\u043c\u0430\u0442\u043e\u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a\u0430\u044f \u043a\u043b\u0438\u043d\u0438\u043a\u0430": { + "count": 40 + }, + "\u041f\u0443\u043d\u043a\u0442 \u0432\u044b\u0434\u0430\u0447\u0438": { + "count": 40 + }, + "\u041f\u0435\u043a\u0430\u0440\u043d\u044f": { + "count": 40 + }, + "\u0422\u0435\u0430\u0442\u0440": { + "count": 40 + }, + "\u041a\u0430\u0444\u0435": { + "count": 40 + }, + "\u0421\u0443\u0448\u0438-\u0431\u0430\u0440": { + "count": 40 + }, + "\u0411\u0430\u043d\u043a": { + "count": 40 + }, + "\u0422\u0443\u0440\u0431\u0430\u0437\u0430": { + "count": 40 + }, + "\u041c\u0443\u0437\u0435\u0439": { + "count": 40 + }, + "\u0411\u0430\u0440\u0431\u0435\u0440\u0448\u043e\u043f": { + "count": 40 + }, + "\u0411\u044b\u0441\u0442\u0440\u043e\u0435 \u043f\u0438\u0442\u0430\u043d\u0438\u0435": { + "count": 40 + }, + "\u041f\u0438\u0446\u0446\u0435\u0440\u0438\u044f": { + "count": 40 + }, + "\u041c\u0430\u0433\u0430\u0437\u0438\u043d \u043e\u0434\u0435\u0436\u0434\u044b": { + "count": 40 + }, + "\u041c\u0430\u0433\u0430\u0437\u0438\u043d \u044d\u043b\u0435\u043a\u0442\u0440\u043e\u043d\u0438\u043a\u0438": { + "count": 40 + }, + "\u0414\u043e\u0441\u0442\u0430\u0432\u043a\u0430 \u0435\u0434\u044b \u0438 \u043e\u0431\u0435\u0434\u043e\u0432": { + "count": 40 + }, + "\u0410\u0433\u0435\u043d\u0442\u0441\u0442\u0432\u043e \u043d\u0435\u0434\u0432\u0438\u0436\u0438\u043c\u043e\u0441\u0442\u0438": { + "count": 40 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/HALClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/HALClusteringS2S.v2.json new file mode 100644 index 0000000000..247ba24a7f --- /dev/null +++ b/mteb/descriptive_stats/Clustering/HALClusteringS2S.v2.json @@ -0,0 +1,49 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 177644, + "min_text_length": 5, + "average_text_length": 86.740234375, + "max_text_length": 442, + "unique_texts": 217, + "min_labels_per_text": 4, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1410, + "unique_labels": 11, + "labels": { + "9": { + "count": 1410 + }, + "5": { + "count": 16 + }, + "8": { + "count": 265 + }, + "10": { + "count": 87 + }, + "6": { + "count": 68 + }, + "1": { + "count": 83 + }, + "7": { + "count": 28 + }, + "2": { + "count": 21 + }, + "0": { + "count": 18 + }, + "3": { + "count": 48 + }, + "4": { + "count": 4 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/IndicReviewsClusteringP2P.json b/mteb/descriptive_stats/Clustering/IndicReviewsClusteringP2P.json new file mode 100644 index 0000000000..d7fa8073ae --- /dev/null +++ b/mteb/descriptive_stats/Clustering/IndicReviewsClusteringP2P.json @@ -0,0 +1,830 @@ +{ + "test": { + "num_samples": 65, + "number_of_characters": 13000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 12972, + "min_labels_per_text": 117, + "average_labels_per_text": 200.0, + "max_labels_per_text": 2834, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 2834 + }, + "Health/Wellness": { + "count": 1066 + }, + "Office": { + "count": 182 + }, + "Hobbies": { + "count": 1716 + }, + "Building Material": { + "count": 117 + }, + "Food": { + "count": 221 + }, + "SPORTS/GAMES": { + "count": 208 + }, + "Education": { + "count": 1196 + }, + "Pets": { + "count": 1664 + }, + "Home": { + "count": 1872 + }, + "Travel": { + "count": 130 + }, + "Transportation": { + "count": 637 + }, + "Baby Products": { + "count": 507 + }, + "Vehicles": { + "count": 117 + }, + "Fashion": { + "count": 533 + } + }, + "hf_subset_descriptive_stats": { + "as": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 997, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "bd": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "bn": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "gu": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 997, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "hi": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "kn": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "ml": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "mr": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "or": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "pa": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "ta": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "te": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + }, + "ur": { + "num_samples": 5, + "number_of_characters": 1000, + "min_text_length": 200, + "average_text_length": 200.0, + "max_text_length": 200, + "unique_texts": 998, + "min_labels_per_text": 9, + "average_labels_per_text": 200.0, + "max_labels_per_text": 218, + "unique_labels": 15, + "labels": { + "Entertainment": { + "count": 218 + }, + "Health/Wellness": { + "count": 82 + }, + "Office": { + "count": 14 + }, + "Hobbies": { + "count": 132 + }, + "Building Material": { + "count": 9 + }, + "Food": { + "count": 17 + }, + "SPORTS/GAMES": { + "count": 16 + }, + "Education": { + "count": 92 + }, + "Pets": { + "count": 128 + }, + "Home": { + "count": 144 + }, + "Travel": { + "count": 10 + }, + "Transportation": { + "count": 49 + }, + "Baby Products": { + "count": 39 + }, + "Vehicles": { + "count": 9 + }, + "Fashion": { + "count": 41 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/LivedoorNewsClustering.v2.json b/mteb/descriptive_stats/Clustering/LivedoorNewsClustering.v2.json new file mode 100644 index 0000000000..aed178c95d --- /dev/null +++ b/mteb/descriptive_stats/Clustering/LivedoorNewsClustering.v2.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 1106, + "number_of_characters": 1161698, + "min_text_length": 33, + "average_text_length": 1050.359855334539, + "max_text_length": 7608, + "unique_texts": 836, + "min_labels_per_text": 71, + "average_labels_per_text": 1.0, + "max_labels_per_text": 156, + "unique_labels": 9, + "labels": { + "5": { + "count": 121 + }, + "1": { + "count": 119 + }, + "2": { + "count": 156 + }, + "0": { + "count": 134 + }, + "8": { + "count": 107 + }, + "6": { + "count": 131 + }, + "4": { + "count": 129 + }, + "3": { + "count": 71 + }, + "7": { + "count": 138 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.v2.json new file mode 100644 index 0000000000..784b2968ef --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.v2.json @@ -0,0 +1,773 @@ +{ + "test": { + "num_samples": 6900, + "number_of_characters": 31106070, + "min_text_length": 296, + "average_text_length": 4508.126086956522, + "max_text_length": 135984, + "unique_texts": 4373, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1040, + "unique_labels": 80, + "labels": { + "5": { + "count": 285 + }, + "11": { + "count": 453 + }, + "9": { + "count": 469 + }, + "7": { + "count": 1040 + }, + "4": { + "count": 65 + }, + "8": { + "count": 160 + }, + "2": { + "count": 132 + }, + "3": { + "count": 218 + }, + "6": { + "count": 137 + }, + "1": { + "count": 349 + }, + "10": { + "count": 64 + }, + "0": { + "count": 154 + }, + "33": { + "count": 160 + }, + "24": { + "count": 179 + }, + "19": { + "count": 350 + }, + "35": { + "count": 367 + }, + "59": { + "count": 53 + }, + "69": { + "count": 15 + }, + "15": { + "count": 61 + }, + "60": { + "count": 18 + }, + "56": { + "count": 2 + }, + "65": { + "count": 183 + }, + "40": { + "count": 7 + }, + "58": { + "count": 112 + }, + "55": { + "count": 86 + }, + "47": { + "count": 66 + }, + "23": { + "count": 27 + }, + "25": { + "count": 6 + }, + "66": { + "count": 109 + }, + "18": { + "count": 22 + }, + "41": { + "count": 61 + }, + "63": { + "count": 41 + }, + "29": { + "count": 66 + }, + "30": { + "count": 24 + }, + "32": { + "count": 8 + }, + "54": { + "count": 56 + }, + "49": { + "count": 82 + }, + "44": { + "count": 140 + }, + "46": { + "count": 38 + }, + "57": { + "count": 239 + }, + "48": { + "count": 49 + }, + "17": { + "count": 4 + }, + "13": { + "count": 3 + }, + "61": { + "count": 8 + }, + "68": { + "count": 22 + }, + "37": { + "count": 3 + }, + "50": { + "count": 10 + }, + "26": { + "count": 13 + }, + "43": { + "count": 4 + }, + "16": { + "count": 98 + }, + "64": { + "count": 21 + }, + "51": { + "count": 2 + }, + "38": { + "count": 2 + }, + "67": { + "count": 100 + }, + "70": { + "count": 11 + }, + "42": { + "count": 6 + }, + "14": { + "count": 3 + }, + "31": { + "count": 33 + }, + "12": { + "count": 1 + }, + "36": { + "count": 2 + }, + "27": { + "count": 3 + }, + "53": { + "count": 8 + }, + "74": { + "count": 181 + }, + "22": { + "count": 2 + }, + "76": { + "count": 102 + }, + "72": { + "count": 12 + }, + "71": { + "count": 8 + }, + "52": { + "count": 17 + }, + "28": { + "count": 4 + }, + "79": { + "count": 23 + }, + "34": { + "count": 25 + }, + "62": { + "count": 2 + }, + "20": { + "count": 2 + }, + "75": { + "count": 2 + }, + "73": { + "count": 4 + }, + "21": { + "count": 1 + }, + "39": { + "count": 1 + }, + "45": { + "count": 1 + }, + "77": { + "count": 1 + }, + "78": { + "count": 2 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 2048, + "number_of_characters": 8267329, + "min_text_length": 693, + "average_text_length": 4036.78173828125, + "max_text_length": 26023, + "unique_texts": 1731, + "min_labels_per_text": 20, + "average_labels_per_text": 1.0, + "max_labels_per_text": 692, + "unique_labels": 12, + "labels": { + "5": { + "count": 106 + }, + "11": { + "count": 450 + }, + "9": { + "count": 467 + }, + "7": { + "count": 692 + }, + "4": { + "count": 24 + }, + "8": { + "count": 42 + }, + "2": { + "count": 45 + }, + "3": { + "count": 20 + }, + "6": { + "count": 103 + }, + "1": { + "count": 24 + }, + "10": { + "count": 40 + }, + "0": { + "count": 35 + } + } + }, + "fr": { + "num_samples": 2048, + "number_of_characters": 7885898, + "min_text_length": 359, + "average_text_length": 3850.5361328125, + "max_text_length": 50130, + "unique_texts": 1514, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 333, + "unique_labels": 61, + "labels": { + "33": { + "count": 160 + }, + "24": { + "count": 178 + }, + "19": { + "count": 179 + }, + "7": { + "count": 5 + }, + "35": { + "count": 333 + }, + "59": { + "count": 1 + }, + "69": { + "count": 8 + }, + "1": { + "count": 158 + }, + "15": { + "count": 1 + }, + "60": { + "count": 7 + }, + "56": { + "count": 1 + }, + "2": { + "count": 28 + }, + "65": { + "count": 154 + }, + "40": { + "count": 1 + }, + "58": { + "count": 111 + }, + "55": { + "count": 85 + }, + "47": { + "count": 58 + }, + "23": { + "count": 24 + }, + "10": { + "count": 23 + }, + "25": { + "count": 5 + }, + "66": { + "count": 107 + }, + "18": { + "count": 17 + }, + "41": { + "count": 36 + }, + "63": { + "count": 41 + }, + "29": { + "count": 46 + }, + "30": { + "count": 20 + }, + "32": { + "count": 5 + }, + "54": { + "count": 54 + }, + "49": { + "count": 37 + }, + "44": { + "count": 28 + }, + "46": { + "count": 5 + }, + "6": { + "count": 19 + }, + "57": { + "count": 11 + }, + "48": { + "count": 21 + }, + "0": { + "count": 10 + }, + "9": { + "count": 1 + }, + "17": { + "count": 4 + }, + "13": { + "count": 1 + }, + "61": { + "count": 4 + }, + "68": { + "count": 2 + }, + "37": { + "count": 3 + }, + "50": { + "count": 4 + }, + "26": { + "count": 13 + }, + "43": { + "count": 3 + }, + "11": { + "count": 2 + }, + "8": { + "count": 6 + }, + "5": { + "count": 7 + }, + "16": { + "count": 2 + }, + "64": { + "count": 3 + }, + "51": { + "count": 2 + }, + "38": { + "count": 1 + }, + "67": { + "count": 1 + }, + "70": { + "count": 2 + }, + "42": { + "count": 1 + }, + "14": { + "count": 3 + }, + "31": { + "count": 1 + }, + "4": { + "count": 1 + }, + "12": { + "count": 1 + }, + "36": { + "count": 1 + }, + "27": { + "count": 1 + }, + "53": { + "count": 1 + } + } + }, + "ru": { + "num_samples": 756, + "number_of_characters": 5174721, + "min_text_length": 444, + "average_text_length": 6844.869047619048, + "max_text_length": 135984, + "unique_texts": 720, + "min_labels_per_text": 15, + "average_labels_per_text": 1.0, + "max_labels_per_text": 203, + "unique_labels": 9, + "labels": { + "6": { + "count": 15 + }, + "5": { + "count": 161 + }, + "4": { + "count": 38 + }, + "0": { + "count": 108 + }, + "7": { + "count": 203 + }, + "2": { + "count": 51 + }, + "1": { + "count": 43 + }, + "8": { + "count": 82 + }, + "3": { + "count": 55 + } + } + }, + "es": { + "num_samples": 2048, + "number_of_characters": 9778122, + "min_text_length": 296, + "average_text_length": 4774.4736328125, + "max_text_length": 85821, + "unique_texts": 1784, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 228, + "unique_labels": 71, + "labels": { + "41": { + "count": 25 + }, + "2": { + "count": 8 + }, + "74": { + "count": 181 + }, + "64": { + "count": 18 + }, + "22": { + "count": 2 + }, + "16": { + "count": 96 + }, + "67": { + "count": 99 + }, + "1": { + "count": 124 + }, + "3": { + "count": 143 + }, + "65": { + "count": 29 + }, + "48": { + "count": 28 + }, + "57": { + "count": 228 + }, + "15": { + "count": 60 + }, + "49": { + "count": 45 + }, + "29": { + "count": 20 + }, + "19": { + "count": 171 + }, + "59": { + "count": 52 + }, + "7": { + "count": 140 + }, + "44": { + "count": 112 + }, + "31": { + "count": 32 + }, + "47": { + "count": 8 + }, + "76": { + "count": 102 + }, + "8": { + "count": 30 + }, + "72": { + "count": 12 + }, + "71": { + "count": 8 + }, + "52": { + "count": 17 + }, + "5": { + "count": 11 + }, + "46": { + "count": 33 + }, + "28": { + "count": 4 + }, + "79": { + "count": 23 + }, + "69": { + "count": 7 + }, + "35": { + "count": 34 + }, + "30": { + "count": 4 + }, + "43": { + "count": 1 + }, + "61": { + "count": 4 + }, + "42": { + "count": 5 + }, + "60": { + "count": 11 + }, + "34": { + "count": 25 + }, + "18": { + "count": 5 + }, + "68": { + "count": 20 + }, + "40": { + "count": 6 + }, + "24": { + "count": 1 + }, + "56": { + "count": 1 + }, + "27": { + "count": 2 + }, + "70": { + "count": 9 + }, + "62": { + "count": 2 + }, + "58": { + "count": 1 + }, + "13": { + "count": 2 + }, + "32": { + "count": 3 + }, + "4": { + "count": 2 + }, + "53": { + "count": 7 + }, + "20": { + "count": 2 + }, + "36": { + "count": 1 + }, + "23": { + "count": 3 + }, + "75": { + "count": 2 + }, + "50": { + "count": 6 + }, + "0": { + "count": 1 + }, + "73": { + "count": 4 + }, + "66": { + "count": 2 + }, + "54": { + "count": 2 + }, + "21": { + "count": 1 + }, + "9": { + "count": 1 + }, + "38": { + "count": 1 + }, + "55": { + "count": 1 + }, + "10": { + "count": 1 + }, + "39": { + "count": 1 + }, + "45": { + "count": 1 + }, + "77": { + "count": 1 + }, + "11": { + "count": 1 + }, + "78": { + "count": 2 + }, + "25": { + "count": 1 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MLSUMClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/MLSUMClusteringS2S.v2.json new file mode 100644 index 0000000000..312bf8d294 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MLSUMClusteringS2S.v2.json @@ -0,0 +1,1529 @@ +{ + "validation": { + "num_samples": 6894, + "number_of_characters": 29580726, + "min_text_length": 273, + "average_text_length": 4290.792863359443, + "max_text_length": 56317, + "unique_texts": 4307, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1129, + "unique_labels": 78, + "labels": { + "7": { + "count": 1129 + }, + "8": { + "count": 149 + }, + "6": { + "count": 152 + }, + "10": { + "count": 41 + }, + "11": { + "count": 450 + }, + "5": { + "count": 284 + }, + "9": { + "count": 466 + }, + "2": { + "count": 114 + }, + "4": { + "count": 70 + }, + "0": { + "count": 120 + }, + "1": { + "count": 378 + }, + "3": { + "count": 218 + }, + "30": { + "count": 312 + }, + "43": { + "count": 26 + }, + "31": { + "count": 15 + }, + "48": { + "count": 65 + }, + "37": { + "count": 51 + }, + "18": { + "count": 208 + }, + "24": { + "count": 25 + }, + "51": { + "count": 142 + }, + "56": { + "count": 167 + }, + "13": { + "count": 189 + }, + "42": { + "count": 78 + }, + "29": { + "count": 4 + }, + "53": { + "count": 8 + }, + "58": { + "count": 14 + }, + "49": { + "count": 66 + }, + "27": { + "count": 198 + }, + "16": { + "count": 23 + }, + "38": { + "count": 15 + }, + "57": { + "count": 135 + }, + "50": { + "count": 23 + }, + "21": { + "count": 16 + }, + "54": { + "count": 51 + }, + "44": { + "count": 55 + }, + "40": { + "count": 137 + }, + "19": { + "count": 176 + }, + "59": { + "count": 7 + }, + "41": { + "count": 6 + }, + "12": { + "count": 3 + }, + "52": { + "count": 4 + }, + "45": { + "count": 32 + }, + "32": { + "count": 26 + }, + "55": { + "count": 314 + }, + "14": { + "count": 61 + }, + "46": { + "count": 9 + }, + "22": { + "count": 3 + }, + "26": { + "count": 22 + }, + "17": { + "count": 2 + }, + "20": { + "count": 5 + }, + "35": { + "count": 2 + }, + "39": { + "count": 3 + }, + "60": { + "count": 1 + }, + "47": { + "count": 2 + }, + "36": { + "count": 5 + }, + "61": { + "count": 1 + }, + "33": { + "count": 2 + }, + "34": { + "count": 2 + }, + "15": { + "count": 74 + }, + "68": { + "count": 74 + }, + "65": { + "count": 32 + }, + "78": { + "count": 86 + }, + "75": { + "count": 204 + }, + "28": { + "count": 36 + }, + "69": { + "count": 28 + }, + "77": { + "count": 1 + }, + "72": { + "count": 7 + }, + "73": { + "count": 15 + }, + "70": { + "count": 2 + }, + "79": { + "count": 17 + }, + "66": { + "count": 5 + }, + "25": { + "count": 1 + }, + "74": { + "count": 7 + }, + "76": { + "count": 4 + }, + "64": { + "count": 11 + }, + "63": { + "count": 1 + }, + "71": { + "count": 6 + }, + "23": { + "count": 1 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 2048, + "number_of_characters": 7721299, + "min_text_length": 340, + "average_text_length": 3770.16552734375, + "max_text_length": 17367, + "unique_texts": 1764, + "min_labels_per_text": 18, + "average_labels_per_text": 1.0, + "max_labels_per_text": 691, + "unique_labels": 12, + "labels": { + "7": { + "count": 691 + }, + "8": { + "count": 37 + }, + "6": { + "count": 130 + }, + "10": { + "count": 37 + }, + "11": { + "count": 448 + }, + "5": { + "count": 98 + }, + "9": { + "count": 457 + }, + "2": { + "count": 46 + }, + "4": { + "count": 27 + }, + "0": { + "count": 39 + }, + "1": { + "count": 20 + }, + "3": { + "count": 18 + } + } + }, + "fr": { + "num_samples": 2048, + "number_of_characters": 7798594, + "min_text_length": 340, + "average_text_length": 3807.9072265625, + "max_text_length": 56261, + "unique_texts": 1526, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 312, + "unique_labels": 58, + "labels": { + "30": { + "count": 312 + }, + "43": { + "count": 20 + }, + "31": { + "count": 2 + }, + "48": { + "count": 51 + }, + "37": { + "count": 41 + }, + "18": { + "count": 205 + }, + "24": { + "count": 23 + }, + "51": { + "count": 141 + }, + "56": { + "count": 166 + }, + "13": { + "count": 188 + }, + "42": { + "count": 62 + }, + "29": { + "count": 1 + }, + "53": { + "count": 6 + }, + "58": { + "count": 6 + }, + "49": { + "count": 66 + }, + "1": { + "count": 156 + }, + "7": { + "count": 36 + }, + "0": { + "count": 11 + }, + "27": { + "count": 193 + }, + "16": { + "count": 23 + }, + "38": { + "count": 1 + }, + "57": { + "count": 71 + }, + "2": { + "count": 28 + }, + "5": { + "count": 24 + }, + "50": { + "count": 19 + }, + "9": { + "count": 8 + }, + "21": { + "count": 15 + }, + "54": { + "count": 48 + }, + "44": { + "count": 26 + }, + "40": { + "count": 26 + }, + "19": { + "count": 7 + }, + "59": { + "count": 3 + }, + "41": { + "count": 5 + }, + "12": { + "count": 2 + }, + "4": { + "count": 7 + }, + "52": { + "count": 4 + }, + "45": { + "count": 1 + }, + "32": { + "count": 2 + }, + "55": { + "count": 3 + }, + "6": { + "count": 8 + }, + "14": { + "count": 1 + }, + "10": { + "count": 3 + }, + "46": { + "count": 3 + }, + "22": { + "count": 1 + }, + "26": { + "count": 6 + }, + "3": { + "count": 1 + }, + "17": { + "count": 1 + }, + "20": { + "count": 1 + }, + "35": { + "count": 1 + }, + "39": { + "count": 3 + }, + "60": { + "count": 1 + }, + "47": { + "count": 1 + }, + "36": { + "count": 2 + }, + "61": { + "count": 1 + }, + "33": { + "count": 1 + }, + "8": { + "count": 2 + }, + "34": { + "count": 1 + }, + "11": { + "count": 1 + } + } + }, + "ru": { + "num_samples": 750, + "number_of_characters": 4847491, + "min_text_length": 711, + "average_text_length": 6463.321333333333, + "max_text_length": 32833, + "unique_texts": 729, + "min_labels_per_text": 13, + "average_labels_per_text": 1.0, + "max_labels_per_text": 263, + "unique_labels": 9, + "labels": { + "7": { + "count": 263 + }, + "1": { + "count": 58 + }, + "3": { + "count": 45 + }, + "5": { + "count": 154 + }, + "8": { + "count": 83 + }, + "6": { + "count": 13 + }, + "4": { + "count": 33 + }, + "0": { + "count": 65 + }, + "2": { + "count": 36 + } + } + }, + "es": { + "num_samples": 2048, + "number_of_characters": 9213342, + "min_text_length": 273, + "average_text_length": 4498.7021484375, + "max_text_length": 56317, + "unique_texts": 1746, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 311, + "unique_labels": 71, + "labels": { + "15": { + "count": 74 + }, + "68": { + "count": 74 + }, + "37": { + "count": 10 + }, + "65": { + "count": 32 + }, + "57": { + "count": 64 + }, + "55": { + "count": 311 + }, + "78": { + "count": 86 + }, + "7": { + "count": 139 + }, + "75": { + "count": 204 + }, + "44": { + "count": 29 + }, + "32": { + "count": 24 + }, + "28": { + "count": 36 + }, + "42": { + "count": 16 + }, + "40": { + "count": 111 + }, + "19": { + "count": 169 + }, + "8": { + "count": 27 + }, + "69": { + "count": 28 + }, + "1": { + "count": 144 + }, + "2": { + "count": 4 + }, + "48": { + "count": 14 + }, + "14": { + "count": 60 + }, + "22": { + "count": 2 + }, + "77": { + "count": 1 + }, + "3": { + "count": 154 + }, + "72": { + "count": 7 + }, + "73": { + "count": 15 + }, + "31": { + "count": 13 + }, + "38": { + "count": 14 + }, + "20": { + "count": 4 + }, + "59": { + "count": 4 + }, + "70": { + "count": 2 + }, + "26": { + "count": 16 + }, + "45": { + "count": 31 + }, + "33": { + "count": 1 + }, + "58": { + "count": 8 + }, + "50": { + "count": 4 + }, + "43": { + "count": 6 + }, + "79": { + "count": 17 + }, + "66": { + "count": 5 + }, + "46": { + "count": 6 + }, + "25": { + "count": 1 + }, + "24": { + "count": 2 + }, + "74": { + "count": 7 + }, + "5": { + "count": 8 + }, + "13": { + "count": 1 + }, + "36": { + "count": 3 + }, + "0": { + "count": 5 + }, + "41": { + "count": 1 + }, + "54": { + "count": 3 + }, + "76": { + "count": 4 + }, + "64": { + "count": 11 + }, + "4": { + "count": 3 + }, + "53": { + "count": 2 + }, + "63": { + "count": 1 + }, + "27": { + "count": 5 + }, + "29": { + "count": 3 + }, + "56": { + "count": 1 + }, + "18": { + "count": 3 + }, + "34": { + "count": 1 + }, + "12": { + "count": 1 + }, + "71": { + "count": 6 + }, + "47": { + "count": 1 + }, + "17": { + "count": 1 + }, + "21": { + "count": 1 + }, + "10": { + "count": 1 + }, + "23": { + "count": 1 + }, + "9": { + "count": 1 + }, + "35": { + "count": 1 + }, + "51": { + "count": 1 + }, + "11": { + "count": 1 + }, + "6": { + "count": 1 + } + } + } + } + }, + "test": { + "num_samples": 6900, + "number_of_characters": 30705479, + "min_text_length": 288, + "average_text_length": 4450.069420289855, + "max_text_length": 135921, + "unique_texts": 4336, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1040, + "unique_labels": 80, + "labels": { + "5": { + "count": 285 + }, + "11": { + "count": 453 + }, + "9": { + "count": 469 + }, + "7": { + "count": 1040 + }, + "4": { + "count": 65 + }, + "8": { + "count": 160 + }, + "2": { + "count": 132 + }, + "3": { + "count": 218 + }, + "6": { + "count": 137 + }, + "1": { + "count": 349 + }, + "10": { + "count": 64 + }, + "0": { + "count": 154 + }, + "33": { + "count": 160 + }, + "24": { + "count": 179 + }, + "19": { + "count": 350 + }, + "35": { + "count": 367 + }, + "59": { + "count": 53 + }, + "69": { + "count": 15 + }, + "15": { + "count": 61 + }, + "60": { + "count": 18 + }, + "56": { + "count": 2 + }, + "65": { + "count": 183 + }, + "40": { + "count": 7 + }, + "58": { + "count": 112 + }, + "55": { + "count": 86 + }, + "47": { + "count": 66 + }, + "23": { + "count": 27 + }, + "25": { + "count": 6 + }, + "66": { + "count": 109 + }, + "18": { + "count": 22 + }, + "41": { + "count": 61 + }, + "63": { + "count": 41 + }, + "29": { + "count": 66 + }, + "30": { + "count": 24 + }, + "32": { + "count": 8 + }, + "54": { + "count": 56 + }, + "49": { + "count": 82 + }, + "44": { + "count": 140 + }, + "46": { + "count": 38 + }, + "57": { + "count": 239 + }, + "48": { + "count": 49 + }, + "17": { + "count": 4 + }, + "13": { + "count": 3 + }, + "61": { + "count": 8 + }, + "68": { + "count": 22 + }, + "37": { + "count": 3 + }, + "50": { + "count": 10 + }, + "26": { + "count": 13 + }, + "43": { + "count": 4 + }, + "16": { + "count": 98 + }, + "64": { + "count": 21 + }, + "51": { + "count": 2 + }, + "38": { + "count": 2 + }, + "67": { + "count": 100 + }, + "70": { + "count": 11 + }, + "42": { + "count": 6 + }, + "14": { + "count": 3 + }, + "31": { + "count": 33 + }, + "12": { + "count": 1 + }, + "36": { + "count": 2 + }, + "27": { + "count": 3 + }, + "53": { + "count": 8 + }, + "74": { + "count": 181 + }, + "22": { + "count": 2 + }, + "76": { + "count": 102 + }, + "72": { + "count": 12 + }, + "71": { + "count": 8 + }, + "52": { + "count": 17 + }, + "28": { + "count": 4 + }, + "79": { + "count": 23 + }, + "34": { + "count": 25 + }, + "62": { + "count": 2 + }, + "20": { + "count": 2 + }, + "75": { + "count": 2 + }, + "73": { + "count": 4 + }, + "21": { + "count": 1 + }, + "39": { + "count": 1 + }, + "45": { + "count": 1 + }, + "77": { + "count": 1 + }, + "78": { + "count": 2 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 2048, + "number_of_characters": 8183989, + "min_text_length": 660, + "average_text_length": 3996.08837890625, + "max_text_length": 25967, + "unique_texts": 1715, + "min_labels_per_text": 20, + "average_labels_per_text": 1.0, + "max_labels_per_text": 692, + "unique_labels": 12, + "labels": { + "5": { + "count": 106 + }, + "11": { + "count": 450 + }, + "9": { + "count": 467 + }, + "7": { + "count": 692 + }, + "4": { + "count": 24 + }, + "8": { + "count": 42 + }, + "2": { + "count": 45 + }, + "3": { + "count": 20 + }, + "6": { + "count": 103 + }, + "1": { + "count": 24 + }, + "10": { + "count": 40 + }, + "0": { + "count": 35 + } + } + }, + "fr": { + "num_samples": 2048, + "number_of_characters": 7735699, + "min_text_length": 291, + "average_text_length": 3777.19677734375, + "max_text_length": 50088, + "unique_texts": 1524, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 333, + "unique_labels": 61, + "labels": { + "33": { + "count": 160 + }, + "24": { + "count": 178 + }, + "19": { + "count": 179 + }, + "7": { + "count": 5 + }, + "35": { + "count": 333 + }, + "59": { + "count": 1 + }, + "69": { + "count": 8 + }, + "1": { + "count": 158 + }, + "15": { + "count": 1 + }, + "60": { + "count": 7 + }, + "56": { + "count": 1 + }, + "2": { + "count": 28 + }, + "65": { + "count": 154 + }, + "40": { + "count": 1 + }, + "58": { + "count": 111 + }, + "55": { + "count": 85 + }, + "47": { + "count": 58 + }, + "23": { + "count": 24 + }, + "10": { + "count": 23 + }, + "25": { + "count": 5 + }, + "66": { + "count": 107 + }, + "18": { + "count": 17 + }, + "41": { + "count": 36 + }, + "63": { + "count": 41 + }, + "29": { + "count": 46 + }, + "30": { + "count": 20 + }, + "32": { + "count": 5 + }, + "54": { + "count": 54 + }, + "49": { + "count": 37 + }, + "44": { + "count": 28 + }, + "46": { + "count": 5 + }, + "6": { + "count": 19 + }, + "57": { + "count": 11 + }, + "48": { + "count": 21 + }, + "0": { + "count": 10 + }, + "9": { + "count": 1 + }, + "17": { + "count": 4 + }, + "13": { + "count": 1 + }, + "61": { + "count": 4 + }, + "68": { + "count": 2 + }, + "37": { + "count": 3 + }, + "50": { + "count": 4 + }, + "26": { + "count": 13 + }, + "43": { + "count": 3 + }, + "11": { + "count": 2 + }, + "8": { + "count": 6 + }, + "5": { + "count": 7 + }, + "16": { + "count": 2 + }, + "64": { + "count": 3 + }, + "51": { + "count": 2 + }, + "38": { + "count": 1 + }, + "67": { + "count": 1 + }, + "70": { + "count": 2 + }, + "42": { + "count": 1 + }, + "14": { + "count": 3 + }, + "31": { + "count": 1 + }, + "4": { + "count": 1 + }, + "12": { + "count": 1 + }, + "36": { + "count": 1 + }, + "27": { + "count": 1 + }, + "53": { + "count": 1 + } + } + }, + "ru": { + "num_samples": 756, + "number_of_characters": 5128031, + "min_text_length": 395, + "average_text_length": 6783.109788359789, + "max_text_length": 135921, + "unique_texts": 732, + "min_labels_per_text": 15, + "average_labels_per_text": 1.0, + "max_labels_per_text": 203, + "unique_labels": 9, + "labels": { + "6": { + "count": 15 + }, + "5": { + "count": 161 + }, + "4": { + "count": 38 + }, + "0": { + "count": 108 + }, + "7": { + "count": 203 + }, + "2": { + "count": 51 + }, + "1": { + "count": 43 + }, + "8": { + "count": 82 + }, + "3": { + "count": 55 + } + } + }, + "es": { + "num_samples": 2048, + "number_of_characters": 9657760, + "min_text_length": 288, + "average_text_length": 4715.703125, + "max_text_length": 85710, + "unique_texts": 1785, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 228, + "unique_labels": 71, + "labels": { + "41": { + "count": 25 + }, + "2": { + "count": 8 + }, + "74": { + "count": 181 + }, + "64": { + "count": 18 + }, + "22": { + "count": 2 + }, + "16": { + "count": 96 + }, + "67": { + "count": 99 + }, + "1": { + "count": 124 + }, + "3": { + "count": 143 + }, + "65": { + "count": 29 + }, + "48": { + "count": 28 + }, + "57": { + "count": 228 + }, + "15": { + "count": 60 + }, + "49": { + "count": 45 + }, + "29": { + "count": 20 + }, + "19": { + "count": 171 + }, + "59": { + "count": 52 + }, + "7": { + "count": 140 + }, + "44": { + "count": 112 + }, + "31": { + "count": 32 + }, + "47": { + "count": 8 + }, + "76": { + "count": 102 + }, + "8": { + "count": 30 + }, + "72": { + "count": 12 + }, + "71": { + "count": 8 + }, + "52": { + "count": 17 + }, + "5": { + "count": 11 + }, + "46": { + "count": 33 + }, + "28": { + "count": 4 + }, + "79": { + "count": 23 + }, + "69": { + "count": 7 + }, + "35": { + "count": 34 + }, + "30": { + "count": 4 + }, + "43": { + "count": 1 + }, + "61": { + "count": 4 + }, + "42": { + "count": 5 + }, + "60": { + "count": 11 + }, + "34": { + "count": 25 + }, + "18": { + "count": 5 + }, + "68": { + "count": 20 + }, + "40": { + "count": 6 + }, + "24": { + "count": 1 + }, + "56": { + "count": 1 + }, + "27": { + "count": 2 + }, + "70": { + "count": 9 + }, + "62": { + "count": 2 + }, + "58": { + "count": 1 + }, + "13": { + "count": 2 + }, + "32": { + "count": 3 + }, + "4": { + "count": 2 + }, + "53": { + "count": 7 + }, + "20": { + "count": 2 + }, + "36": { + "count": 1 + }, + "23": { + "count": 3 + }, + "75": { + "count": 2 + }, + "50": { + "count": 6 + }, + "0": { + "count": 1 + }, + "73": { + "count": 4 + }, + "66": { + "count": 2 + }, + "54": { + "count": 2 + }, + "21": { + "count": 1 + }, + "9": { + "count": 1 + }, + "38": { + "count": 1 + }, + "55": { + "count": 1 + }, + "10": { + "count": 1 + }, + "39": { + "count": 1 + }, + "45": { + "count": 1 + }, + "77": { + "count": 1 + }, + "11": { + "count": 1 + }, + "78": { + "count": 2 + }, + "25": { + "count": 1 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringP2P.json b/mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringP2P.json new file mode 100644 index 0000000000..093dd3b80a --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringP2P.json @@ -0,0 +1,524 @@ +{ + "test": { + "num_samples": 80, + "number_of_characters": 6242, + "min_text_length": 35, + "average_text_length": 78.025, + "max_text_length": 190, + "unique_texts": 6236, + "min_labels_per_text": 286, + "average_labels_per_text": 78.025, + "max_labels_per_text": 1589, + "unique_labels": 7, + "labels": { + "0": { + "count": 785 + }, + "2": { + "count": 1258 + }, + "3": { + "count": 1589 + }, + "5": { + "count": 1265 + }, + "1": { + "count": 762 + }, + "6": { + "count": 297 + }, + "4": { + "count": 286 + } + }, + "hf_subset_descriptive_stats": { + "amh": { + "num_samples": 5, + "number_of_characters": 376, + "min_text_length": 75, + "average_text_length": 75.2, + "max_text_length": 76, + "unique_texts": 373, + "min_labels_per_text": 81, + "average_labels_per_text": 75.2, + "max_labels_per_text": 100, + "unique_labels": 4, + "labels": { + "0": { + "count": 81 + }, + "2": { + "count": 100 + }, + "3": { + "count": 100 + }, + "5": { + "count": 95 + } + } + }, + "eng": { + "num_samples": 5, + "number_of_characters": 948, + "min_text_length": 189, + "average_text_length": 189.6, + "max_text_length": 190, + "unique_texts": 948, + "min_labels_per_text": 123, + "average_labels_per_text": 189.6, + "max_labels_per_text": 200, + "unique_labels": 6, + "labels": { + "0": { + "count": 160 + }, + "1": { + "count": 150 + }, + "2": { + "count": 150 + }, + "3": { + "count": 165 + }, + "5": { + "count": 200 + }, + "6": { + "count": 123 + } + } + }, + "fra": { + "num_samples": 5, + "number_of_characters": 422, + "min_text_length": 84, + "average_text_length": 84.4, + "max_text_length": 85, + "unique_texts": 422, + "min_labels_per_text": 22, + "average_labels_per_text": 84.4, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "0": { + "count": 100 + }, + "2": { + "count": 100 + }, + "3": { + "count": 100 + }, + "5": { + "count": 100 + }, + "6": { + "count": 22 + } + } + }, + "hau": { + "num_samples": 5, + "number_of_characters": 637, + "min_text_length": 127, + "average_text_length": 127.4, + "max_text_length": 128, + "unique_texts": 637, + "min_labels_per_text": 59, + "average_labels_per_text": 127.4, + "max_labels_per_text": 100, + "unique_labels": 7, + "labels": { + "0": { + "count": 80 + }, + "1": { + "count": 100 + }, + "2": { + "count": 99 + }, + "3": { + "count": 100 + }, + "4": { + "count": 99 + }, + "5": { + "count": 100 + }, + "6": { + "count": 59 + } + } + }, + "ibo": { + "num_samples": 5, + "number_of_characters": 390, + "min_text_length": 78, + "average_text_length": 78.0, + "max_text_length": 78, + "unique_texts": 387, + "min_labels_per_text": 15, + "average_labels_per_text": 78.0, + "max_labels_per_text": 100, + "unique_labels": 6, + "labels": { + "0": { + "count": 59 + }, + "1": { + "count": 74 + }, + "2": { + "count": 85 + }, + "3": { + "count": 100 + }, + "4": { + "count": 15 + }, + "5": { + "count": 57 + } + } + }, + "lin": { + "num_samples": 5, + "number_of_characters": 175, + "min_text_length": 35, + "average_text_length": 35.0, + "max_text_length": 35, + "unique_texts": 175, + "min_labels_per_text": 17, + "average_labels_per_text": 35.0, + "max_labels_per_text": 100, + "unique_labels": 4, + "labels": { + "0": { + "count": 17 + }, + "2": { + "count": 39 + }, + "3": { + "count": 100 + }, + "5": { + "count": 19 + } + } + }, + "lug": { + "num_samples": 5, + "number_of_characters": 223, + "min_text_length": 44, + "average_text_length": 44.6, + "max_text_length": 45, + "unique_texts": 223, + "min_labels_per_text": 19, + "average_labels_per_text": 44.6, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "0": { + "count": 34 + }, + "2": { + "count": 46 + }, + "3": { + "count": 100 + }, + "4": { + "count": 19 + }, + "5": { + "count": 24 + } + } + }, + "orm": { + "num_samples": 5, + "number_of_characters": 325, + "min_text_length": 65, + "average_text_length": 65.0, + "max_text_length": 65, + "unique_texts": 325, + "min_labels_per_text": 15, + "average_labels_per_text": 65.0, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "1": { + "count": 32 + }, + "2": { + "count": 97 + }, + "3": { + "count": 100 + }, + "5": { + "count": 81 + }, + "6": { + "count": 15 + } + } + }, + "pcm": { + "num_samples": 5, + "number_of_characters": 305, + "min_text_length": 61, + "average_text_length": 61.0, + "max_text_length": 61, + "unique_texts": 305, + "min_labels_per_text": 20, + "average_labels_per_text": 61.0, + "max_labels_per_text": 99, + "unique_labels": 5, + "labels": { + "0": { + "count": 20 + }, + "1": { + "count": 92 + }, + "2": { + "count": 32 + }, + "3": { + "count": 62 + }, + "5": { + "count": 99 + } + } + }, + "run": { + "num_samples": 5, + "number_of_characters": 322, + "min_text_length": 64, + "average_text_length": 64.4, + "max_text_length": 65, + "unique_texts": 322, + "min_labels_per_text": 15, + "average_labels_per_text": 64.4, + "max_labels_per_text": 100, + "unique_labels": 6, + "labels": { + "0": { + "count": 16 + }, + "1": { + "count": 32 + }, + "2": { + "count": 75 + }, + "3": { + "count": 100 + }, + "4": { + "count": 15 + }, + "5": { + "count": 84 + } + } + }, + "sna": { + "num_samples": 5, + "number_of_characters": 369, + "min_text_length": 73, + "average_text_length": 73.8, + "max_text_length": 74, + "unique_texts": 369, + "min_labels_per_text": 84, + "average_labels_per_text": 73.8, + "max_labels_per_text": 100, + "unique_labels": 4, + "labels": { + "0": { + "count": 100 + }, + "2": { + "count": 85 + }, + "3": { + "count": 100 + }, + "5": { + "count": 84 + } + } + }, + "som": { + "num_samples": 5, + "number_of_characters": 294, + "min_text_length": 58, + "average_text_length": 58.8, + "max_text_length": 59, + "unique_texts": 294, + "min_labels_per_text": 15, + "average_labels_per_text": 58.8, + "max_labels_per_text": 100, + "unique_labels": 7, + "labels": { + "0": { + "count": 23 + }, + "1": { + "count": 28 + }, + "2": { + "count": 71 + }, + "3": { + "count": 100 + }, + "4": { + "count": 15 + }, + "5": { + "count": 30 + }, + "6": { + "count": 27 + } + } + }, + "swa": { + "num_samples": 5, + "number_of_characters": 476, + "min_text_length": 95, + "average_text_length": 95.2, + "max_text_length": 96, + "unique_texts": 476, + "min_labels_per_text": 20, + "average_labels_per_text": 95.2, + "max_labels_per_text": 100, + "unique_labels": 7, + "labels": { + "0": { + "count": 64 + }, + "1": { + "count": 20 + }, + "2": { + "count": 100 + }, + "3": { + "count": 100 + }, + "4": { + "count": 59 + }, + "5": { + "count": 100 + }, + "6": { + "count": 33 + } + } + }, + "tir": { + "num_samples": 5, + "number_of_characters": 272, + "min_text_length": 54, + "average_text_length": 54.4, + "max_text_length": 55, + "unique_texts": 272, + "min_labels_per_text": 16, + "average_labels_per_text": 54.4, + "max_labels_per_text": 100, + "unique_labels": 6, + "labels": { + "0": { + "count": 16 + }, + "1": { + "count": 34 + }, + "2": { + "count": 79 + }, + "3": { + "count": 100 + }, + "5": { + "count": 25 + }, + "6": { + "count": 18 + } + } + }, + "xho": { + "num_samples": 5, + "number_of_characters": 297, + "min_text_length": 59, + "average_text_length": 59.4, + "max_text_length": 60, + "unique_texts": 297, + "min_labels_per_text": 15, + "average_labels_per_text": 59.4, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "0": { + "count": 15 + }, + "1": { + "count": 100 + }, + "2": { + "count": 20 + }, + "3": { + "count": 62 + }, + "5": { + "count": 100 + } + } + }, + "yor": { + "num_samples": 5, + "number_of_characters": 411, + "min_text_length": 82, + "average_text_length": 82.2, + "max_text_length": 83, + "unique_texts": 411, + "min_labels_per_text": 64, + "average_labels_per_text": 82.2, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "1": { + "count": 100 + }, + "2": { + "count": 80 + }, + "3": { + "count": 100 + }, + "4": { + "count": 64 + }, + "5": { + "count": 67 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringS2S.json b/mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringS2S.json new file mode 100644 index 0000000000..093dd3b80a --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MasakhaNEWSClusteringS2S.json @@ -0,0 +1,524 @@ +{ + "test": { + "num_samples": 80, + "number_of_characters": 6242, + "min_text_length": 35, + "average_text_length": 78.025, + "max_text_length": 190, + "unique_texts": 6236, + "min_labels_per_text": 286, + "average_labels_per_text": 78.025, + "max_labels_per_text": 1589, + "unique_labels": 7, + "labels": { + "0": { + "count": 785 + }, + "2": { + "count": 1258 + }, + "3": { + "count": 1589 + }, + "5": { + "count": 1265 + }, + "1": { + "count": 762 + }, + "6": { + "count": 297 + }, + "4": { + "count": 286 + } + }, + "hf_subset_descriptive_stats": { + "amh": { + "num_samples": 5, + "number_of_characters": 376, + "min_text_length": 75, + "average_text_length": 75.2, + "max_text_length": 76, + "unique_texts": 373, + "min_labels_per_text": 81, + "average_labels_per_text": 75.2, + "max_labels_per_text": 100, + "unique_labels": 4, + "labels": { + "0": { + "count": 81 + }, + "2": { + "count": 100 + }, + "3": { + "count": 100 + }, + "5": { + "count": 95 + } + } + }, + "eng": { + "num_samples": 5, + "number_of_characters": 948, + "min_text_length": 189, + "average_text_length": 189.6, + "max_text_length": 190, + "unique_texts": 948, + "min_labels_per_text": 123, + "average_labels_per_text": 189.6, + "max_labels_per_text": 200, + "unique_labels": 6, + "labels": { + "0": { + "count": 160 + }, + "1": { + "count": 150 + }, + "2": { + "count": 150 + }, + "3": { + "count": 165 + }, + "5": { + "count": 200 + }, + "6": { + "count": 123 + } + } + }, + "fra": { + "num_samples": 5, + "number_of_characters": 422, + "min_text_length": 84, + "average_text_length": 84.4, + "max_text_length": 85, + "unique_texts": 422, + "min_labels_per_text": 22, + "average_labels_per_text": 84.4, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "0": { + "count": 100 + }, + "2": { + "count": 100 + }, + "3": { + "count": 100 + }, + "5": { + "count": 100 + }, + "6": { + "count": 22 + } + } + }, + "hau": { + "num_samples": 5, + "number_of_characters": 637, + "min_text_length": 127, + "average_text_length": 127.4, + "max_text_length": 128, + "unique_texts": 637, + "min_labels_per_text": 59, + "average_labels_per_text": 127.4, + "max_labels_per_text": 100, + "unique_labels": 7, + "labels": { + "0": { + "count": 80 + }, + "1": { + "count": 100 + }, + "2": { + "count": 99 + }, + "3": { + "count": 100 + }, + "4": { + "count": 99 + }, + "5": { + "count": 100 + }, + "6": { + "count": 59 + } + } + }, + "ibo": { + "num_samples": 5, + "number_of_characters": 390, + "min_text_length": 78, + "average_text_length": 78.0, + "max_text_length": 78, + "unique_texts": 387, + "min_labels_per_text": 15, + "average_labels_per_text": 78.0, + "max_labels_per_text": 100, + "unique_labels": 6, + "labels": { + "0": { + "count": 59 + }, + "1": { + "count": 74 + }, + "2": { + "count": 85 + }, + "3": { + "count": 100 + }, + "4": { + "count": 15 + }, + "5": { + "count": 57 + } + } + }, + "lin": { + "num_samples": 5, + "number_of_characters": 175, + "min_text_length": 35, + "average_text_length": 35.0, + "max_text_length": 35, + "unique_texts": 175, + "min_labels_per_text": 17, + "average_labels_per_text": 35.0, + "max_labels_per_text": 100, + "unique_labels": 4, + "labels": { + "0": { + "count": 17 + }, + "2": { + "count": 39 + }, + "3": { + "count": 100 + }, + "5": { + "count": 19 + } + } + }, + "lug": { + "num_samples": 5, + "number_of_characters": 223, + "min_text_length": 44, + "average_text_length": 44.6, + "max_text_length": 45, + "unique_texts": 223, + "min_labels_per_text": 19, + "average_labels_per_text": 44.6, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "0": { + "count": 34 + }, + "2": { + "count": 46 + }, + "3": { + "count": 100 + }, + "4": { + "count": 19 + }, + "5": { + "count": 24 + } + } + }, + "orm": { + "num_samples": 5, + "number_of_characters": 325, + "min_text_length": 65, + "average_text_length": 65.0, + "max_text_length": 65, + "unique_texts": 325, + "min_labels_per_text": 15, + "average_labels_per_text": 65.0, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "1": { + "count": 32 + }, + "2": { + "count": 97 + }, + "3": { + "count": 100 + }, + "5": { + "count": 81 + }, + "6": { + "count": 15 + } + } + }, + "pcm": { + "num_samples": 5, + "number_of_characters": 305, + "min_text_length": 61, + "average_text_length": 61.0, + "max_text_length": 61, + "unique_texts": 305, + "min_labels_per_text": 20, + "average_labels_per_text": 61.0, + "max_labels_per_text": 99, + "unique_labels": 5, + "labels": { + "0": { + "count": 20 + }, + "1": { + "count": 92 + }, + "2": { + "count": 32 + }, + "3": { + "count": 62 + }, + "5": { + "count": 99 + } + } + }, + "run": { + "num_samples": 5, + "number_of_characters": 322, + "min_text_length": 64, + "average_text_length": 64.4, + "max_text_length": 65, + "unique_texts": 322, + "min_labels_per_text": 15, + "average_labels_per_text": 64.4, + "max_labels_per_text": 100, + "unique_labels": 6, + "labels": { + "0": { + "count": 16 + }, + "1": { + "count": 32 + }, + "2": { + "count": 75 + }, + "3": { + "count": 100 + }, + "4": { + "count": 15 + }, + "5": { + "count": 84 + } + } + }, + "sna": { + "num_samples": 5, + "number_of_characters": 369, + "min_text_length": 73, + "average_text_length": 73.8, + "max_text_length": 74, + "unique_texts": 369, + "min_labels_per_text": 84, + "average_labels_per_text": 73.8, + "max_labels_per_text": 100, + "unique_labels": 4, + "labels": { + "0": { + "count": 100 + }, + "2": { + "count": 85 + }, + "3": { + "count": 100 + }, + "5": { + "count": 84 + } + } + }, + "som": { + "num_samples": 5, + "number_of_characters": 294, + "min_text_length": 58, + "average_text_length": 58.8, + "max_text_length": 59, + "unique_texts": 294, + "min_labels_per_text": 15, + "average_labels_per_text": 58.8, + "max_labels_per_text": 100, + "unique_labels": 7, + "labels": { + "0": { + "count": 23 + }, + "1": { + "count": 28 + }, + "2": { + "count": 71 + }, + "3": { + "count": 100 + }, + "4": { + "count": 15 + }, + "5": { + "count": 30 + }, + "6": { + "count": 27 + } + } + }, + "swa": { + "num_samples": 5, + "number_of_characters": 476, + "min_text_length": 95, + "average_text_length": 95.2, + "max_text_length": 96, + "unique_texts": 476, + "min_labels_per_text": 20, + "average_labels_per_text": 95.2, + "max_labels_per_text": 100, + "unique_labels": 7, + "labels": { + "0": { + "count": 64 + }, + "1": { + "count": 20 + }, + "2": { + "count": 100 + }, + "3": { + "count": 100 + }, + "4": { + "count": 59 + }, + "5": { + "count": 100 + }, + "6": { + "count": 33 + } + } + }, + "tir": { + "num_samples": 5, + "number_of_characters": 272, + "min_text_length": 54, + "average_text_length": 54.4, + "max_text_length": 55, + "unique_texts": 272, + "min_labels_per_text": 16, + "average_labels_per_text": 54.4, + "max_labels_per_text": 100, + "unique_labels": 6, + "labels": { + "0": { + "count": 16 + }, + "1": { + "count": 34 + }, + "2": { + "count": 79 + }, + "3": { + "count": 100 + }, + "5": { + "count": 25 + }, + "6": { + "count": 18 + } + } + }, + "xho": { + "num_samples": 5, + "number_of_characters": 297, + "min_text_length": 59, + "average_text_length": 59.4, + "max_text_length": 60, + "unique_texts": 297, + "min_labels_per_text": 15, + "average_labels_per_text": 59.4, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "0": { + "count": 15 + }, + "1": { + "count": 100 + }, + "2": { + "count": 20 + }, + "3": { + "count": 62 + }, + "5": { + "count": 100 + } + } + }, + "yor": { + "num_samples": 5, + "number_of_characters": 411, + "min_text_length": 82, + "average_text_length": 82.2, + "max_text_length": 83, + "unique_texts": 411, + "min_labels_per_text": 64, + "average_labels_per_text": 82.2, + "max_labels_per_text": 100, + "unique_labels": 5, + "labels": { + "1": { + "count": 100 + }, + "2": { + "count": 80 + }, + "3": { + "count": 100 + }, + "4": { + "count": 64 + }, + "5": { + "count": 67 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MewsC16JaClustering.json b/mteb/descriptive_stats/Clustering/MewsC16JaClustering.json new file mode 100644 index 0000000000..fb8fa494ca --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MewsC16JaClustering.json @@ -0,0 +1,52 @@ +{ + "test": { + "num_samples": 992, + "number_of_characters": 94247, + "min_text_length": 6, + "average_text_length": 95.0070564516129, + "max_text_length": 466, + "unique_texts": 190, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 240, + "unique_labels": 12, + "labels": { + "5": { + "count": 78 + }, + "1": { + "count": 162 + }, + "7": { + "count": 180 + }, + "9": { + "count": 18 + }, + "6": { + "count": 240 + }, + "2": { + "count": 71 + }, + "0": { + "count": 106 + }, + "8": { + "count": 10 + }, + "11": { + "count": 6 + }, + "10": { + "count": 30 + }, + "4": { + "count": 85 + }, + "3": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/PlscClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/PlscClusteringP2P.v2.json new file mode 100644 index 0000000000..6592381acd --- /dev/null +++ b/mteb/descriptive_stats/Clustering/PlscClusteringP2P.v2.json @@ -0,0 +1,169 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 2104256, + "min_text_length": 60, + "average_text_length": 1027.46875, + "max_text_length": 13887, + "unique_texts": 1193, + "min_labels_per_text": 7, + "average_labels_per_text": 1.0, + "max_labels_per_text": 170, + "unique_labels": 51, + "labels": { + "12": { + "count": 25 + }, + "6": { + "count": 25 + }, + "7": { + "count": 25 + }, + "33": { + "count": 24 + }, + "23": { + "count": 146 + }, + "39": { + "count": 146 + }, + "26": { + "count": 146 + }, + "50": { + "count": 25 + }, + "37": { + "count": 146 + }, + "8": { + "count": 25 + }, + "24": { + "count": 25 + }, + "44": { + "count": 25 + }, + "22": { + "count": 146 + }, + "14": { + "count": 25 + }, + "17": { + "count": 9 + }, + "35": { + "count": 25 + }, + "29": { + "count": 25 + }, + "16": { + "count": 24 + }, + "43": { + "count": 25 + }, + "11": { + "count": 25 + }, + "31": { + "count": 25 + }, + "13": { + "count": 25 + }, + "21": { + "count": 25 + }, + "27": { + "count": 25 + }, + "41": { + "count": 146 + }, + "40": { + "count": 170 + }, + "47": { + "count": 12 + }, + "28": { + "count": 25 + }, + "45": { + "count": 25 + }, + "46": { + "count": 25 + }, + "9": { + "count": 25 + }, + "1": { + "count": 25 + }, + "15": { + "count": 25 + }, + "32": { + "count": 25 + }, + "52": { + "count": 24 + }, + "18": { + "count": 13 + }, + "3": { + "count": 24 + }, + "5": { + "count": 25 + }, + "4": { + "count": 25 + }, + "25": { + "count": 25 + }, + "36": { + "count": 25 + }, + "38": { + "count": 25 + }, + "2": { + "count": 25 + }, + "0": { + "count": 20 + }, + "42": { + "count": 25 + }, + "34": { + "count": 25 + }, + "20": { + "count": 7 + }, + "19": { + "count": 8 + }, + "10": { + "count": 25 + }, + "30": { + "count": 24 + }, + "49": { + "count": 13 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/PlscClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/PlscClusteringS2S.v2.json new file mode 100644 index 0000000000..fd8057deae --- /dev/null +++ b/mteb/descriptive_stats/Clustering/PlscClusteringS2S.v2.json @@ -0,0 +1,169 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 173651, + "min_text_length": 5, + "average_text_length": 84.79052734375, + "max_text_length": 300, + "unique_texts": 189, + "min_labels_per_text": 7, + "average_labels_per_text": 1.0, + "max_labels_per_text": 170, + "unique_labels": 51, + "labels": { + "12": { + "count": 25 + }, + "6": { + "count": 25 + }, + "7": { + "count": 25 + }, + "33": { + "count": 24 + }, + "23": { + "count": 146 + }, + "39": { + "count": 146 + }, + "26": { + "count": 146 + }, + "50": { + "count": 25 + }, + "37": { + "count": 146 + }, + "8": { + "count": 25 + }, + "24": { + "count": 25 + }, + "44": { + "count": 25 + }, + "22": { + "count": 146 + }, + "14": { + "count": 25 + }, + "17": { + "count": 9 + }, + "35": { + "count": 25 + }, + "29": { + "count": 25 + }, + "16": { + "count": 24 + }, + "43": { + "count": 25 + }, + "11": { + "count": 25 + }, + "31": { + "count": 25 + }, + "13": { + "count": 25 + }, + "21": { + "count": 25 + }, + "27": { + "count": 25 + }, + "41": { + "count": 146 + }, + "40": { + "count": 170 + }, + "47": { + "count": 12 + }, + "28": { + "count": 25 + }, + "45": { + "count": 25 + }, + "46": { + "count": 25 + }, + "9": { + "count": 25 + }, + "1": { + "count": 25 + }, + "15": { + "count": 25 + }, + "32": { + "count": 25 + }, + "52": { + "count": 24 + }, + "18": { + "count": 13 + }, + "3": { + "count": 24 + }, + "5": { + "count": 25 + }, + "4": { + "count": 25 + }, + "25": { + "count": 25 + }, + "36": { + "count": 25 + }, + "38": { + "count": 25 + }, + "2": { + "count": 25 + }, + "0": { + "count": 20 + }, + "42": { + "count": 25 + }, + "34": { + "count": 25 + }, + "20": { + "count": 7 + }, + "19": { + "count": 8 + }, + "10": { + "count": 25 + }, + "30": { + "count": 24 + }, + "49": { + "count": 13 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RedditClustering.v2.json b/mteb/descriptive_stats/Clustering/RedditClustering.v2.json new file mode 100644 index 0000000000..fa433778f1 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/RedditClustering.v2.json @@ -0,0 +1,166 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 134119, + "min_text_length": 18, + "average_text_length": 65.48779296875, + "max_text_length": 299, + "unique_texts": 178, + "min_labels_per_text": 23, + "average_labels_per_text": 1.0, + "max_labels_per_text": 60, + "unique_labels": 50, + "labels": { + "17": { + "count": 48 + }, + "43": { + "count": 32 + }, + "44": { + "count": 54 + }, + "8": { + "count": 48 + }, + "15": { + "count": 42 + }, + "29": { + "count": 32 + }, + "5": { + "count": 43 + }, + "21": { + "count": 36 + }, + "14": { + "count": 42 + }, + "24": { + "count": 36 + }, + "39": { + "count": 45 + }, + "1": { + "count": 33 + }, + "32": { + "count": 36 + }, + "16": { + "count": 52 + }, + "27": { + "count": 51 + }, + "6": { + "count": 33 + }, + "36": { + "count": 45 + }, + "31": { + "count": 46 + }, + "46": { + "count": 60 + }, + "12": { + "count": 45 + }, + "34": { + "count": 37 + }, + "41": { + "count": 41 + }, + "47": { + "count": 43 + }, + "13": { + "count": 37 + }, + "25": { + "count": 36 + }, + "10": { + "count": 34 + }, + "42": { + "count": 29 + }, + "2": { + "count": 45 + }, + "48": { + "count": 38 + }, + "35": { + "count": 33 + }, + "11": { + "count": 37 + }, + "33": { + "count": 45 + }, + "40": { + "count": 37 + }, + "30": { + "count": 33 + }, + "26": { + "count": 40 + }, + "28": { + "count": 31 + }, + "0": { + "count": 34 + }, + "4": { + "count": 45 + }, + "20": { + "count": 49 + }, + "9": { + "count": 38 + }, + "18": { + "count": 38 + }, + "37": { + "count": 50 + }, + "19": { + "count": 38 + }, + "22": { + "count": 45 + }, + "49": { + "count": 55 + }, + "7": { + "count": 44 + }, + "45": { + "count": 40 + }, + "23": { + "count": 44 + }, + "38": { + "count": 23 + }, + "3": { + "count": 50 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RomaniBibleClustering.json b/mteb/descriptive_stats/Clustering/RomaniBibleClustering.json new file mode 100644 index 0000000000..f09ef6d425 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/RomaniBibleClustering.json @@ -0,0 +1,211 @@ +{ + "test": { + "num_samples": 4, + "number_of_characters": 2048, + "min_text_length": 512, + "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2043, + "min_labels_per_text": 1, + "average_labels_per_text": 512.0, + "max_labels_per_text": 171, + "unique_labels": 65, + "labels": { + "NT,LUK": { + "count": 76 + }, + "NT,JHN": { + "count": 50 + }, + "NT,ROM": { + "count": 27 + }, + "OT,PSA": { + "count": 171 + }, + "OT,EXO": { + "count": 78 + }, + "OT,DAN": { + "count": 20 + }, + "NT,MAT": { + "count": 69 + }, + "NT,ACT": { + "count": 68 + }, + "OT,SNG": { + "count": 7 + }, + "OT,JOS": { + "count": 55 + }, + "OT,2CH": { + "count": 59 + }, + "OT,DEU": { + "count": 59 + }, + "NT,MRK": { + "count": 44 + }, + "OT,PRO": { + "count": 77 + }, + "OT,JOB": { + "count": 63 + }, + "OT,2KI": { + "count": 46 + }, + "NT,JAS": { + "count": 6 + }, + "OT,1KI": { + "count": 60 + }, + "OT,JER": { + "count": 83 + }, + "OT,EZK": { + "count": 81 + }, + "OT,NEH": { + "count": 30 + }, + "OT,1CH": { + "count": 53 + }, + "OT,ISA": { + "count": 80 + }, + "OT,NUM": { + "count": 78 + }, + "OT,LEV": { + "count": 62 + }, + "OT,1SA": { + "count": 52 + }, + "OT,GEN": { + "count": 99 + }, + "NT,JDG": { + "count": 42 + }, + "NT,HEB": { + "count": 22 + }, + "NT,2CO": { + "count": 13 + }, + "OT,EST": { + "count": 22 + }, + "OT,RUT": { + "count": 8 + }, + "NT,REV": { + "count": 18 + }, + "NT,1TI": { + "count": 7 + }, + "OT,EZR": { + "count": 24 + }, + "NT,2PE": { + "count": 4 + }, + "OT,ECC": { + "count": 16 + }, + "OT,HOS": { + "count": 16 + }, + "NT,1TH": { + "count": 2 + }, + "NT,1CO": { + "count": 35 + }, + "NT,1PE": { + "count": 5 + }, + "NT,2JN": { + "count": 1 + }, + "OT,LAM": { + "count": 14 + }, + "OT,JOL": { + "count": 8 + }, + "OT,2SA": { + "count": 38 + }, + "NT,EPH": { + "count": 11 + }, + "NT,1JN": { + "count": 9 + }, + "OT,ZEC": { + "count": 15 + }, + "NT,GAL": { + "count": 12 + }, + "NT,COL": { + "count": 3 + }, + "OT,MIC": { + "count": 8 + }, + "NT,3JN": { + "count": 1 + }, + "OT,HAG": { + "count": 4 + }, + "OT,AMO": { + "count": 7 + }, + "NT,2TI": { + "count": 4 + }, + "OT,OBA": { + "count": 4 + }, + "OT,NAM": { + "count": 1 + }, + "OT,HAB": { + "count": 3 + }, + "NT,PHP": { + "count": 6 + }, + "NT,2TH": { + "count": 2 + }, + "OT,ZEP": { + "count": 2 + }, + "NT,TIT": { + "count": 2 + }, + "OT,JUD": { + "count": 4 + }, + "OT,MAL": { + "count": 1 + }, + "NT,PHM": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RuSciBenchOECDClusteringP2P.json b/mteb/descriptive_stats/Clustering/RuSciBenchOECDClusteringP2P.json new file mode 100644 index 0000000000..ef36dc31cd --- /dev/null +++ b/mteb/descriptive_stats/Clustering/RuSciBenchOECDClusteringP2P.json @@ -0,0 +1,103 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1725558, + "min_text_length": 83, + "average_text_length": 842.5576171875, + "max_text_length": 4538, + "unique_texts": 1142, + "min_labels_per_text": 70, + "average_labels_per_text": 1.0, + "max_labels_per_text": 71, + "unique_labels": 29, + "labels": { + "13": { + "count": 70 + }, + "25": { + "count": 71 + }, + "8": { + "count": 71 + }, + "12": { + "count": 71 + }, + "6": { + "count": 71 + }, + "4": { + "count": 71 + }, + "26": { + "count": 71 + }, + "2": { + "count": 70 + }, + "0": { + "count": 71 + }, + "24": { + "count": 71 + }, + "17": { + "count": 71 + }, + "7": { + "count": 71 + }, + "14": { + "count": 71 + }, + "9": { + "count": 70 + }, + "10": { + "count": 71 + }, + "18": { + "count": 70 + }, + "21": { + "count": 71 + }, + "27": { + "count": 70 + }, + "19": { + "count": 71 + }, + "28": { + "count": 70 + }, + "1": { + "count": 70 + }, + "23": { + "count": 71 + }, + "22": { + "count": 70 + }, + "3": { + "count": 71 + }, + "16": { + "count": 71 + }, + "15": { + "count": 70 + }, + "11": { + "count": 71 + }, + "20": { + "count": 70 + }, + "5": { + "count": 70 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/SIB200ClusteringS2S.json b/mteb/descriptive_stats/Clustering/SIB200ClusteringS2S.json new file mode 100644 index 0000000000..095eb5a46e --- /dev/null +++ b/mteb/descriptive_stats/Clustering/SIB200ClusteringS2S.json @@ -0,0 +1,6934 @@ +{ + "test": { + "num_samples": 197788, + "number_of_characters": 26633239, + "min_text_length": 10, + "average_text_length": 134.6554846603434, + "max_text_length": 597, + "unique_texts": 448, + "min_labels_per_text": 16351, + "average_labels_per_text": 1.0, + "max_labels_per_text": 49644, + "unique_labels": 7, + "labels": { + "1": { + "count": 16351 + }, + "4": { + "count": 49644 + }, + "0": { + "count": 18321 + }, + "3": { + "count": 28762 + }, + "2": { + "count": 21670 + }, + "6": { + "count": 39006 + }, + "5": { + "count": 24034 + } + }, + "hf_subset_descriptive_stats": { + "ace_Latn": { + "num_samples": 1004, + "number_of_characters": 139165, + "min_text_length": 38, + "average_text_length": 138.6105577689243, + "max_text_length": 355, + "unique_texts": 213, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "acm_Arab": { + "num_samples": 1004, + "number_of_characters": 113703, + "min_text_length": 32, + "average_text_length": 113.25, + "max_text_length": 303, + "unique_texts": 182, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "acq_Arab": { + "num_samples": 1004, + "number_of_characters": 114738, + "min_text_length": 36, + "average_text_length": 114.2808764940239, + "max_text_length": 318, + "unique_texts": 176, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "aeb_Arab": { + "num_samples": 1004, + "number_of_characters": 111876, + "min_text_length": 30, + "average_text_length": 111.43027888446215, + "max_text_length": 305, + "unique_texts": 181, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "afr_Latn": { + "num_samples": 1004, + "number_of_characters": 137392, + "min_text_length": 39, + "average_text_length": 136.84462151394422, + "max_text_length": 385, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ajp_Arab": { + "num_samples": 1004, + "number_of_characters": 108312, + "min_text_length": 28, + "average_text_length": 107.88047808764941, + "max_text_length": 310, + "unique_texts": 171, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "aka_Latn": { + "num_samples": 1004, + "number_of_characters": 131381, + "min_text_length": 32, + "average_text_length": 130.85756972111554, + "max_text_length": 350, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "als_Latn": { + "num_samples": 1004, + "number_of_characters": 146133, + "min_text_length": 36, + "average_text_length": 145.550796812749, + "max_text_length": 399, + "unique_texts": 217, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "amh_Ethi": { + "num_samples": 1004, + "number_of_characters": 87489, + "min_text_length": 21, + "average_text_length": 87.14043824701196, + "max_text_length": 219, + "unique_texts": 141, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "apc_Arab": { + "num_samples": 1004, + "number_of_characters": 107568, + "min_text_length": 24, + "average_text_length": 107.13944223107569, + "max_text_length": 306, + "unique_texts": 170, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "arb_Latn": { + "num_samples": 1004, + "number_of_characters": 151981, + "min_text_length": 42, + "average_text_length": 151.37549800796813, + "max_text_length": 420, + "unique_texts": 227, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ars_Arab": { + "num_samples": 1004, + "number_of_characters": 116201, + "min_text_length": 36, + "average_text_length": 115.73804780876495, + "max_text_length": 316, + "unique_texts": 184, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ary_Arab": { + "num_samples": 1004, + "number_of_characters": 113094, + "min_text_length": 20, + "average_text_length": 112.64342629482071, + "max_text_length": 315, + "unique_texts": 183, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "arz_Arab": { + "num_samples": 1004, + "number_of_characters": 113268, + "min_text_length": 34, + "average_text_length": 112.81673306772909, + "max_text_length": 322, + "unique_texts": 177, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "asm_Beng": { + "num_samples": 1004, + "number_of_characters": 124409, + "min_text_length": 30, + "average_text_length": 123.91334661354581, + "max_text_length": 329, + "unique_texts": 191, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ast_Latn": { + "num_samples": 1004, + "number_of_characters": 133493, + "min_text_length": 37, + "average_text_length": 132.96115537848607, + "max_text_length": 388, + "unique_texts": 200, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "awa_Deva": { + "num_samples": 1004, + "number_of_characters": 126547, + "min_text_length": 34, + "average_text_length": 126.04282868525897, + "max_text_length": 378, + "unique_texts": 198, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ayr_Latn": { + "num_samples": 1004, + "number_of_characters": 135974, + "min_text_length": 38, + "average_text_length": 135.43227091633466, + "max_text_length": 383, + "unique_texts": 216, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "azb_Arab": { + "num_samples": 1004, + "number_of_characters": 116621, + "min_text_length": 29, + "average_text_length": 116.15637450199203, + "max_text_length": 327, + "unique_texts": 187, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "azj_Latn": { + "num_samples": 1004, + "number_of_characters": 142866, + "min_text_length": 43, + "average_text_length": 142.29681274900398, + "max_text_length": 383, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bak_Cyrl": { + "num_samples": 1004, + "number_of_characters": 132097, + "min_text_length": 28, + "average_text_length": 131.5707171314741, + "max_text_length": 389, + "unique_texts": 205, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bam_Latn": { + "num_samples": 1004, + "number_of_characters": 125165, + "min_text_length": 26, + "average_text_length": 124.66633466135458, + "max_text_length": 372, + "unique_texts": 187, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ban_Latn": { + "num_samples": 1004, + "number_of_characters": 144371, + "min_text_length": 35, + "average_text_length": 143.79581673306774, + "max_text_length": 365, + "unique_texts": 212, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bel_Cyrl": { + "num_samples": 1004, + "number_of_characters": 148056, + "min_text_length": 33, + "average_text_length": 147.46613545816734, + "max_text_length": 429, + "unique_texts": 228, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bem_Latn": { + "num_samples": 1004, + "number_of_characters": 159488, + "min_text_length": 38, + "average_text_length": 158.85258964143426, + "max_text_length": 422, + "unique_texts": 239, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ben_Beng": { + "num_samples": 1004, + "number_of_characters": 127231, + "min_text_length": 30, + "average_text_length": 126.72410358565737, + "max_text_length": 333, + "unique_texts": 199, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bho_Deva": { + "num_samples": 1004, + "number_of_characters": 126521, + "min_text_length": 39, + "average_text_length": 126.01693227091633, + "max_text_length": 352, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bjn_Latn": { + "num_samples": 1004, + "number_of_characters": 135888, + "min_text_length": 29, + "average_text_length": 135.34661354581672, + "max_text_length": 368, + "unique_texts": 198, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bod_Tibt": { + "num_samples": 1004, + "number_of_characters": 146695, + "min_text_length": 42, + "average_text_length": 146.1105577689243, + "max_text_length": 431, + "unique_texts": 220, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bos_Latn": { + "num_samples": 1004, + "number_of_characters": 131244, + "min_text_length": 36, + "average_text_length": 130.72111553784862, + "max_text_length": 345, + "unique_texts": 199, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bug_Latn": { + "num_samples": 1004, + "number_of_characters": 139343, + "min_text_length": 22, + "average_text_length": 138.7878486055777, + "max_text_length": 348, + "unique_texts": 212, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "bul_Cyrl": { + "num_samples": 1004, + "number_of_characters": 136284, + "min_text_length": 34, + "average_text_length": 135.74103585657372, + "max_text_length": 369, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "cat_Latn": { + "num_samples": 1004, + "number_of_characters": 143152, + "min_text_length": 36, + "average_text_length": 142.5816733067729, + "max_text_length": 388, + "unique_texts": 213, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ceb_Latn": { + "num_samples": 1004, + "number_of_characters": 156145, + "min_text_length": 48, + "average_text_length": 155.52290836653387, + "max_text_length": 427, + "unique_texts": 227, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ces_Latn": { + "num_samples": 1004, + "number_of_characters": 126349, + "min_text_length": 30, + "average_text_length": 125.84561752988049, + "max_text_length": 362, + "unique_texts": 192, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "cjk_Latn": { + "num_samples": 1004, + "number_of_characters": 138946, + "min_text_length": 31, + "average_text_length": 138.39243027888446, + "max_text_length": 366, + "unique_texts": 211, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ckb_Arab": { + "num_samples": 1004, + "number_of_characters": 126365, + "min_text_length": 39, + "average_text_length": 125.86155378486056, + "max_text_length": 353, + "unique_texts": 194, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "crh_Latn": { + "num_samples": 1004, + "number_of_characters": 132712, + "min_text_length": 27, + "average_text_length": 132.18326693227093, + "max_text_length": 382, + "unique_texts": 204, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "cym_Latn": { + "num_samples": 1004, + "number_of_characters": 138205, + "min_text_length": 38, + "average_text_length": 137.65438247011951, + "max_text_length": 360, + "unique_texts": 198, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "dan_Latn": { + "num_samples": 1004, + "number_of_characters": 133329, + "min_text_length": 32, + "average_text_length": 132.79780876494024, + "max_text_length": 369, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "deu_Latn": { + "num_samples": 1004, + "number_of_characters": 151478, + "min_text_length": 42, + "average_text_length": 150.87450199203187, + "max_text_length": 408, + "unique_texts": 230, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "dik_Latn": { + "num_samples": 1004, + "number_of_characters": 112414, + "min_text_length": 33, + "average_text_length": 111.96613545816733, + "max_text_length": 585, + "unique_texts": 190, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "dyu_Latn": { + "num_samples": 1004, + "number_of_characters": 132169, + "min_text_length": 32, + "average_text_length": 131.64243027888446, + "max_text_length": 342, + "unique_texts": 207, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "dzo_Tibt": { + "num_samples": 1004, + "number_of_characters": 162452, + "min_text_length": 55, + "average_text_length": 161.80478087649402, + "max_text_length": 435, + "unique_texts": 237, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ell_Grek": { + "num_samples": 1004, + "number_of_characters": 155580, + "min_text_length": 39, + "average_text_length": 154.9601593625498, + "max_text_length": 464, + "unique_texts": 233, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "eng_Latn": { + "num_samples": 1004, + "number_of_characters": 129477, + "min_text_length": 33, + "average_text_length": 128.96115537848607, + "max_text_length": 368, + "unique_texts": 192, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "epo_Latn": { + "num_samples": 1004, + "number_of_characters": 129867, + "min_text_length": 37, + "average_text_length": 129.3496015936255, + "max_text_length": 353, + "unique_texts": 198, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "est_Latn": { + "num_samples": 1004, + "number_of_characters": 127786, + "min_text_length": 36, + "average_text_length": 127.27689243027888, + "max_text_length": 356, + "unique_texts": 200, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "eus_Latn": { + "num_samples": 1004, + "number_of_characters": 138876, + "min_text_length": 39, + "average_text_length": 138.32270916334662, + "max_text_length": 421, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ewe_Latn": { + "num_samples": 1004, + "number_of_characters": 127425, + "min_text_length": 29, + "average_text_length": 126.91733067729083, + "max_text_length": 351, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fao_Latn": { + "num_samples": 1004, + "number_of_characters": 132637, + "min_text_length": 37, + "average_text_length": 132.1085657370518, + "max_text_length": 361, + "unique_texts": 203, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fij_Latn": { + "num_samples": 1004, + "number_of_characters": 153469, + "min_text_length": 39, + "average_text_length": 152.85756972111554, + "max_text_length": 522, + "unique_texts": 236, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fin_Latn": { + "num_samples": 1004, + "number_of_characters": 138792, + "min_text_length": 39, + "average_text_length": 138.23904382470118, + "max_text_length": 378, + "unique_texts": 213, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fon_Latn": { + "num_samples": 1004, + "number_of_characters": 134742, + "min_text_length": 38, + "average_text_length": 134.20517928286853, + "max_text_length": 481, + "unique_texts": 219, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fra_Latn": { + "num_samples": 1004, + "number_of_characters": 155197, + "min_text_length": 38, + "average_text_length": 154.57868525896416, + "max_text_length": 415, + "unique_texts": 230, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fur_Latn": { + "num_samples": 1004, + "number_of_characters": 142588, + "min_text_length": 40, + "average_text_length": 142.0199203187251, + "max_text_length": 399, + "unique_texts": 209, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "fuv_Latn": { + "num_samples": 1004, + "number_of_characters": 121764, + "min_text_length": 27, + "average_text_length": 121.2788844621514, + "max_text_length": 308, + "unique_texts": 189, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "gaz_Latn": { + "num_samples": 1004, + "number_of_characters": 154104, + "min_text_length": 42, + "average_text_length": 153.49003984063745, + "max_text_length": 456, + "unique_texts": 224, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "gla_Latn": { + "num_samples": 1004, + "number_of_characters": 161602, + "min_text_length": 34, + "average_text_length": 160.9581673306773, + "max_text_length": 478, + "unique_texts": 233, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "gle_Latn": { + "num_samples": 1004, + "number_of_characters": 150251, + "min_text_length": 33, + "average_text_length": 149.652390438247, + "max_text_length": 435, + "unique_texts": 219, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "glg_Latn": { + "num_samples": 1004, + "number_of_characters": 143707, + "min_text_length": 30, + "average_text_length": 143.13446215139442, + "max_text_length": 368, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "grn_Latn": { + "num_samples": 1004, + "number_of_characters": 131086, + "min_text_length": 41, + "average_text_length": 130.56374501992033, + "max_text_length": 331, + "unique_texts": 203, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "guj_Gujr": { + "num_samples": 1004, + "number_of_characters": 124197, + "min_text_length": 35, + "average_text_length": 123.70219123505976, + "max_text_length": 321, + "unique_texts": 192, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hat_Latn": { + "num_samples": 1004, + "number_of_characters": 119784, + "min_text_length": 31, + "average_text_length": 119.30677290836654, + "max_text_length": 337, + "unique_texts": 180, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hau_Latn": { + "num_samples": 1004, + "number_of_characters": 139489, + "min_text_length": 39, + "average_text_length": 138.93326693227093, + "max_text_length": 372, + "unique_texts": 212, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "heb_Hebr": { + "num_samples": 1004, + "number_of_characters": 101508, + "min_text_length": 20, + "average_text_length": 101.10358565737052, + "max_text_length": 281, + "unique_texts": 165, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hin_Deva": { + "num_samples": 1004, + "number_of_characters": 129747, + "min_text_length": 38, + "average_text_length": 129.2300796812749, + "max_text_length": 381, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hne_Deva": { + "num_samples": 1004, + "number_of_characters": 125269, + "min_text_length": 28, + "average_text_length": 124.7699203187251, + "max_text_length": 326, + "unique_texts": 195, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hrv_Latn": { + "num_samples": 1004, + "number_of_characters": 128627, + "min_text_length": 31, + "average_text_length": 128.11454183266932, + "max_text_length": 346, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hun_Latn": { + "num_samples": 1004, + "number_of_characters": 136920, + "min_text_length": 42, + "average_text_length": 136.37450199203187, + "max_text_length": 387, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "hye_Armn": { + "num_samples": 1004, + "number_of_characters": 145651, + "min_text_length": 29, + "average_text_length": 145.0707171314741, + "max_text_length": 386, + "unique_texts": 231, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ibo_Latn": { + "num_samples": 1004, + "number_of_characters": 132658, + "min_text_length": 29, + "average_text_length": 132.12948207171314, + "max_text_length": 356, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ilo_Latn": { + "num_samples": 1004, + "number_of_characters": 158334, + "min_text_length": 44, + "average_text_length": 157.70318725099602, + "max_text_length": 432, + "unique_texts": 223, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ind_Latn": { + "num_samples": 1004, + "number_of_characters": 140851, + "min_text_length": 41, + "average_text_length": 140.2898406374502, + "max_text_length": 353, + "unique_texts": 211, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "isl_Latn": { + "num_samples": 1004, + "number_of_characters": 128628, + "min_text_length": 30, + "average_text_length": 128.11553784860558, + "max_text_length": 359, + "unique_texts": 196, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ita_Latn": { + "num_samples": 1004, + "number_of_characters": 153850, + "min_text_length": 39, + "average_text_length": 153.23705179282868, + "max_text_length": 398, + "unique_texts": 225, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "jav_Latn": { + "num_samples": 1004, + "number_of_characters": 135094, + "min_text_length": 40, + "average_text_length": 134.5557768924303, + "max_text_length": 358, + "unique_texts": 201, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "jpn_Jpan": { + "num_samples": 1004, + "number_of_characters": 56933, + "min_text_length": 18, + "average_text_length": 56.70617529880478, + "max_text_length": 139, + "unique_texts": 103, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kab_Latn": { + "num_samples": 1004, + "number_of_characters": 129233, + "min_text_length": 31, + "average_text_length": 128.71812749003985, + "max_text_length": 358, + "unique_texts": 194, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kac_Latn": { + "num_samples": 1004, + "number_of_characters": 166799, + "min_text_length": 56, + "average_text_length": 166.13446215139442, + "max_text_length": 513, + "unique_texts": 251, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kam_Latn": { + "num_samples": 1004, + "number_of_characters": 127324, + "min_text_length": 15, + "average_text_length": 126.81673306772909, + "max_text_length": 398, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kan_Knda": { + "num_samples": 1004, + "number_of_characters": 136498, + "min_text_length": 39, + "average_text_length": 135.95418326693226, + "max_text_length": 380, + "unique_texts": 205, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kas_Deva": { + "num_samples": 1004, + "number_of_characters": 125084, + "min_text_length": 32, + "average_text_length": 124.58565737051792, + "max_text_length": 452, + "unique_texts": 195, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kat_Geor": { + "num_samples": 1004, + "number_of_characters": 143701, + "min_text_length": 44, + "average_text_length": 143.1284860557769, + "max_text_length": 369, + "unique_texts": 223, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kaz_Cyrl": { + "num_samples": 1004, + "number_of_characters": 133882, + "min_text_length": 27, + "average_text_length": 133.34860557768926, + "max_text_length": 388, + "unique_texts": 205, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kbp_Latn": { + "num_samples": 1004, + "number_of_characters": 142001, + "min_text_length": 33, + "average_text_length": 141.43525896414343, + "max_text_length": 377, + "unique_texts": 225, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kea_Latn": { + "num_samples": 1004, + "number_of_characters": 129189, + "min_text_length": 32, + "average_text_length": 128.6743027888446, + "max_text_length": 360, + "unique_texts": 191, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "khk_Cyrl": { + "num_samples": 1004, + "number_of_characters": 136642, + "min_text_length": 33, + "average_text_length": 136.097609561753, + "max_text_length": 355, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "khm_Khmr": { + "num_samples": 1004, + "number_of_characters": 154565, + "min_text_length": 49, + "average_text_length": 153.949203187251, + "max_text_length": 507, + "unique_texts": 234, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kik_Latn": { + "num_samples": 1004, + "number_of_characters": 152171, + "min_text_length": 34, + "average_text_length": 151.56474103585657, + "max_text_length": 515, + "unique_texts": 233, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kin_Latn": { + "num_samples": 1004, + "number_of_characters": 145536, + "min_text_length": 42, + "average_text_length": 144.9561752988048, + "max_text_length": 440, + "unique_texts": 219, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kir_Cyrl": { + "num_samples": 1004, + "number_of_characters": 133860, + "min_text_length": 33, + "average_text_length": 133.32669322709162, + "max_text_length": 401, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kmb_Latn": { + "num_samples": 1004, + "number_of_characters": 144640, + "min_text_length": 27, + "average_text_length": 144.06374501992033, + "max_text_length": 426, + "unique_texts": 222, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kmr_Latn": { + "num_samples": 1004, + "number_of_characters": 130295, + "min_text_length": 31, + "average_text_length": 129.77589641434264, + "max_text_length": 359, + "unique_texts": 193, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "knc_Latn": { + "num_samples": 1004, + "number_of_characters": 137979, + "min_text_length": 38, + "average_text_length": 137.4292828685259, + "max_text_length": 427, + "unique_texts": 217, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kon_Latn": { + "num_samples": 1004, + "number_of_characters": 147392, + "min_text_length": 27, + "average_text_length": 146.80478087649402, + "max_text_length": 410, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "kor_Hang": { + "num_samples": 1004, + "number_of_characters": 65215, + "min_text_length": 22, + "average_text_length": 64.95517928286853, + "max_text_length": 177, + "unique_texts": 112, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lao_Laoo": { + "num_samples": 1004, + "number_of_characters": 128902, + "min_text_length": 42, + "average_text_length": 128.38844621513945, + "max_text_length": 322, + "unique_texts": 201, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lij_Latn": { + "num_samples": 1004, + "number_of_characters": 143428, + "min_text_length": 45, + "average_text_length": 142.85657370517927, + "max_text_length": 378, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lim_Latn": { + "num_samples": 1004, + "number_of_characters": 134352, + "min_text_length": 37, + "average_text_length": 133.81673306772907, + "max_text_length": 416, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lin_Latn": { + "num_samples": 1004, + "number_of_characters": 140282, + "min_text_length": 32, + "average_text_length": 139.72310756972112, + "max_text_length": 379, + "unique_texts": 208, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lit_Latn": { + "num_samples": 1004, + "number_of_characters": 130796, + "min_text_length": 38, + "average_text_length": 130.27490039840637, + "max_text_length": 357, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lmo_Latn": { + "num_samples": 1004, + "number_of_characters": 139859, + "min_text_length": 45, + "average_text_length": 139.30179282868525, + "max_text_length": 379, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ltg_Latn": { + "num_samples": 1004, + "number_of_characters": 128683, + "min_text_length": 25, + "average_text_length": 128.1703187250996, + "max_text_length": 348, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ltz_Latn": { + "num_samples": 1004, + "number_of_characters": 145216, + "min_text_length": 42, + "average_text_length": 144.6374501992032, + "max_text_length": 404, + "unique_texts": 211, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lua_Latn": { + "num_samples": 1004, + "number_of_characters": 139443, + "min_text_length": 30, + "average_text_length": 138.8874501992032, + "max_text_length": 407, + "unique_texts": 218, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lug_Latn": { + "num_samples": 1004, + "number_of_characters": 133246, + "min_text_length": 31, + "average_text_length": 132.71513944223108, + "max_text_length": 396, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "luo_Latn": { + "num_samples": 1004, + "number_of_characters": 135173, + "min_text_length": 39, + "average_text_length": 134.63446215139442, + "max_text_length": 372, + "unique_texts": 200, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lus_Latn": { + "num_samples": 1004, + "number_of_characters": 141519, + "min_text_length": 35, + "average_text_length": 140.95517928286853, + "max_text_length": 418, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "lvs_Latn": { + "num_samples": 1004, + "number_of_characters": 132942, + "min_text_length": 26, + "average_text_length": 132.41235059760956, + "max_text_length": 380, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mag_Deva": { + "num_samples": 1004, + "number_of_characters": 124854, + "min_text_length": 36, + "average_text_length": 124.35657370517929, + "max_text_length": 343, + "unique_texts": 191, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mai_Deva": { + "num_samples": 1004, + "number_of_characters": 127369, + "min_text_length": 36, + "average_text_length": 126.86155378486056, + "max_text_length": 350, + "unique_texts": 195, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mal_Mlym": { + "num_samples": 1004, + "number_of_characters": 147195, + "min_text_length": 31, + "average_text_length": 146.6085657370518, + "max_text_length": 367, + "unique_texts": 216, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mar_Deva": { + "num_samples": 1004, + "number_of_characters": 130718, + "min_text_length": 42, + "average_text_length": 130.19721115537848, + "max_text_length": 355, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "min_Latn": { + "num_samples": 1004, + "number_of_characters": 139020, + "min_text_length": 37, + "average_text_length": 138.46613545816734, + "max_text_length": 360, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mkd_Cyrl": { + "num_samples": 1004, + "number_of_characters": 135860, + "min_text_length": 37, + "average_text_length": 135.3187250996016, + "max_text_length": 364, + "unique_texts": 204, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mlt_Latn": { + "num_samples": 1004, + "number_of_characters": 144094, + "min_text_length": 28, + "average_text_length": 143.5199203187251, + "max_text_length": 400, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mni_Beng": { + "num_samples": 1004, + "number_of_characters": 134128, + "min_text_length": 38, + "average_text_length": 133.59362549800798, + "max_text_length": 354, + "unique_texts": 207, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mos_Latn": { + "num_samples": 1004, + "number_of_characters": 124149, + "min_text_length": 30, + "average_text_length": 123.65438247011951, + "max_text_length": 342, + "unique_texts": 194, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mri_Latn": { + "num_samples": 1004, + "number_of_characters": 144428, + "min_text_length": 33, + "average_text_length": 143.85258964143426, + "max_text_length": 392, + "unique_texts": 212, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "mya_Mymr": { + "num_samples": 1004, + "number_of_characters": 161054, + "min_text_length": 44, + "average_text_length": 160.41235059760956, + "max_text_length": 426, + "unique_texts": 237, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nld_Latn": { + "num_samples": 1004, + "number_of_characters": 144601, + "min_text_length": 36, + "average_text_length": 144.02490039840637, + "max_text_length": 384, + "unique_texts": 216, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nno_Latn": { + "num_samples": 1004, + "number_of_characters": 131068, + "min_text_length": 30, + "average_text_length": 130.54581673306774, + "max_text_length": 359, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nob_Latn": { + "num_samples": 1004, + "number_of_characters": 130573, + "min_text_length": 32, + "average_text_length": 130.05278884462152, + "max_text_length": 351, + "unique_texts": 203, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "npi_Deva": { + "num_samples": 1004, + "number_of_characters": 124856, + "min_text_length": 29, + "average_text_length": 124.35856573705179, + "max_text_length": 362, + "unique_texts": 187, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nqo_Nkoo": { + "num_samples": 1004, + "number_of_characters": 166369, + "min_text_length": 38, + "average_text_length": 165.7061752988048, + "max_text_length": 479, + "unique_texts": 243, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nso_Latn": { + "num_samples": 1004, + "number_of_characters": 149246, + "min_text_length": 38, + "average_text_length": 148.65139442231074, + "max_text_length": 423, + "unique_texts": 227, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nus_Latn": { + "num_samples": 1004, + "number_of_characters": 138908, + "min_text_length": 28, + "average_text_length": 138.35458167330677, + "max_text_length": 490, + "unique_texts": 215, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "nya_Latn": { + "num_samples": 1004, + "number_of_characters": 146065, + "min_text_length": 29, + "average_text_length": 145.48306772908367, + "max_text_length": 421, + "unique_texts": 213, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "oci_Latn": { + "num_samples": 1004, + "number_of_characters": 147980, + "min_text_length": 38, + "average_text_length": 147.39043824701196, + "max_text_length": 408, + "unique_texts": 215, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ory_Orya": { + "num_samples": 1004, + "number_of_characters": 132647, + "min_text_length": 38, + "average_text_length": 132.11852589641435, + "max_text_length": 354, + "unique_texts": 204, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "pag_Latn": { + "num_samples": 1004, + "number_of_characters": 130497, + "min_text_length": 33, + "average_text_length": 129.97709163346613, + "max_text_length": 347, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "pan_Guru": { + "num_samples": 1004, + "number_of_characters": 131016, + "min_text_length": 33, + "average_text_length": 130.49402390438246, + "max_text_length": 380, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "pap_Latn": { + "num_samples": 1004, + "number_of_characters": 136726, + "min_text_length": 39, + "average_text_length": 136.1812749003984, + "max_text_length": 388, + "unique_texts": 203, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "pbt_Arab": { + "num_samples": 1004, + "number_of_characters": 124529, + "min_text_length": 32, + "average_text_length": 124.03286852589642, + "max_text_length": 307, + "unique_texts": 191, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "pes_Arab": { + "num_samples": 1004, + "number_of_characters": 123528, + "min_text_length": 25, + "average_text_length": 123.03585657370517, + "max_text_length": 324, + "unique_texts": 189, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "plt_Latn": { + "num_samples": 1004, + "number_of_characters": 160300, + "min_text_length": 47, + "average_text_length": 159.6613545816733, + "max_text_length": 479, + "unique_texts": 234, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "pol_Latn": { + "num_samples": 1004, + "number_of_characters": 138511, + "min_text_length": 35, + "average_text_length": 137.95916334661354, + "max_text_length": 367, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "por_Latn": { + "num_samples": 1004, + "number_of_characters": 141156, + "min_text_length": 38, + "average_text_length": 140.59362549800798, + "max_text_length": 391, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "prs_Arab": { + "num_samples": 1004, + "number_of_characters": 119712, + "min_text_length": 30, + "average_text_length": 119.23505976095618, + "max_text_length": 297, + "unique_texts": 179, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "quy_Latn": { + "num_samples": 1004, + "number_of_characters": 139408, + "min_text_length": 35, + "average_text_length": 138.85258964143426, + "max_text_length": 434, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ron_Latn": { + "num_samples": 1004, + "number_of_characters": 146800, + "min_text_length": 34, + "average_text_length": 146.21513944223108, + "max_text_length": 397, + "unique_texts": 216, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "run_Latn": { + "num_samples": 1004, + "number_of_characters": 145909, + "min_text_length": 40, + "average_text_length": 145.3276892430279, + "max_text_length": 411, + "unique_texts": 219, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "rus_Cyrl": { + "num_samples": 1004, + "number_of_characters": 142650, + "min_text_length": 27, + "average_text_length": 142.0816733067729, + "max_text_length": 351, + "unique_texts": 217, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "sag_Latn": { + "num_samples": 1004, + "number_of_characters": 141259, + "min_text_length": 35, + "average_text_length": 140.69621513944224, + "max_text_length": 406, + "unique_texts": 226, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "san_Deva": { + "num_samples": 1004, + "number_of_characters": 128123, + "min_text_length": 33, + "average_text_length": 127.61254980079681, + "max_text_length": 323, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "sat_Olck": { + "num_samples": 1004, + "number_of_characters": 137599, + "min_text_length": 37, + "average_text_length": 137.050796812749, + "max_text_length": 366, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "scn_Latn": { + "num_samples": 1004, + "number_of_characters": 137238, + "min_text_length": 36, + "average_text_length": 136.69123505976097, + "max_text_length": 359, + "unique_texts": 208, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "shn_Mymr": { + "num_samples": 1004, + "number_of_characters": 187147, + "min_text_length": 48, + "average_text_length": 186.40139442231074, + "max_text_length": 597, + "unique_texts": 266, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "sin_Sinh": { + "num_samples": 1004, + "number_of_characters": 130708, + "min_text_length": 36, + "average_text_length": 130.18725099601593, + "max_text_length": 401, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "slk_Latn": { + "num_samples": 1004, + "number_of_characters": 130453, + "min_text_length": 29, + "average_text_length": 129.93326693227093, + "max_text_length": 370, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "slv_Latn": { + "num_samples": 1004, + "number_of_characters": 130290, + "min_text_length": 41, + "average_text_length": 129.77091633466136, + "max_text_length": 361, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "smo_Latn": { + "num_samples": 1004, + "number_of_characters": 151432, + "min_text_length": 37, + "average_text_length": 150.82868525896416, + "max_text_length": 412, + "unique_texts": 220, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "sna_Latn": { + "num_samples": 1004, + "number_of_characters": 146555, + "min_text_length": 34, + "average_text_length": 145.97111553784862, + "max_text_length": 424, + "unique_texts": 222, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "snd_Arab": { + "num_samples": 1004, + "number_of_characters": 117608, + "min_text_length": 30, + "average_text_length": 117.13944223107569, + "max_text_length": 306, + "unique_texts": 184, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "som_Latn": { + "num_samples": 1004, + "number_of_characters": 148542, + "min_text_length": 29, + "average_text_length": 147.95019920318725, + "max_text_length": 414, + "unique_texts": 215, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "sot_Latn": { + "num_samples": 1004, + "number_of_characters": 157328, + "min_text_length": 35, + "average_text_length": 156.70119521912352, + "max_text_length": 461, + "unique_texts": 228, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "spa_Latn": { + "num_samples": 1004, + "number_of_characters": 154310, + "min_text_length": 37, + "average_text_length": 153.69521912350598, + "max_text_length": 397, + "unique_texts": 224, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "srd_Latn": { + "num_samples": 1004, + "number_of_characters": 150655, + "min_text_length": 39, + "average_text_length": 150.05478087649402, + "max_text_length": 418, + "unique_texts": 221, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "srp_Cyrl": { + "num_samples": 1004, + "number_of_characters": 129190, + "min_text_length": 34, + "average_text_length": 128.67529880478088, + "max_text_length": 350, + "unique_texts": 203, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ssw_Latn": { + "num_samples": 1004, + "number_of_characters": 146802, + "min_text_length": 29, + "average_text_length": 146.21713147410358, + "max_text_length": 394, + "unique_texts": 221, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "sun_Latn": { + "num_samples": 1004, + "number_of_characters": 135024, + "min_text_length": 41, + "average_text_length": 134.48605577689244, + "max_text_length": 354, + "unique_texts": 207, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "swe_Latn": { + "num_samples": 1004, + "number_of_characters": 130449, + "min_text_length": 35, + "average_text_length": 129.9292828685259, + "max_text_length": 382, + "unique_texts": 201, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "swh_Latn": { + "num_samples": 1004, + "number_of_characters": 136808, + "min_text_length": 37, + "average_text_length": 136.26294820717132, + "max_text_length": 384, + "unique_texts": 212, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "szl_Latn": { + "num_samples": 1004, + "number_of_characters": 135277, + "min_text_length": 32, + "average_text_length": 134.73804780876495, + "max_text_length": 360, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tam_Taml": { + "num_samples": 1004, + "number_of_characters": 151701, + "min_text_length": 34, + "average_text_length": 151.09661354581672, + "max_text_length": 404, + "unique_texts": 220, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "taq_Tfng": { + "num_samples": 1004, + "number_of_characters": 121170, + "min_text_length": 21, + "average_text_length": 120.68725099601593, + "max_text_length": 294, + "unique_texts": 194, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tat_Cyrl": { + "num_samples": 1004, + "number_of_characters": 131964, + "min_text_length": 30, + "average_text_length": 131.4382470119522, + "max_text_length": 329, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tel_Telu": { + "num_samples": 1004, + "number_of_characters": 132024, + "min_text_length": 39, + "average_text_length": 131.4980079681275, + "max_text_length": 359, + "unique_texts": 197, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tgk_Cyrl": { + "num_samples": 1004, + "number_of_characters": 144833, + "min_text_length": 30, + "average_text_length": 144.25597609561754, + "max_text_length": 345, + "unique_texts": 210, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tgl_Latn": { + "num_samples": 1004, + "number_of_characters": 164402, + "min_text_length": 46, + "average_text_length": 163.74701195219123, + "max_text_length": 435, + "unique_texts": 231, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tha_Thai": { + "num_samples": 1004, + "number_of_characters": 124817, + "min_text_length": 39, + "average_text_length": 124.31972111553785, + "max_text_length": 333, + "unique_texts": 198, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tir_Ethi": { + "num_samples": 1004, + "number_of_characters": 90356, + "min_text_length": 29, + "average_text_length": 89.99601593625498, + "max_text_length": 246, + "unique_texts": 150, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tpi_Latn": { + "num_samples": 1004, + "number_of_characters": 165333, + "min_text_length": 50, + "average_text_length": 164.6743027888446, + "max_text_length": 500, + "unique_texts": 234, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tsn_Latn": { + "num_samples": 1004, + "number_of_characters": 162131, + "min_text_length": 34, + "average_text_length": 161.48505976095618, + "max_text_length": 440, + "unique_texts": 235, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tso_Latn": { + "num_samples": 1004, + "number_of_characters": 156553, + "min_text_length": 39, + "average_text_length": 155.9292828685259, + "max_text_length": 429, + "unique_texts": 227, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tuk_Latn": { + "num_samples": 1004, + "number_of_characters": 138667, + "min_text_length": 34, + "average_text_length": 138.11454183266932, + "max_text_length": 397, + "unique_texts": 218, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tum_Latn": { + "num_samples": 1004, + "number_of_characters": 171015, + "min_text_length": 38, + "average_text_length": 170.33366533864543, + "max_text_length": 542, + "unique_texts": 266, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tur_Latn": { + "num_samples": 1004, + "number_of_characters": 134149, + "min_text_length": 25, + "average_text_length": 133.61454183266932, + "max_text_length": 386, + "unique_texts": 203, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "twi_Latn": { + "num_samples": 1004, + "number_of_characters": 127091, + "min_text_length": 28, + "average_text_length": 126.58466135458167, + "max_text_length": 335, + "unique_texts": 195, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "tzm_Tfng": { + "num_samples": 1004, + "number_of_characters": 116756, + "min_text_length": 29, + "average_text_length": 116.29083665338645, + "max_text_length": 330, + "unique_texts": 182, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "uig_Arab": { + "num_samples": 1004, + "number_of_characters": 139449, + "min_text_length": 27, + "average_text_length": 138.89342629482073, + "max_text_length": 354, + "unique_texts": 214, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ukr_Cyrl": { + "num_samples": 1004, + "number_of_characters": 133397, + "min_text_length": 37, + "average_text_length": 132.86553784860558, + "max_text_length": 343, + "unique_texts": 204, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "umb_Latn": { + "num_samples": 1004, + "number_of_characters": 131770, + "min_text_length": 32, + "average_text_length": 131.24501992031873, + "max_text_length": 372, + "unique_texts": 206, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "urd_Arab": { + "num_samples": 1004, + "number_of_characters": 129038, + "min_text_length": 39, + "average_text_length": 128.5239043824701, + "max_text_length": 348, + "unique_texts": 198, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "uzn_Latn": { + "num_samples": 1004, + "number_of_characters": 147830, + "min_text_length": 38, + "average_text_length": 147.24103585657372, + "max_text_length": 381, + "unique_texts": 220, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "vec_Latn": { + "num_samples": 1004, + "number_of_characters": 130685, + "min_text_length": 34, + "average_text_length": 130.16434262948206, + "max_text_length": 374, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "vie_Latn": { + "num_samples": 1004, + "number_of_characters": 136980, + "min_text_length": 39, + "average_text_length": 136.43426294820716, + "max_text_length": 357, + "unique_texts": 200, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "war_Latn": { + "num_samples": 1004, + "number_of_characters": 161845, + "min_text_length": 38, + "average_text_length": 161.20019920318725, + "max_text_length": 434, + "unique_texts": 231, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "wol_Latn": { + "num_samples": 1004, + "number_of_characters": 124873, + "min_text_length": 30, + "average_text_length": 124.37549800796813, + "max_text_length": 323, + "unique_texts": 187, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "xho_Latn": { + "num_samples": 1004, + "number_of_characters": 138583, + "min_text_length": 34, + "average_text_length": 138.03087649402391, + "max_text_length": 395, + "unique_texts": 204, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "ydd_Hebr": { + "num_samples": 1004, + "number_of_characters": 138957, + "min_text_length": 39, + "average_text_length": 138.40338645418328, + "max_text_length": 385, + "unique_texts": 204, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "yor_Latn": { + "num_samples": 1004, + "number_of_characters": 126067, + "min_text_length": 27, + "average_text_length": 125.56474103585657, + "max_text_length": 406, + "unique_texts": 202, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "yue_Hant": { + "num_samples": 1004, + "number_of_characters": 39659, + "min_text_length": 10, + "average_text_length": 39.50099601593625, + "max_text_length": 118, + "unique_texts": 84, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "zho_Hant": { + "num_samples": 1004, + "number_of_characters": 40610, + "min_text_length": 11, + "average_text_length": 40.44820717131474, + "max_text_length": 152, + "unique_texts": 86, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "zsm_Latn": { + "num_samples": 1004, + "number_of_characters": 145060, + "min_text_length": 43, + "average_text_length": 144.4820717131474, + "max_text_length": 362, + "unique_texts": 207, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + }, + "zul_Latn": { + "num_samples": 1004, + "number_of_characters": 146106, + "min_text_length": 34, + "average_text_length": 145.5239043824701, + "max_text_length": 425, + "unique_texts": 218, + "min_labels_per_text": 83, + "average_labels_per_text": 1.0, + "max_labels_per_text": 252, + "unique_labels": 7, + "labels": { + "1": { + "count": 83 + }, + "4": { + "count": 252 + }, + "0": { + "count": 93 + }, + "3": { + "count": 146 + }, + "2": { + "count": 110 + }, + "6": { + "count": 198 + }, + "5": { + "count": 122 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringP2P.json b/mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringP2P.json new file mode 100644 index 0000000000..57df7108a0 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringP2P.json @@ -0,0 +1,3754 @@ +{ + "test": { + "num_samples": 1300, + "number_of_characters": 2583029, + "min_text_length": 400, + "average_text_length": 1986.9453846153847, + "max_text_length": 68710, + "unique_texts": 968, + "min_labels_per_text": 1, + "average_labels_per_text": 4.136153846153846, + "max_labels_per_text": 201, + "unique_labels": 1246, + "labels": { + "Realfag": { + "count": 89 + }, + "Fysikk": { + "count": 32 + }, + "Varme- og stoffl\u00e6re": { + "count": 3 + }, + "Spr\u00e5k og litteratur": { + "count": 139 + }, + "Spr\u00e5kvitenskapens fagdisipliner": { + "count": 20 + }, + "Pragmatikk": { + "count": 1 + }, + "Onomastikk": { + "count": 14 + }, + "Personnavn": { + "count": 14 + }, + "Fornavn": { + "count": 14 + }, + "Guttenavn": { + "count": 9 + }, + "Kunst og estetikk": { + "count": 201 + }, + "Metall- og smykkekunst": { + "count": 1 + }, + "Biologi": { + "count": 92 + }, + "Vitenskapsgrener i biologien": { + "count": 24 + }, + "Botanikk": { + "count": 8 + }, + "Planteanatomi": { + "count": 5 + }, + "Plantenes vevsystemer": { + "count": 1 + }, + "Andre vev hos planter": { + "count": 1 + }, + "Teknologi og industri": { + "count": 97 + }, + "Transport": { + "count": 35 + }, + "Navigasjon": { + "count": 2 + }, + "Kjemi": { + "count": 28 + }, + "Polymerkjemi": { + "count": 3 + }, + "Sport og spill": { + "count": 28 + }, + "Ballsport": { + "count": 15 + }, + "Biljard": { + "count": 2 + }, + "S\u00f8m": { + "count": 8 + }, + " kl\u00e6r og mote": { + "count": 8 + }, + "Sko": { + "count": 1 + }, + "Veterin\u00e6rmedisin": { + "count": 2 + }, + "Fiskehelse": { + "count": 2 + }, + "Fotball": { + "count": 9 + }, + "Norsk fotball": { + "count": 4 + }, + "Musikk": { + "count": 69 + }, + "Klassisk musikk": { + "count": 25 + }, + "Den klassiske musikkens historie": { + "count": 12 + }, + "Klassisk musikk f\u00f8r 1600": { + "count": 1 + }, + "M\u00e5l og vekt": { + "count": 7 + }, + "Eldre m\u00e5leenheter": { + "count": 1 + }, + "Historie": { + "count": 192 + }, + "Kulturhistorie": { + "count": 6 + }, + "Folkelige skikker og tradisjoner": { + "count": 2 + }, + "Spill": { + "count": 3 + }, + "Brettspill": { + "count": 2 + }, + "Biokjemi": { + "count": 1 + }, + "Aminosyrer": { + "count": 1 + }, + "Geografi": { + "count": 178 + }, + "Norges geografi": { + "count": 45 + }, + "Agder": { + "count": 4 + }, + "Lindesnes": { + "count": 1 + }, + "Verdens geografi": { + "count": 128 + }, + "Afrika": { + "count": 26 + }, + "Kapp Verdes geografi": { + "count": 1 + }, + "Viken": { + "count": 9 + }, + "Halden": { + "count": 1 + }, + "Samfunn": { + "count": 97 + }, + "Samfunnsfag": { + "count": 26 + }, + "Statsvitenskap": { + "count": 7 + }, + "Valgforskning og sammenliknende politikk": { + "count": 1 + }, + "Politiske begreper": { + "count": 1 + }, + "Planteriket": { + "count": 24 + }, + "Dekkfr\u00f8ete planter": { + "count": 24 + }, + "Enfr\u00f8bladete planter": { + "count": 2 + }, + "Aspargesordenen": { + "count": 1 + }, + "Orkid\u00e9familien": { + "count": 1 + }, + "Marihandslekta": { + "count": 1 + }, + "Jus": { + "count": 34 + }, + "Formuerett": { + "count": 8 + }, + "Selskapsrett": { + "count": 5 + }, + "Telekommunikasjon og kringkasting": { + "count": 2 + }, + "Kommunikasjonsteknologi": { + "count": 2 + }, + "Oseania": { + "count": 6 + }, + "Australias geografi": { + "count": 2 + }, + "Jernbane og sporvei": { + "count": 1 + }, + "Jernbanebiografier": { + "count": 1 + }, + "Verdens historie og samtidshistorie": { + "count": 126 + }, + "Europa": { + "count": 192 + }, + "Den greske antikken": { + "count": 6 + }, + "Sj\u00f8fart og skipsindustri": { + "count": 16 + }, + "Fyrvesen og losvesen": { + "count": 5 + }, + "Fyrvesen": { + "count": 4 + }, + "Norske fyr": { + "count": 4 + }, + "Norsk og nordisk historie": { + "count": 26 + }, + "Norges historie": { + "count": 20 + }, + "Norges historie fra 1945 til 1970": { + "count": 1 + }, + "Nord- og Mellom-Amerika": { + "count": 24 + }, + "Honduras\u2019 geografi": { + "count": 1 + }, + "Historiske hjelpevitenskaper": { + "count": 18 + }, + "Slektsforskning og genealogi": { + "count": 17 + }, + "Utenlandske slekter": { + "count": 7 + }, + "Folkegrupper i Nord- og Mellom-Amerika": { + "count": 3 + }, + "Folkegrupper i Nord-Amerika": { + "count": 3 + }, + "Litteratur i verden": { + "count": 88 + }, + "Nord- og Mellom-Amerikas litteratur": { + "count": 11 + }, + "USAs litteratur": { + "count": 7 + }, + "USAs litteratur p\u00e5 1600 og 1700-tallet": { + "count": 1 + }, + "Religion og filosofi": { + "count": 95 + }, + "Religioner og livssyn": { + "count": 61 + }, + "Kristendom": { + "count": 48 + }, + "Det gamle testamentet": { + "count": 2 + }, + "Personer i Det gamle testamentet": { + "count": 2 + }, + "Tofr\u00f8bladete planter": { + "count": 21 + }, + "Vierordenen": { + "count": 3 + }, + "Vierfamilien": { + "count": 1 + }, + "Vierslekta": { + "count": 1 + }, + "Afrikas litteratur": { + "count": 2 + }, + "Elfenbenskystens litteratur": { + "count": 1 + }, + "N\u00e6ringsmidler og husholdning": { + "count": 13 + }, + "Mat": { + "count": 7 + }, + "Ulike lands kj\u00f8kken": { + "count": 2 + }, + "S\u00f8r-Europas kj\u00f8kken": { + "count": 1 + }, + "Italiensk mat": { + "count": 1 + }, + "Klassisk musikk 1900\u20131950": { + "count": 3 + }, + "Norske klassiske komponister og verker 1900\u20131950": { + "count": 1 + }, + "Atferdsbiologi": { + "count": 1 + }, + "Skip og skipstyper": { + "count": 5 + }, + "Seilskip": { + "count": 2 + }, + "Demografi": { + "count": 2 + }, + "Filosofiens fagdisipliner": { + "count": 12 + }, + "Filosofisk logikk": { + "count": 1 + }, + "Historiske b\u00e5ttyper": { + "count": 2 + }, + "Vikingskip": { + "count": 1 + }, + "Bildende kunst": { + "count": 49 + }, + "Kunstvitenskap": { + "count": 5 + }, + "Termer og begreper i bildende kunst": { + "count": 4 + }, + "Rubladordenen": { + "count": 3 + }, + "Rubladfamilien": { + "count": 3 + }, + "\u00c5kersteinfr\u00f8slekta": { + "count": 1 + }, + "Familierett": { + "count": 3 + }, + "Oslo": { + "count": 1 + }, + "Atom- og kjernefysikk": { + "count": 3 + }, + "Teknologi": { + "count": 2 + }, + "Sjakk": { + "count": 1 + }, + "Frankrikes historie": { + "count": 11 + }, + "Frankrikes samtidshistorie": { + "count": 3 + }, + "Italias geografi": { + "count": 4 + }, + "Storbritannia og Nord-Irlands geografi": { + "count": 13 + }, + "Katolisisme": { + "count": 8 + }, + "Klostervesenet": { + "count": 4 + }, + "Verdens historie": { + "count": 5 + }, + "Den kalde krigen": { + "count": 2 + }, + "Cellens organeller og mikrostruktur": { + "count": 1 + }, + "Vekst og vekstanlegg": { + "count": 1 + }, + "Analytisk kjemi": { + "count": 2 + }, + "Spektroskopi": { + "count": 2 + }, + "Grammatikk": { + "count": 2 + }, + "Morfologi": { + "count": 2 + }, + "Finlands litteratur": { + "count": 4 + }, + "Utdanning og forskning": { + "count": 12 + }, + "Pedagogikk": { + "count": 3 + }, + "Biografier innen pedagogisk teori": { + "count": 2 + }, + "Frankrikes litteratur": { + "count": 14 + }, + "Frankrikes litteratur p\u00e5 1700-tallet": { + "count": 2 + }, + "Psykologi": { + "count": 4 + }, + "Personlighetspsykologi": { + "count": 3 + }, + "Musikkbransje": { + "count": 1 + }, + "Plateselskaper": { + "count": 1 + }, + "Romanias historie": { + "count": 3 + }, + "IT": { + "count": 4 + }, + "Internett": { + "count": 2 + }, + "Antigua og Barbudas geografi": { + "count": 1 + }, + "Norges historie fra 1660 til 1814": { + "count": 3 + }, + "Nordland": { + "count": 6 + }, + "Bindal": { + "count": 1 + }, + "Storbritannia og Nord-Irlands historie": { + "count": 7 + }, + "Italias litteratur": { + "count": 4 + }, + "Italias litteratur p\u00e5 1500-tallet": { + "count": 2 + }, + "Arbeid og velferd": { + "count": 4 + }, + "Arbeidsliv": { + "count": 2 + }, + "Arbeidslivsorganisasjoner": { + "count": 1 + }, + "Europas geografi generelt": { + "count": 2 + }, + "Internasjonale dirigenter": { + "count": 3 + }, + "Popul\u00e6rmusikk": { + "count": 20 + }, + "Pop og rock": { + "count": 11 + }, + "Internasjonal pop og rock": { + "count": 10 + }, + "Afrikas historie": { + "count": 2 + }, + "USAs historie": { + "count": 5 + }, + "USAs historie fra 1960": { + "count": 2 + }, + "Skipstyper": { + "count": 1 + }, + "De ortodokse kirker": { + "count": 2 + }, + "Sveriges litteratur": { + "count": 5 + }, + "Sveriges litteratur 1900-1960": { + "count": 2 + }, + "Avtalerett": { + "count": 2 + }, + "Jentenavn": { + "count": 5 + }, + "Energi": { + "count": 6 + }, + "Belysning": { + "count": 2 + }, + "Asia": { + "count": 44 + }, + "Indias geografi": { + "count": 3 + }, + "Br\u00f8nn\u00f8y": { + "count": 1 + }, + "Anatomi": { + "count": 3 + }, + "Insektenes anatomi og fysiologi": { + "count": 1 + }, + "Pakistans geografi": { + "count": 2 + }, + "Historiske religioner": { + "count": 8 + }, + "Gresk religion": { + "count": 2 + }, + "\u00d8konomi og n\u00e6ringsliv": { + "count": 25 + }, + "N\u00e6ringsliv": { + "count": 10 + }, + "Bedrifter og personer": { + "count": 9 + }, + "Bedrifter i internasjonal n\u00e6ringslivshistorie": { + "count": 2 + }, + "Politikk og offentlig forvaltning": { + "count": 12 + }, + "Norsk politikk og offentlig forvaltning": { + "count": 9 + }, + "Partier og politikere": { + "count": 7 + }, + "Norske politikere": { + "count": 7 + }, + "Tidligere politikere": { + "count": 5 + }, + "Belarus\u2019 geografi": { + "count": 1 + }, + "Matematikk": { + "count": 6 + }, + "Sannsynlighet og statistikk": { + "count": 2 + }, + "Statistikk": { + "count": 2 + }, + "Film": { + "count": 23 + }, + " TV og teater": { + "count": 23 + }, + "Film og TV": { + "count": 18 + }, + "Norske filmer og TV-serier": { + "count": 1 + }, + "Arkitektur og landskap": { + "count": 29 + }, + "Arkitekturhistorie": { + "count": 9 + }, + "Islamsk arkitekturhistorie": { + "count": 2 + }, + "Organisk kjemi": { + "count": 7 + }, + "Organiske forbindelser": { + "count": 7 + }, + "Historievitenskap": { + "count": 8 + }, + "Historikere": { + "count": 4 + }, + "Storbritannia og Nord-Irlands litteratur": { + "count": 8 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1900-tallet": { + "count": 3 + }, + "Folkegrupper i Afrika": { + "count": 3 + }, + "Spr\u00e5kvitenskapens grunnbegreper": { + "count": 1 + }, + "Luftfart": { + "count": 6 + }, + "Luftfart\u00f8y": { + "count": 1 + }, + "Sivilflytyper": { + "count": 1 + }, + "Passasjerfly": { + "count": 1 + }, + "Bygg og anlegg": { + "count": 6 + }, + "Betong": { + "count": 2 + }, + " sement og m\u00f8rtel": { + "count": 2 + }, + "S\u00f8r-Amerikas geografi": { + "count": 9 + }, + "Argentinas geografi": { + "count": 2 + }, + "Bosnia-Hercegovinas geografi": { + "count": 2 + }, + "Folkemusikk": { + "count": 1 + }, + "Kristendommens historie": { + "count": 6 + }, + "Tidlig kristendom": { + "count": 4 + }, + "Den norske kirke": { + "count": 5 + }, + "Buddhisme": { + "count": 2 + }, + "Nyreligi\u00f8sitet": { + "count": 2 + }, + "\u00d8stlig innflytelse": { + "count": 1 + }, + "Medier": { + "count": 17 + }, + "Forlagsdrift": { + "count": 1 + }, + "Amt og len i Norge": { + "count": 1 + }, + "Oster": { + "count": 3 + }, + "Tekstiler": { + "count": 4 + }, + "Flyteknikk": { + "count": 1 + }, + "Flygeteori": { + "count": 1 + }, + "Kirker": { + "count": 7 + }, + " slott og g\u00e5rder": { + "count": 7 + }, + "Slott og palasser": { + "count": 3 + }, + "Klassisk musikk i barokken": { + "count": 4 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1700-tallet": { + "count": 3 + }, + "Drikke": { + "count": 3 + }, + "Vin": { + "count": 2 + }, + "Vinterminologi": { + "count": 1 + }, + "Jazz": { + "count": 17 + }, + "Nord-Amerikanske jazzut\u00f8vere og -ensembler": { + "count": 10 + }, + " 1920 \u2013 1960": { + "count": 2 + }, + "Danmarks geografi": { + "count": 5 + }, + "Filippinenes geografi": { + "count": 1 + }, + "Sj\u00f8merker": { + "count": 1 + }, + "Elektrokjemi og metallurgi": { + "count": 3 + }, + "Legeringer": { + "count": 1 + }, + "St\u00e5l og andre jernlegeringer": { + "count": 1 + }, + "Penger": { + "count": 4 + }, + "Numismatikk": { + "count": 2 + }, + "Taiwans geografi": { + "count": 1 + }, + "Burkina Fasos geografi": { + "count": 3 + }, + "Blues": { + "count": 2 + }, + "Internasjonale bluesmusikere": { + "count": 2 + }, + "Journalistikk": { + "count": 2 + }, + "Journalistiske begreper": { + "count": 2 + }, + "Middelalderens arkitekturhistorie": { + "count": 4 + }, + "Frankrikes geografi": { + "count": 8 + }, + "Jazzens historie": { + "count": 1 + }, + "Tysk-romerske rike": { + "count": 1 + }, + "Biografier i Det tysk-romerske riket": { + "count": 1 + }, + "Geometri og topologi": { + "count": 3 + }, + "Algebraisk geometri": { + "count": 3 + }, + "USAs geografi": { + "count": 7 + }, + "Delstater og omr\u00e5der i USA": { + "count": 1 + }, + "Meteorologi": { + "count": 2 + }, + "Klima": { + "count": 1 + }, + "Klima i Norge": { + "count": 1 + }, + "Norske tradisjonsb\u00e5ter": { + "count": 1 + }, + "Nordnorske tradisjonsb\u00e5ter": { + "count": 1 + }, + "Politi og kriminalitet": { + "count": 4 + }, + "Politivesen": { + "count": 2 + }, + "Politifaglige begreper": { + "count": 2 + }, + "Kulturinstitusjoner": { + "count": 6 + }, + "Museum": { + "count": 3 + }, + "Filosofiens historie": { + "count": 9 + }, + "Middelalderens og renessansens filosofi": { + "count": 4 + }, + "Forsvar og sikkerhet": { + "count": 18 + }, + "Forsvar i verden": { + "count": 3 + }, + "Bokkunst og skrift": { + "count": 2 + }, + "Skriftdesign og typografi": { + "count": 1 + }, + "Typografer og skriftdesignere": { + "count": 1 + }, + "Frankrikes nyere litteratur": { + "count": 3 + }, + "Romerrikets litteratur": { + "count": 2 + }, + "Verk i Romerrikets litteratur": { + "count": 1 + }, + "Dyreriket": { + "count": 30 + }, + "Ryggstrengdyr": { + "count": 23 + }, + "Virveldyr": { + "count": 26 + }, + "Fugler": { + "count": 8 + }, + "Dykkere": { + "count": 1 + }, + "Monarki": { + "count": 1 + }, + "Maskiner": { + "count": 10 + }, + "Maskinelementer": { + "count": 1 + }, + "Jus generelt": { + "count": 2 + }, + "Petroleum": { + "count": 6 + }, + "Petroleumsutvinning": { + "count": 4 + }, + "Boreteknologi": { + "count": 1 + }, + "Landbruk": { + "count": 8 + }, + "Husdyr": { + "count": 7 + }, + "Hund": { + "count": 5 + }, + "Hunderaser": { + "count": 5 + }, + "Offentlig politikk og administrasjon": { + "count": 3 + }, + "Politisk og \u00f8konomisk filosofi": { + "count": 5 + }, + "Internasjonal tekstilindustri": { + "count": 1 + }, + "Spanias geografi": { + "count": 6 + }, + "Multippelenheter i SI-systemet": { + "count": 1 + }, + " 1980 til i dag": { + "count": 7 + }, + "Salmesang": { + "count": 2 + }, + "Milit\u00e6re biografier": { + "count": 4 + }, + "Norske milit\u00e6re biografier": { + "count": 2 + }, + "Kristne trossamfunn": { + "count": 9 + }, + "Spr\u00e5kforskere": { + "count": 2 + }, + "Norske spr\u00e5kforskere og spr\u00e5kfolk": { + "count": 1 + }, + "Romerrikets historie": { + "count": 10 + }, + "Filmteknikk": { + "count": 2 + }, + "Europeisk film og TV": { + "count": 6 + }, + "Europeiske filmer og tv-serier": { + "count": 1 + }, + "Bil": { + "count": 8 + }, + "Bilfag": { + "count": 4 + }, + "Bilens sikkerhetssystemer": { + "count": 2 + }, + "Bygningstyper": { + "count": 2 + }, + "Norges historie fra 1814 til 1884": { + "count": 2 + }, + "Europeiske jazzut\u00f8vere og -ensembler": { + "count": 2 + }, + " 1920 \u2013 1980": { + "count": 4 + }, + "S\u00f8r-Amerika": { + "count": 7 + }, + "Bolivias historie": { + "count": 2 + }, + "Internasjonalt milit\u00e6rt samarbeid": { + "count": 2 + }, + "Internasjonale milit\u00e6re organisasjoner": { + "count": 2 + }, + "NATO": { + "count": 2 + }, + "Kristen teologi og trosl\u00e6re": { + "count": 2 + }, + "Leddyr": { + "count": 4 + }, + "Insekter": { + "count": 4 + }, + "Tovinger": { + "count": 1 + }, + "Gallmygg": { + "count": 1 + }, + "Dasineura": { + "count": 1 + }, + "Mauritius\u2019 historie": { + "count": 1 + }, + "Mauritius\u2019 samtidshistorie": { + "count": 1 + }, + "Historiske riker i Sentral-Afrika": { + "count": 1 + }, + "Politikkomr\u00e5der": { + "count": 2 + }, + "Milj\u00f8vern": { + "count": 1 + }, + "Milj\u00f8bevegelsen": { + "count": 1 + }, + "Programmering": { + "count": 1 + }, + "Polens historie": { + "count": 1 + }, + "Fotografi": { + "count": 2 + }, + "Fotohistoriske biografier": { + "count": 1 + }, + "Skipstekniske og nautiske begrep": { + "count": 5 + }, + "Immaterialrett": { + "count": 1 + }, + "Opphavsrett": { + "count": 1 + }, + "Kontormaskiner": { + "count": 1 + }, + "Sosiologi": { + "count": 6 + }, + "Sosiologer": { + "count": 1 + }, + "Plantenes form": { + "count": 3 + }, + "Fr\u00f8 og formeringsorganer": { + "count": 1 + }, + "Genealoger": { + "count": 1 + }, + "Storbritannias samtidshistorie": { + "count": 1 + }, + "Samtidskunst": { + "count": 10 + }, + "Skulptur og mosaikk i samtidskunsten": { + "count": 4 + }, + "Bolig og eiendom": { + "count": 3 + }, + "Volleyball": { + "count": 1 + }, + "V\u00e6r": { + "count": 1 + }, + "Skyer og nedb\u00f8r": { + "count": 1 + }, + "Bangladesh' historie": { + "count": 1 + }, + "Bangladesh' samtidshistorie": { + "count": 1 + }, + "Spurvefugler": { + "count": 2 + }, + "Kinas geografi": { + "count": 1 + }, + "V\u00e5pen og milit\u00e6rt materiell": { + "count": 3 + }, + "Marinefart\u00f8y": { + "count": 1 + }, + "Undervannsb\u00e5ter": { + "count": 1 + }, + "Belgias franskspr\u00e5klige litteratur": { + "count": 4 + }, + "Motorkj\u00f8ret\u00f8yer": { + "count": 1 + }, + "Israels historie": { + "count": 1 + }, + "Astronomi": { + "count": 17 + }, + "Solsystemet": { + "count": 2 + }, + "Colombias geografi": { + "count": 2 + }, + "Norges historie fra 1050 til 1300": { + "count": 3 + }, + "\u00d8vrig vintersport": { + "count": 2 + }, + "Skiskyting": { + "count": 1 + }, + "Vestland": { + "count": 8 + }, + "B\u00f8mlo": { + "count": 1 + }, + "Moderne okkultisme": { + "count": 1 + }, + "Frankrikes litteratur p\u00e5 1800-tallet": { + "count": 3 + }, + "Antropologi": { + "count": 6 + }, + "Antropologer": { + "count": 3 + }, + "Musikkteori": { + "count": 2 + }, + "Tonalitet": { + "count": 1 + }, + "Internasjonale bedrifter og varemerker": { + "count": 4 + }, + "Religionsvitenskap": { + "count": 6 + }, + "Spr\u00e5ktypologi": { + "count": 1 + }, + "Ungarns historie": { + "count": 2 + }, + "Vade-": { + "count": 2 + }, + " m\u00e5ke- og alkefugler": { + "count": 2 + }, + "Oseanias litteratur": { + "count": 3 + }, + "New Zealands litteratur": { + "count": 1 + }, + "Pattedyr": { + "count": 10 + }, + "Spissmusdyr": { + "count": 1 + }, + "Spissmusfamilien": { + "count": 1 + }, + "Soricinae": { + "count": 1 + }, + "Soricini": { + "count": 1 + }, + "Sorex": { + "count": 1 + }, + "Fysikalsk kjemi": { + "count": 1 + }, + "Litteraturvitenskap": { + "count": 7 + }, + "Litteraturforskere og kritikere": { + "count": 3 + }, + "Aserbajdsjans historie": { + "count": 1 + }, + "Rettssosiologi": { + "count": 1 + }, + "Chiles geografi": { + "count": 1 + }, + "Norske bedrifter og varemerker": { + "count": 3 + }, + "Klassisk musikk p\u00e5 1800-tallet": { + "count": 4 + }, + "Internasjonale klassiske komponister og verker p\u00e5 1800-tallet": { + "count": 3 + }, + "Flyindustri": { + "count": 4 + }, + "Flyselskaper": { + "count": 2 + }, + "Racketsport": { + "count": 2 + }, + "Tennis": { + "count": 2 + }, + "Prosessmetallurgi": { + "count": 1 + }, + "Tysklands litteratur": { + "count": 6 + }, + "Tysklands litteratur 1900-1950": { + "count": 1 + }, + "Norske klassiske instrumentalister": { + "count": 2 + }, + "Innlandet": { + "count": 2 + }, + "Eidskog": { + "count": 1 + }, + "Nord-amerikansk film og TV": { + "count": 7 + }, + "Nord-amerikanske skuespillere": { + "count": 5 + }, + "Internasjonal politikk": { + "count": 2 + }, + "Troms og Finnmark": { + "count": 3 + }, + "Bardu": { + "count": 1 + }, + "Hinduisme": { + "count": 3 + }, + "Aust-Agder": { + "count": 1 + }, + "Kirkemusikk": { + "count": 1 + }, + "Australias litteratur": { + "count": 2 + }, + "Kunsthistorie": { + "count": 31 + }, + "Ekspresjonisme og fauvisme": { + "count": 1 + }, + "Vannforsyning": { + "count": 1 + }, + "\u00d8vrige sporter": { + "count": 2 + }, + "\u00d8vrig konkurranseidrett": { + "count": 1 + }, + "Norsk pop og rock": { + "count": 1 + }, + "Frankrikes litteratur p\u00e5 1600-tallet": { + "count": 1 + }, + "Arkitekter og arkitektur-institusjoner": { + "count": 10 + }, + "Internasjonale arkitekter fra 1600 til 1900": { + "count": 2 + }, + "Dyrenes anatomi": { + "count": 1 + }, + "Bangladesh' geografi": { + "count": 3 + }, + "Benins historie": { + "count": 1 + }, + "Benins samtidshistorie": { + "count": 1 + }, + "Elektromagnetisme": { + "count": 4 + }, + "Neo-ekspresjonisme og figurativ billedkunst etter 1945": { + "count": 4 + }, + "N\u00e6ringsmiddelteknologi": { + "count": 2 + }, + "Tr\u00f8ndelag": { + "count": 3 + }, + "Skaun": { + "count": 1 + }, + "Sigdal": { + "count": 1 + }, + "Tysklands historie": { + "count": 4 + }, + "Tysklands historie fra 1933 til 1945": { + "count": 1 + }, + "Spr\u00e5kfilosofi": { + "count": 1 + }, + "Kjerneenergi": { + "count": 1 + }, + "Kjernekraftverk": { + "count": 1 + }, + "Reiseliv": { + "count": 3 + }, + "Bedrifter og organisasjoner i internasjonalt reiseliv": { + "count": 2 + }, + "Norske dirigenter": { + "count": 1 + }, + "Soppriket": { + "count": 10 + }, + "Stilksporesopper": { + "count": 9 + }, + "Hymeniesopper": { + "count": 7 + }, + "Ekte hymeniesopper": { + "count": 7 + }, + "Agaricomycetidae": { + "count": 5 + }, + "Skivesoppordenen": { + "count": 5 + }, + "Tricholomataceae": { + "count": 1 + }, + "Ridderhatter": { + "count": 1 + }, + "Indonesias geografi": { + "count": 4 + }, + "Forskningsmetode": { + "count": 5 + }, + "Frankrikes litteratur p\u00e5 1900-tallet": { + "count": 4 + }, + "Aviser": { + "count": 9 + }, + "Avisredakt\u00f8rer": { + "count": 2 + }, + "Portugals litteratur": { + "count": 1 + }, + "Utdanningshistorie": { + "count": 1 + }, + "Pumper og turbiner": { + "count": 3 + }, + "Byer i USA": { + "count": 4 + }, + "USA-assosierte stater og territorer i Oseania": { + "count": 1 + }, + "Korsblomstordenen": { + "count": 3 + }, + "Korsblomstfamilien": { + "count": 3 + }, + "Rublomslekta": { + "count": 1 + }, + "Lyngordenen": { + "count": 1 + }, + "Lyngfamilien": { + "count": 1 + }, + "B\u00e6rlyngslekta": { + "count": 1 + }, + " 1960 \u2013 1980": { + "count": 1 + }, + "Geologi": { + "count": 11 + }, + "Bergarter": { + "count": 3 + }, + "Metamorfe bergarter": { + "count": 1 + }, + "Nederlandsk og flamsk billedkunst 1400-1750": { + "count": 2 + }, + "Kristen misjonsvirksomhet": { + "count": 6 + }, + "Benins geografi": { + "count": 1 + }, + "Romanias geografi": { + "count": 2 + }, + "Molekyl\u00e6rbiologi": { + "count": 1 + }, + "Metoder i celle- og molekyl\u00e6rbiologien": { + "count": 1 + }, + "Skrinneblomslekta": { + "count": 1 + }, + "Norsk bildende kunst fra 1800-1945": { + "count": 3 + }, + "Englands historie": { + "count": 1 + }, + "Kinas historie": { + "count": 1 + }, + "Republikken Kinas historie (1912\u20131949)": { + "count": 1 + }, + "Bunad": { + "count": 1 + }, + " kofter og folkedrakt": { + "count": 1 + }, + "Islam": { + "count": 3 + }, + "Retninger i islam": { + "count": 3 + }, + "Postimpresjonisme": { + "count": 2 + }, + " symbolisme og art nouveau": { + "count": 2 + }, + "Dyrevern": { + "count": 1 + }, + "Tidsregning": { + "count": 2 + }, + "Temabasert reiseliv": { + "count": 1 + }, + "Turisthytter": { + "count": 1 + }, + "Geomorfologi": { + "count": 1 + }, + "M\u00f8re og Romsdal": { + "count": 3 + }, + "Fjord": { + "count": 1 + }, + "Formering": { + "count": 1 + }, + "Design": { + "count": 4 + }, + "M\u00f8bler og interi\u00f8rtyper": { + "count": 1 + }, + "Ordforklaringer": { + "count": 10 + }, + "Uttrykk og ordtak": { + "count": 3 + }, + "Petroleumsfelter": { + "count": 2 + }, + "Brasils historie": { + "count": 2 + }, + "Brasils samtidshistorie": { + "count": 2 + }, + "Kildeskrifter": { + "count": 3 + }, + "Norske slekter": { + "count": 9 + }, + "Kamp- og kontaktsport": { + "count": 2 + }, + "Asiatiske kampsporter": { + "count": 1 + }, + "Stormfugler": { + "count": 1 + }, + "Stormfuglfamilien": { + "count": 1 + }, + "Thalassoica": { + "count": 1 + }, + "Danmarks historie": { + "count": 4 + }, + "Danmarks samtidshistorie": { + "count": 1 + }, + "Personer innen astronomi": { + "count": 3 + }, + "Romvirksomhet": { + "count": 4 + }, + "Personer": { + "count": 1 + }, + " bedrifter og organisasjoner innen romvirksomhet": { + "count": 1 + }, + "Prefikser og suffikser": { + "count": 1 + }, + "Kraftselskaper og kraftverk": { + "count": 2 + }, + "Arkivfag": { + "count": 2 + }, + "Vestfold og Telemark": { + "count": 2 + }, + "Midt-Telemark": { + "count": 1 + }, + "B\u00f8": { + "count": 2 + }, + "Russlands geografi": { + "count": 1 + }, + "Teknologi- og industrihistorie": { + "count": 4 + }, + "Norsk teknologi- og industrihistorie": { + "count": 4 + }, + "Bergverkshistorie": { + "count": 1 + }, + "Statsvitere og institusjoner": { + "count": 1 + }, + "Verdensmusikk": { + "count": 2 + }, + "Country og bluegrass": { + "count": 1 + }, + "Amerikanske country- og bluegrassartister": { + "count": 1 + }, + "Barnehage": { + "count": 2 + }, + "Internasjonale klassiske ensembler": { + "count": 1 + }, + "Antikkens filosofi": { + "count": 2 + }, + "Leppeblomstordenen": { + "count": 3 + }, + "Kjempefamilien": { + "count": 1 + }, + "Kjempeslekta": { + "count": 1 + }, + "Industri": { + "count": 4 + }, + "Metallarbeid": { + "count": 2 + }, + "Geologer": { + "count": 2 + }, + "Canadas historie": { + "count": 1 + }, + "Canadas samtidshistorie": { + "count": 1 + }, + "Sosiologisk teori": { + "count": 4 + }, + "Palestinas historie": { + "count": 2 + }, + "Palestinas samtidshistorie": { + "count": 2 + }, + "Canadas franskspr\u00e5klige litteratur": { + "count": 1 + }, + "Internasjonale tegnekunstnere": { + "count": 2 + }, + "\u00d8sterrikes litteratur": { + "count": 2 + }, + "\u00d8sterrikes litteratur f\u00f8r 1700-tallet": { + "count": 1 + }, + "Str\u00e5lefinnefisker": { + "count": 2 + }, + "Karpefisker": { + "count": 1 + }, + "Karpefamilien": { + "count": 1 + }, + "Rhodeus": { + "count": 1 + }, + "Indonesias historie": { + "count": 1 + }, + "Indonesias samtidshistorie": { + "count": 1 + }, + "Samfunnssikkerhet og beredskap": { + "count": 1 + }, + "Beredskap og kriseh\u00e5ndtering": { + "count": 1 + }, + "Designteori og -historie": { + "count": 1 + }, + "Grunnstoffer": { + "count": 5 + }, + "Nyklassisisme og romantikk i bildende kunst": { + "count": 1 + }, + "Rot hos planter": { + "count": 1 + }, + "Kroatias litteratur": { + "count": 1 + }, + "Flaggermus": { + "count": 1 + }, + "Glattnesefamilien": { + "count": 1 + }, + "Vespertilioninae": { + "count": 1 + }, + "Plecotini": { + "count": 1 + }, + "Barbastella": { + "count": 1 + }, + "J\u00f8dedom": { + "count": 2 + }, + "Tsjekkias geografi": { + "count": 1 + }, + "Danmarks litteratur": { + "count": 5 + }, + "Danmarks litteratur p\u00e5 1900-tallet": { + "count": 4 + }, + "Fransk politikk": { + "count": 1 + }, + "IT-historie": { + "count": 1 + }, + "Personer i IT-historie": { + "count": 1 + }, + "USAs litteratur 1900-1950": { + "count": 1 + }, + "Internasjonal rett": { + "count": 3 + }, + "Alminnelig folkerett": { + "count": 3 + }, + "Lydteknologi": { + "count": 1 + }, + "Flatormer": { + "count": 1 + }, + "Bendelormer": { + "count": 1 + }, + "Pseudophyllidea": { + "count": 1 + }, + "Diphyllobothriidae": { + "count": 1 + }, + "Asias litteratur": { + "count": 3 + }, + "Irans litteratur": { + "count": 1 + }, + "Realisme og impresjonisme": { + "count": 5 + }, + "Kjernekjemi": { + "count": 1 + }, + "Hellas\u2019 historie": { + "count": 1 + }, + "Norske aviser": { + "count": 2 + }, + "Ord og uttrykk p\u00e5 andre spr\u00e5k": { + "count": 3 + }, + "Tyske ord og uttrykk": { + "count": 2 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1600-tallet": { + "count": 1 + }, + "Haier og skater": { + "count": 1 + }, + "H\u00e5branner": { + "count": 1 + }, + "Brugdefamilien": { + "count": 1 + }, + "Cetorhinus": { + "count": 1 + }, + "Rettskildel\u00e6re": { + "count": 1 + }, + "Belgias historie": { + "count": 1 + }, + "Usbekistans geografi": { + "count": 1 + }, + "Elektronikk": { + "count": 6 + }, + "Fjernsynsteknikk": { + "count": 1 + }, + "Frankrikes litteratur f\u00f8r 1500-tallet": { + "count": 1 + }, + "Begreper i botanikken": { + "count": 2 + }, + "Costa Ricas litteratur": { + "count": 1 + }, + "Macrocystidiaceae": { + "count": 1 + }, + "Macrocystidia": { + "count": 1 + }, + "Flagg og nasjonalsymboler": { + "count": 2 + }, + "Flagg og v\u00e5pen": { + "count": 2 + }, + "Milit\u00e6r teori": { + "count": 1 + }, + "Arendal": { + "count": 1 + }, + "Austrheim": { + "count": 1 + }, + "Cubas historie": { + "count": 1 + }, + "Cubas samtidshistorie": { + "count": 1 + }, + "Motiver i bildende kunst": { + "count": 1 + }, + "Strophariaceae": { + "count": 1 + }, + "Skulptur og mosaikk i nyere tid": { + "count": 3 + }, + "Europas \u00f8vrige historie": { + "count": 3 + }, + "Europa p\u00e5 1900-tallet": { + "count": 1 + }, + "Kjemiens historie": { + "count": 1 + }, + "S\u00f8tvierordenen": { + "count": 3 + }, + "S\u00f8tvierfamilien": { + "count": 3 + }, + "S\u00f8tvierslekta": { + "count": 2 + }, + "Andre verdenskrig": { + "count": 2 + }, + "Tysklands litteratur p\u00e5 1800-tallet": { + "count": 3 + }, + "Norske klassiske ensembler": { + "count": 1 + }, + "Drammen": { + "count": 1 + }, + "Perus geografi": { + "count": 2 + }, + "Romobservatorier": { + "count": 2 + }, + "Fagbegreper i antropologi": { + "count": 3 + }, + "Botswanas historie": { + "count": 1 + }, + "Botswanas samtidshistorie": { + "count": 1 + }, + "Vortemelkfamilien": { + "count": 2 + }, + "Fly- og helikopterprodusenter": { + "count": 2 + }, + "Heraldikk": { + "count": 1 + }, + "Grafikk": { + "count": 1 + }, + "Genetikk": { + "count": 2 + }, + "Bemannet romfart": { + "count": 1 + }, + "Tyrkias geografi": { + "count": 2 + }, + "Den katolske kirke": { + "count": 1 + }, + "Erkebiskoper": { + "count": 1 + }, + "Biografier i norsk teknologi- og industrihistorie": { + "count": 2 + }, + "Vepser": { + "count": 3 + }, + "Arverett og skifterett": { + "count": 1 + }, + "\u00d8rland": { + "count": 1 + }, + "Engelsk fotball": { + "count": 2 + }, + "Engelske fotballspillere": { + "count": 1 + }, + "Bibliotek": { + "count": 1 + }, + "S\u00f8r-Amerikas litteratur": { + "count": 2 + }, + "Colombias litteratur": { + "count": 1 + }, + "Protestantisme": { + "count": 4 + }, + "Samferdsel": { + "count": 2 + }, + "Broer": { + "count": 1 + }, + "Broer i Norge": { + "count": 1 + }, + "Stilkvepser": { + "count": 1 + }, + "Broddvepser": { + "count": 1 + }, + "Apoidea": { + "count": 1 + }, + "Grafisk industri": { + "count": 3 + }, + "Teknologi og materialer i grafisk industri": { + "count": 2 + }, + "Norske skuespillere": { + "count": 4 + }, + "Aviser i Vestland": { + "count": 1 + }, + "Magnoliids": { + "count": 1 + }, + "Magnoliaordenen": { + "count": 1 + }, + "Annonaceae": { + "count": 1 + }, + "Den franske revolusjon": { + "count": 1 + }, + "Teater": { + "count": 1 + }, + "Internasjonale teaterinstitusjoner": { + "count": 1 + }, + "Arkeologi": { + "count": 9 + }, + "Nordisk arkeologi": { + "count": 2 + }, + "Hvaler": { + "count": 1 + }, + "Bardehvaler": { + "count": 1 + }, + "Finnhvalfamilien": { + "count": 1 + }, + "Balaenoptera": { + "count": 1 + }, + "Folketro": { + "count": 2 + }, + "Russland og Sovjetunionens historie": { + "count": 5 + }, + "Russlands eldre historie": { + "count": 3 + }, + "Blad": { + "count": 1 + }, + "Europeiske filmskuespillere": { + "count": 4 + }, + "Internasjonale klassiske komponister og verker i barokken": { + "count": 2 + }, + "Samfunns\u00f8konomi": { + "count": 1 + }, + "\u00d8konomisk politikk": { + "count": 1 + }, + "Motorsport": { + "count": 1 + }, + "Bilsport": { + "count": 1 + }, + "Perus historie": { + "count": 1 + }, + "Skipstekniske begrep": { + "count": 2 + }, + "Glass og keramikk": { + "count": 1 + }, + "Glass og keramikk \u2013 ut\u00f8vere": { + "count": 1 + }, + "Brunrotfamilien": { + "count": 1 + }, + "Bokkunst": { + "count": 1 + }, + "Internasjonale klassiske instrumentalister": { + "count": 1 + }, + "Dagblomordenen": { + "count": 1 + }, + "Dagblomfamilien": { + "count": 1 + }, + "Roseordenen": { + "count": 1 + }, + "Rosefamilien": { + "count": 1 + }, + "Epleslekta": { + "count": 1 + }, + "USAs litteratur 1950-2000": { + "count": 3 + }, + "Plantevepser": { + "count": 1 + }, + "Tenthredinoidea": { + "count": 1 + }, + "Bladvepser": { + "count": 1 + }, + "Blennocampinae": { + "count": 1 + }, + "Tomostethus": { + "count": 1 + }, + "Franske s\u00f8rterritorier": { + "count": 1 + }, + "Musikkinstrumenter": { + "count": 1 + }, + "Elektriske musikkinstrumenter": { + "count": 1 + }, + "Arbeidsrett": { + "count": 1 + }, + "Optikk": { + "count": 3 + }, + "Animasjonsfilm": { + "count": 1 + }, + "Snipefamilien": { + "count": 1 + }, + "Rogaland": { + "count": 3 + }, + "Eigersund": { + "count": 2 + }, + "Albanias historie": { + "count": 2 + }, + "Sirenia": { + "count": 1 + }, + "Dugongidae": { + "count": 1 + }, + "Dugong": { + "count": 1 + }, + "Dans": { + "count": 5 + }, + "Scenisk dans": { + "count": 2 + }, + "Moderne- og samtidsdans": { + "count": 1 + }, + "Moderne- og samtidsdans: norske kunstnere og kompanier": { + "count": 1 + }, + "Raketter": { + "count": 2 + }, + "Obligasjonsrett": { + "count": 4 + }, + "Myanmars historie": { + "count": 1 + }, + "Myanmars samtidshistorie": { + "count": 1 + }, + "Biografier i Romerrikets historie": { + "count": 3 + }, + "Kantarellordenen": { + "count": 1 + }, + "Clavulinaceae": { + "count": 1 + }, + "Ringerike": { + "count": 1 + }, + "Litteraturtyper": { + "count": 7 + }, + "Barne- og ungdomslitteratur": { + "count": 3 + }, + "Internasjonal barne- og ungdomslitteratur": { + "count": 2 + }, + "Russland og Sovjetunionens litteratur": { + "count": 1 + }, + "Russiske litteraturforskere og kritikere": { + "count": 1 + }, + "Filippinenes historie": { + "count": 1 + }, + "Filippinenes samtidshistorie": { + "count": 1 + }, + "Norske jazzut\u00f8vere og -ensembler": { + "count": 2 + }, + "Bedrifts\u00f8konomi": { + "count": 5 + }, + "Norske tegnekunstnere": { + "count": 2 + }, + "Argentinas litteratur": { + "count": 1 + }, + "Islands geografi": { + "count": 1 + }, + "Gnagere": { + "count": 2 + }, + "Sciuromorpha": { + "count": 1 + }, + "Ekornfamilien": { + "count": 1 + }, + "Myosciurus": { + "count": 1 + }, + "Paleontologi": { + "count": 5 + }, + "Tokke": { + "count": 1 + }, + "Soul og R&B": { + "count": 1 + }, + "Internasjonal soul og R&B": { + "count": 1 + }, + "Utenlandske spr\u00e5kforskere": { + "count": 1 + }, + "Forglemmegeislekta": { + "count": 2 + }, + "Slimriket": { + "count": 1 + }, + "Strafferett": { + "count": 1 + }, + "Mineraler": { + "count": 4 + }, + "Silikatmineraler": { + "count": 3 + }, + "Norges forsvar": { + "count": 2 + }, + "Norske festninger og milit\u00e6ranlegg": { + "count": 1 + }, + "Katolske ritualer og symboler": { + "count": 2 + }, + "Irlands geografi": { + "count": 1 + }, + "Jurister": { + "count": 2 + }, + "Ulike lands musikk og musikktradisjoner": { + "count": 1 + }, + "Albanias samtidshistorie": { + "count": 1 + }, + "Varehandel": { + "count": 1 + }, + "Psykologiens historie": { + "count": 1 + }, + "Utenlandske aviser": { + "count": 2 + }, + "Verdenshavene": { + "count": 3 + }, + "Dyphavsgroper": { + "count": 1 + }, + "Ulike lands arkitekturhistorie": { + "count": 1 + }, + "Islands litteratur": { + "count": 4 + }, + "Ulike lands kunst og kunsttradisjoner": { + "count": 1 + }, + "Konstruksjonsteknikk": { + "count": 1 + }, + "Canadas geografi": { + "count": 1 + }, + "Bakterier": { + "count": 1 + }, + "H\u00e5ndverk": { + "count": 1 + }, + "Redskaper og festemidler": { + "count": 1 + }, + "Utforsking av solsystemet": { + "count": 1 + }, + "Vin i Europa": { + "count": 1 + }, + "Vin i Frankrike": { + "count": 1 + }, + "Myndigheter i Norge": { + "count": 2 + }, + "Embets- og tjenestepersoner": { + "count": 1 + }, + "Det 19. \u00e5rhundres filosofi": { + "count": 1 + }, + "Nederlands historie": { + "count": 1 + }, + "Norges historie fram til 1050": { + "count": 1 + }, + "Barnevern": { + "count": 1 + }, + "Rovpattedyr": { + "count": 2 + }, + "Caniformia": { + "count": 2 + }, + "Bj\u00f8rnefamilien": { + "count": 1 + }, + "Tremarctos": { + "count": 1 + }, + "Kirgisistans historie": { + "count": 1 + }, + "Kirgisistans samtidshistorie": { + "count": 1 + }, + "Krypdyr": { + "count": 1 + }, + "Dinosaurer": { + "count": 1 + }, + "\u00d8vrig europeisk malerkunst 1400-1750": { + "count": 2 + }, + "Mesopotamia": { + "count": 2 + }, + "Storg\u00e5rder og herreg\u00e5rder": { + "count": 2 + }, + "Norske storg\u00e5rder": { + "count": 2 + }, + "Internasjonale arkitekter og arkitektkontorer fra 1900 til i dag": { + "count": 4 + }, + "Skriftspr\u00e5k": { + "count": 1 + }, + "Alfabeter og skriftsystemer": { + "count": 1 + }, + "Skrifttegn": { + "count": 1 + }, + "Latinske bokstaver": { + "count": 1 + }, + "Diakritiske tegn": { + "count": 1 + }, + "Det 17. og 18. \u00e5rhundres filosofi": { + "count": 2 + }, + "Norske arkitekter og arkitektkontorer fra 1900 til i dag": { + "count": 3 + }, + "Asias kj\u00f8kken": { + "count": 1 + }, + "Avisjournalister": { + "count": 1 + }, + "V\u00e5pen": { + "count": 2 + }, + "Artilleri": { + "count": 1 + }, + "TV og radio": { + "count": 1 + }, + "TV- og radiopersoner": { + "count": 1 + }, + "Uorganisk kjemi": { + "count": 3 + }, + "Myomorpha": { + "count": 1 + }, + "Muroidea": { + "count": 1 + }, + "Hamsterfamilien": { + "count": 1 + }, + "Arvicolinae": { + "count": 1 + }, + "Lemmini": { + "count": 1 + }, + "Lemmus": { + "count": 1 + }, + "Bilens drivlinje": { + "count": 1 + }, + "Britisk Indiahav": { + "count": 1 + }, + "Nyrealisme": { + "count": 1 + }, + " nysaklighet og tendenskunst": { + "count": 1 + }, + "Cellebiologi": { + "count": 1 + }, + "Cellestrukturer": { + "count": 1 + }, + "Tysklands geografi": { + "count": 3 + }, + "Armenias geografi": { + "count": 1 + }, + "Elektronisk navigasjon": { + "count": 1 + }, + "Kinematografi": { + "count": 1 + }, + "Spr\u00e5kfamilier": { + "count": 3 + }, + "Indoeuropeiske spr\u00e5k": { + "count": 3 + }, + "Romansk": { + "count": 2 + }, + "Latin": { + "count": 2 + }, + "Latinske verk": { + "count": 2 + }, + "Folkegrupper i Asia": { + "count": 1 + }, + "Folkegrupper i Sentral-Asia": { + "count": 1 + }, + "Bilmerker": { + "count": 2 + }, + "Eritreas geografi": { + "count": 1 + }, + "Norr\u00f8n litteratur": { + "count": 2 + }, + "Algebra": { + "count": 1 + }, + "Kommutative algebraer og ringer": { + "count": 1 + }, + "Israels geografi": { + "count": 1 + }, + "Nellikordenen": { + "count": 1 + }, + "Slireknefamilien": { + "count": 1 + }, + "Syreslekta": { + "count": 1 + }, + "And\u00f8y": { + "count": 1 + }, + "Gulen": { + "count": 1 + }, + "Rettshistorie": { + "count": 2 + }, + "Kvinnherad": { + "count": 1 + }, + "Bilhistoriske biografier": { + "count": 1 + }, + "Jordans historie": { + "count": 1 + }, + "Friidrett": { + "count": 1 + }, + "L\u00f8p": { + "count": 1 + }, + "Europeiske filmskapere": { + "count": 1 + }, + "Oseania generelt": { + "count": 1 + }, + "Industridesign": { + "count": 1 + }, + "Fast eiendom": { + "count": 1 + }, + "Milit\u00e6rvesen": { + "count": 2 + }, + "Bank og finans": { + "count": 2 + }, + "B\u00f8rs og finans": { + "count": 2 + }, + "Internasjonale klassiske sangere": { + "count": 2 + }, + "Nattravn- og seilerfugler": { + "count": 1 + }, + "Seilerfamilien": { + "count": 1 + }, + "Tachymarptis": { + "count": 1 + }, + "Zoologi": { + "count": 1 + }, + "Pedagogisk teori": { + "count": 1 + }, + "Midt\u00f8sten": { + "count": 4 + }, + "Midt\u00f8stens gamle kulturer": { + "count": 4 + }, + "Oppdagelsesreiser": { + "count": 3 + }, + "Urstilksporesopper": { + "count": 1 + }, + "Ekte rustsopper": { + "count": 1 + }, + "Rustsoppordenen": { + "count": 1 + }, + "Pucciniaceae": { + "count": 1 + }, + "Slektskap": { + "count": 1 + }, + " familie og ekteskap": { + "count": 1 + }, + "Akershus": { + "count": 1 + }, + "Sk\u00f8ytesport": { + "count": 1 + }, + "Eldre mynter og pengeenheter": { + "count": 2 + }, + "Elektronisk musikk": { + "count": 1 + }, + "Kunstinstitusjoner": { + "count": 1 + }, + "Internasjonale kunstinstitusjoner": { + "count": 1 + }, + "Norges historie fra 1884 til 1940": { + "count": 5 + }, + "Plantefysiologi": { + "count": 1 + }, + "Serbias historie": { + "count": 1 + }, + "Ulike lands dansehistorie": { + "count": 1 + }, + "Spanias litteratur": { + "count": 1 + }, + "Spanias litteratur p\u00e5 1900-tallet": { + "count": 1 + }, + "Israels litteratur": { + "count": 1 + }, + "Skytesport": { + "count": 1 + }, + "Viltm\u00e5l": { + "count": 1 + }, + "H\u00e6r og landforsvar": { + "count": 1 + }, + "Norsk og samisk litteratur": { + "count": 11 + }, + "Nyere norsk litteratur": { + "count": 5 + }, + "Italienske ord og uttrykk": { + "count": 1 + }, + "USAs nyere litteratur": { + "count": 2 + }, + "Nyklassisisme og historisme i arkitekturhistorien": { + "count": 2 + }, + "Hygrophoraceae": { + "count": 1 + }, + "Cuphophyllus": { + "count": 1 + }, + "Ustabile grunnstoffer": { + "count": 3 + }, + "Kubisme og futurisme": { + "count": 1 + }, + "Krim": { + "count": 1 + }, + "Krim i Norge": { + "count": 1 + }, + "Namibias historie": { + "count": 1 + }, + "Skifer": { + "count": 1 + }, + "Rytme og tempo": { + "count": 1 + }, + "Bl\u00f8tdyr": { + "count": 2 + }, + "Snegler": { + "count": 1 + }, + "Albuesnegler": { + "count": 1 + }, + "Patelloidea": { + "count": 1 + }, + "Patellidae": { + "count": 1 + }, + "Patella": { + "count": 1 + }, + "Angolas historie": { + "count": 1 + }, + "Angolas samtidshistorie": { + "count": 1 + }, + "Grunnoppl\u00e6ring": { + "count": 2 + }, + "Yrkesfaglig utdanning": { + "count": 2 + }, + "Bygg- og anleggsteknikk": { + "count": 1 + }, + "Sveriges geografi": { + "count": 3 + }, + "Andefugler": { + "count": 1 + }, + "Andefamilien": { + "count": 1 + }, + "Somateria": { + "count": 1 + }, + "Rugby og amerikansk fotball": { + "count": 1 + }, + "Broddsoppordenen": { + "count": 1 + }, + "Hymenochaetaceae": { + "count": 1 + }, + "Jordbruk": { + "count": 1 + }, + "Trelast og treforedling": { + "count": 2 + }, + "Papirindustri": { + "count": 1 + }, + "Venezuelas geografi": { + "count": 1 + }, + "Mystikere": { + "count": 1 + }, + "Prosess": { + "count": 1 + }, + "Straffeprosess": { + "count": 1 + }, + "Institusjoner i klassisk musikk": { + "count": 1 + }, + "Aviseiere og direkt\u00f8rer": { + "count": 2 + }, + "Nord-amerikanske filmskapere": { + "count": 2 + }, + "Norske kirkebygg": { + "count": 2 + }, + "Utdanning i verden": { + "count": 2 + }, + "Utdanning i Europa": { + "count": 1 + }, + "Marokkos geografi": { + "count": 1 + }, + "Trolldomsprosesser": { + "count": 1 + }, + "Etiopias geografi": { + "count": 1 + }, + "Petroleumshistorie": { + "count": 1 + }, + "Ulykker i petroleumsbransjen": { + "count": 1 + }, + "Anvendt elektromagnetisme": { + "count": 1 + }, + "Tyrkiskspr\u00e5klig litteratur": { + "count": 1 + }, + "Tyrkias litteratur": { + "count": 1 + }, + "Indre \u00d8stfold": { + "count": 1 + }, + "Eidsberg": { + "count": 1 + }, + "Grafisk industrihistorie": { + "count": 1 + }, + "Tingvoll": { + "count": 1 + }, + "\u00d8kologi": { + "count": 1 + }, + "Utviklingsbiologi": { + "count": 1 + }, + "Fiskeri og havbruk": { + "count": 1 + }, + "Norske fiskerier": { + "count": 1 + }, + "Fiskeredskaper og fart\u00f8y": { + "count": 1 + }, + "Bulgarias geografi": { + "count": 2 + }, + "Nyere norsk sakprosa": { + "count": 1 + }, + "Tremellomycetes": { + "count": 1 + }, + "Tremellales": { + "count": 1 + }, + "Tremellaceae": { + "count": 1 + }, + "Cryptococcus": { + "count": 1 + }, + "Norges litteratur 1800-tallet": { + "count": 1 + }, + "Forskning og h\u00f8yere utdanning": { + "count": 2 + }, + "Forskningsinstitutter": { + "count": 1 + }, + "Transportrett": { + "count": 2 + }, + "Sj\u00f8rett": { + "count": 2 + }, + "Italias historie": { + "count": 2 + }, + "Italias samtidshistorie": { + "count": 1 + }, + "Internasjonale milit\u00e6re biografier": { + "count": 2 + }, + "Portugisisk fotball": { + "count": 1 + }, + "Medisin": { + "count": 3 + }, + "Helse og samfunn": { + "count": 2 + }, + "Medisinsk historie": { + "count": 2 + }, + "Algeries geografi": { + "count": 1 + }, + "Sveits\u2019 geografi": { + "count": 2 + }, + "Klovdyr": { + "count": 1 + }, + "Kvegfamilien": { + "count": 1 + }, + "Bos": { + "count": 1 + }, + "Norges litteratur 1900-1960": { + "count": 1 + }, + "Mel\u00f8y": { + "count": 1 + }, + "Religionsfilosofi": { + "count": 1 + }, + "Sveits\u2019 litteratur": { + "count": 2 + }, + "Sveits\u2019 franskspr\u00e5klige litteratur": { + "count": 1 + }, + "Materialfysikk": { + "count": 1 + }, + "Systematikk i biologien": { + "count": 1 + }, + "Terrorisme": { + "count": 1 + }, + "Norsk barne- og ungdomslitteratur f\u00f8r 2000": { + "count": 1 + }, + "Krystallografi": { + "count": 1 + }, + "Litter\u00e6re perioder og stiler": { + "count": 1 + }, + "Ungarns litteratur": { + "count": 1 + }, + "Semantikk": { + "count": 1 + }, + "Norsk samtidskunst": { + "count": 2 + }, + "Enebakk": { + "count": 1 + }, + "Europeisk arkeologi": { + "count": 1 + }, + "Tidlig abstrakt billedkunst": { + "count": 1 + }, + "Cortinariaceae": { + "count": 1 + }, + "Sl\u00f8rsopper": { + "count": 1 + }, + "Sentralafrikanske republikks historie": { + "count": 1 + }, + "Nord-Makedonias historie": { + "count": 1 + }, + "Nord-Makedonias samtidshistorie": { + "count": 1 + }, + "Ford\u00f8yelsessystemet": { + "count": 1 + }, + "Ern\u00e6ring": { + "count": 1 + }, + "Sveits\u2019 historie": { + "count": 1 + }, + "Sveits\u2019 samtidshistorie": { + "count": 1 + }, + "Grader og titler": { + "count": 1 + }, + "Akademiske titler": { + "count": 1 + }, + "New Zealands geografi": { + "count": 1 + }, + "Sveriges historie": { + "count": 1 + }, + "Sveriges kongehus": { + "count": 1 + }, + "Norges historie fra 1940 til 1945": { + "count": 2 + }, + "Klassisk religion": { + "count": 1 + }, + "Astronomisk teknologi": { + "count": 1 + }, + "Spanias historie": { + "count": 1 + }, + "Spanias samtidshistorie": { + "count": 1 + }, + "Sjangerl\u00e6re": { + "count": 2 + }, + "epikk": { + "count": 1 + }, + "Eidfjord": { + "count": 1 + }, + "Orientering": { + "count": 1 + }, + "Skotsk fotball": { + "count": 1 + }, + "Leppeblomstfamilien": { + "count": 1 + }, + "D\u00e5slekta": { + "count": 1 + }, + "Estetikk": { + "count": 2 + }, + "Nigerias historie": { + "count": 1 + }, + "Nigerias samtidshistorie": { + "count": 1 + }, + "Musikaler": { + "count": 2 + }, + "Angolas geografi": { + "count": 1 + }, + "Kristne ritualer og symboler": { + "count": 2 + }, + "Radioteknikk": { + "count": 1 + }, + "Internasjonale fotografer og fotokunstnere": { + "count": 1 + }, + "Internasjonale klassiske komponister og verker 1900\u20131950": { + "count": 2 + }, + "Norsk bildende kunst fra 1945 til 1990": { + "count": 2 + }, + "Serbia og det gamle Jugoslavias litteratur": { + "count": 1 + }, + "Polare ekspedisjoner og oppdagere": { + "count": 1 + }, + "Italiensk fotball": { + "count": 1 + }, + "Petroleumsselskaper": { + "count": 1 + }, + "Datoartikler": { + "count": 3 + }, + "Tysklands litteratur 1950-2000": { + "count": 1 + }, + "M\u00e5ltider": { + "count": 1 + }, + "Iraks historie": { + "count": 1 + }, + "Iraks samtidshistorie": { + "count": 1 + }, + "Irans historie": { + "count": 1 + }, + "Irans samtidshistorie": { + "count": 1 + }, + "Bod\u00f8": { + "count": 1 + }, + "Nordiske jazzut\u00f8vere og -ensembler": { + "count": 2 + }, + " 1980 til idag": { + "count": 2 + }, + "Trelast- og treforedlingsindustri \u2013 historie": { + "count": 1 + }, + "Det gamle Egypt": { + "count": 1 + }, + "Norske ord og uttrykk": { + "count": 1 + }, + "Papua Ny-Guineas geografi": { + "count": 1 + }, + "Asker": { + "count": 1 + }, + "Amfibier": { + "count": 2 + }, + "Springpadder": { + "count": 1 + }, + "Eleutherodactylidae": { + "count": 1 + }, + "Eleutherodactylus": { + "count": 1 + }, + "Tunneler": { + "count": 1 + }, + "Tunneler i Norge": { + "count": 1 + }, + "Salamandere": { + "count": 1 + }, + "Plethodontidae": { + "count": 1 + }, + "Aneides": { + "count": 1 + }, + "H\u00e6ren": { + "count": 1 + }, + "Kardeborreordenen": { + "count": 1 + }, + "Kaprifolfamilien": { + "count": 1 + }, + "Bl\u00e5knappslekta": { + "count": 1 + }, + "Italias litteratur p\u00e5 1700-tallet": { + "count": 1 + }, + "Skipsbygging": { + "count": 1 + }, + "Skipskonstrukt\u00f8rer": { + "count": 1 + }, + "Piggepleslekta": { + "count": 1 + }, + "Sediment\u00e6re bergarter": { + "count": 1 + }, + "Samfunnstyper": { + "count": 1 + }, + "Boksing": { + "count": 1 + }, + "Designinstitusjoner": { + "count": 1 + }, + "De forente arabiske emiraters historie": { + "count": 1 + }, + "De forente arabiske emiraters samtidshistorie": { + "count": 1 + }, + "Folketrygd og pensjon": { + "count": 1 + }, + "Forbrukerelektronikk": { + "count": 1 + }, + "Utdanning i S\u00f8r-Amerika": { + "count": 1 + }, + "Asias geografi generelt": { + "count": 1 + }, + "Urverk og klokker": { + "count": 1 + }, + "Standardisering": { + "count": 1 + }, + "Oppdal": { + "count": 1 + }, + "Norske arkitekturinstitusjoner": { + "count": 1 + }, + "Sparta": { + "count": 1 + }, + "Kunsth\u00e5ndverk": { + "count": 2 + }, + "Treskurd": { + "count": 1 + }, + "Treskj\u00e6rere": { + "count": 1 + }, + "Tysklands historie fra 1806 til 1918": { + "count": 1 + }, + "Mexicos geografi": { + "count": 1 + }, + "Selfamilien": { + "count": 1 + }, + "Cystophora": { + "count": 1 + }, + "Burundis geografi": { + "count": 1 + }, + "Islands historie": { + "count": 1 + }, + "Tegneserier": { + "count": 1 + }, + "Birkenes": { + "count": 1 + }, + "Humanetikk": { + "count": 1 + }, + "Matretter": { + "count": 2 + }, + "Supper": { + "count": 2 + }, + " sauser og kraft": { + "count": 2 + }, + "Ugandas historie": { + "count": 1 + }, + "Erkjennelsesteori": { + "count": 1 + }, + "Sveits\u2019 tyskspr\u00e5klig litteratur": { + "count": 1 + }, + "Sveits\u2019 tyskspr\u00e5klig litteratur p\u00e5 1900-tallet": { + "count": 1 + }, + "Popkunst og minimalisme": { + "count": 1 + }, + "Klassisk ballett": { + "count": 1 + }, + "Klassisk ballett: internasjonale kunstnere og kompanier": { + "count": 1 + }, + "Inkludering og rettigheter": { + "count": 1 + }, + "Likestilling og diskriminering": { + "count": 1 + }, + "Kvinnebevegelse": { + "count": 1 + }, + "Kvinnebevegelsen i Norge": { + "count": 1 + }, + "Arkeologi i Asia": { + "count": 1 + }, + "Cubas litteratur": { + "count": 1 + }, + "Elektrokjemi": { + "count": 1 + }, + "Norges litteratur 1960-2000": { + "count": 3 + }, + "Nederlands litteratur": { + "count": 1 + }, + "Forfattere i romerrikets litteratur": { + "count": 1 + }, + "Forbrenningsmotorer": { + "count": 2 + }, + "Vitenskapsteori": { + "count": 1 + }, + "Vitenskapsteoretikere": { + "count": 1 + }, + "Norsk lyrikk 1960-2000": { + "count": 1 + }, + "Helserett": { + "count": 1 + }, + "Klesplagg": { + "count": 1 + }, + "Belgias geografi": { + "count": 1 + }, + "Antarktis": { + "count": 1 + }, + "Danseinstitusjoner": { + "count": 1 + }, + "Irans geografi": { + "count": 1 + }, + "V\u00e5rskrinneblomslekta": { + "count": 1 + }, + "Motstandskampen i Norge": { + "count": 1 + }, + "Skjell": { + "count": 1 + }, + "Unionoida": { + "count": 1 + }, + "Unionoidea": { + "count": 1 + }, + "Elvemuslinger": { + "count": 1 + }, + "Margaritifera": { + "count": 2 + }, + "Klassisk arkeologi": { + "count": 1 + }, + "Gresk arkeologi": { + "count": 1 + }, + "Teknologi- og industrifag": { + "count": 1 + }, + "Afrosoricida": { + "count": 1 + }, + "Tenrecidae": { + "count": 1 + }, + "Tysklands historie fra 1918 til 1933": { + "count": 1 + }, + "Erstatningsrett": { + "count": 1 + }, + "Trevirke": { + "count": 1 + }, + "Sovjetunionens historie": { + "count": 2 + }, + "Gjenstanders kulturhistorie": { + "count": 2 + }, + "Dominicas geografi": { + "count": 2 + }, + "B\u00f8keordenen": { + "count": 1 + }, + "Bj\u00f8rkefamilien": { + "count": 1 + }, + "Bj\u00f8rkeslekta": { + "count": 1 + }, + "Metrikk": { + "count": 1 + }, + "USAs historie 1920\u20131960": { + "count": 2 + }, + "Energi og ressurs": { + "count": 1 + }, + "Tysklands nyere litteratur": { + "count": 1 + }, + "Libanons historie": { + "count": 1 + }, + "B\u00e6rum": { + "count": 1 + }, + "Brasils geografi": { + "count": 1 + }, + "Norges historie fra 1300 til 1660": { + "count": 1 + }, + "Natur og landemerker i USA": { + "count": 2 + }, + "Kv\u00e6fjord": { + "count": 1 + }, + "Fremmedord og l\u00e5nord": { + "count": 2 + }, + "Lokalforvaltning": { + "count": 1 + }, + "Brennevin": { + "count": 1 + }, + "Drinker": { + "count": 1 + }, + "Hest": { + "count": 1 + }, + "Erteblomstordenen": { + "count": 1 + }, + "Erteblomstfamilien": { + "count": 1 + }, + "Acacia": { + "count": 1 + }, + "Historiske biografier i norsk politikk": { + "count": 1 + }, + "Fremmedord med latinsk og gresk opphav": { + "count": 1 + }, + "B\u00e5tsfjord": { + "count": 1 + }, + "Tidsskrifter": { + "count": 1 + }, + "Romanias samtidshistorie": { + "count": 1 + }, + "Fagretninger i sosiologi": { + "count": 1 + }, + "Folkegrupper i S\u00f8r-Amerika": { + "count": 1 + }, + "Norges historie generelt": { + "count": 1 + }, + "Norsk polarhistorie": { + "count": 1 + }, + "Tekstilkunst": { + "count": 1 + }, + "Tekstilkunstnere": { + "count": 1 + }, + "Arkeologer": { + "count": 2 + }, + "Stabile grunnstoffer": { + "count": 1 + }, + "Fiskenes anatomi og fysiologi": { + "count": 1 + }, + "Vang": { + "count": 1 + }, + "Sunnfjord": { + "count": 1 + }, + "Samv\u00e6rsdans": { + "count": 1 + }, + "Nicaraguas litteratur": { + "count": 1 + }, + "Sulfid- og sulfatmineraler": { + "count": 1 + }, + "Hardrock og heavy metal": { + "count": 1 + }, + "Norsk hardrock og heavy metal": { + "count": 1 + }, + "Aver\u00f8y": { + "count": 1 + }, + "Sol og stjerner": { + "count": 1 + }, + "Gloppen": { + "count": 1 + }, + "Italiensk manierisme": { + "count": 1 + }, + " barokk- og renessansekunst": { + "count": 1 + }, + "Sakprosa": { + "count": 1 + }, + "Sekksporesopper": { + "count": 1 + }, + "Ursekksporesopper": { + "count": 1 + }, + "Heksekostsopper": { + "count": 1 + }, + "Religion i ulike land": { + "count": 1 + }, + "Religion i Latin-Amerika": { + "count": 1 + }, + "Italias nyere litteratur": { + "count": 1 + }, + "Arkitekturteori og -metode": { + "count": 1 + }, + "Bahrains geografi": { + "count": 1 + }, + "Biografier i nyere norsk historie": { + "count": 1 + }, + "Askvoll": { + "count": 1 + }, + "\u00d8sterrikes litteratur p\u00e5 1900-tallet": { + "count": 1 + }, + "Katt": { + "count": 1 + }, + "Piggfinnefisker": { + "count": 1 + }, + "Leppefiskfamilien": { + "count": 1 + }, + "Ctenolabrus": { + "count": 1 + }, + "Mykologi": { + "count": 1 + }, + "Innholdsstoffer hos sopp": { + "count": 1 + }, + "Fluesnapperfamilien": { + "count": 1 + }, + "Luscinia": { + "count": 1 + }, + "Colombias historie": { + "count": 1 + }, + "\u00d8vrige indo-europeiske spr\u00e5k": { + "count": 1 + }, + "Dannmarks historie 1814-1945": { + "count": 1 + }, + "Arbeiderbevegelsens historie": { + "count": 1 + }, + "Kameruns litteratur": { + "count": 1 + }, + "Vannsport": { + "count": 1 + }, + "Sv\u00f8mmesport": { + "count": 1 + }, + "Abstrakt ekspresjonisme": { + "count": 1 + }, + "Bulgarias historie": { + "count": 1 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1800-tallet": { + "count": 1 + }, + "Norske klassiske komponister og verker f\u00f8r 1900": { + "count": 1 + }, + "Kirgisistans geografi": { + "count": 1 + }, + "Panamas historie": { + "count": 1 + }, + "Finlands geografi": { + "count": 1 + }, + "Petroleumsprodukter": { + "count": 1 + }, + "Teknisk kybernetikk": { + "count": 1 + }, + "Dyregeografi": { + "count": 1 + }, + "Verdens dyreliv": { + "count": 1 + }, + "Minev\u00e5pen": { + "count": 1 + }, + "Det bysantinske rike": { + "count": 1 + }, + "Element\u00e6rpartikkelfysikk": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringS2S.json b/mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringS2S.json new file mode 100644 index 0000000000..9e6fec105a --- /dev/null +++ b/mteb/descriptive_stats/Clustering/SNLHierarchicalClusteringS2S.json @@ -0,0 +1,3754 @@ +{ + "test": { + "num_samples": 1300, + "number_of_characters": 314891, + "min_text_length": 100, + "average_text_length": 242.22384615384615, + "max_text_length": 908, + "unique_texts": 382, + "min_labels_per_text": 1, + "average_labels_per_text": 4.136153846153846, + "max_labels_per_text": 201, + "unique_labels": 1246, + "labels": { + "Realfag": { + "count": 89 + }, + "Fysikk": { + "count": 32 + }, + "Varme- og stoffl\u00e6re": { + "count": 3 + }, + "Spr\u00e5k og litteratur": { + "count": 139 + }, + "Spr\u00e5kvitenskapens fagdisipliner": { + "count": 20 + }, + "Pragmatikk": { + "count": 1 + }, + "Onomastikk": { + "count": 14 + }, + "Personnavn": { + "count": 14 + }, + "Fornavn": { + "count": 14 + }, + "Guttenavn": { + "count": 9 + }, + "Kunst og estetikk": { + "count": 201 + }, + "Metall- og smykkekunst": { + "count": 1 + }, + "Biologi": { + "count": 92 + }, + "Vitenskapsgrener i biologien": { + "count": 24 + }, + "Botanikk": { + "count": 8 + }, + "Planteanatomi": { + "count": 5 + }, + "Plantenes vevsystemer": { + "count": 1 + }, + "Andre vev hos planter": { + "count": 1 + }, + "Teknologi og industri": { + "count": 97 + }, + "Transport": { + "count": 35 + }, + "Navigasjon": { + "count": 2 + }, + "Kjemi": { + "count": 28 + }, + "Polymerkjemi": { + "count": 3 + }, + "Sport og spill": { + "count": 28 + }, + "Ballsport": { + "count": 15 + }, + "Biljard": { + "count": 2 + }, + "S\u00f8m": { + "count": 8 + }, + " kl\u00e6r og mote": { + "count": 8 + }, + "Sko": { + "count": 1 + }, + "Veterin\u00e6rmedisin": { + "count": 2 + }, + "Fiskehelse": { + "count": 2 + }, + "Fotball": { + "count": 9 + }, + "Norsk fotball": { + "count": 4 + }, + "Musikk": { + "count": 69 + }, + "Klassisk musikk": { + "count": 25 + }, + "Den klassiske musikkens historie": { + "count": 12 + }, + "Klassisk musikk f\u00f8r 1600": { + "count": 1 + }, + "M\u00e5l og vekt": { + "count": 7 + }, + "Eldre m\u00e5leenheter": { + "count": 1 + }, + "Historie": { + "count": 192 + }, + "Kulturhistorie": { + "count": 6 + }, + "Folkelige skikker og tradisjoner": { + "count": 2 + }, + "Spill": { + "count": 3 + }, + "Brettspill": { + "count": 2 + }, + "Biokjemi": { + "count": 1 + }, + "Aminosyrer": { + "count": 1 + }, + "Geografi": { + "count": 178 + }, + "Norges geografi": { + "count": 45 + }, + "Agder": { + "count": 4 + }, + "Lindesnes": { + "count": 1 + }, + "Verdens geografi": { + "count": 128 + }, + "Afrika": { + "count": 26 + }, + "Kapp Verdes geografi": { + "count": 1 + }, + "Viken": { + "count": 9 + }, + "Halden": { + "count": 1 + }, + "Samfunn": { + "count": 97 + }, + "Samfunnsfag": { + "count": 26 + }, + "Statsvitenskap": { + "count": 7 + }, + "Valgforskning og sammenliknende politikk": { + "count": 1 + }, + "Politiske begreper": { + "count": 1 + }, + "Planteriket": { + "count": 24 + }, + "Dekkfr\u00f8ete planter": { + "count": 24 + }, + "Enfr\u00f8bladete planter": { + "count": 2 + }, + "Aspargesordenen": { + "count": 1 + }, + "Orkid\u00e9familien": { + "count": 1 + }, + "Marihandslekta": { + "count": 1 + }, + "Jus": { + "count": 34 + }, + "Formuerett": { + "count": 8 + }, + "Selskapsrett": { + "count": 5 + }, + "Telekommunikasjon og kringkasting": { + "count": 2 + }, + "Kommunikasjonsteknologi": { + "count": 2 + }, + "Oseania": { + "count": 6 + }, + "Australias geografi": { + "count": 2 + }, + "Jernbane og sporvei": { + "count": 1 + }, + "Jernbanebiografier": { + "count": 1 + }, + "Verdens historie og samtidshistorie": { + "count": 126 + }, + "Europa": { + "count": 192 + }, + "Den greske antikken": { + "count": 6 + }, + "Sj\u00f8fart og skipsindustri": { + "count": 16 + }, + "Fyrvesen og losvesen": { + "count": 5 + }, + "Fyrvesen": { + "count": 4 + }, + "Norske fyr": { + "count": 4 + }, + "Norsk og nordisk historie": { + "count": 26 + }, + "Norges historie": { + "count": 20 + }, + "Norges historie fra 1945 til 1970": { + "count": 1 + }, + "Nord- og Mellom-Amerika": { + "count": 24 + }, + "Honduras\u2019 geografi": { + "count": 1 + }, + "Historiske hjelpevitenskaper": { + "count": 18 + }, + "Slektsforskning og genealogi": { + "count": 17 + }, + "Utenlandske slekter": { + "count": 7 + }, + "Folkegrupper i Nord- og Mellom-Amerika": { + "count": 3 + }, + "Folkegrupper i Nord-Amerika": { + "count": 3 + }, + "Litteratur i verden": { + "count": 88 + }, + "Nord- og Mellom-Amerikas litteratur": { + "count": 11 + }, + "USAs litteratur": { + "count": 7 + }, + "USAs litteratur p\u00e5 1600 og 1700-tallet": { + "count": 1 + }, + "Religion og filosofi": { + "count": 95 + }, + "Religioner og livssyn": { + "count": 61 + }, + "Kristendom": { + "count": 48 + }, + "Det gamle testamentet": { + "count": 2 + }, + "Personer i Det gamle testamentet": { + "count": 2 + }, + "Tofr\u00f8bladete planter": { + "count": 21 + }, + "Vierordenen": { + "count": 3 + }, + "Vierfamilien": { + "count": 1 + }, + "Vierslekta": { + "count": 1 + }, + "Afrikas litteratur": { + "count": 2 + }, + "Elfenbenskystens litteratur": { + "count": 1 + }, + "N\u00e6ringsmidler og husholdning": { + "count": 13 + }, + "Mat": { + "count": 7 + }, + "Ulike lands kj\u00f8kken": { + "count": 2 + }, + "S\u00f8r-Europas kj\u00f8kken": { + "count": 1 + }, + "Italiensk mat": { + "count": 1 + }, + "Klassisk musikk 1900\u20131950": { + "count": 3 + }, + "Norske klassiske komponister og verker 1900\u20131950": { + "count": 1 + }, + "Atferdsbiologi": { + "count": 1 + }, + "Skip og skipstyper": { + "count": 5 + }, + "Seilskip": { + "count": 2 + }, + "Demografi": { + "count": 2 + }, + "Filosofiens fagdisipliner": { + "count": 12 + }, + "Filosofisk logikk": { + "count": 1 + }, + "Historiske b\u00e5ttyper": { + "count": 2 + }, + "Vikingskip": { + "count": 1 + }, + "Bildende kunst": { + "count": 49 + }, + "Kunstvitenskap": { + "count": 5 + }, + "Termer og begreper i bildende kunst": { + "count": 4 + }, + "Rubladordenen": { + "count": 3 + }, + "Rubladfamilien": { + "count": 3 + }, + "\u00c5kersteinfr\u00f8slekta": { + "count": 1 + }, + "Familierett": { + "count": 3 + }, + "Oslo": { + "count": 1 + }, + "Atom- og kjernefysikk": { + "count": 3 + }, + "Teknologi": { + "count": 2 + }, + "Sjakk": { + "count": 1 + }, + "Frankrikes historie": { + "count": 11 + }, + "Frankrikes samtidshistorie": { + "count": 3 + }, + "Italias geografi": { + "count": 4 + }, + "Storbritannia og Nord-Irlands geografi": { + "count": 13 + }, + "Katolisisme": { + "count": 8 + }, + "Klostervesenet": { + "count": 4 + }, + "Verdens historie": { + "count": 5 + }, + "Den kalde krigen": { + "count": 2 + }, + "Cellens organeller og mikrostruktur": { + "count": 1 + }, + "Vekst og vekstanlegg": { + "count": 1 + }, + "Analytisk kjemi": { + "count": 2 + }, + "Spektroskopi": { + "count": 2 + }, + "Grammatikk": { + "count": 2 + }, + "Morfologi": { + "count": 2 + }, + "Finlands litteratur": { + "count": 4 + }, + "Utdanning og forskning": { + "count": 12 + }, + "Pedagogikk": { + "count": 3 + }, + "Biografier innen pedagogisk teori": { + "count": 2 + }, + "Frankrikes litteratur": { + "count": 14 + }, + "Frankrikes litteratur p\u00e5 1700-tallet": { + "count": 2 + }, + "Psykologi": { + "count": 4 + }, + "Personlighetspsykologi": { + "count": 3 + }, + "Musikkbransje": { + "count": 1 + }, + "Plateselskaper": { + "count": 1 + }, + "Romanias historie": { + "count": 3 + }, + "IT": { + "count": 4 + }, + "Internett": { + "count": 2 + }, + "Antigua og Barbudas geografi": { + "count": 1 + }, + "Norges historie fra 1660 til 1814": { + "count": 3 + }, + "Nordland": { + "count": 6 + }, + "Bindal": { + "count": 1 + }, + "Storbritannia og Nord-Irlands historie": { + "count": 7 + }, + "Italias litteratur": { + "count": 4 + }, + "Italias litteratur p\u00e5 1500-tallet": { + "count": 2 + }, + "Arbeid og velferd": { + "count": 4 + }, + "Arbeidsliv": { + "count": 2 + }, + "Arbeidslivsorganisasjoner": { + "count": 1 + }, + "Europas geografi generelt": { + "count": 2 + }, + "Internasjonale dirigenter": { + "count": 3 + }, + "Popul\u00e6rmusikk": { + "count": 20 + }, + "Pop og rock": { + "count": 11 + }, + "Internasjonal pop og rock": { + "count": 10 + }, + "Afrikas historie": { + "count": 2 + }, + "USAs historie": { + "count": 5 + }, + "USAs historie fra 1960": { + "count": 2 + }, + "Skipstyper": { + "count": 1 + }, + "De ortodokse kirker": { + "count": 2 + }, + "Sveriges litteratur": { + "count": 5 + }, + "Sveriges litteratur 1900-1960": { + "count": 2 + }, + "Avtalerett": { + "count": 2 + }, + "Jentenavn": { + "count": 5 + }, + "Energi": { + "count": 6 + }, + "Belysning": { + "count": 2 + }, + "Asia": { + "count": 44 + }, + "Indias geografi": { + "count": 3 + }, + "Br\u00f8nn\u00f8y": { + "count": 1 + }, + "Anatomi": { + "count": 3 + }, + "Insektenes anatomi og fysiologi": { + "count": 1 + }, + "Pakistans geografi": { + "count": 2 + }, + "Historiske religioner": { + "count": 8 + }, + "Gresk religion": { + "count": 2 + }, + "\u00d8konomi og n\u00e6ringsliv": { + "count": 25 + }, + "N\u00e6ringsliv": { + "count": 10 + }, + "Bedrifter og personer": { + "count": 9 + }, + "Bedrifter i internasjonal n\u00e6ringslivshistorie": { + "count": 2 + }, + "Politikk og offentlig forvaltning": { + "count": 12 + }, + "Norsk politikk og offentlig forvaltning": { + "count": 9 + }, + "Partier og politikere": { + "count": 7 + }, + "Norske politikere": { + "count": 7 + }, + "Tidligere politikere": { + "count": 5 + }, + "Belarus\u2019 geografi": { + "count": 1 + }, + "Matematikk": { + "count": 6 + }, + "Sannsynlighet og statistikk": { + "count": 2 + }, + "Statistikk": { + "count": 2 + }, + "Film": { + "count": 23 + }, + " TV og teater": { + "count": 23 + }, + "Film og TV": { + "count": 18 + }, + "Norske filmer og TV-serier": { + "count": 1 + }, + "Arkitektur og landskap": { + "count": 29 + }, + "Arkitekturhistorie": { + "count": 9 + }, + "Islamsk arkitekturhistorie": { + "count": 2 + }, + "Organisk kjemi": { + "count": 7 + }, + "Organiske forbindelser": { + "count": 7 + }, + "Historievitenskap": { + "count": 8 + }, + "Historikere": { + "count": 4 + }, + "Storbritannia og Nord-Irlands litteratur": { + "count": 8 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1900-tallet": { + "count": 3 + }, + "Folkegrupper i Afrika": { + "count": 3 + }, + "Spr\u00e5kvitenskapens grunnbegreper": { + "count": 1 + }, + "Luftfart": { + "count": 6 + }, + "Luftfart\u00f8y": { + "count": 1 + }, + "Sivilflytyper": { + "count": 1 + }, + "Passasjerfly": { + "count": 1 + }, + "Bygg og anlegg": { + "count": 6 + }, + "Betong": { + "count": 2 + }, + " sement og m\u00f8rtel": { + "count": 2 + }, + "S\u00f8r-Amerikas geografi": { + "count": 9 + }, + "Argentinas geografi": { + "count": 2 + }, + "Bosnia-Hercegovinas geografi": { + "count": 2 + }, + "Folkemusikk": { + "count": 1 + }, + "Kristendommens historie": { + "count": 6 + }, + "Tidlig kristendom": { + "count": 4 + }, + "Den norske kirke": { + "count": 5 + }, + "Buddhisme": { + "count": 2 + }, + "Nyreligi\u00f8sitet": { + "count": 2 + }, + "\u00d8stlig innflytelse": { + "count": 1 + }, + "Medier": { + "count": 17 + }, + "Forlagsdrift": { + "count": 1 + }, + "Amt og len i Norge": { + "count": 1 + }, + "Oster": { + "count": 3 + }, + "Tekstiler": { + "count": 4 + }, + "Flyteknikk": { + "count": 1 + }, + "Flygeteori": { + "count": 1 + }, + "Kirker": { + "count": 7 + }, + " slott og g\u00e5rder": { + "count": 7 + }, + "Slott og palasser": { + "count": 3 + }, + "Klassisk musikk i barokken": { + "count": 4 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1700-tallet": { + "count": 3 + }, + "Drikke": { + "count": 3 + }, + "Vin": { + "count": 2 + }, + "Vinterminologi": { + "count": 1 + }, + "Jazz": { + "count": 17 + }, + "Nord-Amerikanske jazzut\u00f8vere og -ensembler": { + "count": 10 + }, + " 1920 \u2013 1960": { + "count": 2 + }, + "Danmarks geografi": { + "count": 5 + }, + "Filippinenes geografi": { + "count": 1 + }, + "Sj\u00f8merker": { + "count": 1 + }, + "Elektrokjemi og metallurgi": { + "count": 3 + }, + "Legeringer": { + "count": 1 + }, + "St\u00e5l og andre jernlegeringer": { + "count": 1 + }, + "Penger": { + "count": 4 + }, + "Numismatikk": { + "count": 2 + }, + "Taiwans geografi": { + "count": 1 + }, + "Burkina Fasos geografi": { + "count": 3 + }, + "Blues": { + "count": 2 + }, + "Internasjonale bluesmusikere": { + "count": 2 + }, + "Journalistikk": { + "count": 2 + }, + "Journalistiske begreper": { + "count": 2 + }, + "Middelalderens arkitekturhistorie": { + "count": 4 + }, + "Frankrikes geografi": { + "count": 8 + }, + "Jazzens historie": { + "count": 1 + }, + "Tysk-romerske rike": { + "count": 1 + }, + "Biografier i Det tysk-romerske riket": { + "count": 1 + }, + "Geometri og topologi": { + "count": 3 + }, + "Algebraisk geometri": { + "count": 3 + }, + "USAs geografi": { + "count": 7 + }, + "Delstater og omr\u00e5der i USA": { + "count": 1 + }, + "Meteorologi": { + "count": 2 + }, + "Klima": { + "count": 1 + }, + "Klima i Norge": { + "count": 1 + }, + "Norske tradisjonsb\u00e5ter": { + "count": 1 + }, + "Nordnorske tradisjonsb\u00e5ter": { + "count": 1 + }, + "Politi og kriminalitet": { + "count": 4 + }, + "Politivesen": { + "count": 2 + }, + "Politifaglige begreper": { + "count": 2 + }, + "Kulturinstitusjoner": { + "count": 6 + }, + "Museum": { + "count": 3 + }, + "Filosofiens historie": { + "count": 9 + }, + "Middelalderens og renessansens filosofi": { + "count": 4 + }, + "Forsvar og sikkerhet": { + "count": 18 + }, + "Forsvar i verden": { + "count": 3 + }, + "Bokkunst og skrift": { + "count": 2 + }, + "Skriftdesign og typografi": { + "count": 1 + }, + "Typografer og skriftdesignere": { + "count": 1 + }, + "Frankrikes nyere litteratur": { + "count": 3 + }, + "Romerrikets litteratur": { + "count": 2 + }, + "Verk i Romerrikets litteratur": { + "count": 1 + }, + "Dyreriket": { + "count": 30 + }, + "Ryggstrengdyr": { + "count": 23 + }, + "Virveldyr": { + "count": 26 + }, + "Fugler": { + "count": 8 + }, + "Dykkere": { + "count": 1 + }, + "Monarki": { + "count": 1 + }, + "Maskiner": { + "count": 10 + }, + "Maskinelementer": { + "count": 1 + }, + "Jus generelt": { + "count": 2 + }, + "Petroleum": { + "count": 6 + }, + "Petroleumsutvinning": { + "count": 4 + }, + "Boreteknologi": { + "count": 1 + }, + "Landbruk": { + "count": 8 + }, + "Husdyr": { + "count": 7 + }, + "Hund": { + "count": 5 + }, + "Hunderaser": { + "count": 5 + }, + "Offentlig politikk og administrasjon": { + "count": 3 + }, + "Politisk og \u00f8konomisk filosofi": { + "count": 5 + }, + "Internasjonal tekstilindustri": { + "count": 1 + }, + "Spanias geografi": { + "count": 6 + }, + "Multippelenheter i SI-systemet": { + "count": 1 + }, + " 1980 til i dag": { + "count": 7 + }, + "Salmesang": { + "count": 2 + }, + "Milit\u00e6re biografier": { + "count": 4 + }, + "Norske milit\u00e6re biografier": { + "count": 2 + }, + "Kristne trossamfunn": { + "count": 9 + }, + "Spr\u00e5kforskere": { + "count": 2 + }, + "Norske spr\u00e5kforskere og spr\u00e5kfolk": { + "count": 1 + }, + "Romerrikets historie": { + "count": 10 + }, + "Filmteknikk": { + "count": 2 + }, + "Europeisk film og TV": { + "count": 6 + }, + "Europeiske filmer og tv-serier": { + "count": 1 + }, + "Bil": { + "count": 8 + }, + "Bilfag": { + "count": 4 + }, + "Bilens sikkerhetssystemer": { + "count": 2 + }, + "Bygningstyper": { + "count": 2 + }, + "Norges historie fra 1814 til 1884": { + "count": 2 + }, + "Europeiske jazzut\u00f8vere og -ensembler": { + "count": 2 + }, + " 1920 \u2013 1980": { + "count": 4 + }, + "S\u00f8r-Amerika": { + "count": 7 + }, + "Bolivias historie": { + "count": 2 + }, + "Internasjonalt milit\u00e6rt samarbeid": { + "count": 2 + }, + "Internasjonale milit\u00e6re organisasjoner": { + "count": 2 + }, + "NATO": { + "count": 2 + }, + "Kristen teologi og trosl\u00e6re": { + "count": 2 + }, + "Leddyr": { + "count": 4 + }, + "Insekter": { + "count": 4 + }, + "Tovinger": { + "count": 1 + }, + "Gallmygg": { + "count": 1 + }, + "Dasineura": { + "count": 1 + }, + "Mauritius\u2019 historie": { + "count": 1 + }, + "Mauritius\u2019 samtidshistorie": { + "count": 1 + }, + "Historiske riker i Sentral-Afrika": { + "count": 1 + }, + "Politikkomr\u00e5der": { + "count": 2 + }, + "Milj\u00f8vern": { + "count": 1 + }, + "Milj\u00f8bevegelsen": { + "count": 1 + }, + "Programmering": { + "count": 1 + }, + "Polens historie": { + "count": 1 + }, + "Fotografi": { + "count": 2 + }, + "Fotohistoriske biografier": { + "count": 1 + }, + "Skipstekniske og nautiske begrep": { + "count": 5 + }, + "Immaterialrett": { + "count": 1 + }, + "Opphavsrett": { + "count": 1 + }, + "Kontormaskiner": { + "count": 1 + }, + "Sosiologi": { + "count": 6 + }, + "Sosiologer": { + "count": 1 + }, + "Plantenes form": { + "count": 3 + }, + "Fr\u00f8 og formeringsorganer": { + "count": 1 + }, + "Genealoger": { + "count": 1 + }, + "Storbritannias samtidshistorie": { + "count": 1 + }, + "Samtidskunst": { + "count": 10 + }, + "Skulptur og mosaikk i samtidskunsten": { + "count": 4 + }, + "Bolig og eiendom": { + "count": 3 + }, + "Volleyball": { + "count": 1 + }, + "V\u00e6r": { + "count": 1 + }, + "Skyer og nedb\u00f8r": { + "count": 1 + }, + "Bangladesh' historie": { + "count": 1 + }, + "Bangladesh' samtidshistorie": { + "count": 1 + }, + "Spurvefugler": { + "count": 2 + }, + "Kinas geografi": { + "count": 1 + }, + "V\u00e5pen og milit\u00e6rt materiell": { + "count": 3 + }, + "Marinefart\u00f8y": { + "count": 1 + }, + "Undervannsb\u00e5ter": { + "count": 1 + }, + "Belgias franskspr\u00e5klige litteratur": { + "count": 4 + }, + "Motorkj\u00f8ret\u00f8yer": { + "count": 1 + }, + "Israels historie": { + "count": 1 + }, + "Astronomi": { + "count": 17 + }, + "Solsystemet": { + "count": 2 + }, + "Colombias geografi": { + "count": 2 + }, + "Norges historie fra 1050 til 1300": { + "count": 3 + }, + "\u00d8vrig vintersport": { + "count": 2 + }, + "Skiskyting": { + "count": 1 + }, + "Vestland": { + "count": 8 + }, + "B\u00f8mlo": { + "count": 1 + }, + "Moderne okkultisme": { + "count": 1 + }, + "Frankrikes litteratur p\u00e5 1800-tallet": { + "count": 3 + }, + "Antropologi": { + "count": 6 + }, + "Antropologer": { + "count": 3 + }, + "Musikkteori": { + "count": 2 + }, + "Tonalitet": { + "count": 1 + }, + "Internasjonale bedrifter og varemerker": { + "count": 4 + }, + "Religionsvitenskap": { + "count": 6 + }, + "Spr\u00e5ktypologi": { + "count": 1 + }, + "Ungarns historie": { + "count": 2 + }, + "Vade-": { + "count": 2 + }, + " m\u00e5ke- og alkefugler": { + "count": 2 + }, + "Oseanias litteratur": { + "count": 3 + }, + "New Zealands litteratur": { + "count": 1 + }, + "Pattedyr": { + "count": 10 + }, + "Spissmusdyr": { + "count": 1 + }, + "Spissmusfamilien": { + "count": 1 + }, + "Soricinae": { + "count": 1 + }, + "Soricini": { + "count": 1 + }, + "Sorex": { + "count": 1 + }, + "Fysikalsk kjemi": { + "count": 1 + }, + "Litteraturvitenskap": { + "count": 7 + }, + "Litteraturforskere og kritikere": { + "count": 3 + }, + "Aserbajdsjans historie": { + "count": 1 + }, + "Rettssosiologi": { + "count": 1 + }, + "Chiles geografi": { + "count": 1 + }, + "Norske bedrifter og varemerker": { + "count": 3 + }, + "Klassisk musikk p\u00e5 1800-tallet": { + "count": 4 + }, + "Internasjonale klassiske komponister og verker p\u00e5 1800-tallet": { + "count": 3 + }, + "Flyindustri": { + "count": 4 + }, + "Flyselskaper": { + "count": 2 + }, + "Racketsport": { + "count": 2 + }, + "Tennis": { + "count": 2 + }, + "Prosessmetallurgi": { + "count": 1 + }, + "Tysklands litteratur": { + "count": 6 + }, + "Tysklands litteratur 1900-1950": { + "count": 1 + }, + "Norske klassiske instrumentalister": { + "count": 2 + }, + "Innlandet": { + "count": 2 + }, + "Eidskog": { + "count": 1 + }, + "Nord-amerikansk film og TV": { + "count": 7 + }, + "Nord-amerikanske skuespillere": { + "count": 5 + }, + "Internasjonal politikk": { + "count": 2 + }, + "Troms og Finnmark": { + "count": 3 + }, + "Bardu": { + "count": 1 + }, + "Hinduisme": { + "count": 3 + }, + "Aust-Agder": { + "count": 1 + }, + "Kirkemusikk": { + "count": 1 + }, + "Australias litteratur": { + "count": 2 + }, + "Kunsthistorie": { + "count": 31 + }, + "Ekspresjonisme og fauvisme": { + "count": 1 + }, + "Vannforsyning": { + "count": 1 + }, + "\u00d8vrige sporter": { + "count": 2 + }, + "\u00d8vrig konkurranseidrett": { + "count": 1 + }, + "Norsk pop og rock": { + "count": 1 + }, + "Frankrikes litteratur p\u00e5 1600-tallet": { + "count": 1 + }, + "Arkitekter og arkitektur-institusjoner": { + "count": 10 + }, + "Internasjonale arkitekter fra 1600 til 1900": { + "count": 2 + }, + "Dyrenes anatomi": { + "count": 1 + }, + "Bangladesh' geografi": { + "count": 3 + }, + "Benins historie": { + "count": 1 + }, + "Benins samtidshistorie": { + "count": 1 + }, + "Elektromagnetisme": { + "count": 4 + }, + "Neo-ekspresjonisme og figurativ billedkunst etter 1945": { + "count": 4 + }, + "N\u00e6ringsmiddelteknologi": { + "count": 2 + }, + "Tr\u00f8ndelag": { + "count": 3 + }, + "Skaun": { + "count": 1 + }, + "Sigdal": { + "count": 1 + }, + "Tysklands historie": { + "count": 4 + }, + "Tysklands historie fra 1933 til 1945": { + "count": 1 + }, + "Spr\u00e5kfilosofi": { + "count": 1 + }, + "Kjerneenergi": { + "count": 1 + }, + "Kjernekraftverk": { + "count": 1 + }, + "Reiseliv": { + "count": 3 + }, + "Bedrifter og organisasjoner i internasjonalt reiseliv": { + "count": 2 + }, + "Norske dirigenter": { + "count": 1 + }, + "Soppriket": { + "count": 10 + }, + "Stilksporesopper": { + "count": 9 + }, + "Hymeniesopper": { + "count": 7 + }, + "Ekte hymeniesopper": { + "count": 7 + }, + "Agaricomycetidae": { + "count": 5 + }, + "Skivesoppordenen": { + "count": 5 + }, + "Tricholomataceae": { + "count": 1 + }, + "Ridderhatter": { + "count": 1 + }, + "Indonesias geografi": { + "count": 4 + }, + "Forskningsmetode": { + "count": 5 + }, + "Frankrikes litteratur p\u00e5 1900-tallet": { + "count": 4 + }, + "Aviser": { + "count": 9 + }, + "Avisredakt\u00f8rer": { + "count": 2 + }, + "Portugals litteratur": { + "count": 1 + }, + "Utdanningshistorie": { + "count": 1 + }, + "Pumper og turbiner": { + "count": 3 + }, + "Byer i USA": { + "count": 4 + }, + "USA-assosierte stater og territorer i Oseania": { + "count": 1 + }, + "Korsblomstordenen": { + "count": 3 + }, + "Korsblomstfamilien": { + "count": 3 + }, + "Rublomslekta": { + "count": 1 + }, + "Lyngordenen": { + "count": 1 + }, + "Lyngfamilien": { + "count": 1 + }, + "B\u00e6rlyngslekta": { + "count": 1 + }, + " 1960 \u2013 1980": { + "count": 1 + }, + "Geologi": { + "count": 11 + }, + "Bergarter": { + "count": 3 + }, + "Metamorfe bergarter": { + "count": 1 + }, + "Nederlandsk og flamsk billedkunst 1400-1750": { + "count": 2 + }, + "Kristen misjonsvirksomhet": { + "count": 6 + }, + "Benins geografi": { + "count": 1 + }, + "Romanias geografi": { + "count": 2 + }, + "Molekyl\u00e6rbiologi": { + "count": 1 + }, + "Metoder i celle- og molekyl\u00e6rbiologien": { + "count": 1 + }, + "Skrinneblomslekta": { + "count": 1 + }, + "Norsk bildende kunst fra 1800-1945": { + "count": 3 + }, + "Englands historie": { + "count": 1 + }, + "Kinas historie": { + "count": 1 + }, + "Republikken Kinas historie (1912\u20131949)": { + "count": 1 + }, + "Bunad": { + "count": 1 + }, + " kofter og folkedrakt": { + "count": 1 + }, + "Islam": { + "count": 3 + }, + "Retninger i islam": { + "count": 3 + }, + "Postimpresjonisme": { + "count": 2 + }, + " symbolisme og art nouveau": { + "count": 2 + }, + "Dyrevern": { + "count": 1 + }, + "Tidsregning": { + "count": 2 + }, + "Temabasert reiseliv": { + "count": 1 + }, + "Turisthytter": { + "count": 1 + }, + "Geomorfologi": { + "count": 1 + }, + "M\u00f8re og Romsdal": { + "count": 3 + }, + "Fjord": { + "count": 1 + }, + "Formering": { + "count": 1 + }, + "Design": { + "count": 4 + }, + "M\u00f8bler og interi\u00f8rtyper": { + "count": 1 + }, + "Ordforklaringer": { + "count": 10 + }, + "Uttrykk og ordtak": { + "count": 3 + }, + "Petroleumsfelter": { + "count": 2 + }, + "Brasils historie": { + "count": 2 + }, + "Brasils samtidshistorie": { + "count": 2 + }, + "Kildeskrifter": { + "count": 3 + }, + "Norske slekter": { + "count": 9 + }, + "Kamp- og kontaktsport": { + "count": 2 + }, + "Asiatiske kampsporter": { + "count": 1 + }, + "Stormfugler": { + "count": 1 + }, + "Stormfuglfamilien": { + "count": 1 + }, + "Thalassoica": { + "count": 1 + }, + "Danmarks historie": { + "count": 4 + }, + "Danmarks samtidshistorie": { + "count": 1 + }, + "Personer innen astronomi": { + "count": 3 + }, + "Romvirksomhet": { + "count": 4 + }, + "Personer": { + "count": 1 + }, + " bedrifter og organisasjoner innen romvirksomhet": { + "count": 1 + }, + "Prefikser og suffikser": { + "count": 1 + }, + "Kraftselskaper og kraftverk": { + "count": 2 + }, + "Arkivfag": { + "count": 2 + }, + "Vestfold og Telemark": { + "count": 2 + }, + "Midt-Telemark": { + "count": 1 + }, + "B\u00f8": { + "count": 2 + }, + "Russlands geografi": { + "count": 1 + }, + "Teknologi- og industrihistorie": { + "count": 4 + }, + "Norsk teknologi- og industrihistorie": { + "count": 4 + }, + "Bergverkshistorie": { + "count": 1 + }, + "Statsvitere og institusjoner": { + "count": 1 + }, + "Verdensmusikk": { + "count": 2 + }, + "Country og bluegrass": { + "count": 1 + }, + "Amerikanske country- og bluegrassartister": { + "count": 1 + }, + "Barnehage": { + "count": 2 + }, + "Internasjonale klassiske ensembler": { + "count": 1 + }, + "Antikkens filosofi": { + "count": 2 + }, + "Leppeblomstordenen": { + "count": 3 + }, + "Kjempefamilien": { + "count": 1 + }, + "Kjempeslekta": { + "count": 1 + }, + "Industri": { + "count": 4 + }, + "Metallarbeid": { + "count": 2 + }, + "Geologer": { + "count": 2 + }, + "Canadas historie": { + "count": 1 + }, + "Canadas samtidshistorie": { + "count": 1 + }, + "Sosiologisk teori": { + "count": 4 + }, + "Palestinas historie": { + "count": 2 + }, + "Palestinas samtidshistorie": { + "count": 2 + }, + "Canadas franskspr\u00e5klige litteratur": { + "count": 1 + }, + "Internasjonale tegnekunstnere": { + "count": 2 + }, + "\u00d8sterrikes litteratur": { + "count": 2 + }, + "\u00d8sterrikes litteratur f\u00f8r 1700-tallet": { + "count": 1 + }, + "Str\u00e5lefinnefisker": { + "count": 2 + }, + "Karpefisker": { + "count": 1 + }, + "Karpefamilien": { + "count": 1 + }, + "Rhodeus": { + "count": 1 + }, + "Indonesias historie": { + "count": 1 + }, + "Indonesias samtidshistorie": { + "count": 1 + }, + "Samfunnssikkerhet og beredskap": { + "count": 1 + }, + "Beredskap og kriseh\u00e5ndtering": { + "count": 1 + }, + "Designteori og -historie": { + "count": 1 + }, + "Grunnstoffer": { + "count": 5 + }, + "Nyklassisisme og romantikk i bildende kunst": { + "count": 1 + }, + "Rot hos planter": { + "count": 1 + }, + "Kroatias litteratur": { + "count": 1 + }, + "Flaggermus": { + "count": 1 + }, + "Glattnesefamilien": { + "count": 1 + }, + "Vespertilioninae": { + "count": 1 + }, + "Plecotini": { + "count": 1 + }, + "Barbastella": { + "count": 1 + }, + "J\u00f8dedom": { + "count": 2 + }, + "Tsjekkias geografi": { + "count": 1 + }, + "Danmarks litteratur": { + "count": 5 + }, + "Danmarks litteratur p\u00e5 1900-tallet": { + "count": 4 + }, + "Fransk politikk": { + "count": 1 + }, + "IT-historie": { + "count": 1 + }, + "Personer i IT-historie": { + "count": 1 + }, + "USAs litteratur 1900-1950": { + "count": 1 + }, + "Internasjonal rett": { + "count": 3 + }, + "Alminnelig folkerett": { + "count": 3 + }, + "Lydteknologi": { + "count": 1 + }, + "Flatormer": { + "count": 1 + }, + "Bendelormer": { + "count": 1 + }, + "Pseudophyllidea": { + "count": 1 + }, + "Diphyllobothriidae": { + "count": 1 + }, + "Asias litteratur": { + "count": 3 + }, + "Irans litteratur": { + "count": 1 + }, + "Realisme og impresjonisme": { + "count": 5 + }, + "Kjernekjemi": { + "count": 1 + }, + "Hellas\u2019 historie": { + "count": 1 + }, + "Norske aviser": { + "count": 2 + }, + "Ord og uttrykk p\u00e5 andre spr\u00e5k": { + "count": 3 + }, + "Tyske ord og uttrykk": { + "count": 2 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1600-tallet": { + "count": 1 + }, + "Haier og skater": { + "count": 1 + }, + "H\u00e5branner": { + "count": 1 + }, + "Brugdefamilien": { + "count": 1 + }, + "Cetorhinus": { + "count": 1 + }, + "Rettskildel\u00e6re": { + "count": 1 + }, + "Belgias historie": { + "count": 1 + }, + "Usbekistans geografi": { + "count": 1 + }, + "Elektronikk": { + "count": 6 + }, + "Fjernsynsteknikk": { + "count": 1 + }, + "Frankrikes litteratur f\u00f8r 1500-tallet": { + "count": 1 + }, + "Begreper i botanikken": { + "count": 2 + }, + "Costa Ricas litteratur": { + "count": 1 + }, + "Macrocystidiaceae": { + "count": 1 + }, + "Macrocystidia": { + "count": 1 + }, + "Flagg og nasjonalsymboler": { + "count": 2 + }, + "Flagg og v\u00e5pen": { + "count": 2 + }, + "Milit\u00e6r teori": { + "count": 1 + }, + "Arendal": { + "count": 1 + }, + "Austrheim": { + "count": 1 + }, + "Cubas historie": { + "count": 1 + }, + "Cubas samtidshistorie": { + "count": 1 + }, + "Motiver i bildende kunst": { + "count": 1 + }, + "Strophariaceae": { + "count": 1 + }, + "Skulptur og mosaikk i nyere tid": { + "count": 3 + }, + "Europas \u00f8vrige historie": { + "count": 3 + }, + "Europa p\u00e5 1900-tallet": { + "count": 1 + }, + "Kjemiens historie": { + "count": 1 + }, + "S\u00f8tvierordenen": { + "count": 3 + }, + "S\u00f8tvierfamilien": { + "count": 3 + }, + "S\u00f8tvierslekta": { + "count": 2 + }, + "Andre verdenskrig": { + "count": 2 + }, + "Tysklands litteratur p\u00e5 1800-tallet": { + "count": 3 + }, + "Norske klassiske ensembler": { + "count": 1 + }, + "Drammen": { + "count": 1 + }, + "Perus geografi": { + "count": 2 + }, + "Romobservatorier": { + "count": 2 + }, + "Fagbegreper i antropologi": { + "count": 3 + }, + "Botswanas historie": { + "count": 1 + }, + "Botswanas samtidshistorie": { + "count": 1 + }, + "Vortemelkfamilien": { + "count": 2 + }, + "Fly- og helikopterprodusenter": { + "count": 2 + }, + "Heraldikk": { + "count": 1 + }, + "Grafikk": { + "count": 1 + }, + "Genetikk": { + "count": 2 + }, + "Bemannet romfart": { + "count": 1 + }, + "Tyrkias geografi": { + "count": 2 + }, + "Den katolske kirke": { + "count": 1 + }, + "Erkebiskoper": { + "count": 1 + }, + "Biografier i norsk teknologi- og industrihistorie": { + "count": 2 + }, + "Vepser": { + "count": 3 + }, + "Arverett og skifterett": { + "count": 1 + }, + "\u00d8rland": { + "count": 1 + }, + "Engelsk fotball": { + "count": 2 + }, + "Engelske fotballspillere": { + "count": 1 + }, + "Bibliotek": { + "count": 1 + }, + "S\u00f8r-Amerikas litteratur": { + "count": 2 + }, + "Colombias litteratur": { + "count": 1 + }, + "Protestantisme": { + "count": 4 + }, + "Samferdsel": { + "count": 2 + }, + "Broer": { + "count": 1 + }, + "Broer i Norge": { + "count": 1 + }, + "Stilkvepser": { + "count": 1 + }, + "Broddvepser": { + "count": 1 + }, + "Apoidea": { + "count": 1 + }, + "Grafisk industri": { + "count": 3 + }, + "Teknologi og materialer i grafisk industri": { + "count": 2 + }, + "Norske skuespillere": { + "count": 4 + }, + "Aviser i Vestland": { + "count": 1 + }, + "Magnoliids": { + "count": 1 + }, + "Magnoliaordenen": { + "count": 1 + }, + "Annonaceae": { + "count": 1 + }, + "Den franske revolusjon": { + "count": 1 + }, + "Teater": { + "count": 1 + }, + "Internasjonale teaterinstitusjoner": { + "count": 1 + }, + "Arkeologi": { + "count": 9 + }, + "Nordisk arkeologi": { + "count": 2 + }, + "Hvaler": { + "count": 1 + }, + "Bardehvaler": { + "count": 1 + }, + "Finnhvalfamilien": { + "count": 1 + }, + "Balaenoptera": { + "count": 1 + }, + "Folketro": { + "count": 2 + }, + "Russland og Sovjetunionens historie": { + "count": 5 + }, + "Russlands eldre historie": { + "count": 3 + }, + "Blad": { + "count": 1 + }, + "Europeiske filmskuespillere": { + "count": 4 + }, + "Internasjonale klassiske komponister og verker i barokken": { + "count": 2 + }, + "Samfunns\u00f8konomi": { + "count": 1 + }, + "\u00d8konomisk politikk": { + "count": 1 + }, + "Motorsport": { + "count": 1 + }, + "Bilsport": { + "count": 1 + }, + "Perus historie": { + "count": 1 + }, + "Skipstekniske begrep": { + "count": 2 + }, + "Glass og keramikk": { + "count": 1 + }, + "Glass og keramikk \u2013 ut\u00f8vere": { + "count": 1 + }, + "Brunrotfamilien": { + "count": 1 + }, + "Bokkunst": { + "count": 1 + }, + "Internasjonale klassiske instrumentalister": { + "count": 1 + }, + "Dagblomordenen": { + "count": 1 + }, + "Dagblomfamilien": { + "count": 1 + }, + "Roseordenen": { + "count": 1 + }, + "Rosefamilien": { + "count": 1 + }, + "Epleslekta": { + "count": 1 + }, + "USAs litteratur 1950-2000": { + "count": 3 + }, + "Plantevepser": { + "count": 1 + }, + "Tenthredinoidea": { + "count": 1 + }, + "Bladvepser": { + "count": 1 + }, + "Blennocampinae": { + "count": 1 + }, + "Tomostethus": { + "count": 1 + }, + "Franske s\u00f8rterritorier": { + "count": 1 + }, + "Musikkinstrumenter": { + "count": 1 + }, + "Elektriske musikkinstrumenter": { + "count": 1 + }, + "Arbeidsrett": { + "count": 1 + }, + "Optikk": { + "count": 3 + }, + "Animasjonsfilm": { + "count": 1 + }, + "Snipefamilien": { + "count": 1 + }, + "Rogaland": { + "count": 3 + }, + "Eigersund": { + "count": 2 + }, + "Albanias historie": { + "count": 2 + }, + "Sirenia": { + "count": 1 + }, + "Dugongidae": { + "count": 1 + }, + "Dugong": { + "count": 1 + }, + "Dans": { + "count": 5 + }, + "Scenisk dans": { + "count": 2 + }, + "Moderne- og samtidsdans": { + "count": 1 + }, + "Moderne- og samtidsdans: norske kunstnere og kompanier": { + "count": 1 + }, + "Raketter": { + "count": 2 + }, + "Obligasjonsrett": { + "count": 4 + }, + "Myanmars historie": { + "count": 1 + }, + "Myanmars samtidshistorie": { + "count": 1 + }, + "Biografier i Romerrikets historie": { + "count": 3 + }, + "Kantarellordenen": { + "count": 1 + }, + "Clavulinaceae": { + "count": 1 + }, + "Ringerike": { + "count": 1 + }, + "Litteraturtyper": { + "count": 7 + }, + "Barne- og ungdomslitteratur": { + "count": 3 + }, + "Internasjonal barne- og ungdomslitteratur": { + "count": 2 + }, + "Russland og Sovjetunionens litteratur": { + "count": 1 + }, + "Russiske litteraturforskere og kritikere": { + "count": 1 + }, + "Filippinenes historie": { + "count": 1 + }, + "Filippinenes samtidshistorie": { + "count": 1 + }, + "Norske jazzut\u00f8vere og -ensembler": { + "count": 2 + }, + "Bedrifts\u00f8konomi": { + "count": 5 + }, + "Norske tegnekunstnere": { + "count": 2 + }, + "Argentinas litteratur": { + "count": 1 + }, + "Islands geografi": { + "count": 1 + }, + "Gnagere": { + "count": 2 + }, + "Sciuromorpha": { + "count": 1 + }, + "Ekornfamilien": { + "count": 1 + }, + "Myosciurus": { + "count": 1 + }, + "Paleontologi": { + "count": 5 + }, + "Tokke": { + "count": 1 + }, + "Soul og R&B": { + "count": 1 + }, + "Internasjonal soul og R&B": { + "count": 1 + }, + "Utenlandske spr\u00e5kforskere": { + "count": 1 + }, + "Forglemmegeislekta": { + "count": 2 + }, + "Slimriket": { + "count": 1 + }, + "Strafferett": { + "count": 1 + }, + "Mineraler": { + "count": 4 + }, + "Silikatmineraler": { + "count": 3 + }, + "Norges forsvar": { + "count": 2 + }, + "Norske festninger og milit\u00e6ranlegg": { + "count": 1 + }, + "Katolske ritualer og symboler": { + "count": 2 + }, + "Irlands geografi": { + "count": 1 + }, + "Jurister": { + "count": 2 + }, + "Ulike lands musikk og musikktradisjoner": { + "count": 1 + }, + "Albanias samtidshistorie": { + "count": 1 + }, + "Varehandel": { + "count": 1 + }, + "Psykologiens historie": { + "count": 1 + }, + "Utenlandske aviser": { + "count": 2 + }, + "Verdenshavene": { + "count": 3 + }, + "Dyphavsgroper": { + "count": 1 + }, + "Ulike lands arkitekturhistorie": { + "count": 1 + }, + "Islands litteratur": { + "count": 4 + }, + "Ulike lands kunst og kunsttradisjoner": { + "count": 1 + }, + "Konstruksjonsteknikk": { + "count": 1 + }, + "Canadas geografi": { + "count": 1 + }, + "Bakterier": { + "count": 1 + }, + "H\u00e5ndverk": { + "count": 1 + }, + "Redskaper og festemidler": { + "count": 1 + }, + "Utforsking av solsystemet": { + "count": 1 + }, + "Vin i Europa": { + "count": 1 + }, + "Vin i Frankrike": { + "count": 1 + }, + "Myndigheter i Norge": { + "count": 2 + }, + "Embets- og tjenestepersoner": { + "count": 1 + }, + "Det 19. \u00e5rhundres filosofi": { + "count": 1 + }, + "Nederlands historie": { + "count": 1 + }, + "Norges historie fram til 1050": { + "count": 1 + }, + "Barnevern": { + "count": 1 + }, + "Rovpattedyr": { + "count": 2 + }, + "Caniformia": { + "count": 2 + }, + "Bj\u00f8rnefamilien": { + "count": 1 + }, + "Tremarctos": { + "count": 1 + }, + "Kirgisistans historie": { + "count": 1 + }, + "Kirgisistans samtidshistorie": { + "count": 1 + }, + "Krypdyr": { + "count": 1 + }, + "Dinosaurer": { + "count": 1 + }, + "\u00d8vrig europeisk malerkunst 1400-1750": { + "count": 2 + }, + "Mesopotamia": { + "count": 2 + }, + "Storg\u00e5rder og herreg\u00e5rder": { + "count": 2 + }, + "Norske storg\u00e5rder": { + "count": 2 + }, + "Internasjonale arkitekter og arkitektkontorer fra 1900 til i dag": { + "count": 4 + }, + "Skriftspr\u00e5k": { + "count": 1 + }, + "Alfabeter og skriftsystemer": { + "count": 1 + }, + "Skrifttegn": { + "count": 1 + }, + "Latinske bokstaver": { + "count": 1 + }, + "Diakritiske tegn": { + "count": 1 + }, + "Det 17. og 18. \u00e5rhundres filosofi": { + "count": 2 + }, + "Norske arkitekter og arkitektkontorer fra 1900 til i dag": { + "count": 3 + }, + "Asias kj\u00f8kken": { + "count": 1 + }, + "Avisjournalister": { + "count": 1 + }, + "V\u00e5pen": { + "count": 2 + }, + "Artilleri": { + "count": 1 + }, + "TV og radio": { + "count": 1 + }, + "TV- og radiopersoner": { + "count": 1 + }, + "Uorganisk kjemi": { + "count": 3 + }, + "Myomorpha": { + "count": 1 + }, + "Muroidea": { + "count": 1 + }, + "Hamsterfamilien": { + "count": 1 + }, + "Arvicolinae": { + "count": 1 + }, + "Lemmini": { + "count": 1 + }, + "Lemmus": { + "count": 1 + }, + "Bilens drivlinje": { + "count": 1 + }, + "Britisk Indiahav": { + "count": 1 + }, + "Nyrealisme": { + "count": 1 + }, + " nysaklighet og tendenskunst": { + "count": 1 + }, + "Cellebiologi": { + "count": 1 + }, + "Cellestrukturer": { + "count": 1 + }, + "Tysklands geografi": { + "count": 3 + }, + "Armenias geografi": { + "count": 1 + }, + "Elektronisk navigasjon": { + "count": 1 + }, + "Kinematografi": { + "count": 1 + }, + "Spr\u00e5kfamilier": { + "count": 3 + }, + "Indoeuropeiske spr\u00e5k": { + "count": 3 + }, + "Romansk": { + "count": 2 + }, + "Latin": { + "count": 2 + }, + "Latinske verk": { + "count": 2 + }, + "Folkegrupper i Asia": { + "count": 1 + }, + "Folkegrupper i Sentral-Asia": { + "count": 1 + }, + "Bilmerker": { + "count": 2 + }, + "Eritreas geografi": { + "count": 1 + }, + "Norr\u00f8n litteratur": { + "count": 2 + }, + "Algebra": { + "count": 1 + }, + "Kommutative algebraer og ringer": { + "count": 1 + }, + "Israels geografi": { + "count": 1 + }, + "Nellikordenen": { + "count": 1 + }, + "Slireknefamilien": { + "count": 1 + }, + "Syreslekta": { + "count": 1 + }, + "And\u00f8y": { + "count": 1 + }, + "Gulen": { + "count": 1 + }, + "Rettshistorie": { + "count": 2 + }, + "Kvinnherad": { + "count": 1 + }, + "Bilhistoriske biografier": { + "count": 1 + }, + "Jordans historie": { + "count": 1 + }, + "Friidrett": { + "count": 1 + }, + "L\u00f8p": { + "count": 1 + }, + "Europeiske filmskapere": { + "count": 1 + }, + "Oseania generelt": { + "count": 1 + }, + "Industridesign": { + "count": 1 + }, + "Fast eiendom": { + "count": 1 + }, + "Milit\u00e6rvesen": { + "count": 2 + }, + "Bank og finans": { + "count": 2 + }, + "B\u00f8rs og finans": { + "count": 2 + }, + "Internasjonale klassiske sangere": { + "count": 2 + }, + "Nattravn- og seilerfugler": { + "count": 1 + }, + "Seilerfamilien": { + "count": 1 + }, + "Tachymarptis": { + "count": 1 + }, + "Zoologi": { + "count": 1 + }, + "Pedagogisk teori": { + "count": 1 + }, + "Midt\u00f8sten": { + "count": 4 + }, + "Midt\u00f8stens gamle kulturer": { + "count": 4 + }, + "Oppdagelsesreiser": { + "count": 3 + }, + "Urstilksporesopper": { + "count": 1 + }, + "Ekte rustsopper": { + "count": 1 + }, + "Rustsoppordenen": { + "count": 1 + }, + "Pucciniaceae": { + "count": 1 + }, + "Slektskap": { + "count": 1 + }, + " familie og ekteskap": { + "count": 1 + }, + "Akershus": { + "count": 1 + }, + "Sk\u00f8ytesport": { + "count": 1 + }, + "Eldre mynter og pengeenheter": { + "count": 2 + }, + "Elektronisk musikk": { + "count": 1 + }, + "Kunstinstitusjoner": { + "count": 1 + }, + "Internasjonale kunstinstitusjoner": { + "count": 1 + }, + "Norges historie fra 1884 til 1940": { + "count": 5 + }, + "Plantefysiologi": { + "count": 1 + }, + "Serbias historie": { + "count": 1 + }, + "Ulike lands dansehistorie": { + "count": 1 + }, + "Spanias litteratur": { + "count": 1 + }, + "Spanias litteratur p\u00e5 1900-tallet": { + "count": 1 + }, + "Israels litteratur": { + "count": 1 + }, + "Skytesport": { + "count": 1 + }, + "Viltm\u00e5l": { + "count": 1 + }, + "H\u00e6r og landforsvar": { + "count": 1 + }, + "Norsk og samisk litteratur": { + "count": 11 + }, + "Nyere norsk litteratur": { + "count": 5 + }, + "Italienske ord og uttrykk": { + "count": 1 + }, + "USAs nyere litteratur": { + "count": 2 + }, + "Nyklassisisme og historisme i arkitekturhistorien": { + "count": 2 + }, + "Hygrophoraceae": { + "count": 1 + }, + "Cuphophyllus": { + "count": 1 + }, + "Ustabile grunnstoffer": { + "count": 3 + }, + "Kubisme og futurisme": { + "count": 1 + }, + "Krim": { + "count": 1 + }, + "Krim i Norge": { + "count": 1 + }, + "Namibias historie": { + "count": 1 + }, + "Skifer": { + "count": 1 + }, + "Rytme og tempo": { + "count": 1 + }, + "Bl\u00f8tdyr": { + "count": 2 + }, + "Snegler": { + "count": 1 + }, + "Albuesnegler": { + "count": 1 + }, + "Patelloidea": { + "count": 1 + }, + "Patellidae": { + "count": 1 + }, + "Patella": { + "count": 1 + }, + "Angolas historie": { + "count": 1 + }, + "Angolas samtidshistorie": { + "count": 1 + }, + "Grunnoppl\u00e6ring": { + "count": 2 + }, + "Yrkesfaglig utdanning": { + "count": 2 + }, + "Bygg- og anleggsteknikk": { + "count": 1 + }, + "Sveriges geografi": { + "count": 3 + }, + "Andefugler": { + "count": 1 + }, + "Andefamilien": { + "count": 1 + }, + "Somateria": { + "count": 1 + }, + "Rugby og amerikansk fotball": { + "count": 1 + }, + "Broddsoppordenen": { + "count": 1 + }, + "Hymenochaetaceae": { + "count": 1 + }, + "Jordbruk": { + "count": 1 + }, + "Trelast og treforedling": { + "count": 2 + }, + "Papirindustri": { + "count": 1 + }, + "Venezuelas geografi": { + "count": 1 + }, + "Mystikere": { + "count": 1 + }, + "Prosess": { + "count": 1 + }, + "Straffeprosess": { + "count": 1 + }, + "Institusjoner i klassisk musikk": { + "count": 1 + }, + "Aviseiere og direkt\u00f8rer": { + "count": 2 + }, + "Nord-amerikanske filmskapere": { + "count": 2 + }, + "Norske kirkebygg": { + "count": 2 + }, + "Utdanning i verden": { + "count": 2 + }, + "Utdanning i Europa": { + "count": 1 + }, + "Marokkos geografi": { + "count": 1 + }, + "Trolldomsprosesser": { + "count": 1 + }, + "Etiopias geografi": { + "count": 1 + }, + "Petroleumshistorie": { + "count": 1 + }, + "Ulykker i petroleumsbransjen": { + "count": 1 + }, + "Anvendt elektromagnetisme": { + "count": 1 + }, + "Tyrkiskspr\u00e5klig litteratur": { + "count": 1 + }, + "Tyrkias litteratur": { + "count": 1 + }, + "Indre \u00d8stfold": { + "count": 1 + }, + "Eidsberg": { + "count": 1 + }, + "Grafisk industrihistorie": { + "count": 1 + }, + "Tingvoll": { + "count": 1 + }, + "\u00d8kologi": { + "count": 1 + }, + "Utviklingsbiologi": { + "count": 1 + }, + "Fiskeri og havbruk": { + "count": 1 + }, + "Norske fiskerier": { + "count": 1 + }, + "Fiskeredskaper og fart\u00f8y": { + "count": 1 + }, + "Bulgarias geografi": { + "count": 2 + }, + "Nyere norsk sakprosa": { + "count": 1 + }, + "Tremellomycetes": { + "count": 1 + }, + "Tremellales": { + "count": 1 + }, + "Tremellaceae": { + "count": 1 + }, + "Cryptococcus": { + "count": 1 + }, + "Norges litteratur 1800-tallet": { + "count": 1 + }, + "Forskning og h\u00f8yere utdanning": { + "count": 2 + }, + "Forskningsinstitutter": { + "count": 1 + }, + "Transportrett": { + "count": 2 + }, + "Sj\u00f8rett": { + "count": 2 + }, + "Italias historie": { + "count": 2 + }, + "Italias samtidshistorie": { + "count": 1 + }, + "Internasjonale milit\u00e6re biografier": { + "count": 2 + }, + "Portugisisk fotball": { + "count": 1 + }, + "Medisin": { + "count": 3 + }, + "Helse og samfunn": { + "count": 2 + }, + "Medisinsk historie": { + "count": 2 + }, + "Algeries geografi": { + "count": 1 + }, + "Sveits\u2019 geografi": { + "count": 2 + }, + "Klovdyr": { + "count": 1 + }, + "Kvegfamilien": { + "count": 1 + }, + "Bos": { + "count": 1 + }, + "Norges litteratur 1900-1960": { + "count": 1 + }, + "Mel\u00f8y": { + "count": 1 + }, + "Religionsfilosofi": { + "count": 1 + }, + "Sveits\u2019 litteratur": { + "count": 2 + }, + "Sveits\u2019 franskspr\u00e5klige litteratur": { + "count": 1 + }, + "Materialfysikk": { + "count": 1 + }, + "Systematikk i biologien": { + "count": 1 + }, + "Terrorisme": { + "count": 1 + }, + "Norsk barne- og ungdomslitteratur f\u00f8r 2000": { + "count": 1 + }, + "Krystallografi": { + "count": 1 + }, + "Litter\u00e6re perioder og stiler": { + "count": 1 + }, + "Ungarns litteratur": { + "count": 1 + }, + "Semantikk": { + "count": 1 + }, + "Norsk samtidskunst": { + "count": 2 + }, + "Enebakk": { + "count": 1 + }, + "Europeisk arkeologi": { + "count": 1 + }, + "Tidlig abstrakt billedkunst": { + "count": 1 + }, + "Cortinariaceae": { + "count": 1 + }, + "Sl\u00f8rsopper": { + "count": 1 + }, + "Sentralafrikanske republikks historie": { + "count": 1 + }, + "Nord-Makedonias historie": { + "count": 1 + }, + "Nord-Makedonias samtidshistorie": { + "count": 1 + }, + "Ford\u00f8yelsessystemet": { + "count": 1 + }, + "Ern\u00e6ring": { + "count": 1 + }, + "Sveits\u2019 historie": { + "count": 1 + }, + "Sveits\u2019 samtidshistorie": { + "count": 1 + }, + "Grader og titler": { + "count": 1 + }, + "Akademiske titler": { + "count": 1 + }, + "New Zealands geografi": { + "count": 1 + }, + "Sveriges historie": { + "count": 1 + }, + "Sveriges kongehus": { + "count": 1 + }, + "Norges historie fra 1940 til 1945": { + "count": 2 + }, + "Klassisk religion": { + "count": 1 + }, + "Astronomisk teknologi": { + "count": 1 + }, + "Spanias historie": { + "count": 1 + }, + "Spanias samtidshistorie": { + "count": 1 + }, + "Sjangerl\u00e6re": { + "count": 2 + }, + "epikk": { + "count": 1 + }, + "Eidfjord": { + "count": 1 + }, + "Orientering": { + "count": 1 + }, + "Skotsk fotball": { + "count": 1 + }, + "Leppeblomstfamilien": { + "count": 1 + }, + "D\u00e5slekta": { + "count": 1 + }, + "Estetikk": { + "count": 2 + }, + "Nigerias historie": { + "count": 1 + }, + "Nigerias samtidshistorie": { + "count": 1 + }, + "Musikaler": { + "count": 2 + }, + "Angolas geografi": { + "count": 1 + }, + "Kristne ritualer og symboler": { + "count": 2 + }, + "Radioteknikk": { + "count": 1 + }, + "Internasjonale fotografer og fotokunstnere": { + "count": 1 + }, + "Internasjonale klassiske komponister og verker 1900\u20131950": { + "count": 2 + }, + "Norsk bildende kunst fra 1945 til 1990": { + "count": 2 + }, + "Serbia og det gamle Jugoslavias litteratur": { + "count": 1 + }, + "Polare ekspedisjoner og oppdagere": { + "count": 1 + }, + "Italiensk fotball": { + "count": 1 + }, + "Petroleumsselskaper": { + "count": 1 + }, + "Datoartikler": { + "count": 3 + }, + "Tysklands litteratur 1950-2000": { + "count": 1 + }, + "M\u00e5ltider": { + "count": 1 + }, + "Iraks historie": { + "count": 1 + }, + "Iraks samtidshistorie": { + "count": 1 + }, + "Irans historie": { + "count": 1 + }, + "Irans samtidshistorie": { + "count": 1 + }, + "Bod\u00f8": { + "count": 1 + }, + "Nordiske jazzut\u00f8vere og -ensembler": { + "count": 2 + }, + " 1980 til idag": { + "count": 2 + }, + "Trelast- og treforedlingsindustri \u2013 historie": { + "count": 1 + }, + "Det gamle Egypt": { + "count": 1 + }, + "Norske ord og uttrykk": { + "count": 1 + }, + "Papua Ny-Guineas geografi": { + "count": 1 + }, + "Asker": { + "count": 1 + }, + "Amfibier": { + "count": 2 + }, + "Springpadder": { + "count": 1 + }, + "Eleutherodactylidae": { + "count": 1 + }, + "Eleutherodactylus": { + "count": 1 + }, + "Tunneler": { + "count": 1 + }, + "Tunneler i Norge": { + "count": 1 + }, + "Salamandere": { + "count": 1 + }, + "Plethodontidae": { + "count": 1 + }, + "Aneides": { + "count": 1 + }, + "H\u00e6ren": { + "count": 1 + }, + "Kardeborreordenen": { + "count": 1 + }, + "Kaprifolfamilien": { + "count": 1 + }, + "Bl\u00e5knappslekta": { + "count": 1 + }, + "Italias litteratur p\u00e5 1700-tallet": { + "count": 1 + }, + "Skipsbygging": { + "count": 1 + }, + "Skipskonstrukt\u00f8rer": { + "count": 1 + }, + "Piggepleslekta": { + "count": 1 + }, + "Sediment\u00e6re bergarter": { + "count": 1 + }, + "Samfunnstyper": { + "count": 1 + }, + "Boksing": { + "count": 1 + }, + "Designinstitusjoner": { + "count": 1 + }, + "De forente arabiske emiraters historie": { + "count": 1 + }, + "De forente arabiske emiraters samtidshistorie": { + "count": 1 + }, + "Folketrygd og pensjon": { + "count": 1 + }, + "Forbrukerelektronikk": { + "count": 1 + }, + "Utdanning i S\u00f8r-Amerika": { + "count": 1 + }, + "Asias geografi generelt": { + "count": 1 + }, + "Urverk og klokker": { + "count": 1 + }, + "Standardisering": { + "count": 1 + }, + "Oppdal": { + "count": 1 + }, + "Norske arkitekturinstitusjoner": { + "count": 1 + }, + "Sparta": { + "count": 1 + }, + "Kunsth\u00e5ndverk": { + "count": 2 + }, + "Treskurd": { + "count": 1 + }, + "Treskj\u00e6rere": { + "count": 1 + }, + "Tysklands historie fra 1806 til 1918": { + "count": 1 + }, + "Mexicos geografi": { + "count": 1 + }, + "Selfamilien": { + "count": 1 + }, + "Cystophora": { + "count": 1 + }, + "Burundis geografi": { + "count": 1 + }, + "Islands historie": { + "count": 1 + }, + "Tegneserier": { + "count": 1 + }, + "Birkenes": { + "count": 1 + }, + "Humanetikk": { + "count": 1 + }, + "Matretter": { + "count": 2 + }, + "Supper": { + "count": 2 + }, + " sauser og kraft": { + "count": 2 + }, + "Ugandas historie": { + "count": 1 + }, + "Erkjennelsesteori": { + "count": 1 + }, + "Sveits\u2019 tyskspr\u00e5klig litteratur": { + "count": 1 + }, + "Sveits\u2019 tyskspr\u00e5klig litteratur p\u00e5 1900-tallet": { + "count": 1 + }, + "Popkunst og minimalisme": { + "count": 1 + }, + "Klassisk ballett": { + "count": 1 + }, + "Klassisk ballett: internasjonale kunstnere og kompanier": { + "count": 1 + }, + "Inkludering og rettigheter": { + "count": 1 + }, + "Likestilling og diskriminering": { + "count": 1 + }, + "Kvinnebevegelse": { + "count": 1 + }, + "Kvinnebevegelsen i Norge": { + "count": 1 + }, + "Arkeologi i Asia": { + "count": 1 + }, + "Cubas litteratur": { + "count": 1 + }, + "Elektrokjemi": { + "count": 1 + }, + "Norges litteratur 1960-2000": { + "count": 3 + }, + "Nederlands litteratur": { + "count": 1 + }, + "Forfattere i romerrikets litteratur": { + "count": 1 + }, + "Forbrenningsmotorer": { + "count": 2 + }, + "Vitenskapsteori": { + "count": 1 + }, + "Vitenskapsteoretikere": { + "count": 1 + }, + "Norsk lyrikk 1960-2000": { + "count": 1 + }, + "Helserett": { + "count": 1 + }, + "Klesplagg": { + "count": 1 + }, + "Belgias geografi": { + "count": 1 + }, + "Antarktis": { + "count": 1 + }, + "Danseinstitusjoner": { + "count": 1 + }, + "Irans geografi": { + "count": 1 + }, + "V\u00e5rskrinneblomslekta": { + "count": 1 + }, + "Motstandskampen i Norge": { + "count": 1 + }, + "Skjell": { + "count": 1 + }, + "Unionoida": { + "count": 1 + }, + "Unionoidea": { + "count": 1 + }, + "Elvemuslinger": { + "count": 1 + }, + "Margaritifera": { + "count": 2 + }, + "Klassisk arkeologi": { + "count": 1 + }, + "Gresk arkeologi": { + "count": 1 + }, + "Teknologi- og industrifag": { + "count": 1 + }, + "Afrosoricida": { + "count": 1 + }, + "Tenrecidae": { + "count": 1 + }, + "Tysklands historie fra 1918 til 1933": { + "count": 1 + }, + "Erstatningsrett": { + "count": 1 + }, + "Trevirke": { + "count": 1 + }, + "Sovjetunionens historie": { + "count": 2 + }, + "Gjenstanders kulturhistorie": { + "count": 2 + }, + "Dominicas geografi": { + "count": 2 + }, + "B\u00f8keordenen": { + "count": 1 + }, + "Bj\u00f8rkefamilien": { + "count": 1 + }, + "Bj\u00f8rkeslekta": { + "count": 1 + }, + "Metrikk": { + "count": 1 + }, + "USAs historie 1920\u20131960": { + "count": 2 + }, + "Energi og ressurs": { + "count": 1 + }, + "Tysklands nyere litteratur": { + "count": 1 + }, + "Libanons historie": { + "count": 1 + }, + "B\u00e6rum": { + "count": 1 + }, + "Brasils geografi": { + "count": 1 + }, + "Norges historie fra 1300 til 1660": { + "count": 1 + }, + "Natur og landemerker i USA": { + "count": 2 + }, + "Kv\u00e6fjord": { + "count": 1 + }, + "Fremmedord og l\u00e5nord": { + "count": 2 + }, + "Lokalforvaltning": { + "count": 1 + }, + "Brennevin": { + "count": 1 + }, + "Drinker": { + "count": 1 + }, + "Hest": { + "count": 1 + }, + "Erteblomstordenen": { + "count": 1 + }, + "Erteblomstfamilien": { + "count": 1 + }, + "Acacia": { + "count": 1 + }, + "Historiske biografier i norsk politikk": { + "count": 1 + }, + "Fremmedord med latinsk og gresk opphav": { + "count": 1 + }, + "B\u00e5tsfjord": { + "count": 1 + }, + "Tidsskrifter": { + "count": 1 + }, + "Romanias samtidshistorie": { + "count": 1 + }, + "Fagretninger i sosiologi": { + "count": 1 + }, + "Folkegrupper i S\u00f8r-Amerika": { + "count": 1 + }, + "Norges historie generelt": { + "count": 1 + }, + "Norsk polarhistorie": { + "count": 1 + }, + "Tekstilkunst": { + "count": 1 + }, + "Tekstilkunstnere": { + "count": 1 + }, + "Arkeologer": { + "count": 2 + }, + "Stabile grunnstoffer": { + "count": 1 + }, + "Fiskenes anatomi og fysiologi": { + "count": 1 + }, + "Vang": { + "count": 1 + }, + "Sunnfjord": { + "count": 1 + }, + "Samv\u00e6rsdans": { + "count": 1 + }, + "Nicaraguas litteratur": { + "count": 1 + }, + "Sulfid- og sulfatmineraler": { + "count": 1 + }, + "Hardrock og heavy metal": { + "count": 1 + }, + "Norsk hardrock og heavy metal": { + "count": 1 + }, + "Aver\u00f8y": { + "count": 1 + }, + "Sol og stjerner": { + "count": 1 + }, + "Gloppen": { + "count": 1 + }, + "Italiensk manierisme": { + "count": 1 + }, + " barokk- og renessansekunst": { + "count": 1 + }, + "Sakprosa": { + "count": 1 + }, + "Sekksporesopper": { + "count": 1 + }, + "Ursekksporesopper": { + "count": 1 + }, + "Heksekostsopper": { + "count": 1 + }, + "Religion i ulike land": { + "count": 1 + }, + "Religion i Latin-Amerika": { + "count": 1 + }, + "Italias nyere litteratur": { + "count": 1 + }, + "Arkitekturteori og -metode": { + "count": 1 + }, + "Bahrains geografi": { + "count": 1 + }, + "Biografier i nyere norsk historie": { + "count": 1 + }, + "Askvoll": { + "count": 1 + }, + "\u00d8sterrikes litteratur p\u00e5 1900-tallet": { + "count": 1 + }, + "Katt": { + "count": 1 + }, + "Piggfinnefisker": { + "count": 1 + }, + "Leppefiskfamilien": { + "count": 1 + }, + "Ctenolabrus": { + "count": 1 + }, + "Mykologi": { + "count": 1 + }, + "Innholdsstoffer hos sopp": { + "count": 1 + }, + "Fluesnapperfamilien": { + "count": 1 + }, + "Luscinia": { + "count": 1 + }, + "Colombias historie": { + "count": 1 + }, + "\u00d8vrige indo-europeiske spr\u00e5k": { + "count": 1 + }, + "Dannmarks historie 1814-1945": { + "count": 1 + }, + "Arbeiderbevegelsens historie": { + "count": 1 + }, + "Kameruns litteratur": { + "count": 1 + }, + "Vannsport": { + "count": 1 + }, + "Sv\u00f8mmesport": { + "count": 1 + }, + "Abstrakt ekspresjonisme": { + "count": 1 + }, + "Bulgarias historie": { + "count": 1 + }, + "Storbritannia og Nord-Irlands litteratur p\u00e5 1800-tallet": { + "count": 1 + }, + "Norske klassiske komponister og verker f\u00f8r 1900": { + "count": 1 + }, + "Kirgisistans geografi": { + "count": 1 + }, + "Panamas historie": { + "count": 1 + }, + "Finlands geografi": { + "count": 1 + }, + "Petroleumsprodukter": { + "count": 1 + }, + "Teknisk kybernetikk": { + "count": 1 + }, + "Dyregeografi": { + "count": 1 + }, + "Verdens dyreliv": { + "count": 1 + }, + "Minev\u00e5pen": { + "count": 1 + }, + "Det bysantinske rike": { + "count": 1 + }, + "Element\u00e6rpartikkelfysikk": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/SpanishNewsClusteringP2P.json b/mteb/descriptive_stats/Clustering/SpanishNewsClusteringP2P.json new file mode 100644 index 0000000000..63f1a4d6fc --- /dev/null +++ b/mteb/descriptive_stats/Clustering/SpanishNewsClusteringP2P.json @@ -0,0 +1,37 @@ +{ + "test": { + "num_samples": 1, + "number_of_characters": 1217, + "min_text_length": 1217, + "average_text_length": 1217.0, + "max_text_length": 1217, + "unique_texts": 1138, + "min_labels_per_text": 26, + "average_labels_per_text": 1217.0, + "max_labels_per_text": 340, + "unique_labels": 7, + "labels": { + "Otra": { + "count": 130 + }, + "Regulaciones": { + "count": 142 + }, + "Alianzas": { + "count": 247 + }, + "Macroeconomia": { + "count": 340 + }, + "Innovacion": { + "count": 195 + }, + "Sostenibilidad": { + "count": 137 + }, + "Reputacion": { + "count": 26 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/StackExchangeClustering.v2.json b/mteb/descriptive_stats/Clustering/StackExchangeClustering.v2.json new file mode 100644 index 0000000000..3be2e5a998 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/StackExchangeClustering.v2.json @@ -0,0 +1,379 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 117782, + "min_text_length": 19, + "average_text_length": 57.5107421875, + "max_text_length": 148, + "unique_texts": 116, + "min_labels_per_text": 7, + "average_labels_per_text": 1.0, + "max_labels_per_text": 37, + "unique_labels": 121, + "labels": { + "53": { + "count": 24 + }, + "26": { + "count": 15 + }, + "108": { + "count": 10 + }, + "4": { + "count": 24 + }, + "12": { + "count": 27 + }, + "68": { + "count": 11 + }, + "86": { + "count": 28 + }, + "37": { + "count": 32 + }, + "47": { + "count": 30 + }, + "7": { + "count": 16 + }, + "15": { + "count": 26 + }, + "119": { + "count": 18 + }, + "88": { + "count": 20 + }, + "102": { + "count": 18 + }, + "49": { + "count": 15 + }, + "2": { + "count": 36 + }, + "1": { + "count": 29 + }, + "40": { + "count": 8 + }, + "101": { + "count": 19 + }, + "91": { + "count": 37 + }, + "79": { + "count": 21 + }, + "5": { + "count": 13 + }, + "35": { + "count": 16 + }, + "41": { + "count": 14 + }, + "63": { + "count": 23 + }, + "73": { + "count": 8 + }, + "99": { + "count": 17 + }, + "42": { + "count": 15 + }, + "110": { + "count": 27 + }, + "64": { + "count": 21 + }, + "0": { + "count": 16 + }, + "18": { + "count": 12 + }, + "55": { + "count": 13 + }, + "34": { + "count": 16 + }, + "90": { + "count": 27 + }, + "114": { + "count": 17 + }, + "77": { + "count": 8 + }, + "32": { + "count": 26 + }, + "109": { + "count": 17 + }, + "78": { + "count": 12 + }, + "104": { + "count": 11 + }, + "96": { + "count": 19 + }, + "70": { + "count": 26 + }, + "105": { + "count": 19 + }, + "80": { + "count": 11 + }, + "38": { + "count": 17 + }, + "50": { + "count": 14 + }, + "30": { + "count": 18 + }, + "83": { + "count": 18 + }, + "52": { + "count": 10 + }, + "93": { + "count": 15 + }, + "58": { + "count": 9 + }, + "71": { + "count": 19 + }, + "16": { + "count": 14 + }, + "6": { + "count": 18 + }, + "89": { + "count": 16 + }, + "87": { + "count": 18 + }, + "14": { + "count": 8 + }, + "117": { + "count": 9 + }, + "66": { + "count": 28 + }, + "29": { + "count": 22 + }, + "82": { + "count": 13 + }, + "100": { + "count": 26 + }, + "45": { + "count": 19 + }, + "51": { + "count": 12 + }, + "60": { + "count": 24 + }, + "81": { + "count": 27 + }, + "17": { + "count": 28 + }, + "103": { + "count": 10 + }, + "33": { + "count": 15 + }, + "95": { + "count": 20 + }, + "3": { + "count": 8 + }, + "113": { + "count": 10 + }, + "21": { + "count": 12 + }, + "39": { + "count": 27 + }, + "112": { + "count": 9 + }, + "85": { + "count": 17 + }, + "65": { + "count": 13 + }, + "24": { + "count": 20 + }, + "75": { + "count": 16 + }, + "111": { + "count": 8 + }, + "36": { + "count": 22 + }, + "74": { + "count": 23 + }, + "84": { + "count": 15 + }, + "94": { + "count": 21 + }, + "44": { + "count": 13 + }, + "61": { + "count": 25 + }, + "56": { + "count": 19 + }, + "107": { + "count": 8 + }, + "28": { + "count": 19 + }, + "11": { + "count": 14 + }, + "10": { + "count": 13 + }, + "92": { + "count": 8 + }, + "43": { + "count": 20 + }, + "48": { + "count": 7 + }, + "106": { + "count": 19 + }, + "120": { + "count": 14 + }, + "25": { + "count": 19 + }, + "46": { + "count": 14 + }, + "116": { + "count": 13 + }, + "54": { + "count": 14 + }, + "20": { + "count": 25 + }, + "13": { + "count": 10 + }, + "19": { + "count": 15 + }, + "22": { + "count": 20 + }, + "23": { + "count": 12 + }, + "72": { + "count": 18 + }, + "8": { + "count": 33 + }, + "27": { + "count": 9 + }, + "67": { + "count": 7 + }, + "97": { + "count": 11 + }, + "62": { + "count": 17 + }, + "69": { + "count": 11 + }, + "118": { + "count": 13 + }, + "31": { + "count": 8 + }, + "76": { + "count": 12 + }, + "59": { + "count": 11 + }, + "98": { + "count": 17 + }, + "115": { + "count": 10 + }, + "9": { + "count": 7 + }, + "57": { + "count": 7 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/StackExchangeClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/StackExchangeClusteringP2P.v2.json new file mode 100644 index 0000000000..fb6b238b28 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/StackExchangeClusteringP2P.v2.json @@ -0,0 +1,1588 @@ +{ + "test": { + "num_samples": 74914, + "number_of_characters": 81735679, + "min_text_length": 71, + "average_text_length": 1091.06013562218, + "max_text_length": 4225, + "unique_texts": 3720, + "min_labels_per_text": 2, + "average_labels_per_text": 1.0, + "max_labels_per_text": 18476, + "unique_labels": 524, + "labels": { + "unity": { + "count": 18476 + }, + "directx": { + "count": 589 + }, + "movement": { + "count": 214 + }, + "opengl": { + "count": 5190 + }, + "javascript": { + "count": 1141 + }, + "c#": { + "count": 3223 + }, + "2d": { + "count": 2367 + }, + "animation": { + "count": 582 + }, + "game-design": { + "count": 1735 + }, + "procedural-generation": { + "count": 348 + }, + "physics": { + "count": 1048 + }, + "c++": { + "count": 4496 + }, + "sdl2": { + "count": 90 + }, + "terminology": { + "count": 81 + }, + "android": { + "count": 1644 + }, + "graphics": { + "count": 482 + }, + "octree": { + "count": 11 + }, + "java": { + "count": 3833 + }, + "ios": { + "count": 385 + }, + "cocos2d-iphone": { + "count": 184 + }, + "tools": { + "count": 91 + }, + "terrain": { + "count": 99 + }, + "xna": { + "count": 4059 + }, + "html5": { + "count": 138 + }, + "rendering": { + "count": 400 + }, + "mathematics": { + "count": 1249 + }, + "publishing": { + "count": 52 + }, + "architecture": { + "count": 1067 + }, + "matrix": { + "count": 115 + }, + "lighting": { + "count": 131 + }, + "shaders": { + "count": 707 + }, + "libgdx": { + "count": 1310 + }, + "path-finding": { + "count": 307 + }, + "pc": { + "count": 7 + }, + "mmo": { + "count": 182 + }, + "collision-detection": { + "count": 1389 + }, + "graphic-effects": { + "count": 25 + }, + "sdl": { + "count": 119 + }, + "3d": { + "count": 1331 + }, + "blender": { + "count": 133 + }, + "software-engineering": { + "count": 300 + }, + "gpu": { + "count": 42 + }, + "design-patterns": { + "count": 57 + }, + "box2d": { + "count": 264 + }, + "color": { + "count": 31 + }, + "udk": { + "count": 121 + }, + "transformation": { + "count": 39 + }, + "tiles": { + "count": 188 + }, + "camera": { + "count": 302 + }, + "legal": { + "count": 311 + }, + "raycasting": { + "count": 37 + }, + "python": { + "count": 329 + }, + "multiplayer": { + "count": 320 + }, + "andengine": { + "count": 77 + }, + "distribution": { + "count": 6 + }, + "lua": { + "count": 102 + }, + "algorithm": { + "count": 766 + }, + "tilemap": { + "count": 116 + }, + "game-maker": { + "count": 324 + }, + "ai": { + "count": 464 + }, + "pygame": { + "count": 56 + }, + "networking": { + "count": 547 + }, + "flash": { + "count": 336 + }, + "sprites": { + "count": 293 + }, + "simulations": { + "count": 32 + }, + "virtual-reality": { + "count": 14 + }, + "hlsl": { + "count": 121 + }, + "waypoints": { + "count": 5 + }, + "audio": { + "count": 55 + }, + "textures": { + "count": 656 + }, + "game-mechanics": { + "count": 201 + }, + "intersection": { + "count": 9 + }, + "facebook": { + "count": 30 + }, + "manuals": { + "count": 2 + }, + "level-design": { + "count": 56 + }, + "rigging": { + "count": 14 + }, + "open-source": { + "count": 45 + }, + "software-rendering": { + "count": 9 + }, + "books": { + "count": 23 + }, + "objects": { + "count": 7 + }, + "minecraft-modding": { + "count": 109 + }, + "rotation": { + "count": 195 + }, + "storyboard": { + "count": 3 + }, + "iphone": { + "count": 349 + }, + "sharpdx": { + "count": 23 + }, + "glsl": { + "count": 139 + }, + "opengl-es": { + "count": 149 + }, + "loading": { + "count": 21 + }, + "effects": { + "count": 3 + }, + "accessibility": { + "count": 5 + }, + "performance": { + "count": 184 + }, + "physics-engine": { + "count": 44 + }, + "browser-based-games": { + "count": 68 + }, + "fund-raising": { + "count": 4 + }, + "xna-4.0": { + "count": 241 + }, + "monogame": { + "count": 120 + }, + "game-loop": { + "count": 93 + }, + "music": { + "count": 35 + }, + "board-game": { + "count": 3 + }, + "opengl-es2": { + "count": 67 + }, + "game-industry": { + "count": 102 + }, + "physx": { + "count": 11 + }, + "input": { + "count": 155 + }, + "unreal": { + "count": 81 + }, + "licensing": { + "count": 50 + }, + "starcraft-2": { + "count": 8 + }, + "geometry": { + "count": 112 + }, + "directx11": { + "count": 284 + }, + "smartfox": { + "count": 2 + }, + "jobs": { + "count": 20 + }, + "quaternion": { + "count": 22 + }, + "actionscript-3": { + "count": 118 + }, + "education": { + "count": 55 + }, + "art": { + "count": 55 + }, + "copyright": { + "count": 69 + }, + "roguelikes": { + "count": 7 + }, + "orientation": { + "count": 3 + }, + "projection": { + "count": 12 + }, + "steam": { + "count": 66 + }, + "phaser": { + "count": 46 + }, + "antialiasing": { + "count": 10 + }, + "3dsmax": { + "count": 70 + }, + "isometric": { + "count": 40 + }, + "mouse": { + "count": 14 + }, + "unreal-4": { + "count": 378 + }, + "spritesheet": { + "count": 22 + }, + "cocos2d": { + "count": 59 + }, + "assets": { + "count": 110 + }, + "nintendo": { + "count": 22 + }, + "asset-management": { + "count": 5 + }, + "mobile": { + "count": 66 + }, + "graphics-programming": { + "count": 73 + }, + "maya": { + "count": 30 + }, + "control": { + "count": 9 + }, + "directx9": { + "count": 56 + }, + "windows": { + "count": 71 + }, + "component-based": { + "count": 70 + }, + "gui": { + "count": 106 + }, + "adventure-game-studio": { + "count": 5 + }, + "roblox": { + "count": 15 + }, + "planning": { + "count": 12 + }, + "uv-mapping": { + "count": 9 + }, + "cross-platform": { + "count": 37 + }, + "c": { + "count": 46 + }, + "vector": { + "count": 124 + }, + "terrain-rendering": { + "count": 31 + }, + "screen": { + "count": 5 + }, + "project-management": { + "count": 39 + }, + "data-structure": { + "count": 64 + }, + "rpg": { + "count": 86 + }, + "balance": { + "count": 12 + }, + "management": { + "count": 3 + }, + "source-engine": { + "count": 12 + }, + "windows-phone-7": { + "count": 16 + }, + "testing": { + "count": 25 + }, + "controllers": { + "count": 15 + }, + "projectile-physics": { + "count": 21 + }, + "godot": { + "count": 163 + }, + "collider": { + "count": 3 + }, + "user-experience": { + "count": 22 + }, + "maps": { + "count": 90 + }, + "free-to-play": { + "count": 5 + }, + "keyboard": { + "count": 15 + }, + "random": { + "count": 38 + }, + "fragment-shader": { + "count": 16 + }, + "file-format": { + "count": 20 + }, + "bullet-physics": { + "count": 40 + }, + "shadow-mapping": { + "count": 10 + }, + "server": { + "count": 92 + }, + "racing": { + "count": 9 + }, + "unityscript": { + "count": 14 + }, + "directx10": { + "count": 15 + }, + "first-person-shooter": { + "count": 27 + }, + "xbox360": { + "count": 26 + }, + "multithreading": { + "count": 52 + }, + "point-cloud": { + "count": 3 + }, + "sfml": { + "count": 54 + }, + "modding": { + "count": 87 + }, + "logic": { + "count": 16 + }, + "vulkan": { + "count": 19 + }, + "adventure-games": { + "count": 10 + }, + "development-speed": { + "count": 8 + }, + "savegame": { + "count": 34 + }, + "competition": { + "count": 10 + }, + "voxels": { + "count": 44 + }, + "obj": { + "count": 6 + }, + "business": { + "count": 36 + }, + "entity-system": { + "count": 131 + }, + "3d-meshes": { + "count": 90 + }, + "databases": { + "count": 44 + }, + "html-canvas": { + "count": 28 + }, + "php": { + "count": 38 + }, + "cocos2d-x-js": { + "count": 7 + }, + "hardware": { + "count": 21 + }, + "trigonometry": { + "count": 7 + }, + "levels": { + "count": 15 + }, + "blender-game-engine": { + "count": 5 + }, + "touch": { + "count": 4 + }, + "marketing": { + "count": 99 + }, + "events": { + "count": 24 + }, + "spatial-partitioning": { + "count": 4 + }, + "macos": { + "count": 7 + }, + "leaderboards": { + "count": 30 + }, + "webgl": { + "count": 30 + }, + "web": { + "count": 31 + }, + "monetization": { + "count": 16 + }, + "frustum-culling": { + "count": 5 + }, + "entity-component-system": { + "count": 11 + }, + "resolution": { + "count": 11 + }, + "particles": { + "count": 22 + }, + "steering-behaviors": { + "count": 10 + }, + "dev-groups": { + "count": 2 + }, + "special-effects": { + "count": 9 + }, + "client-server": { + "count": 50 + }, + "scene-graph": { + "count": 12 + }, + "models": { + "count": 121 + }, + "angelscript": { + "count": 2 + }, + "timer": { + "count": 21 + }, + "corona-sdk": { + "count": 24 + }, + "node.js": { + "count": 21 + }, + "security": { + "count": 14 + }, + "jmonkeyengine": { + "count": 14 + }, + "sky": { + "count": 5 + }, + "deferred-rendering": { + "count": 10 + }, + "game-recording": { + "count": 5 + }, + "optimization": { + "count": 63 + }, + "efficiency": { + "count": 8 + }, + "character": { + "count": 26 + }, + "appstore": { + "count": 10 + }, + "selection": { + "count": 5 + }, + "rigidbody": { + "count": 13 + }, + "collision-resolution": { + "count": 40 + }, + "objective-c": { + "count": 21 + }, + "transparency": { + "count": 8 + }, + "ranking": { + "count": 14 + }, + "mesh": { + "count": 18 + }, + "inventory": { + "count": 3 + }, + "cocos2d-x": { + "count": 68 + }, + "timestep": { + "count": 5 + }, + "coordinates": { + "count": 34 + }, + "ogre": { + "count": 39 + }, + "wpf": { + "count": 2 + }, + "floating-point": { + "count": 6 + }, + "voice": { + "count": 9 + }, + "space-partitioning": { + "count": 8 + }, + "sound": { + "count": 41 + }, + "deployment": { + "count": 12 + }, + "game-maker-dnd": { + "count": 4 + }, + "console": { + "count": 21 + }, + "behavior-tree": { + "count": 5 + }, + "shadows": { + "count": 17 + }, + "havok": { + "count": 8 + }, + "three.js": { + "count": 15 + }, + "scripting": { + "count": 76 + }, + "joystick": { + "count": 3 + }, + "raytracing": { + "count": 19 + }, + "level-of-detail": { + "count": 12 + }, + "quake3": { + "count": 4 + }, + "normals": { + "count": 19 + }, + "interpolation": { + "count": 23 + }, + "frame-rate": { + "count": 33 + }, + "spritekit": { + "count": 24 + }, + "file": { + "count": 6 + }, + "sales": { + "count": 3 + }, + "sound-effects": { + "count": 7 + }, + "google-play": { + "count": 33 + }, + "tetris": { + "count": 2 + }, + "separating-axis-theorem": { + "count": 8 + }, + "ruby": { + "count": 14 + }, + "gimp": { + "count": 2 + }, + "grid": { + "count": 13 + }, + "graphics-design": { + "count": 10 + }, + "heuristics": { + "count": 3 + }, + "google-play-services": { + "count": 7 + }, + "lwjgl": { + "count": 41 + }, + "visual-studio": { + "count": 7 + }, + "playstation4": { + "count": 15 + }, + "line-of-sight": { + "count": 5 + }, + "probability": { + "count": 3 + }, + "refactoring": { + "count": 2 + }, + "interactive-fiction": { + "count": 7 + }, + "scoring": { + "count": 11 + }, + "game-state": { + "count": 3 + }, + "fluid-dynamics": { + "count": 5 + }, + "anti-cheat": { + "count": 9 + }, + "rpg-maker": { + "count": 8 + }, + "direct3d12": { + "count": 13 + }, + "parallax-scrolling": { + "count": 4 + }, + "oculus": { + "count": 3 + }, + "memory": { + "count": 3 + }, + "state": { + "count": 17 + }, + "resource-management": { + "count": 13 + }, + "2.5d": { + "count": 2 + }, + "collada": { + "count": 4 + }, + "construct-2": { + "count": 4 + }, + "platformer": { + "count": 22 + }, + "replays": { + "count": 2 + }, + "curves": { + "count": 4 + }, + "scala": { + "count": 2 + }, + "aabb": { + "count": 4 + }, + "porting": { + "count": 4 + }, + "cryengine": { + "count": 10 + }, + "glfw": { + "count": 11 + }, + ".net": { + "count": 8 + }, + "fmod": { + "count": 3 + }, + "fixed-timestep": { + "count": 5 + }, + "flixel": { + "count": 6 + }, + "hdr": { + "count": 3 + }, + "platform": { + "count": 11 + }, + "community-management": { + "count": 11 + }, + "hexagonal-grid": { + "count": 11 + }, + "materials": { + "count": 9 + }, + "career": { + "count": 39 + }, + "installer": { + "count": 4 + }, + "playstation3": { + "count": 4 + }, + "pyglet": { + "count": 7 + }, + "slick": { + "count": 12 + }, + "image": { + "count": 23 + }, + "allegro": { + "count": 8 + }, + "sql": { + "count": 4 + }, + "rts": { + "count": 31 + }, + "localization": { + "count": 7 + }, + "text": { + "count": 19 + }, + "map-editor": { + "count": 6 + }, + "viewport": { + "count": 3 + }, + "face": { + "count": 4 + }, + "noise": { + "count": 2 + }, + "timing": { + "count": 6 + }, + "tower-defense": { + "count": 5 + }, + "editors": { + "count": 6 + }, + "debugging": { + "count": 10 + }, + "normal-mapping": { + "count": 14 + }, + "jbox2d": { + "count": 3 + }, + "fsm": { + "count": 14 + }, + "content-rating": { + "count": 11 + }, + "glut": { + "count": 3 + }, + "turn-based": { + "count": 23 + }, + "compression": { + "count": 8 + }, + "vertex": { + "count": 12 + }, + "teamwork": { + "count": 4 + }, + "slimdx": { + "count": 3 + }, + "version-control": { + "count": 9 + }, + "plugin": { + "count": 3 + }, + "shading": { + "count": 5 + }, + "entity-component": { + "count": 10 + }, + "fonts": { + "count": 23 + }, + "spherical-harmonics": { + "count": 3 + }, + "vb.net": { + "count": 5 + }, + "psm": { + "count": 6 + }, + "actionscript": { + "count": 7 + }, + "microsoft": { + "count": 7 + }, + "puzzle": { + "count": 18 + }, + "car": { + "count": 2 + }, + "love2d": { + "count": 10 + }, + "beta": { + "count": 4 + }, + "quadtree": { + "count": 7 + }, + "linear-algebra": { + "count": 12 + }, + "jumping": { + "count": 5 + }, + "scale": { + "count": 2 + }, + "ipad": { + "count": 6 + }, + "linux": { + "count": 20 + }, + "global-illumination": { + "count": 4 + }, + "unity-networking": { + "count": 11 + }, + "crusader-kings-2-modding": { + "count": 5 + }, + "playtesting": { + "count": 5 + }, + "heightmap": { + "count": 9 + }, + "kinect": { + "count": 4 + }, + "websocket": { + "count": 6 + }, + "statistics": { + "count": 9 + }, + "fbx": { + "count": 9 + }, + "world-of-warcraft-modding": { + "count": 3 + }, + "source-code": { + "count": 2 + }, + "perlin-noise": { + "count": 5 + }, + "skyrim-modding": { + "count": 9 + }, + "skeletal-animation": { + "count": 24 + }, + "economy": { + "count": 5 + }, + "assembly": { + "count": 3 + }, + "geolocation": { + "count": 2 + }, + "splash-screen": { + "count": 2 + }, + "pixel": { + "count": 10 + }, + "card-game": { + "count": 7 + }, + "swift": { + "count": 5 + }, + "visualization": { + "count": 3 + }, + "vsync": { + "count": 5 + }, + "eclipse": { + "count": 3 + }, + "pixel-art": { + "count": 13 + }, + "tournament": { + "count": 2 + }, + "zbrush": { + "count": 2 + }, + "motivation": { + "count": 6 + }, + "online": { + "count": 8 + }, + "oop": { + "count": 6 + }, + "video": { + "count": 21 + }, + "index-buffer": { + "count": 5 + }, + "methodology": { + "count": 10 + }, + "impactjs": { + "count": 3 + }, + "code-reflection": { + "count": 4 + }, + "depth-buffer": { + "count": 11 + }, + "interface": { + "count": 6 + }, + "turn-based-strategy": { + "count": 7 + }, + "go": { + "count": 3 + }, + "wii": { + "count": 3 + }, + "path": { + "count": 3 + }, + "process": { + "count": 4 + }, + "trademark": { + "count": 10 + }, + "soya3d": { + "count": 2 + }, + "mission-design": { + "count": 3 + }, + "side-scroller": { + "count": 5 + }, + "marching-cubes": { + "count": 6 + }, + "skybox": { + "count": 8 + }, + "scene": { + "count": 7 + }, + "haxe": { + "count": 3 + }, + "openscenegraph": { + "count": 2 + }, + "culling": { + "count": 8 + }, + "encryption": { + "count": 4 + }, + "mvc": { + "count": 4 + }, + "advertisements": { + "count": 5 + }, + "npc": { + "count": 3 + }, + "effect": { + "count": 4 + }, + "game-maker-studio-2": { + "count": 3 + }, + "3d-modeling": { + "count": 9 + }, + "udp": { + "count": 3 + }, + "jquery": { + "count": 4 + }, + "hiring": { + "count": 2 + }, + "cloud-computing": { + "count": 2 + }, + "commodore-64": { + "count": 3 + }, + "storage": { + "count": 3 + }, + "ui-design": { + "count": 7 + }, + "release": { + "count": 4 + }, + "demo": { + "count": 4 + }, + "navmesh": { + "count": 2 + }, + "strategy": { + "count": 7 + }, + "social": { + "count": 3 + }, + "data-driven": { + "count": 4 + }, + "revenue": { + "count": 4 + }, + "openal": { + "count": 2 + }, + "window-management": { + "count": 4 + }, + "magicavoxel": { + "count": 2 + }, + "twine": { + "count": 6 + }, + "pipeline": { + "count": 4 + }, + "css": { + "count": 2 + }, + "game-center": { + "count": 3 + }, + "text-based": { + "count": 7 + }, + "torque-x": { + "count": 2 + }, + "signed-distance-field": { + "count": 3 + }, + "tessellation": { + "count": 4 + }, + "pico-8": { + "count": 4 + }, + "spritebatch": { + "count": 3 + }, + "business-model": { + "count": 2 + }, + "opentk": { + "count": 4 + }, + "assimp": { + "count": 7 + }, + "trailer": { + "count": 3 + }, + "texture-atlas": { + "count": 2 + }, + "unity-webgl": { + "count": 2 + }, + "tiled": { + "count": 3 + }, + "glm": { + "count": 2 + }, + "splines": { + "count": 6 + }, + "unit-testing": { + "count": 3 + }, + "compatibility": { + "count": 3 + }, + "matchmaking": { + "count": 3 + }, + "analytics": { + "count": 4 + }, + "irrlicht": { + "count": 2 + }, + "configuration": { + "count": 3 + }, + "filesystem": { + "count": 2 + }, + "piracy": { + "count": 3 + }, + "difficulty": { + "count": 3 + }, + "water": { + "count": 3 + }, + "rope-physics": { + "count": 2 + }, + "teaching": { + "count": 4 + }, + "memory-efficiency": { + "count": 9 + }, + "modelling-techniques": { + "count": 2 + }, + "sketchup": { + "count": 2 + }, + "portals": { + "count": 3 + }, + "windows-forms": { + "count": 5 + }, + "farseer-physics-engine": { + "count": 2 + }, + "hammer": { + "count": 2 + }, + "alpha-blending": { + "count": 5 + }, + "normal-generation": { + "count": 3 + }, + "human-resources": { + "count": 2 + }, + "quake2": { + "count": 2 + }, + "emulation": { + "count": 5 + }, + "metrics": { + "count": 2 + }, + "image-processing": { + "count": 2 + }, + "hacks": { + "count": 3 + }, + "angles": { + "count": 4 + }, + "streaming": { + "count": 2 + }, + "jogl": { + "count": 3 + }, + "chess": { + "count": 2 + }, + "ide": { + "count": 2 + }, + "augmented-reality": { + "count": 3 + }, + "productivity": { + "count": 2 + }, + "google-app-engine": { + "count": 2 + }, + "bukkit": { + "count": 3 + }, + "authentication": { + "count": 3 + }, + "spawning": { + "count": 2 + }, + "user-interface": { + "count": 3 + }, + "gameobject": { + "count": 3 + }, + "silverlight": { + "count": 3 + }, + "htc-vive": { + "count": 2 + }, + "synchronization": { + "count": 3 + }, + "spaces": { + "count": 2 + }, + "profiling": { + "count": 2 + }, + "lumberyard-engine": { + "count": 3 + }, + "unity-ads": { + "count": 2 + }, + "ajax": { + "count": 2 + }, + "bot": { + "count": 2 + }, + "outsourcing": { + "count": 2 + }, + "functional": { + "count": 2 + }, + "user-generated-content": { + "count": 3 + }, + "rpg-maker-mv": { + "count": 2 + }, + "javafx": { + "count": 2 + }, + "data": { + "count": 2 + }, + "computational-geometry": { + "count": 5 + }, + "harlowe": { + "count": 2 + }, + "hud": { + "count": 2 + }, + "bounding-spheres": { + "count": 2 + }, + "stencyl": { + "count": 2 + }, + "bounding-boxes": { + "count": 2 + }, + "rpg-maker-xp": { + "count": 3 + }, + "stage3d": { + "count": 2 + }, + "renpy": { + "count": 2 + }, + "documentation": { + "count": 2 + }, + "content-generation": { + "count": 2 + }, + "babylonjs": { + "count": 2 + }, + "palette": { + "count": 2 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/SwednClusteringP2P.json b/mteb/descriptive_stats/Clustering/SwednClusteringP2P.json new file mode 100644 index 0000000000..550d57b63b --- /dev/null +++ b/mteb/descriptive_stats/Clustering/SwednClusteringP2P.json @@ -0,0 +1,54 @@ +{ + "summaries": { + "num_samples": 34376, + "number_of_characters": 7541052, + "min_text_length": 48, + "average_text_length": 219.36967651850128, + "max_text_length": 904, + "unique_texts": 484, + "min_labels_per_text": 4529, + "average_labels_per_text": 1.0, + "max_labels_per_text": 19155, + "unique_labels": 4, + "labels": { + "domestic news": { + "count": 19155 + }, + "economy": { + "count": 4582 + }, + "sports": { + "count": 6110 + }, + "culture": { + "count": 4529 + } + } + }, + "articles": { + "num_samples": 34376, + "number_of_characters": 103343288, + "min_text_length": 191, + "average_text_length": 3006.2627414475214, + "max_text_length": 140461, + "unique_texts": 7393, + "min_labels_per_text": 4529, + "average_labels_per_text": 1.0, + "max_labels_per_text": 19155, + "unique_labels": 4, + "labels": { + "domestic news": { + "count": 19155 + }, + "economy": { + "count": 4582 + }, + "sports": { + "count": 6110 + }, + "culture": { + "count": 4529 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/SwednClusteringS2S.json b/mteb/descriptive_stats/Clustering/SwednClusteringS2S.json new file mode 100644 index 0000000000..b070e06c31 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/SwednClusteringS2S.json @@ -0,0 +1,28 @@ +{ + "headlines": { + "num_samples": 34376, + "number_of_characters": 1427388, + "min_text_length": 2, + "average_text_length": 41.522806609262275, + "max_text_length": 496, + "unique_texts": 126, + "min_labels_per_text": 4529, + "average_labels_per_text": 1.0, + "max_labels_per_text": 19155, + "unique_labels": 4, + "labels": { + "domestic news": { + "count": 19155 + }, + "economy": { + "count": 4582 + }, + "sports": { + "count": 6110 + }, + "culture": { + "count": 4529 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/TenKGnadClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/TenKGnadClusteringP2P.v2.json new file mode 100644 index 0000000000..81ec2e0b9d --- /dev/null +++ b/mteb/descriptive_stats/Clustering/TenKGnadClusteringP2P.v2.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 10275, + "number_of_characters": 27212877, + "min_text_length": 45, + "average_text_length": 2648.4551824817518, + "max_text_length": 32702, + "unique_texts": 4554, + "min_labels_per_text": 539, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1679, + "unique_labels": 9, + "labels": { + "Panorama": { + "count": 1679 + }, + "Inland": { + "count": 1016 + }, + "International": { + "count": 1513 + }, + "Wirtschaft": { + "count": 1410 + }, + "Web": { + "count": 1676 + }, + "Wissenschaft": { + "count": 573 + }, + "Sport": { + "count": 1201 + }, + "Kultur": { + "count": 539 + }, + "Etat": { + "count": 668 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/TenKGnadClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/TenKGnadClusteringS2S.v2.json new file mode 100644 index 0000000000..3391275fd7 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/TenKGnadClusteringS2S.v2.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 10267, + "number_of_characters": 523331, + "min_text_length": 13, + "average_text_length": 50.97214376156618, + "max_text_length": 108, + "unique_texts": 85, + "min_labels_per_text": 535, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1679, + "unique_labels": 9, + "labels": { + "Sport": { + "count": 1201 + }, + "International": { + "count": 1512 + }, + "Web": { + "count": 1677 + }, + "Kultur": { + "count": 535 + }, + "Panorama": { + "count": 1679 + }, + "Etat": { + "count": 668 + }, + "Wissenschaft": { + "count": 572 + }, + "Inland": { + "count": 1014 + }, + "Wirtschaft": { + "count": 1409 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/ThuNewsClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/ThuNewsClusteringP2P.v2.json new file mode 100644 index 0000000000..9fb9b65379 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/ThuNewsClusteringP2P.v2.json @@ -0,0 +1,58 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 221799, + "min_text_length": 11, + "average_text_length": 108.30029296875, + "max_text_length": 389, + "unique_texts": 265, + "min_labels_per_text": 9, + "average_labels_per_text": 1.0, + "max_labels_per_text": 401, + "unique_labels": 14, + "labels": { + "12": { + "count": 380 + }, + "11": { + "count": 401 + }, + "1": { + "count": 223 + }, + "7": { + "count": 154 + }, + "10": { + "count": 123 + }, + "13": { + "count": 92 + }, + "4": { + "count": 50 + }, + "0": { + "count": 323 + }, + "2": { + "count": 80 + }, + "6": { + "count": 33 + }, + "9": { + "count": 60 + }, + "5": { + "count": 102 + }, + "8": { + "count": 9 + }, + "3": { + "count": 18 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/ThuNewsClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/ThuNewsClusteringS2S.v2.json new file mode 100644 index 0000000000..cf4404df70 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/ThuNewsClusteringS2S.v2.json @@ -0,0 +1,58 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 40107, + "min_text_length": 7, + "average_text_length": 19.58349609375, + "max_text_length": 31, + "unique_texts": 25, + "min_labels_per_text": 9, + "average_labels_per_text": 1.0, + "max_labels_per_text": 401, + "unique_labels": 14, + "labels": { + "12": { + "count": 380 + }, + "11": { + "count": 401 + }, + "1": { + "count": 223 + }, + "7": { + "count": 154 + }, + "10": { + "count": 123 + }, + "13": { + "count": 92 + }, + "4": { + "count": 50 + }, + "0": { + "count": 323 + }, + "2": { + "count": 80 + }, + "6": { + "count": 33 + }, + "9": { + "count": 60 + }, + "5": { + "count": 102 + }, + "8": { + "count": 9 + }, + "3": { + "count": 18 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/VGHierarchicalClusteringP2P.json b/mteb/descriptive_stats/Clustering/VGHierarchicalClusteringP2P.json new file mode 100644 index 0000000000..6d8fba6dc9 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/VGHierarchicalClusteringP2P.json @@ -0,0 +1,229 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 5493923, + "min_text_length": 80, + "average_text_length": 2682.57958984375, + "max_text_length": 20478, + "unique_texts": 1677, + "min_labels_per_text": 1, + "average_labels_per_text": 1.86279296875, + "max_labels_per_text": 1014, + "unique_labels": 71, + "labels": { + "nyheter": { + "count": 1014 + }, + "utenriks": { + "count": 371 + }, + "innenriks": { + "count": 527 + }, + "rampelys": { + "count": 258 + }, + "musikk": { + "count": 80 + }, + "sport": { + "count": 650 + }, + "fotball": { + "count": 341 + }, + "meninger": { + "count": 100 + }, + "haandball": { + "count": 23 + }, + "tv": { + "count": 57 + }, + "forbruker": { + "count": 32 + }, + "teknologi": { + "count": 16 + }, + "kongelige": { + "count": 1 + }, + "ishockey": { + "count": 28 + }, + "kampsport": { + "count": 8 + }, + "doping": { + "count": 3 + }, + "helse": { + "count": 6 + }, + "spill": { + "count": 7 + }, + "ski": { + "count": 15 + }, + "bil-og-motor": { + "count": 9 + }, + "reise": { + "count": 21 + }, + "film": { + "count": 42 + }, + "tennis": { + "count": 9 + }, + "vaer": { + "count": 7 + }, + "mat-og-drikke": { + "count": 5 + }, + "langrenn": { + "count": 30 + }, + "alpint": { + "count": 14 + }, + "moter-og-modeller": { + "count": 1 + }, + "bok": { + "count": 10 + }, + "hollywood": { + "count": 1 + }, + "friidrett": { + "count": 12 + }, + "basketball": { + "count": 1 + }, + "grand-prix": { + "count": 2 + }, + "sykkel": { + "count": 21 + }, + "ungdoms-ol-2016": { + "count": 1 + }, + "vintersport": { + "count": 1 + }, + "skiskyting": { + "count": 7 + }, + "golf": { + "count": 3 + }, + "jernbanen": { + "count": 1 + }, + "motorsport": { + "count": 8 + }, + "travtips": { + "count": 4 + }, + "hopp": { + "count": 8 + }, + "ol": { + "count": 5 + }, + "ski-vm-2015": { + "count": 2 + }, + "teateranmeldelser": { + "count": 1 + }, + "skoyter": { + "count": 1 + }, + "dinepenger": { + "count": 3 + }, + "storbritannia": { + "count": 2 + }, + "utrolige-historier": { + "count": 3 + }, + "idol": { + "count": 2 + }, + "farmen": { + "count": 1 + }, + "skoeyter": { + "count": 1 + }, + "motor": { + "count": 3 + }, + "musikal": { + "count": 1 + }, + "sjakk": { + "count": 3 + }, + "boksing": { + "count": 5 + }, + "bil-baat-og-motor": { + "count": 10 + }, + "justin-bieber": { + "count": 2 + }, + "kongehuset": { + "count": 1 + }, + "internasjonal-fotball": { + "count": 1 + }, + "vg-lista-topp-20": { + "count": 1 + }, + "roing": { + "count": 1 + }, + "ol-2016": { + "count": 1 + }, + "skotsk-fotball": { + "count": 1 + }, + "leonard-cohen": { + "count": 1 + }, + "skuespiller": { + "count": 1 + }, + "william-og-kate": { + "count": 1 + }, + "idrettspolitikk": { + "count": 3 + }, + "norsk-film": { + "count": 1 + }, + "game-of-thrones": { + "count": 1 + }, + "snowboard": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/VGHierarchicalClusteringS2S.json b/mteb/descriptive_stats/Clustering/VGHierarchicalClusteringS2S.json new file mode 100644 index 0000000000..79608a6885 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/VGHierarchicalClusteringS2S.json @@ -0,0 +1,229 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 284776, + "min_text_length": 13, + "average_text_length": 139.05078125, + "max_text_length": 463, + "unique_texts": 249, + "min_labels_per_text": 1, + "average_labels_per_text": 1.86279296875, + "max_labels_per_text": 1014, + "unique_labels": 71, + "labels": { + "nyheter": { + "count": 1014 + }, + "utenriks": { + "count": 371 + }, + "innenriks": { + "count": 527 + }, + "rampelys": { + "count": 258 + }, + "musikk": { + "count": 80 + }, + "sport": { + "count": 650 + }, + "fotball": { + "count": 341 + }, + "meninger": { + "count": 100 + }, + "haandball": { + "count": 23 + }, + "tv": { + "count": 57 + }, + "forbruker": { + "count": 32 + }, + "teknologi": { + "count": 16 + }, + "kongelige": { + "count": 1 + }, + "ishockey": { + "count": 28 + }, + "kampsport": { + "count": 8 + }, + "doping": { + "count": 3 + }, + "helse": { + "count": 6 + }, + "spill": { + "count": 7 + }, + "ski": { + "count": 15 + }, + "bil-og-motor": { + "count": 9 + }, + "reise": { + "count": 21 + }, + "film": { + "count": 42 + }, + "tennis": { + "count": 9 + }, + "vaer": { + "count": 7 + }, + "mat-og-drikke": { + "count": 5 + }, + "langrenn": { + "count": 30 + }, + "alpint": { + "count": 14 + }, + "moter-og-modeller": { + "count": 1 + }, + "bok": { + "count": 10 + }, + "hollywood": { + "count": 1 + }, + "friidrett": { + "count": 12 + }, + "basketball": { + "count": 1 + }, + "grand-prix": { + "count": 2 + }, + "sykkel": { + "count": 21 + }, + "ungdoms-ol-2016": { + "count": 1 + }, + "vintersport": { + "count": 1 + }, + "skiskyting": { + "count": 7 + }, + "golf": { + "count": 3 + }, + "jernbanen": { + "count": 1 + }, + "motorsport": { + "count": 8 + }, + "travtips": { + "count": 4 + }, + "hopp": { + "count": 8 + }, + "ol": { + "count": 5 + }, + "ski-vm-2015": { + "count": 2 + }, + "teateranmeldelser": { + "count": 1 + }, + "skoyter": { + "count": 1 + }, + "dinepenger": { + "count": 3 + }, + "storbritannia": { + "count": 2 + }, + "utrolige-historier": { + "count": 3 + }, + "idol": { + "count": 2 + }, + "farmen": { + "count": 1 + }, + "skoeyter": { + "count": 1 + }, + "motor": { + "count": 3 + }, + "musikal": { + "count": 1 + }, + "sjakk": { + "count": 3 + }, + "boksing": { + "count": 5 + }, + "bil-baat-og-motor": { + "count": 10 + }, + "justin-bieber": { + "count": 2 + }, + "kongehuset": { + "count": 1 + }, + "internasjonal-fotball": { + "count": 1 + }, + "vg-lista-topp-20": { + "count": 1 + }, + "roing": { + "count": 1 + }, + "ol-2016": { + "count": 1 + }, + "skotsk-fotball": { + "count": 1 + }, + "leonard-cohen": { + "count": 1 + }, + "skuespiller": { + "count": 1 + }, + "william-og-kate": { + "count": 1 + }, + "idrettspolitikk": { + "count": 3 + }, + "norsk-film": { + "count": 1 + }, + "game-of-thrones": { + "count": 1 + }, + "snowboard": { + "count": 1 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/WikiCitiesClustering.json b/mteb/descriptive_stats/Clustering/WikiCitiesClustering.json new file mode 100644 index 0000000000..6f1e381d6e --- /dev/null +++ b/mteb/descriptive_stats/Clustering/WikiCitiesClustering.json @@ -0,0 +1,394 @@ +{ + "test": { + "num_samples": 1, + "number_of_characters": 3531, + "min_text_length": 3531, + "average_text_length": 3531.0, + "max_text_length": 3531, + "unique_texts": 3530, + "min_labels_per_text": 5, + "average_labels_per_text": 3531.0, + "max_labels_per_text": 200, + "unique_labels": 126, + "labels": { + "United Kingdom": { + "count": 56 + }, + "Poland": { + "count": 200 + }, + "Mexico": { + "count": 47 + }, + "Argentina": { + "count": 45 + }, + "Egypt": { + "count": 60 + }, + "Germany": { + "count": 51 + }, + "Bolivia": { + "count": 66 + }, + "Ethiopia": { + "count": 71 + }, + "Luxembourg": { + "count": 6 + }, + "Iraq": { + "count": 50 + }, + "Democratic Republic of the Congo": { + "count": 46 + }, + "India": { + "count": 200 + }, + "Angola": { + "count": 19 + }, + "North Macedonia": { + "count": 19 + }, + "Albania": { + "count": 41 + }, + "Madagascar": { + "count": 24 + }, + "Montenegro": { + "count": 14 + }, + "Algeria": { + "count": 33 + }, + "Myanmar": { + "count": 30 + }, + "Finland": { + "count": 97 + }, + "Greece": { + "count": 45 + }, + "Serbia": { + "count": 21 + }, + "Mozambique": { + "count": 26 + }, + "Ecuador": { + "count": 30 + }, + "Malawi": { + "count": 17 + }, + "Haiti": { + "count": 9 + }, + "Guyana": { + "count": 6 + }, + "Iceland": { + "count": 36 + }, + "Morocco": { + "count": 50 + }, + "Sierra Leone": { + "count": 9 + }, + "Lithuania": { + "count": 79 + }, + "Nigeria": { + "count": 50 + }, + "Yemen": { + "count": 22 + }, + "France": { + "count": 33 + }, + "Cuba": { + "count": 38 + }, + "Jordan": { + "count": 17 + }, + "Turkey": { + "count": 111 + }, + "Greenland": { + "count": 15 + }, + "Oman": { + "count": 11 + }, + "Cameroon": { + "count": 48 + }, + "Taiwan": { + "count": 7 + }, + "Spain": { + "count": 46 + }, + "Senegal": { + "count": 48 + }, + "Peru": { + "count": 107 + }, + "Botswana": { + "count": 23 + }, + "Namibia": { + "count": 24 + }, + "Uzbekistan": { + "count": 39 + }, + "Russia": { + "count": 11 + }, + "Lebanon": { + "count": 9 + }, + "Belgium": { + "count": 7 + }, + "Turkmenistan": { + "count": 14 + }, + "Afghanistan": { + "count": 17 + }, + "Venezuela": { + "count": 50 + }, + "Pakistan": { + "count": 105 + }, + "Guinea-Bissau": { + "count": 11 + }, + "Italy": { + "count": 85 + }, + "Syria": { + "count": 51 + }, + "Ukraine": { + "count": 6 + }, + "Bangladesh": { + "count": 38 + }, + "Moldova": { + "count": 38 + }, + "Sudan": { + "count": 9 + }, + "Mali": { + "count": 5 + }, + "Libya": { + "count": 16 + }, + "Somalia": { + "count": 21 + }, + "Burundi": { + "count": 12 + }, + "Kazakhstan": { + "count": 25 + }, + "Slovenia": { + "count": 32 + }, + "Nicaragua": { + "count": 7 + }, + "Indonesia": { + "count": 10 + }, + "Chad": { + "count": 16 + }, + "Paraguay": { + "count": 5 + }, + "Uganda": { + "count": 11 + }, + "Tanzania": { + "count": 32 + }, + "Eritrea": { + "count": 11 + }, + "Israel": { + "count": 37 + }, + "Honduras": { + "count": 6 + }, + "State of Palestine": { + "count": 25 + }, + "Norway": { + "count": 18 + }, + "Tajikistan": { + "count": 9 + }, + "Togo": { + "count": 12 + }, + "Azerbaijan": { + "count": 32 + }, + "Kenya": { + "count": 25 + }, + "Kyrgyzstan": { + "count": 20 + }, + "Malaysia": { + "count": 11 + }, + "Ghana": { + "count": 8 + }, + "Australia": { + "count": 32 + }, + "Netherlands": { + "count": 43 + }, + "Canada": { + "count": 30 + }, + "Sri Lanka": { + "count": 15 + }, + "Central African Republic": { + "count": 13 + }, + "Zimbabwe": { + "count": 15 + }, + "Lesotho": { + "count": 8 + }, + "Colombia": { + "count": 6 + }, + "Rwanda": { + "count": 11 + }, + "Papua New Guinea": { + "count": 17 + }, + "Zambia": { + "count": 16 + }, + "Equatorial Guinea": { + "count": 16 + }, + "Tunisia": { + "count": 7 + }, + "Denmark": { + "count": 43 + }, + "New Zealand": { + "count": 9 + }, + "Laos": { + "count": 6 + }, + "Guatemala": { + "count": 11 + }, + "Burkina Faso": { + "count": 20 + }, + "Bhutan": { + "count": 6 + }, + "Czech Republic": { + "count": 11 + }, + "Hungary": { + "count": 5 + }, + "South Africa": { + "count": 11 + }, + "Slovakia": { + "count": 9 + }, + "Romania": { + "count": 8 + }, + "El Salvador": { + "count": 6 + }, + "Bosnia and Herzegovina": { + "count": 18 + }, + "Nepal": { + "count": 25 + }, + "Sweden": { + "count": 7 + }, + "South Korea": { + "count": 7 + }, + "Mongolia": { + "count": 17 + }, + "Jamaica": { + "count": 6 + }, + "Uruguay": { + "count": 9 + }, + "Guinea": { + "count": 8 + }, + "South Sudan": { + "count": 7 + }, + "Croatia": { + "count": 6 + }, + "Austria": { + "count": 7 + }, + "Gabon": { + "count": 12 + }, + "Chile": { + "count": 8 + }, + "Saudi Arabia": { + "count": 9 + }, + "Brazil": { + "count": 5 + }, + "Cambodia": { + "count": 7 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.v2.json new file mode 100644 index 0000000000..c4055c008f --- /dev/null +++ b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.v2.json @@ -0,0 +1,1201 @@ +{ + "test": { + "num_samples": 28672, + "number_of_characters": 18055981, + "min_text_length": 1, + "average_text_length": 629.7426409040179, + "max_text_length": 69506, + "unique_texts": 2829, + "min_labels_per_text": 3, + "average_labels_per_text": 1.0, + "max_labels_per_text": 2942, + "unique_labels": 39, + "labels": { + "16": { + "count": 541 + }, + "3": { + "count": 1607 + }, + "12": { + "count": 846 + }, + "0": { + "count": 2410 + }, + "15": { + "count": 878 + }, + "11": { + "count": 864 + }, + "6": { + "count": 787 + }, + "9": { + "count": 654 + }, + "14": { + "count": 966 + }, + "8": { + "count": 1389 + }, + "2": { + "count": 2428 + }, + "10": { + "count": 839 + }, + "1": { + "count": 1370 + }, + "4": { + "count": 2942 + }, + "7": { + "count": 2514 + }, + "5": { + "count": 1490 + }, + "13": { + "count": 918 + }, + "19": { + "count": 315 + }, + "17": { + "count": 711 + }, + "20": { + "count": 345 + }, + "18": { + "count": 800 + }, + "24": { + "count": 467 + }, + "25": { + "count": 928 + }, + "21": { + "count": 62 + }, + "26": { + "count": 270 + }, + "22": { + "count": 186 + }, + "23": { + "count": 36 + }, + "27": { + "count": 465 + }, + "28": { + "count": 62 + }, + "36": { + "count": 139 + }, + "32": { + "count": 57 + }, + "38": { + "count": 43 + }, + "30": { + "count": 52 + }, + "34": { + "count": 80 + }, + "33": { + "count": 75 + }, + "35": { + "count": 62 + }, + "31": { + "count": 63 + }, + "37": { + "count": 8 + }, + "29": { + "count": 3 + } + }, + "hf_subset_descriptive_stats": { + "bs": { + "num_samples": 2048, + "number_of_characters": 2142735, + "min_text_length": 2, + "average_text_length": 1046.25732421875, + "max_text_length": 37514, + "unique_texts": 1151, + "min_labels_per_text": 2, + "average_labels_per_text": 1.0, + "max_labels_per_text": 597, + "unique_labels": 17, + "labels": { + "16": { + "count": 268 + }, + "3": { + "count": 89 + }, + "12": { + "count": 597 + }, + "0": { + "count": 202 + }, + "15": { + "count": 113 + }, + "11": { + "count": 11 + }, + "6": { + "count": 142 + }, + "9": { + "count": 181 + }, + "14": { + "count": 179 + }, + "8": { + "count": 33 + }, + "2": { + "count": 172 + }, + "10": { + "count": 12 + }, + "1": { + "count": 7 + }, + "4": { + "count": 25 + }, + "7": { + "count": 6 + }, + "5": { + "count": 9 + }, + "13": { + "count": 2 + } + } + }, + "ca": { + "num_samples": 2048, + "number_of_characters": 1230301, + "min_text_length": 2, + "average_text_length": 600.73291015625, + "max_text_length": 12074, + "unique_texts": 1011, + "min_labels_per_text": 8, + "average_labels_per_text": 1.0, + "max_labels_per_text": 737, + "unique_labels": 8, + "labels": { + "6": { + "count": 257 + }, + "1": { + "count": 737 + }, + "2": { + "count": 284 + }, + "4": { + "count": 394 + }, + "0": { + "count": 162 + }, + "7": { + "count": 151 + }, + "5": { + "count": 55 + }, + "3": { + "count": 8 + } + } + }, + "cs": { + "num_samples": 2048, + "number_of_characters": 1350102, + "min_text_length": 2, + "average_text_length": 659.2294921875, + "max_text_length": 6874, + "unique_texts": 1100, + "min_labels_per_text": 8, + "average_labels_per_text": 1.0, + "max_labels_per_text": 624, + "unique_labels": 21, + "labels": { + "19": { + "count": 35 + }, + "5": { + "count": 624 + }, + "17": { + "count": 126 + }, + "10": { + "count": 155 + }, + "1": { + "count": 231 + }, + "7": { + "count": 215 + }, + "11": { + "count": 128 + }, + "0": { + "count": 57 + }, + "13": { + "count": 75 + }, + "2": { + "count": 83 + }, + "3": { + "count": 38 + }, + "9": { + "count": 8 + }, + "6": { + "count": 14 + }, + "12": { + "count": 9 + }, + "16": { + "count": 16 + }, + "20": { + "count": 73 + }, + "18": { + "count": 38 + }, + "4": { + "count": 60 + }, + "15": { + "count": 14 + }, + "14": { + "count": 38 + }, + "8": { + "count": 11 + } + } + }, + "da": { + "num_samples": 2048, + "number_of_characters": 1572023, + "min_text_length": 2, + "average_text_length": 767.58935546875, + "max_text_length": 25249, + "unique_texts": 1149, + "min_labels_per_text": 14, + "average_labels_per_text": 1.0, + "max_labels_per_text": 364, + "unique_labels": 20, + "labels": { + "14": { + "count": 212 + }, + "4": { + "count": 74 + }, + "15": { + "count": 16 + }, + "8": { + "count": 165 + }, + "13": { + "count": 115 + }, + "0": { + "count": 79 + }, + "1": { + "count": 34 + }, + "9": { + "count": 114 + }, + "7": { + "count": 364 + }, + "10": { + "count": 32 + }, + "17": { + "count": 66 + }, + "18": { + "count": 32 + }, + "12": { + "count": 129 + }, + "11": { + "count": 159 + }, + "2": { + "count": 66 + }, + "3": { + "count": 185 + }, + "19": { + "count": 103 + }, + "16": { + "count": 33 + }, + "5": { + "count": 56 + }, + "6": { + "count": 14 + } + } + }, + "eu": { + "num_samples": 2048, + "number_of_characters": 829768, + "min_text_length": 4, + "average_text_length": 405.16015625, + "max_text_length": 6324, + "unique_texts": 672, + "min_labels_per_text": 44, + "average_labels_per_text": 1.0, + "max_labels_per_text": 995, + "unique_labels": 5, + "labels": { + "4": { + "count": 383 + }, + "0": { + "count": 995 + }, + "3": { + "count": 282 + }, + "2": { + "count": 344 + }, + "1": { + "count": 44 + } + } + }, + "gv": { + "num_samples": 2048, + "number_of_characters": 753687, + "min_text_length": 2, + "average_text_length": 368.01123046875, + "max_text_length": 5185, + "unique_texts": 548, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 534, + "unique_labels": 28, + "labels": { + "6": { + "count": 32 + }, + "1": { + "count": 83 + }, + "24": { + "count": 13 + }, + "17": { + "count": 152 + }, + "2": { + "count": 534 + }, + "25": { + "count": 76 + }, + "5": { + "count": 198 + }, + "15": { + "count": 100 + }, + "21": { + "count": 22 + }, + "26": { + "count": 188 + }, + "13": { + "count": 230 + }, + "20": { + "count": 11 + }, + "3": { + "count": 107 + }, + "19": { + "count": 88 + }, + "16": { + "count": 55 + }, + "22": { + "count": 29 + }, + "14": { + "count": 12 + }, + "8": { + "count": 61 + }, + "0": { + "count": 5 + }, + "10": { + "count": 4 + }, + "4": { + "count": 9 + }, + "23": { + "count": 6 + }, + "7": { + "count": 3 + }, + "9": { + "count": 20 + }, + "18": { + "count": 4 + }, + "12": { + "count": 3 + }, + "27": { + "count": 1 + }, + "11": { + "count": 2 + } + } + }, + "ilo": { + "num_samples": 2048, + "number_of_characters": 1265475, + "min_text_length": 2, + "average_text_length": 617.90771484375, + "max_text_length": 13214, + "unique_texts": 612, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 562, + "unique_labels": 29, + "labels": { + "3": { + "count": 562 + }, + "0": { + "count": 373 + }, + "18": { + "count": 521 + }, + "8": { + "count": 129 + }, + "13": { + "count": 123 + }, + "11": { + "count": 54 + }, + "25": { + "count": 8 + }, + "27": { + "count": 5 + }, + "17": { + "count": 13 + }, + "15": { + "count": 4 + }, + "4": { + "count": 28 + }, + "7": { + "count": 83 + }, + "10": { + "count": 15 + }, + "1": { + "count": 11 + }, + "24": { + "count": 15 + }, + "14": { + "count": 8 + }, + "16": { + "count": 4 + }, + "19": { + "count": 9 + }, + "23": { + "count": 10 + }, + "26": { + "count": 4 + }, + "28": { + "count": 8 + }, + "12": { + "count": 29 + }, + "21": { + "count": 12 + }, + "6": { + "count": 5 + }, + "20": { + "count": 6 + }, + "5": { + "count": 4 + }, + "22": { + "count": 2 + }, + "9": { + "count": 2 + }, + "2": { + "count": 1 + } + } + }, + "ku": { + "num_samples": 2048, + "number_of_characters": 862563, + "min_text_length": 2, + "average_text_length": 421.17333984375, + "max_text_length": 9379, + "unique_texts": 793, + "min_labels_per_text": 2, + "average_labels_per_text": 1.0, + "max_labels_per_text": 431, + "unique_labels": 39, + "labels": { + "14": { + "count": 14 + }, + "36": { + "count": 139 + }, + "20": { + "count": 108 + }, + "22": { + "count": 27 + }, + "15": { + "count": 102 + }, + "32": { + "count": 55 + }, + "8": { + "count": 431 + }, + "17": { + "count": 210 + }, + "38": { + "count": 43 + }, + "30": { + "count": 51 + }, + "4": { + "count": 60 + }, + "2": { + "count": 111 + }, + "6": { + "count": 95 + }, + "34": { + "count": 70 + }, + "27": { + "count": 15 + }, + "5": { + "count": 174 + }, + "26": { + "count": 37 + }, + "0": { + "count": 11 + }, + "25": { + "count": 50 + }, + "16": { + "count": 2 + }, + "12": { + "count": 16 + }, + "24": { + "count": 2 + }, + "11": { + "count": 17 + }, + "21": { + "count": 9 + }, + "13": { + "count": 20 + }, + "1": { + "count": 7 + }, + "33": { + "count": 33 + }, + "35": { + "count": 28 + }, + "10": { + "count": 11 + }, + "31": { + "count": 51 + }, + "18": { + "count": 4 + }, + "3": { + "count": 4 + }, + "28": { + "count": 8 + }, + "37": { + "count": 8 + }, + "23": { + "count": 2 + }, + "19": { + "count": 7 + }, + "7": { + "count": 6 + }, + "9": { + "count": 8 + }, + "29": { + "count": 2 + } + } + }, + "lv": { + "num_samples": 2048, + "number_of_characters": 1578335, + "min_text_length": 2, + "average_text_length": 770.67138671875, + "max_text_length": 69506, + "unique_texts": 1125, + "min_labels_per_text": 5, + "average_labels_per_text": 1.0, + "max_labels_per_text": 351, + "unique_labels": 16, + "labels": { + "15": { + "count": 288 + }, + "2": { + "count": 110 + }, + "6": { + "count": 74 + }, + "12": { + "count": 50 + }, + "0": { + "count": 171 + }, + "14": { + "count": 188 + }, + "10": { + "count": 351 + }, + "5": { + "count": 142 + }, + "4": { + "count": 300 + }, + "13": { + "count": 60 + }, + "11": { + "count": 48 + }, + "1": { + "count": 165 + }, + "8": { + "count": 53 + }, + "7": { + "count": 5 + }, + "3": { + "count": 9 + }, + "9": { + "count": 34 + } + } + }, + "min": { + "num_samples": 2048, + "number_of_characters": 1293805, + "min_text_length": 2, + "average_text_length": 631.74072265625, + "max_text_length": 23070, + "unique_texts": 495, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1595, + "unique_labels": 15, + "labels": { + "7": { + "count": 1595 + }, + "9": { + "count": 9 + }, + "4": { + "count": 48 + }, + "3": { + "count": 83 + }, + "2": { + "count": 160 + }, + "0": { + "count": 19 + }, + "5": { + "count": 74 + }, + "6": { + "count": 12 + }, + "10": { + "count": 12 + }, + "13": { + "count": 10 + }, + "8": { + "count": 5 + }, + "11": { + "count": 13 + }, + "12": { + "count": 2 + }, + "1": { + "count": 5 + }, + "14": { + "count": 1 + } + } + }, + "mt": { + "num_samples": 2048, + "number_of_characters": 1681864, + "min_text_length": 2, + "average_text_length": 821.22265625, + "max_text_length": 28371, + "unique_texts": 872, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 654, + "unique_labels": 27, + "labels": { + "12": { + "count": 8 + }, + "10": { + "count": 147 + }, + "14": { + "count": 180 + }, + "17": { + "count": 117 + }, + "25": { + "count": 654 + }, + "19": { + "count": 35 + }, + "0": { + "count": 77 + }, + "3": { + "count": 12 + }, + "16": { + "count": 44 + }, + "15": { + "count": 108 + }, + "24": { + "count": 267 + }, + "6": { + "count": 43 + }, + "26": { + "count": 32 + }, + "4": { + "count": 79 + }, + "22": { + "count": 67 + }, + "9": { + "count": 16 + }, + "8": { + "count": 16 + }, + "2": { + "count": 55 + }, + "5": { + "count": 6 + }, + "11": { + "count": 30 + }, + "18": { + "count": 12 + }, + "21": { + "count": 12 + }, + "20": { + "count": 15 + }, + "23": { + "count": 7 + }, + "13": { + "count": 6 + }, + "7": { + "count": 1 + }, + "1": { + "count": 2 + } + } + }, + "sco": { + "num_samples": 2048, + "number_of_characters": 2181551, + "min_text_length": 2, + "average_text_length": 1065.21044921875, + "max_text_length": 13617, + "unique_texts": 1046, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 432, + "unique_labels": 23, + "labels": { + "18": { + "count": 178 + }, + "6": { + "count": 92 + }, + "9": { + "count": 28 + }, + "15": { + "count": 106 + }, + "8": { + "count": 432 + }, + "2": { + "count": 95 + }, + "11": { + "count": 104 + }, + "1": { + "count": 42 + }, + "13": { + "count": 248 + }, + "16": { + "count": 118 + }, + "20": { + "count": 130 + }, + "3": { + "count": 171 + }, + "22": { + "count": 57 + }, + "7": { + "count": 83 + }, + "10": { + "count": 74 + }, + "5": { + "count": 6 + }, + "4": { + "count": 17 + }, + "17": { + "count": 24 + }, + "14": { + "count": 14 + }, + "0": { + "count": 7 + }, + "19": { + "count": 18 + }, + "21": { + "count": 3 + }, + "12": { + "count": 1 + } + } + }, + "sq": { + "num_samples": 2048, + "number_of_characters": 871396, + "min_text_length": 1, + "average_text_length": 425.486328125, + "max_text_length": 18132, + "unique_texts": 789, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 444, + "unique_labels": 36, + "labels": { + "27": { + "count": 444 + }, + "9": { + "count": 234 + }, + "14": { + "count": 120 + }, + "0": { + "count": 128 + }, + "15": { + "count": 27 + }, + "11": { + "count": 298 + }, + "24": { + "count": 170 + }, + "28": { + "count": 46 + }, + "19": { + "count": 20 + }, + "25": { + "count": 140 + }, + "3": { + "count": 47 + }, + "2": { + "count": 87 + }, + "35": { + "count": 34 + }, + "8": { + "count": 53 + }, + "31": { + "count": 12 + }, + "17": { + "count": 3 + }, + "23": { + "count": 11 + }, + "20": { + "count": 2 + }, + "33": { + "count": 42 + }, + "10": { + "count": 26 + }, + "34": { + "count": 10 + }, + "7": { + "count": 2 + }, + "13": { + "count": 29 + }, + "4": { + "count": 4 + }, + "6": { + "count": 7 + }, + "26": { + "count": 9 + }, + "5": { + "count": 16 + }, + "30": { + "count": 1 + }, + "21": { + "count": 4 + }, + "22": { + "count": 4 + }, + "18": { + "count": 11 + }, + "32": { + "count": 2 + }, + "12": { + "count": 2 + }, + "16": { + "count": 1 + }, + "1": { + "count": 1 + }, + "29": { + "count": 1 + } + } + }, + "wa": { + "num_samples": 2048, + "number_of_characters": 442376, + "min_text_length": 3, + "average_text_length": 216.00390625, + "max_text_length": 1326, + "unique_texts": 475, + "min_labels_per_text": 1, + "average_labels_per_text": 1.0, + "max_labels_per_text": 1461, + "unique_labels": 6, + "labels": { + "5": { + "count": 126 + }, + "4": { + "count": 1461 + }, + "0": { + "count": 124 + }, + "2": { + "count": 326 + }, + "3": { + "count": 10 + }, + "1": { + "count": 1 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json new file mode 100644 index 0000000000..6f2a7d170e --- /dev/null +++ b/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 44459412, + "num_samples": 19939, + "num_queries": 40, + "num_documents": 19899, + "min_document_length": 55, + "average_document_length": 0.22061410121111613, + "max_document_length": 278, + "unique_documents": 19899, + "min_query_length": 8, + "average_query_length": 1111375.55, + "max_query_length": 2960, + "unique_queries": 40, + "none_queries": 0, + "min_relevant_docs_per_query": 135, + "average_relevant_docs_per_query": 43.6, + "max_relevant_docs_per_query": 379, + "unique_relevant_docs": 4739, + "num_instructions": 40, + "min_instruction_length": 102, + "average_instruction_length": 13015, + "max_instruction_length": 837, + "unique_instructions": 40, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json b/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json new file mode 100644 index 0000000000..5c4f7a6ba6 --- /dev/null +++ b/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 72717436, + "num_samples": 30985, + "num_queries": 64, + "num_documents": 30921, + "min_document_length": 35, + "average_document_length": 0.15413473044209436, + "max_document_length": 159, + "unique_documents": 30921, + "min_query_length": 3, + "average_query_length": 1136135.46875, + "max_query_length": 5056, + "unique_queries": 64, + "none_queries": 0, + "min_relevant_docs_per_query": 83, + "average_relevant_docs_per_query": 26.796875, + "max_relevant_docs_per_query": 217, + "unique_relevant_docs": 4248, + "num_instructions": 64, + "min_instruction_length": 162, + "average_instruction_length": 29015, + "max_instruction_length": 879, + "unique_instructions": 64, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json b/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json new file mode 100644 index 0000000000..b765c4dc0d --- /dev/null +++ b/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 109145153, + "num_samples": 47596, + "num_queries": 104, + "num_documents": 47492, + "min_document_length": 43, + "average_document_length": 0.21224627305651478, + "max_document_length": 425, + "unique_documents": 47492, + "min_query_length": 48, + "average_query_length": 1049375.701923077, + "max_query_length": 6597, + "unique_queries": 104, + "none_queries": 0, + "min_relevant_docs_per_query": 195, + "average_relevant_docs_per_query": 26.75, + "max_relevant_docs_per_query": 570, + "unique_relevant_docs": 17643, + "num_instructions": 104, + "min_instruction_length": 75, + "average_instruction_length": 29957, + "max_instruction_length": 923, + "unique_instructions": 104, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/BrazilianToxicTweetsClassification.json b/mteb/descriptive_stats/MultilabelClassification/BrazilianToxicTweetsClassification.json new file mode 100644 index 0000000000..c058f40bae --- /dev/null +++ b/mteb/descriptive_stats/MultilabelClassification/BrazilianToxicTweetsClassification.json @@ -0,0 +1,74 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 172708, + "number_texts_intersect_with_train": 23, + "min_text_length": 5, + "average_text_length": 84.330078125, + "max_text_length": 304, + "unique_texts": 2046, + "min_labels_per_text": 0, + "average_label_per_text": 0.57958984375, + "max_labels_per_text": 4, + "unique_labels": 7, + "labels": { + "obscene": { + "count": 653 + }, + "insult": { + "count": 430 + }, + "misogyny": { + "count": 46 + }, + "racism": { + "count": 13 + }, + "xenophobia": { + "count": 13 + }, + "homophobia": { + "count": 32 + }, + "None": { + "count": 1145 + } + } + }, + "train": { + "num_samples": 8192, + "number_of_characters": 714281, + "number_texts_intersect_with_train": null, + "min_text_length": 4, + "average_text_length": 87.1925048828125, + "max_text_length": 322, + "unique_texts": 8172, + "min_labels_per_text": 0, + "average_label_per_text": 0.5751953125, + "max_labels_per_text": 4, + "unique_labels": 7, + "labels": { + "None": { + "count": 4580 + }, + "obscene": { + "count": 2576 + }, + "insult": { + "count": 1700 + }, + "homophobia": { + "count": 139 + }, + "misogyny": { + "count": 179 + }, + "racism": { + "count": 54 + }, + "xenophobia": { + "count": 64 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/KorHateSpeechMLClassification.json b/mteb/descriptive_stats/MultilabelClassification/KorHateSpeechMLClassification.json new file mode 100644 index 0000000000..e156ad2ac8 --- /dev/null +++ b/mteb/descriptive_stats/MultilabelClassification/KorHateSpeechMLClassification.json @@ -0,0 +1,86 @@ +{ + "test": { + "num_samples": 2037, + "number_of_characters": 70625, + "number_texts_intersect_with_train": 2, + "min_text_length": 1, + "average_text_length": 34.67108492881689, + "max_text_length": 300, + "unique_texts": 2037, + "min_labels_per_text": 1, + "average_label_per_text": 1.1467844869906725, + "max_labels_per_text": 3, + "unique_labels": 9, + "labels": { + "8": { + "count": 1103 + }, + "0": { + "count": 202 + }, + "5": { + "count": 148 + }, + "1": { + "count": 163 + }, + "2": { + "count": 229 + }, + "4": { + "count": 139 + }, + "7": { + "count": 46 + }, + "3": { + "count": 301 + }, + "6": { + "count": 5 + } + } + }, + "train": { + "num_samples": 8200, + "number_of_characters": 276145, + "number_texts_intersect_with_train": null, + "min_text_length": 1, + "average_text_length": 33.676219512195125, + "max_text_length": 302, + "unique_texts": 8192, + "min_labels_per_text": 1, + "average_label_per_text": 1.138170731707317, + "max_labels_per_text": 4, + "unique_labels": 9, + "labels": { + "8": { + "count": 4451 + }, + "2": { + "count": 886 + }, + "4": { + "count": 553 + }, + "3": { + "count": 1223 + }, + "1": { + "count": 658 + }, + "5": { + "count": 602 + }, + "0": { + "count": 754 + }, + "7": { + "count": 181 + }, + "6": { + "count": 25 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/MalteseNewsClassification.json b/mteb/descriptive_stats/MultilabelClassification/MalteseNewsClassification.json new file mode 100644 index 0000000000..474314be05 --- /dev/null +++ b/mteb/descriptive_stats/MultilabelClassification/MalteseNewsClassification.json @@ -0,0 +1,134 @@ +{ + "test": { + "num_samples": 2297, + "number_of_characters": 4024570, + "number_texts_intersect_with_train": 0, + "min_text_length": 104, + "average_text_length": 1752.0983892033087, + "max_text_length": 17875, + "unique_texts": 2297, + "min_labels_per_text": 1, + "average_label_per_text": 1.2259468872442316, + "max_labels_per_text": 4, + "unique_labels": 17, + "labels": { + "15": { + "count": 460 + }, + "10": { + "count": 593 + }, + "7": { + "count": 22 + }, + "0": { + "count": 129 + }, + "1": { + "count": 261 + }, + "12": { + "count": 194 + }, + "4": { + "count": 48 + }, + "3": { + "count": 36 + }, + "8": { + "count": 43 + }, + "5": { + "count": 28 + }, + "13": { + "count": 70 + }, + "2": { + "count": 328 + }, + "16": { + "count": 36 + }, + "6": { + "count": 472 + }, + "11": { + "count": 48 + }, + "14": { + "count": 31 + }, + "9": { + "count": 17 + } + } + }, + "train": { + "num_samples": 10784, + "number_of_characters": 17207270, + "number_texts_intersect_with_train": null, + "min_text_length": 80, + "average_text_length": 1595.6296364985162, + "max_text_length": 21442, + "unique_texts": 10784, + "min_labels_per_text": 1, + "average_label_per_text": 1.2194918397626113, + "max_labels_per_text": 5, + "unique_labels": 17, + "labels": { + "12": { + "count": 906 + }, + "10": { + "count": 2770 + }, + "7": { + "count": 103 + }, + "15": { + "count": 2146 + }, + "8": { + "count": 203 + }, + "0": { + "count": 602 + }, + "5": { + "count": 134 + }, + "4": { + "count": 225 + }, + "2": { + "count": 1530 + }, + "1": { + "count": 1214 + }, + "13": { + "count": 326 + }, + "3": { + "count": 168 + }, + "16": { + "count": 169 + }, + "6": { + "count": 2203 + }, + "11": { + "count": 225 + }, + "14": { + "count": 142 + }, + "9": { + "count": 85 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/SensitiveTopicsClassification.json b/mteb/descriptive_stats/MultilabelClassification/SensitiveTopicsClassification.json new file mode 100644 index 0000000000..1739f12d59 --- /dev/null +++ b/mteb/descriptive_stats/MultilabelClassification/SensitiveTopicsClassification.json @@ -0,0 +1,146 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 195229, + "number_texts_intersect_with_train": 0, + "min_text_length": 18, + "average_text_length": 95.32666015625, + "max_text_length": 248, + "unique_texts": 2048, + "min_labels_per_text": 0, + "average_label_per_text": 1.26416015625, + "max_labels_per_text": 4, + "unique_labels": 19, + "labels": { + "None": { + "count": 373 + }, + "0": { + "count": 163 + }, + "13": { + "count": 248 + }, + "12": { + "count": 315 + }, + "16": { + "count": 166 + }, + "4": { + "count": 256 + }, + "15": { + "count": 119 + }, + "2": { + "count": 133 + }, + "10": { + "count": 148 + }, + "1": { + "count": 52 + }, + "8": { + "count": 53 + }, + "17": { + "count": 238 + }, + "9": { + "count": 221 + }, + "14": { + "count": 138 + }, + "6": { + "count": 46 + }, + "5": { + "count": 110 + }, + "11": { + "count": 163 + }, + "7": { + "count": 11 + }, + "3": { + "count": 9 + } + } + }, + "train": { + "num_samples": 29177, + "number_of_characters": 3484328, + "number_texts_intersect_with_train": null, + "min_text_length": 16, + "average_text_length": 119.42036535627378, + "max_text_length": 249, + "unique_texts": 29177, + "min_labels_per_text": 0, + "average_label_per_text": 0.9729581519690167, + "max_labels_per_text": 6, + "unique_labels": 19, + "labels": { + "5": { + "count": 1002 + }, + "None": { + "count": 2867 + }, + "7": { + "count": 1371 + }, + "2": { + "count": 3604 + }, + "9": { + "count": 1888 + }, + "10": { + "count": 902 + }, + "11": { + "count": 1453 + }, + "14": { + "count": 3814 + }, + "15": { + "count": 1647 + }, + "1": { + "count": 901 + }, + "0": { + "count": 1404 + }, + "13": { + "count": 983 + }, + "17": { + "count": 1371 + }, + "6": { + "count": 905 + }, + "3": { + "count": 2020 + }, + "16": { + "count": 1181 + }, + "4": { + "count": 1372 + }, + "12": { + "count": 1423 + }, + "8": { + "count": 1147 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/ArEntail.json b/mteb/descriptive_stats/PairClassification/ArEntail.json new file mode 100644 index 0000000000..aa20bd6fe3 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/ArEntail.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 131547, + "unique_pairs": 1000, + "min_sentence1_length": 33, + "avg_sentence1_length": 73.529, + "max_sentence1_length": 129, + "unique_sentence1": 507, + "min_sentence2_length": 26, + "avg_sentence2_length": 58.018, + "max_sentence2_length": 160, + "unique_sentence2": 998, + "unique_labels": 2, + "labels": { + "1": { + "count": 500 + }, + "0": { + "count": 500 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/ArmenianParaphrasePC.json b/mteb/descriptive_stats/PairClassification/ArmenianParaphrasePC.json new file mode 100644 index 0000000000..dde4d3d8da --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/ArmenianParaphrasePC.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1470, + "number_of_characters": 354826, + "unique_pairs": 1470, + "min_sentence1_length": 28, + "avg_sentence1_length": 123.76054421768707, + "max_sentence1_length": 268, + "unique_sentence1": 1465, + "min_sentence2_length": 29, + "avg_sentence2_length": 117.61768707482993, + "max_sentence2_length": 271, + "unique_sentence2": 1469, + "unique_labels": 2, + "labels": { + "1": { + "count": 1021 + }, + "0": { + "count": 449 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/Assin2RTE.json b/mteb/descriptive_stats/PairClassification/Assin2RTE.json new file mode 100644 index 0000000000..e6ce178d01 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/Assin2RTE.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 219609, + "unique_pairs": 2043, + "min_sentence1_length": 20, + "avg_sentence1_length": 55.2802734375, + "max_sentence1_length": 159, + "unique_sentence1": 1776, + "min_sentence2_length": 19, + "avg_sentence2_length": 51.95068359375, + "max_sentence2_length": 158, + "unique_sentence2": 1776, + "unique_labels": 2, + "labels": { + "0": { + "count": 1024 + }, + "1": { + "count": 1024 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/CDSC-E.json b/mteb/descriptive_stats/PairClassification/CDSC-E.json new file mode 100644 index 0000000000..3849ce7e2c --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/CDSC-E.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 149941, + "unique_pairs": 998, + "min_sentence1_length": 14, + "avg_sentence1_length": 75.237, + "max_sentence1_length": 190, + "unique_sentence1": 510, + "min_sentence2_length": 17, + "avg_sentence2_length": 74.704, + "max_sentence2_length": 190, + "unique_sentence2": 509, + "unique_labels": 2, + "labels": { + "0": { + "count": 810 + }, + "1": { + "count": 190 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/CTKFactsNLI.json b/mteb/descriptive_stats/PairClassification/CTKFactsNLI.json new file mode 100644 index 0000000000..3eae4dfb1c --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/CTKFactsNLI.json @@ -0,0 +1,46 @@ +{ + "validation": { + "num_samples": 305, + "number_of_characters": 133788, + "unique_pairs": 305, + "min_sentence1_length": 50, + "avg_sentence1_length": 375.0655737704918, + "max_sentence1_length": 2383, + "unique_sentence1": 115, + "min_sentence2_length": 15, + "avg_sentence2_length": 63.58360655737705, + "max_sentence2_length": 189, + "unique_sentence2": 224, + "unique_labels": 2, + "labels": { + "1": { + "count": 191 + }, + "0": { + "count": 114 + } + } + }, + "test": { + "num_samples": 375, + "number_of_characters": 169218, + "unique_pairs": 374, + "min_sentence1_length": 196, + "avg_sentence1_length": 391.26666666666665, + "max_sentence1_length": 1676, + "unique_sentence1": 123, + "min_sentence2_length": 22, + "avg_sentence2_length": 59.98133333333333, + "max_sentence2_length": 131, + "unique_sentence2": 251, + "unique_labels": 2, + "labels": { + "0": { + "count": 115 + }, + "1": { + "count": 260 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/Cmnli.json b/mteb/descriptive_stats/PairClassification/Cmnli.json new file mode 100644 index 0000000000..71b9f3f175 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/Cmnli.json @@ -0,0 +1,24 @@ +{ + "validation": { + "num_samples": 8315, + "number_of_characters": 426122, + "unique_pairs": 8312, + "min_sentence1_length": 2, + "avg_sentence1_length": 34.50847865303668, + "max_sentence1_length": 135, + "unique_sentence1": 4132, + "min_sentence2_length": 2, + "avg_sentence2_length": 16.738905592303066, + "max_sentence2_length": 89, + "unique_sentence2": 8305, + "unique_labels": 2, + "labels": { + "1": { + "count": 4277 + }, + "0": { + "count": 4038 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/FalseFriendsGermanEnglish.json b/mteb/descriptive_stats/PairClassification/FalseFriendsGermanEnglish.json new file mode 100644 index 0000000000..e15d1e7ea8 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/FalseFriendsGermanEnglish.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1524, + "number_of_characters": 61254, + "unique_pairs": 1502, + "min_sentence1_length": 3, + "avg_sentence1_length": 14.548556430446194, + "max_sentence1_length": 63, + "unique_sentence1": 489, + "min_sentence2_length": 3, + "avg_sentence2_length": 25.644356955380577, + "max_sentence2_length": 72, + "unique_sentence2": 986, + "unique_labels": 2, + "labels": { + "1": { + "count": 762 + }, + "0": { + "count": 762 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/FarsTail.json b/mteb/descriptive_stats/PairClassification/FarsTail.json new file mode 100644 index 0000000000..76e6192e23 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/FarsTail.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1029, + "number_of_characters": 258990, + "unique_pairs": 1029, + "min_sentence1_length": 36, + "avg_sentence1_length": 181.20408163265307, + "max_sentence1_length": 549, + "unique_sentence1": 530, + "min_sentence2_length": 23, + "avg_sentence2_length": 70.4868804664723, + "max_sentence2_length": 196, + "unique_sentence2": 1026, + "unique_labels": 2, + "labels": { + "0": { + "count": 510 + }, + "1": { + "count": 519 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/KLUE-NLI.json b/mteb/descriptive_stats/PairClassification/KLUE-NLI.json new file mode 100644 index 0000000000..5ed74555bf --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/KLUE-NLI.json @@ -0,0 +1,24 @@ +{ + "validation": { + "num_samples": 2000, + "number_of_characters": 140030, + "unique_pairs": 2000, + "min_sentence1_length": 19, + "avg_sentence1_length": 44.689, + "max_sentence1_length": 90, + "unique_sentence1": 1000, + "min_sentence2_length": 8, + "avg_sentence2_length": 25.326, + "max_sentence2_length": 85, + "unique_sentence2": 2000, + "unique_labels": 2, + "labels": { + "0": { + "count": 1000 + }, + "1": { + "count": 1000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/LegalBenchPC.json b/mteb/descriptive_stats/PairClassification/LegalBenchPC.json new file mode 100644 index 0000000000..40f445596d --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/LegalBenchPC.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 872437, + "unique_pairs": 2048, + "min_sentence1_length": 12, + "avg_sentence1_length": 288.84423828125, + "max_sentence1_length": 3671, + "unique_sentence1": 1296, + "min_sentence2_length": 15, + "avg_sentence2_length": 137.150390625, + "max_sentence2_length": 8094, + "unique_sentence2": 474, + "unique_labels": 2, + "labels": { + "0": { + "count": 827 + }, + "1": { + "count": 1221 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/Ocnli.json b/mteb/descriptive_stats/PairClassification/Ocnli.json new file mode 100644 index 0000000000..baafb55c07 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/Ocnli.json @@ -0,0 +1,24 @@ +{ + "validation": { + "num_samples": 1847, + "number_of_characters": 66809, + "unique_pairs": 1847, + "min_sentence1_length": 8, + "avg_sentence1_length": 24.3183540877098, + "max_sentence1_length": 50, + "unique_sentence1": 1522, + "min_sentence2_length": 3, + "avg_sentence2_length": 11.853275582024905, + "max_sentence2_length": 55, + "unique_sentence2": 1846, + "unique_labels": 2, + "labels": { + "1": { + "count": 947 + }, + "0": { + "count": 900 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/OpusparcusPC.json b/mteb/descriptive_stats/PairClassification/OpusparcusPC.json new file mode 100644 index 0000000000..4bb80c5358 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/OpusparcusPC.json @@ -0,0 +1,314 @@ +{ + "test.full": { + "num_samples": 9155, + "number_of_characters": 436535, + "unique_pairs": 9155, + "min_sentence1_length": 10, + "avg_sentence1_length": 23.896559257236483, + "max_sentence1_length": 122, + "unique_sentence1": 9155, + "min_sentence2_length": 10, + "avg_sentence2_length": 23.78612779901693, + "max_sentence2_length": 121, + "unique_sentence2": 9155, + "unique_labels": 2, + "labels": { + "1": { + "count": 6009 + }, + "0": { + "count": 3146 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 1409, + "number_of_characters": 70269, + "unique_pairs": 1409, + "min_sentence1_length": 10, + "avg_sentence1_length": 24.828246983676365, + "max_sentence1_length": 98, + "unique_sentence1": 1409, + "min_sentence2_length": 10, + "avg_sentence2_length": 25.043293115684882, + "max_sentence2_length": 110, + "unique_sentence2": 1409, + "unique_labels": 2, + "labels": { + "1": { + "count": 1047 + }, + "0": { + "count": 362 + } + } + }, + "en": { + "num_samples": 1348, + "number_of_characters": 63924, + "unique_pairs": 1348, + "min_sentence1_length": 10, + "avg_sentence1_length": 23.98145400593472, + "max_sentence1_length": 82, + "unique_sentence1": 1348, + "min_sentence2_length": 10, + "avg_sentence2_length": 23.439910979228486, + "max_sentence2_length": 111, + "unique_sentence2": 1348, + "unique_labels": 2, + "labels": { + "1": { + "count": 982 + }, + "0": { + "count": 366 + } + } + }, + "fi": { + "num_samples": 1570, + "number_of_characters": 69983, + "unique_pairs": 1570, + "min_sentence1_length": 10, + "avg_sentence1_length": 22.2171974522293, + "max_sentence1_length": 98, + "unique_sentence1": 1570, + "min_sentence2_length": 10, + "avg_sentence2_length": 22.35796178343949, + "max_sentence2_length": 108, + "unique_sentence2": 1570, + "unique_labels": 2, + "labels": { + "1": { + "count": 958 + }, + "0": { + "count": 612 + } + } + }, + "fr": { + "num_samples": 1468, + "number_of_characters": 82094, + "unique_pairs": 1468, + "min_sentence1_length": 11, + "avg_sentence1_length": 28.242506811989102, + "max_sentence1_length": 122, + "unique_sentence1": 1468, + "min_sentence2_length": 10, + "avg_sentence2_length": 27.67983651226158, + "max_sentence2_length": 121, + "unique_sentence2": 1468, + "unique_labels": 2, + "labels": { + "1": { + "count": 1007 + }, + "0": { + "count": 461 + } + } + }, + "ru": { + "num_samples": 1632, + "number_of_characters": 71040, + "unique_pairs": 1632, + "min_sentence1_length": 11, + "avg_sentence1_length": 21.72610294117647, + "max_sentence1_length": 106, + "unique_sentence1": 1632, + "min_sentence2_length": 10, + "avg_sentence2_length": 21.803308823529413, + "max_sentence2_length": 94, + "unique_sentence2": 1632, + "unique_labels": 2, + "labels": { + "1": { + "count": 1068 + }, + "0": { + "count": 564 + } + } + }, + "sv": { + "num_samples": 1728, + "number_of_characters": 79225, + "unique_pairs": 1728, + "min_sentence1_length": 10, + "avg_sentence1_length": 22.95428240740741, + "max_sentence1_length": 79, + "unique_sentence1": 1728, + "min_sentence2_length": 10, + "avg_sentence2_length": 22.89351851851852, + "max_sentence2_length": 106, + "unique_sentence2": 1728, + "unique_labels": 2, + "labels": { + "1": { + "count": 947 + }, + "0": { + "count": 781 + } + } + } + } + }, + "validation.full": { + "num_samples": 9052, + "number_of_characters": 441614, + "unique_pairs": 9052, + "min_sentence1_length": 10, + "avg_sentence1_length": 24.41427308882015, + "max_sentence1_length": 140, + "unique_sentence1": 9052, + "min_sentence2_length": 10, + "avg_sentence2_length": 24.372072470172338, + "max_sentence2_length": 155, + "unique_sentence2": 9052, + "unique_labels": 2, + "labels": { + "1": { + "count": 5992 + }, + "0": { + "count": 3060 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 1393, + "number_of_characters": 69379, + "unique_pairs": 1393, + "min_sentence1_length": 11, + "avg_sentence1_length": 24.728643216080403, + "max_sentence1_length": 108, + "unique_sentence1": 1393, + "min_sentence2_length": 10, + "avg_sentence2_length": 25.07681263460158, + "max_sentence2_length": 122, + "unique_sentence2": 1393, + "unique_labels": 2, + "labels": { + "1": { + "count": 1013 + }, + "0": { + "count": 380 + } + } + }, + "en": { + "num_samples": 1350, + "number_of_characters": 63869, + "unique_pairs": 1350, + "min_sentence1_length": 10, + "avg_sentence1_length": 23.950370370370372, + "max_sentence1_length": 91, + "unique_sentence1": 1350, + "min_sentence2_length": 10, + "avg_sentence2_length": 23.36, + "max_sentence2_length": 76, + "unique_sentence2": 1350, + "unique_labels": 2, + "labels": { + "0": { + "count": 335 + }, + "1": { + "count": 1015 + } + } + }, + "fi": { + "num_samples": 1575, + "number_of_characters": 71790, + "unique_pairs": 1575, + "min_sentence1_length": 11, + "avg_sentence1_length": 22.70095238095238, + "max_sentence1_length": 99, + "unique_sentence1": 1575, + "min_sentence2_length": 10, + "avg_sentence2_length": 22.88, + "max_sentence2_length": 155, + "unique_sentence2": 1575, + "unique_labels": 2, + "labels": { + "1": { + "count": 963 + }, + "0": { + "count": 612 + } + } + }, + "fr": { + "num_samples": 1404, + "number_of_characters": 81660, + "unique_pairs": 1404, + "min_sentence1_length": 11, + "avg_sentence1_length": 29.03988603988604, + "max_sentence1_length": 140, + "unique_sentence1": 1404, + "min_sentence2_length": 10, + "avg_sentence2_length": 29.122507122507123, + "max_sentence2_length": 139, + "unique_sentence2": 1404, + "unique_labels": 2, + "labels": { + "1": { + "count": 997 + }, + "0": { + "count": 407 + } + } + }, + "ru": { + "num_samples": 1598, + "number_of_characters": 77436, + "unique_pairs": 1598, + "min_sentence1_length": 10, + "avg_sentence1_length": 24.303504380475594, + "max_sentence1_length": 100, + "unique_sentence1": 1598, + "min_sentence2_length": 11, + "avg_sentence2_length": 24.154568210262827, + "max_sentence2_length": 106, + "unique_sentence2": 1598, + "unique_labels": 2, + "labels": { + "1": { + "count": 1020 + }, + "0": { + "count": 578 + } + } + }, + "sv": { + "num_samples": 1732, + "number_of_characters": 77480, + "unique_pairs": 1732, + "min_sentence1_length": 10, + "avg_sentence1_length": 22.433602771362587, + "max_sentence1_length": 101, + "unique_sentence1": 1732, + "min_sentence2_length": 10, + "avg_sentence2_length": 22.30080831408776, + "max_sentence2_length": 104, + "unique_sentence2": 1732, + "unique_labels": 2, + "labels": { + "1": { + "count": 984 + }, + "0": { + "count": 748 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/PSC.json b/mteb/descriptive_stats/PairClassification/PSC.json new file mode 100644 index 0000000000..9245c6cf7d --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/PSC.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1078, + "number_of_characters": 1206570, + "unique_pairs": 1074, + "min_sentence1_length": 314, + "avg_sentence1_length": 549.2820037105752, + "max_sentence1_length": 1445, + "unique_sentence1": 507, + "min_sentence2_length": 293, + "avg_sentence2_length": 569.9851576994434, + "max_sentence2_length": 1534, + "unique_sentence2": 406, + "unique_labels": 2, + "labels": { + "0": { + "count": 750 + }, + "1": { + "count": 328 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/PpcPC.json b/mteb/descriptive_stats/PairClassification/PpcPC.json new file mode 100644 index 0000000000..2d037cc033 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/PpcPC.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 84963, + "unique_pairs": 1000, + "min_sentence1_length": 8, + "avg_sentence1_length": 40.214, + "max_sentence1_length": 226, + "unique_sentence1": 984, + "min_sentence2_length": 8, + "avg_sentence2_length": 44.749, + "max_sentence2_length": 249, + "unique_sentence2": 996, + "unique_labels": 2, + "labels": { + "1": { + "count": 604 + }, + "0": { + "count": 396 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/RTE3.json b/mteb/descriptive_stats/PairClassification/RTE3.json new file mode 100644 index 0000000000..b702407e9d --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/RTE3.json @@ -0,0 +1,114 @@ +{ + "test": { + "num_samples": 1923, + "number_of_characters": 479937, + "unique_pairs": 1923, + "min_sentence1_length": 45, + "avg_sentence1_length": 196.5340613624545, + "max_sentence1_length": 661, + "unique_sentence1": 1832, + "min_sentence2_length": 16, + "avg_sentence2_length": 53.04316172646906, + "max_sentence2_length": 166, + "unique_sentence2": 1651, + "unique_labels": 2, + "labels": { + "1": { + "count": 1634 + }, + "0": { + "count": 289 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 481, + "number_of_characters": 124552, + "unique_pairs": 481, + "min_sentence1_length": 45, + "avg_sentence1_length": 204.0852390852391, + "max_sentence1_length": 640, + "unique_sentence1": 454, + "min_sentence2_length": 23, + "avg_sentence2_length": 54.858627858627855, + "max_sentence2_length": 161, + "unique_sentence2": 409, + "unique_labels": 2, + "labels": { + "1": { + "count": 409 + }, + "0": { + "count": 72 + } + } + }, + "en": { + "num_samples": 482, + "number_of_characters": 108730, + "unique_pairs": 482, + "min_sentence1_length": 50, + "avg_sentence1_length": 178.0643153526971, + "max_sentence1_length": 595, + "unique_sentence1": 457, + "min_sentence2_length": 16, + "avg_sentence2_length": 47.516597510373444, + "max_sentence2_length": 142, + "unique_sentence2": 408, + "unique_labels": 2, + "labels": { + "1": { + "count": 410 + }, + "0": { + "count": 72 + } + } + }, + "fr": { + "num_samples": 482, + "number_of_characters": 126128, + "unique_pairs": 482, + "min_sentence1_length": 56, + "avg_sentence1_length": 205.60995850622407, + "max_sentence1_length": 641, + "unique_sentence1": 467, + "min_sentence2_length": 24, + "avg_sentence2_length": 56.066390041493776, + "max_sentence2_length": 166, + "unique_sentence2": 428, + "unique_labels": 2, + "labels": { + "1": { + "count": 409 + }, + "0": { + "count": 73 + } + } + }, + "it": { + "num_samples": 478, + "number_of_characters": 120527, + "unique_pairs": 478, + "min_sentence1_length": 63, + "avg_sentence1_length": 198.407949790795, + "max_sentence1_length": 661, + "unique_sentence1": 454, + "min_sentence2_length": 21, + "avg_sentence2_length": 53.74058577405858, + "max_sentence2_length": 151, + "unique_sentence2": 406, + "unique_labels": 2, + "labels": { + "1": { + "count": 406 + }, + "0": { + "count": 72 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/SICK-BR-PC.json b/mteb/descriptive_stats/PairClassification/SICK-BR-PC.json new file mode 100644 index 0000000000..66ffbfa87f --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/SICK-BR-PC.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 1462, + "number_of_characters": 159837, + "unique_pairs": 1462, + "min_sentence1_length": 18, + "avg_sentence1_length": 54.23734610123119, + "max_sentence1_length": 158, + "unique_sentence1": 1285, + "min_sentence2_length": 20, + "avg_sentence2_length": 55.090287277701776, + "max_sentence2_length": 145, + "unique_sentence2": 1292, + "unique_labels": 2, + "labels": { + "0": { + "count": 1165 + }, + "1": { + "count": 297 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/SICK-E-PL.json b/mteb/descriptive_stats/PairClassification/SICK-E-PL.json new file mode 100644 index 0000000000..a0e3548aa4 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/SICK-E-PL.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 4906, + "number_of_characters": 419806, + "unique_pairs": 4892, + "min_sentence1_length": 8, + "avg_sentence1_length": 43.16102731349368, + "max_sentence1_length": 169, + "unique_sentence1": 3332, + "min_sentence2_length": 8, + "avg_sentence2_length": 42.40888707704851, + "max_sentence2_length": 144, + "unique_sentence2": 3278, + "unique_labels": 2, + "labels": { + "0": { + "count": 3502 + }, + "1": { + "count": 1404 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/SprintDuplicateQuestions.json b/mteb/descriptive_stats/PairClassification/SprintDuplicateQuestions.json new file mode 100644 index 0000000000..babc095081 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/SprintDuplicateQuestions.json @@ -0,0 +1,46 @@ +{ + "validation": { + "num_samples": 101000, + "number_of_characters": 12006640, + "unique_pairs": 101000, + "min_sentence1_length": 22, + "avg_sentence1_length": 65.159, + "max_sentence1_length": 139, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "avg_sentence2_length": 53.71862376237624, + "max_sentence2_length": 223, + "unique_sentence2": 7932, + "unique_labels": 2, + "labels": { + "1": { + "count": 1000 + }, + "0": { + "count": 100000 + } + } + }, + "test": { + "num_samples": 101000, + "number_of_characters": 12292709, + "unique_pairs": 101000, + "min_sentence1_length": 19, + "avg_sentence1_length": 67.944, + "max_sentence1_length": 157, + "unique_sentence1": 1000, + "min_sentence2_length": 12, + "avg_sentence2_length": 53.7659900990099, + "max_sentence2_length": 223, + "unique_sentence2": 7932, + "unique_labels": 2, + "labels": { + "1": { + "count": 1000 + }, + "0": { + "count": 100000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/TERRa.json b/mteb/descriptive_stats/PairClassification/TERRa.json new file mode 100644 index 0000000000..cd97c967bc --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/TERRa.json @@ -0,0 +1,24 @@ +{ + "dev": { + "num_samples": 307, + "number_of_characters": 84848, + "unique_pairs": 307, + "min_sentence1_length": 39, + "avg_sentence1_length": 230.76221498371336, + "max_sentence1_length": 717, + "unique_sentence1": 282, + "min_sentence2_length": 12, + "avg_sentence2_length": 45.615635179153095, + "max_sentence2_length": 129, + "unique_sentence2": 307, + "unique_labels": 2, + "labels": { + "1": { + "count": 153 + }, + "0": { + "count": 154 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/TwitterSemEval2015.json b/mteb/descriptive_stats/PairClassification/TwitterSemEval2015.json new file mode 100644 index 0000000000..5e89bb032d --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/TwitterSemEval2015.json @@ -0,0 +1,24 @@ +{ + "test": { + "num_samples": 16777, + "number_of_characters": 1424986, + "unique_pairs": 16767, + "min_sentence1_length": 14, + "avg_sentence1_length": 38.319365798414495, + "max_sentence1_length": 98, + "unique_sentence1": 2073, + "min_sentence2_length": 13, + "avg_sentence2_length": 46.61751207009596, + "max_sentence2_length": 119, + "unique_sentence2": 15755, + "unique_labels": 2, + "labels": { + "0": { + "count": 12987 + }, + "1": { + "count": 3790 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/XNLIV2.json b/mteb/descriptive_stats/PairClassification/XNLIV2.json new file mode 100644 index 0000000000..d092e07c3f --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/XNLIV2.json @@ -0,0 +1,312 @@ +{ + "test": { + "num_samples": 17745, + "number_of_characters": 2778287, + "unique_pairs": 17745, + "min_sentence1_length": 5, + "avg_sentence1_length": 105.99329388560157, + "max_sentence1_length": 339, + "unique_sentence1": 14234, + "min_sentence2_length": 8, + "avg_sentence2_length": 50.57402085094393, + "max_sentence2_length": 162, + "unique_sentence2": 17745, + "unique_labels": 2, + "labels": { + "0": { + "count": 8879 + }, + "1": { + "count": 8866 + } + }, + "hf_subset_descriptive_stats": { + "punjabi": { + "num_samples": 1365, + "number_of_characters": 211060, + "unique_pairs": 1365, + "min_sentence1_length": 16, + "avg_sentence1_length": 104.71721611721611, + "max_sentence1_length": 275, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.9054945054945, + "max_sentence2_length": 138, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "gujrati": { + "num_samples": 1365, + "number_of_characters": 202647, + "unique_pairs": 1365, + "min_sentence1_length": 15, + "avg_sentence1_length": 100.43589743589743, + "max_sentence1_length": 270, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 48.02344322344322, + "max_sentence2_length": 141, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "kannada": { + "num_samples": 1365, + "number_of_characters": 220182, + "unique_pairs": 1365, + "min_sentence1_length": 16, + "avg_sentence1_length": 108.54212454212454, + "max_sentence1_length": 283, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.76336996336996, + "max_sentence2_length": 158, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "assamese": { + "num_samples": 1365, + "number_of_characters": 202808, + "unique_pairs": 1365, + "min_sentence1_length": 17, + "avg_sentence1_length": 100.45860805860806, + "max_sentence1_length": 271, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 48.11868131868132, + "max_sentence2_length": 153, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "bengali": { + "num_samples": 1365, + "number_of_characters": 204859, + "unique_pairs": 1365, + "min_sentence1_length": 5, + "avg_sentence1_length": 102.37582417582418, + "max_sentence1_length": 282, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 47.704029304029305, + "max_sentence2_length": 139, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "marathi": { + "num_samples": 1365, + "number_of_characters": 205734, + "unique_pairs": 1365, + "min_sentence1_length": 18, + "avg_sentence1_length": 102.1985347985348, + "max_sentence1_length": 272, + "unique_sentence1": 1095, + "min_sentence2_length": 10, + "avg_sentence2_length": 48.522344322344324, + "max_sentence2_length": 156, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "bhojpuri": { + "num_samples": 1365, + "number_of_characters": 205063, + "unique_pairs": 1365, + "min_sentence1_length": 5, + "avg_sentence1_length": 101.88864468864469, + "max_sentence1_length": 266, + "unique_sentence1": 1095, + "min_sentence2_length": 12, + "avg_sentence2_length": 48.34065934065934, + "max_sentence2_length": 154, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "odiya": { + "num_samples": 1365, + "number_of_characters": 210923, + "unique_pairs": 1365, + "min_sentence1_length": 17, + "avg_sentence1_length": 103.77509157509158, + "max_sentence1_length": 259, + "unique_sentence1": 1094, + "min_sentence2_length": 11, + "avg_sentence2_length": 50.747252747252745, + "max_sentence2_length": 149, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "sanskrit": { + "num_samples": 1365, + "number_of_characters": 214093, + "unique_pairs": 1365, + "min_sentence1_length": 17, + "avg_sentence1_length": 105.03516483516484, + "max_sentence1_length": 272, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 51.80952380952381, + "max_sentence2_length": 148, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "tamil": { + "num_samples": 1365, + "number_of_characters": 239561, + "unique_pairs": 1365, + "min_sentence1_length": 15, + "avg_sentence1_length": 118.0923076923077, + "max_sentence1_length": 337, + "unique_sentence1": 1095, + "min_sentence2_length": 10, + "avg_sentence2_length": 57.41025641025641, + "max_sentence2_length": 156, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "turkish": { + "num_samples": 1365, + "number_of_characters": 206729, + "unique_pairs": 1365, + "min_sentence1_length": 15, + "avg_sentence1_length": 103.18021978021979, + "max_sentence1_length": 266, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 48.26959706959707, + "max_sentence2_length": 145, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "greek": { + "num_samples": 1365, + "number_of_characters": 236520, + "unique_pairs": 1365, + "min_sentence1_length": 13, + "avg_sentence1_length": 117.69377289377289, + "max_sentence1_length": 339, + "unique_sentence1": 1095, + "min_sentence2_length": 10, + "avg_sentence2_length": 55.58095238095238, + "max_sentence2_length": 162, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + }, + "russian": { + "num_samples": 1365, + "number_of_characters": 218108, + "unique_pairs": 1365, + "min_sentence1_length": 11, + "avg_sentence1_length": 109.51941391941392, + "max_sentence1_length": 306, + "unique_sentence1": 1095, + "min_sentence2_length": 11, + "avg_sentence2_length": 50.266666666666666, + "max_sentence2_length": 150, + "unique_sentence2": 1365, + "unique_labels": 2, + "labels": { + "0": { + "count": 683 + }, + "1": { + "count": 682 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/XStance.json b/mteb/descriptive_stats/PairClassification/XStance.json new file mode 100644 index 0000000000..8f074734e6 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/XStance.json @@ -0,0 +1,92 @@ +{ + "test": { + "num_samples": 5542, + "number_of_characters": 1645233, + "unique_pairs": 5542, + "min_sentence1_length": 48, + "avg_sentence1_length": 123.79538072897871, + "max_sentence1_length": 403, + "unique_sentence1": 418, + "min_sentence2_length": 50, + "avg_sentence2_length": 173.0709130277878, + "max_sentence2_length": 500, + "unique_sentence2": 5542, + "unique_labels": 2, + "labels": { + "0": { + "count": 2561 + }, + "1": { + "count": 2981 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 2048, + "number_of_characters": 564191, + "unique_pairs": 2048, + "min_sentence1_length": 48, + "avg_sentence1_length": 112.8359375, + "max_sentence1_length": 208, + "unique_sentence1": 96, + "min_sentence2_length": 50, + "avg_sentence2_length": 162.64794921875, + "max_sentence2_length": 500, + "unique_sentence2": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 1015 + }, + "1": { + "count": 1033 + } + } + }, + "fr": { + "num_samples": 2048, + "number_of_characters": 656746, + "unique_pairs": 2048, + "min_sentence1_length": 56, + "avg_sentence1_length": 127.8798828125, + "max_sentence1_length": 351, + "unique_sentence1": 156, + "min_sentence2_length": 50, + "avg_sentence2_length": 192.796875, + "max_sentence2_length": 500, + "unique_sentence2": 2048, + "unique_labels": 2, + "labels": { + "0": { + "count": 880 + }, + "1": { + "count": 1168 + } + } + }, + "it": { + "num_samples": 1446, + "number_of_characters": 424296, + "unique_pairs": 1446, + "min_sentence1_length": 60, + "avg_sentence1_length": 133.53250345781467, + "max_sentence1_length": 403, + "unique_sentence1": 166, + "min_sentence2_length": 50, + "avg_sentence2_length": 159.8948824343015, + "max_sentence2_length": 500, + "unique_sentence2": 1446, + "unique_labels": 2, + "labels": { + "1": { + "count": 780 + }, + "0": { + "count": 666 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/indonli.json b/mteb/descriptive_stats/PairClassification/indonli.json new file mode 100644 index 0000000000..9edc097358 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/indonli.json @@ -0,0 +1,24 @@ +{ + "test_expert": { + "num_samples": 2040, + "number_of_characters": 595178, + "unique_pairs": 2040, + "min_sentence1_length": 32, + "avg_sentence1_length": 226.04166666666666, + "max_sentence1_length": 916, + "unique_sentence1": 514, + "min_sentence2_length": 15, + "avg_sentence2_length": 65.71225490196079, + "max_sentence2_length": 267, + "unique_sentence2": 2035, + "unique_labels": 2, + "labels": { + "0": { + "count": 999 + }, + "1": { + "count": 1041 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/AlloprofReranking.json b/mteb/descriptive_stats/Reranking/AlloprofReranking.json new file mode 100644 index 0000000000..8e39a283ce --- /dev/null +++ b/mteb/descriptive_stats/Reranking/AlloprofReranking.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 102329333, + "num_samples": 27355, + "num_queries": 2316, + "num_documents": 25039, + "min_document_length": 8, + "average_document_length": 15.79020727664843, + "max_document_length": 2863, + "unique_documents": 25039, + "min_query_length": 42, + "average_query_length": 44012.93696027634, + "max_query_length": 47972, + "unique_queries": 2316, + "none_queries": 0, + "min_relevant_docs_per_query": 10, + "average_relevant_docs_per_query": 1.2845423143350605, + "max_relevant_docs_per_query": 37, + "unique_relevant_docs": 25039, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 10, + "average_top_ranked_per_query": 10.811312607944732, + "max_top_ranked_per_query": 37 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json index a0ced7def7..27103ee3cc 100644 --- a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json @@ -1,20 +1,29 @@ { "test": { - "num_samples": 375, - "number_of_characters": 413674, - "num_positive": 2255, - "num_negative": 5245, - "min_query_length": 17, - "avg_query_length": 50.205333333333336, + "number_of_characters": 397127, + "num_samples": 7581, + "num_queries": 361, + "num_documents": 7220, + "min_document_length": 17, + "average_document_length": 2.5065096952908585, + "max_document_length": 148, + "unique_documents": 7220, + "min_query_length": 15, + "average_query_length": 1049.94459833795, "max_query_length": 148, - "unique_query": 374, - "min_positive_length": 15, - "avg_positive_length": 52.54013303769401, - "max_positive_length": 152, - "unique_positive": 2165, - "min_negative_length": 15, - "avg_negative_length": 52.69189704480458, - "max_negative_length": 148, - "unique_negative": 5002 + "unique_queries": 361, + "none_queries": 0, + "min_relevant_docs_per_query": 20, + "average_relevant_docs_per_query": 5.470914127423823, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 7220, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 20, + "average_top_ranked_per_query": 20.0, + "max_top_ranked_per_query": 20 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json b/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json new file mode 100644 index 0000000000..5dee6893ad --- /dev/null +++ b/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 10719709, + "num_samples": 101000, + "num_queries": 1000, + "num_documents": 100000, + "min_document_length": 3, + "average_document_length": 0.55717, + "max_document_length": 505, + "unique_documents": 100000, + "min_query_length": 5, + "average_query_length": 10663.992, + "max_query_length": 265, + "unique_queries": 1000, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.931, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 100000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json b/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json new file mode 100644 index 0000000000..58a7b0efcd --- /dev/null +++ b/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 10110234, + "num_samples": 101000, + "num_queries": 1000, + "num_documents": 100000, + "min_document_length": 11, + "average_document_length": 0.48848, + "max_document_length": 153, + "unique_documents": 100000, + "min_query_length": 11, + "average_query_length": 10061.386, + "max_query_length": 264, + "unique_queries": 1000, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.91, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 100000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/ESCIReranking.json b/mteb/descriptive_stats/Reranking/ESCIReranking.json index 9c9556be9d..3d052c309e 100644 --- a/mteb/descriptive_stats/Reranking/ESCIReranking.json +++ b/mteb/descriptive_stats/Reranking/ESCIReranking.json @@ -1,75 +1,111 @@ { "test": { - "num_samples": 29285, - "number_of_characters": 254538331, - "num_positive": 271416, - "num_negative": 44235, + "number_of_characters": 117894609, + "num_samples": 158627, + "num_queries": 10395, + "num_documents": 148232, + "min_document_length": 1, + "average_document_length": 1.4161921852231636, + "max_document_length": 143, + "unique_documents": 148232, "min_query_length": 1, - "avg_query_length": 19.691890046098685, - "max_query_length": 151, - "unique_query": 29269, - "min_positive_length": 1, - "avg_positive_length": 803.9230995961918, - "max_positive_length": 8640, - "unique_positive": 217712, - "min_negative_length": 1, - "avg_negative_length": 808.501458121397, - "max_negative_length": 4441, - "unique_negative": 39551, + "average_query_length": 11321.277922077921, + "max_query_length": 8640, + "unique_queries": 10395, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 10.277825877825878, + "max_relevant_docs_per_query": 74, + "unique_relevant_docs": 148232, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 14.25993265993266, + "max_top_ranked_per_query": 74, "hf_subset_descriptive_stats": { "us": { - "num_samples": 21296, - "number_of_characters": 186915609, - "num_positive": 189375, - "num_negative": 25463, + "number_of_characters": 74993786, + "num_samples": 93896, + "num_queries": 6694, + "num_documents": 87202, + "min_document_length": 1, + "average_document_length": 1.7313823077452353, + "max_document_length": 143, + "unique_documents": 87202, "min_query_length": 1, - "avg_query_length": 21.440833959429, - "max_query_length": 151, - "unique_query": 21296, - "min_positive_length": 1, - "avg_positive_length": 868.3698006600661, - "max_positive_length": 5545, - "unique_positive": 150734, - "min_negative_length": 1, - "avg_negative_length": 864.4493578918431, - "max_negative_length": 3779, - "unique_negative": 23073 + "average_query_length": 11180.58051986854, + "max_query_length": 4608, + "unique_queries": 6694, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 9.446519270988945, + "max_relevant_docs_per_query": 74, + "unique_relevant_docs": 87202, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 13.026889752016732, + "max_top_ranked_per_query": 74 }, "es": { - "num_samples": 3703, - "number_of_characters": 48861389, - "num_positive": 39110, - "num_negative": 10183, - "min_query_length": 3, - "avg_query_length": 20.681609505806104, - "max_query_length": 59, - "unique_query": 3703, - "min_positive_length": 1, - "avg_positive_length": 980.9613142418818, - "max_positive_length": 8640, - "unique_positive": 32921, - "min_negative_length": 1, - "avg_negative_length": 1023.2159481488756, - "max_negative_length": 4441, - "unique_negative": 9285 + "number_of_characters": 32170187, + "num_samples": 33785, + "num_queries": 1851, + "num_documents": 31934, + "min_document_length": 3, + "average_document_length": 1.2324481743596167, + "max_document_length": 59, + "unique_documents": 31934, + "min_query_length": 1, + "average_query_length": 17358.633171258778, + "max_query_length": 8640, + "unique_queries": 1851, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 12.038357644516477, + "max_relevant_docs_per_query": 41, + "unique_relevant_docs": 31934, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 17.252296056185845, + "max_top_ranked_per_query": 41 }, "jp": { - "num_samples": 4286, - "number_of_characters": 18761333, - "num_positive": 42931, - "num_negative": 8589, + "number_of_characters": 10730636, + "num_samples": 30946, + "num_queries": 1850, + "num_documents": 29096, + "min_document_length": 1, + "average_document_length": 0.6732196865548529, + "max_document_length": 60, + "unique_documents": 29096, "min_query_length": 1, - "avg_query_length": 10.146756882874476, - "max_query_length": 60, - "unique_query": 4286, - "min_positive_length": 1, - "avg_positive_length": 358.35792317905475, - "max_positive_length": 3488, - "unique_positive": 35165, - "min_negative_length": 1, - "avg_negative_length": 388.075445337059, - "max_negative_length": 3940, - "unique_negative": 7289 + "average_query_length": 5789.755675675676, + "max_query_length": 3940, + "unique_queries": 1850, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 11.524324324324324, + "max_relevant_docs_per_query": 50, + "unique_relevant_docs": 29096, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 15.727567567567567, + "max_top_ranked_per_query": 50 } } } diff --git a/mteb/descriptive_stats/Reranking/InstructIR.json b/mteb/descriptive_stats/Reranking/InstructIR.json new file mode 100644 index 0000000000..a2a5bf6dce --- /dev/null +++ b/mteb/descriptive_stats/Reranking/InstructIR.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 10114013, + "num_samples": 25978, + "num_queries": 9906, + "num_documents": 16072, + "min_document_length": 24, + "average_document_length": 19.262008461921354, + "max_document_length": 40, + "unique_documents": 16072, + "min_query_length": 36, + "average_query_length": 989.7470220068645, + "max_query_length": 1700, + "unique_queries": 9906, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 9906, + "num_instructions": 9906, + "min_instruction_length": 146, + "average_instruction_length": 3063330, + "max_instruction_length": 616, + "unique_instructions": 9906, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/MMarcoReranking.json b/mteb/descriptive_stats/Reranking/MMarcoReranking.json new file mode 100644 index 0000000000..7658ad1fea --- /dev/null +++ b/mteb/descriptive_stats/Reranking/MMarcoReranking.json @@ -0,0 +1,29 @@ +{ + "dev": { + "number_of_characters": 12381331, + "num_samples": 100126, + "num_queries": 100, + "num_documents": 100026, + "min_document_length": 4, + "average_document_length": 0.011437026373142983, + "max_document_length": 61, + "unique_documents": 100026, + "min_query_length": 13, + "average_query_length": 123801.87, + "max_query_length": 803, + "unique_queries": 100, + "none_queries": 0, + "min_relevant_docs_per_query": 1000, + "average_relevant_docs_per_query": 1.07, + "max_relevant_docs_per_query": 1002, + "unique_relevant_docs": 100026, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.26, + "max_top_ranked_per_query": 1002 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/NevIR.json b/mteb/descriptive_stats/Reranking/NevIR.json new file mode 100644 index 0000000000..4a5eddde01 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/NevIR.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3829988, + "num_samples": 7878, + "num_queries": 2766, + "num_documents": 5112, + "min_document_length": 19, + "average_document_length": 36.754890453834115, + "max_document_length": 168, + "unique_documents": 5112, + "min_query_length": 95, + "average_query_length": 1316.737888647867, + "max_query_length": 1317, + "unique_queries": 2766, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2766, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 2.0, + "max_top_ranked_per_query": 2 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/RuBQReranking.json b/mteb/descriptive_stats/Reranking/RuBQReranking.json new file mode 100644 index 0000000000..d4f56b1f17 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/RuBQReranking.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 17186357, + "num_samples": 38998, + "num_queries": 1551, + "num_documents": 37447, + "min_document_length": 13, + "average_document_length": 1.7734932037279354, + "max_document_length": 142, + "unique_documents": 37447, + "min_query_length": 1, + "average_query_length": 11038.00451321728, + "max_query_length": 11010, + "unique_queries": 1551, + "none_queries": 0, + "min_relevant_docs_per_query": 10, + "average_relevant_docs_per_query": 1.6776273372018053, + "max_relevant_docs_per_query": 29, + "unique_relevant_docs": 37447, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 10, + "average_top_ranked_per_query": 24.143778207607994, + "max_top_ranked_per_query": 29 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/SciDocsRR.json b/mteb/descriptive_stats/Reranking/SciDocsRR.json new file mode 100644 index 0000000000..f706e7e175 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/SciDocsRR.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 8761260, + "num_samples": 122578, + "num_queries": 3978, + "num_documents": 118600, + "min_document_length": 13, + "average_document_length": 2.343684654300169, + "max_document_length": 300, + "unique_documents": 118600, + "min_query_length": 8, + "average_query_length": 2132.5537958773252, + "max_query_length": 300, + "unique_queries": 3978, + "none_queries": 0, + "min_relevant_docs_per_query": 26, + "average_relevant_docs_per_query": 4.92684766214178, + "max_relevant_docs_per_query": 60, + "unique_relevant_docs": 118600, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 26, + "average_top_ranked_per_query": 29.813976872800403, + "max_top_ranked_per_query": 60 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json b/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json new file mode 100644 index 0000000000..15d02e7ae8 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 4138870, + "num_samples": 92462, + "num_queries": 2992, + "num_documents": 89470, + "min_document_length": 13, + "average_document_length": 1.7777690846093663, + "max_document_length": 149, + "unique_documents": 89470, + "min_query_length": 10, + "average_query_length": 1330.1514037433155, + "max_query_length": 150, + "unique_queries": 2992, + "none_queries": 0, + "min_relevant_docs_per_query": 20, + "average_relevant_docs_per_query": 1.1587566844919786, + "max_relevant_docs_per_query": 30, + "unique_relevant_docs": 89470, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 20, + "average_top_ranked_per_query": 29.90307486631016, + "max_top_ranked_per_query": 30 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/SyntecReranking.json b/mteb/descriptive_stats/Reranking/SyntecReranking.json new file mode 100644 index 0000000000..6ce98070b7 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/SyntecReranking.json @@ -0,0 +1,29 @@ +{ + "test": { + "number_of_characters": 1753367, + "num_samples": 1117, + "num_queries": 100, + "num_documents": 1017, + "min_document_length": 18, + "average_document_length": 7.160275319567355, + "max_document_length": 175, + "unique_documents": 1017, + "min_query_length": 100, + "average_query_length": 17460.85, + "max_query_length": 6947, + "unique_queries": 100, + "none_queries": 0, + "min_relevant_docs_per_query": 10, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 1017, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 10, + "average_top_ranked_per_query": 10.17, + "max_top_ranked_per_query": 11 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/T2Reranking.json b/mteb/descriptive_stats/Reranking/T2Reranking.json new file mode 100644 index 0000000000..f880ef6f79 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/T2Reranking.json @@ -0,0 +1,29 @@ +{ + "dev": { + "number_of_characters": 81980036, + "num_samples": 103330, + "num_queries": 5908, + "num_documents": 97422, + "min_document_length": 4, + "average_document_length": 0.6639465418488637, + "max_document_length": 29, + "unique_documents": 97422, + "min_query_length": 1, + "average_query_length": 13865.157921462423, + "max_query_length": 120026, + "unique_queries": 5908, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 7.522681110358835, + "max_relevant_docs_per_query": 335, + "unique_relevant_docs": 97422, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 16.489844278943806, + "max_top_ranked_per_query": 335 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json index 0506ff39e5..18eed2cfca 100644 --- a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json +++ b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json @@ -1,309 +1,462 @@ { "test": { - "num_samples": 24000, "number_of_characters": 83866932, - "num_positive": 24000, - "num_negative": 192000, - "min_query_length": 7, - "avg_query_length": 59.091208333333334, - "max_query_length": 180, - "unique_query": 23997, - "min_positive_length": 100, - "avg_positive_length": 385.45120833333334, - "max_positive_length": 3515, - "unique_positive": 23993, - "min_negative_length": 100, - "avg_negative_length": 381.23913541666667, - "max_negative_length": 9461, - "unique_negative": 191783, + "num_samples": 240000, + "num_queries": 24000, + "num_documents": 216000, + "min_document_length": 7, + "average_document_length": 6.565689814814815, + "max_document_length": 180, + "unique_documents": 216000, + "min_query_length": 100, + "average_query_length": 3435.3642916666668, + "max_query_length": 9461, + "unique_queries": 24000, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 216000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9, "hf_subset_descriptive_stats": { "bg": { - "num_samples": 1500, "number_of_characters": 5145316, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 18, - "avg_query_length": 60.82666666666667, - "max_query_length": 166, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 375.88866666666667, - "max_positive_length": 2241, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 374.18691666666666, - "max_negative_length": 4869, - "unique_negative": 11996 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 6.758518518518518, + "max_document_length": 166, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3369.384, + "max_query_length": 4869, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "bn": { - "num_samples": 1500, "number_of_characters": 5390581, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 7, - "avg_query_length": 47.266666666666666, - "max_query_length": 123, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 394.5946666666667, - "max_positive_length": 2338, - "unique_positive": 1499, - "min_negative_length": 100, - "avg_negative_length": 393.98241666666667, - "max_negative_length": 5104, - "unique_negative": 11996 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 7, + "average_document_length": 5.2518518518518515, + "max_document_length": 123, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3546.454, + "max_query_length": 5104, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "cs": { - "num_samples": 1500, "number_of_characters": 5079180, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 17, - "avg_query_length": 56.272, - "max_query_length": 137, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 383.8446666666667, - "max_positive_length": 2300, - "unique_positive": 1499, - "min_negative_length": 100, - "avg_negative_length": 368.2504166666667, - "max_negative_length": 3487, - "unique_negative": 11982 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 17, + "average_document_length": 6.2524444444444445, + "max_document_length": 137, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3329.848, + "max_query_length": 3487, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "da": { - "num_samples": 1500, "number_of_characters": 4746132, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 17, - "avg_query_length": 56.75066666666667, - "max_query_length": 137, - "unique_query": 1499, - "min_positive_length": 100, - "avg_positive_length": 351.6813333333333, - "max_positive_length": 2159, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 344.457, - "max_negative_length": 2563, - "unique_negative": 11972 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 17, + "average_document_length": 6.30562962962963, + "max_document_length": 137, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3107.3373333333334, + "max_query_length": 2563, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "de": { - "num_samples": 1500, "number_of_characters": 5483592, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 20, - "avg_query_length": 70.004, - "max_query_length": 180, - "unique_query": 1499, - "min_positive_length": 100, - "avg_positive_length": 391.5366666666667, - "max_positive_length": 2674, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 399.27341666666666, - "max_negative_length": 3083, - "unique_negative": 12000 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 20, + "average_document_length": 7.778222222222222, + "max_document_length": 180, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3585.724, + "max_query_length": 3083, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "en": { - "num_samples": 1500, "number_of_characters": 6217884, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 18, - "avg_query_length": 68.372, - "max_query_length": 162, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 451.72866666666664, - "max_positive_length": 3515, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 453.14441666666664, - "max_negative_length": 3662, - "unique_negative": 12000 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 7.596888888888889, + "max_document_length": 162, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 4076.884, + "max_query_length": 3662, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "fa": { - "num_samples": 1500, "number_of_characters": 4732619, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 12, - "avg_query_length": 48.66733333333333, - "max_query_length": 119, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 347.704, - "max_positive_length": 2571, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 344.8385, - "max_negative_length": 4707, - "unique_negative": 11978 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 12, + "average_document_length": 5.407481481481481, + "max_document_length": 119, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3106.412, + "max_query_length": 4707, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "fi": { - "num_samples": 1500, "number_of_characters": 5209132, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 14, - "avg_query_length": 55.343333333333334, - "max_query_length": 132, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 394.7126666666667, - "max_positive_length": 2129, - "unique_positive": 1498, - "min_negative_length": 100, - "avg_negative_length": 377.83733333333333, - "max_negative_length": 2574, - "unique_negative": 11972 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 14, + "average_document_length": 6.149259259259259, + "max_document_length": 132, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3417.4113333333335, + "max_query_length": 2574, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "hi": { - "num_samples": 1500, "number_of_characters": 5620959, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 13, - "avg_query_length": 50.77733333333333, - "max_query_length": 125, - "unique_query": 1499, - "min_positive_length": 100, - "avg_positive_length": 420.3786666666667, - "max_positive_length": 2361, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 409.51875, - "max_negative_length": 5912, - "unique_negative": 11996 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 13, + "average_document_length": 5.641925925925926, + "max_document_length": 125, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3696.5286666666666, + "max_query_length": 5912, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "it": { - "num_samples": 1500, "number_of_characters": 5420496, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 23, - "avg_query_length": 70.05466666666666, - "max_query_length": 156, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 396.97333333333336, - "max_positive_length": 2082, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 393.3295, - "max_negative_length": 9461, - "unique_negative": 11993 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 23, + "average_document_length": 7.783851851851852, + "max_document_length": 156, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3543.6093333333333, + "max_query_length": 9461, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "nl": { - "num_samples": 1500, "number_of_characters": 5169556, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 18, - "avg_query_length": 65.34466666666667, - "max_query_length": 136, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 380.79133333333334, - "max_positive_length": 1864, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 375.02933333333334, - "max_negative_length": 3641, - "unique_negative": 11985 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 7.260518518518518, + "max_document_length": 136, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3381.026, + "max_query_length": 3641, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "pt": { - "num_samples": 1500, "number_of_characters": 5474356, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 18, - "avg_query_length": 65.11933333333333, - "max_query_length": 176, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 404.01933333333335, - "max_positive_length": 3057, - "unique_positive": 1499, - "min_negative_length": 100, - "avg_negative_length": 397.554, - "max_negative_length": 2877, - "unique_negative": 11991 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 7.235481481481481, + "max_document_length": 176, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3584.4513333333334, + "max_query_length": 3057, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "ro": { - "num_samples": 1500, "number_of_characters": 4796113, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 14, - "avg_query_length": 61.973333333333336, - "max_query_length": 169, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 346.70866666666666, - "max_positive_length": 1917, - "unique_positive": 1499, - "min_negative_length": 100, - "avg_negative_length": 348.5908333333333, - "max_negative_length": 4213, - "unique_negative": 11971 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 14, + "average_document_length": 6.885925925925926, + "max_document_length": 169, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3135.4353333333333, + "max_query_length": 4213, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "sr": { - "num_samples": 1500, "number_of_characters": 5271732, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 15, - "avg_query_length": 55.669333333333334, - "max_query_length": 146, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 386.34933333333333, - "max_positive_length": 2421, - "unique_positive": 1499, - "min_negative_length": 100, - "avg_negative_length": 384.0586666666667, - "max_negative_length": 3668, - "unique_negative": 11974 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 15, + "average_document_length": 6.185481481481482, + "max_document_length": 146, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3458.8186666666666, + "max_query_length": 3668, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "no": { - "num_samples": 1500, "number_of_characters": 5036586, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 14, - "avg_query_length": 55.288, - "max_query_length": 129, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 367.72, - "max_positive_length": 1450, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 366.8395, - "max_negative_length": 2841, - "unique_negative": 11996 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 14, + "average_document_length": 6.143111111111111, + "max_document_length": 129, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3302.436, + "max_query_length": 2841, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 }, "sv": { - "num_samples": 1500, "number_of_characters": 5072698, - "num_positive": 1500, - "num_negative": 12000, - "min_query_length": 17, - "avg_query_length": 57.73, - "max_query_length": 133, - "unique_query": 1500, - "min_positive_length": 100, - "avg_positive_length": 372.58733333333333, - "max_positive_length": 2493, - "unique_positive": 1500, - "min_negative_length": 100, - "avg_negative_length": 368.93516666666665, - "max_negative_length": 3680, - "unique_negative": 11999 + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 17, + "average_document_length": 6.414444444444444, + "max_document_length": 133, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3324.0686666666666, + "max_query_length": 3680, + "unique_queries": 1500, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 13500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 9, + "average_top_ranked_per_query": 9.0, + "max_top_ranked_per_query": 9 } } } diff --git a/mteb/descriptive_stats/Retrieval/AILACasedocs.json b/mteb/descriptive_stats/Retrieval/AILACasedocs.json new file mode 100644 index 0000000000..dec3b677d4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/AILACasedocs.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 5164499, + "num_samples": 236, + "num_queries": 50, + "num_documents": 186, + "min_document_length": 1174, + "average_document_length": 816.7795698924731, + "max_document_length": 5936, + "unique_documents": 186, + "min_query_length": 1014, + "average_query_length": 100251.56, + "max_query_length": 222891, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.9, + "max_relevant_docs_per_query": 22, + "unique_relevant_docs": 186, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/AILAStatutes.json b/mteb/descriptive_stats/Retrieval/AILAStatutes.json new file mode 100644 index 0000000000..9ced1b695f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/AILAStatutes.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 313841, + "num_samples": 132, + "num_queries": 50, + "num_documents": 82, + "min_document_length": 1174, + "average_document_length": 1852.6951219512196, + "max_document_length": 5936, + "unique_documents": 82, + "min_query_length": 164, + "average_query_length": 3238.4, + "max_query_length": 26039, + "unique_queries": 50, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 4.34, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 82, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/ARCChallenge.json b/mteb/descriptive_stats/Retrieval/ARCChallenge.json new file mode 100644 index 0000000000..1634911bd4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/ARCChallenge.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 452856, + "num_samples": 10522, + "num_queries": 1172, + "num_documents": 9350, + "min_document_length": 13, + "average_document_length": 16.49144385026738, + "max_document_length": 831, + "unique_documents": 9350, + "min_query_length": 2, + "average_query_length": 254.830204778157, + "max_query_length": 193, + "unique_queries": 1172, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1143, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json b/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json new file mode 100644 index 0000000000..2899e4e4d7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 9355954, + "num_samples": 4872, + "num_queries": 2316, + "num_documents": 2556, + "min_document_length": 8, + "average_document_length": 154.68348982785602, + "max_document_length": 2863, + "unique_documents": 2556, + "min_query_length": 9, + "average_query_length": 3868.990932642487, + "max_query_length": 47930, + "unique_queries": 2316, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 988, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/AlphaNLI.json b/mteb/descriptive_stats/Retrieval/AlphaNLI.json new file mode 100644 index 0000000000..3c3751f9c7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/AlphaNLI.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 10880076, + "num_samples": 242879, + "num_queries": 1532, + "num_documents": 241347, + "min_document_length": 51, + "average_document_length": 0.6541618499504862, + "max_document_length": 153, + "unique_documents": 241347, + "min_query_length": 2, + "average_query_length": 6998.822454308094, + "max_query_length": 185, + "unique_queries": 1532, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1532, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/ArguAna-PL.json b/mteb/descriptive_stats/Retrieval/ArguAna-PL.json new file mode 100644 index 0000000000..c72e0a2944 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/ArguAna-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 10931281, + "num_samples": 10080, + "num_queries": 1406, + "num_documents": 8674, + "min_document_length": 229, + "average_document_length": 198.53262623933594, + "max_document_length": 5541, + "unique_documents": 8674, + "min_query_length": 3, + "average_query_length": 6549.935277382646, + "max_query_length": 6506, + "unique_queries": 1406, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1406, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/ArguAna.json b/mteb/descriptive_stats/Retrieval/ArguAna.json new file mode 100644 index 0000000000..5eb59cda5e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/ArguAna.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 10613204, + "num_samples": 10080, + "num_queries": 1406, + "num_documents": 8674, + "min_document_length": 251, + "average_document_length": 193.33237260779342, + "max_document_length": 5500, + "unique_documents": 8674, + "min_query_length": 3, + "average_query_length": 6355.7887624466575, + "max_query_length": 6674, + "unique_queries": 1406, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1406, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json b/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json index 62c9124a48..9685cec463 100644 --- a/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json @@ -1,11 +1,28 @@ { "test": { - "number_of_characters": 894.2168128654971, + "number_of_characters": 601650, "num_samples": 834, "num_queries": 114, "num_documents": 720, - "average_document_length": 1.1452816358024691, - "average_query_length": 0.610649430594029, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 34, + "average_document_length": 11.022222222222222, + "max_document_length": 157, + "unique_documents": 720, + "min_query_length": 8, + "average_query_length": 5208.017543859649, + "max_query_length": 2485, + "unique_queries": 114, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 114, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json b/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json new file mode 100644 index 0000000000..155754d26c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 19955744, + "num_samples": 22855, + "num_queries": 222, + "num_documents": 22633, + "min_document_length": 36, + "average_document_length": 1.4200061856581099, + "max_document_length": 293, + "unique_documents": 22633, + "min_query_length": 1, + "average_query_length": 89745.96846846846, + "max_query_length": 39566, + "unique_queries": 222, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 148, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json new file mode 100644 index 0000000000..5295fd2e3a --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 13713141, + "num_samples": 23697, + "num_queries": 699, + "num_documents": 22998, + "min_document_length": 16, + "average_document_length": 1.57339768675537, + "max_document_length": 127, + "unique_documents": 22998, + "min_query_length": 57, + "average_query_length": 19566.460658082975, + "max_query_length": 27831, + "unique_queries": 699, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.4263233190271816, + "max_relevant_docs_per_query": 262, + "unique_relevant_docs": 1696, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json new file mode 100644 index 0000000000..fd788671ee --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 19521569, + "num_samples": 41791, + "num_queries": 1570, + "num_documents": 40221, + "min_document_length": 15, + "average_document_length": 1.886526938663882, + "max_document_length": 149, + "unique_documents": 40221, + "min_query_length": 41, + "average_query_length": 12385.790445859873, + "max_query_length": 6511, + "unique_queries": 1570, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.3980891719745223, + "max_relevant_docs_per_query": 79, + "unique_relevant_docs": 3765, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json new file mode 100644 index 0000000000..607914d9b9 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 22263573, + "num_samples": 46896, + "num_queries": 1595, + "num_documents": 45301, + "min_document_length": 15, + "average_document_length": 1.7172247853248273, + "max_document_length": 149, + "unique_documents": 45301, + "min_query_length": 46, + "average_query_length": 13909.580564263322, + "max_query_length": 28835, + "unique_queries": 1595, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.418808777429467, + "max_relevant_docs_per_query": 30, + "unique_relevant_docs": 2263, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json new file mode 100644 index 0000000000..9cd07a4019 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 38178794, + "num_samples": 38522, + "num_queries": 885, + "num_documents": 37637, + "min_document_length": 15, + "average_document_length": 1.227435767994261, + "max_document_length": 140, + "unique_documents": 37637, + "min_query_length": 52, + "average_query_length": 43087.6802259887, + "max_query_length": 28938, + "unique_queries": 885, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.2587570621468926, + "max_relevant_docs_per_query": 22, + "unique_relevant_docs": 1114, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json new file mode 100644 index 0000000000..9c821c8621 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 19325188, + "num_samples": 17509, + "num_queries": 804, + "num_documents": 16705, + "min_document_length": 15, + "average_document_length": 2.3537862915294823, + "max_document_length": 137, + "unique_documents": 16705, + "min_query_length": 75, + "average_query_length": 23987.398009950248, + "max_query_length": 28907, + "unique_queries": 804, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6890547263681592, + "max_relevant_docs_per_query": 56, + "unique_relevant_docs": 1358, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json new file mode 100644 index 0000000000..0c7464758d --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 31461064, + "num_samples": 39355, + "num_queries": 1039, + "num_documents": 38316, + "min_document_length": 15, + "average_document_length": 1.4470717193861573, + "max_document_length": 149, + "unique_documents": 38316, + "min_query_length": 60, + "average_query_length": 30226.773820981714, + "max_query_length": 20177, + "unique_queries": 1039, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8604427333974976, + "max_relevant_docs_per_query": 72, + "unique_relevant_docs": 1933, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json new file mode 100644 index 0000000000..9be488355e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 34048829, + "num_samples": 33052, + "num_queries": 876, + "num_documents": 32176, + "min_document_length": 15, + "average_document_length": 1.5023930880159124, + "max_document_length": 149, + "unique_documents": 32176, + "min_query_length": 61, + "average_query_length": 38813.34246575343, + "max_query_length": 21955, + "unique_queries": 876, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9121004566210045, + "max_relevant_docs_per_query": 149, + "unique_relevant_docs": 1675, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json new file mode 100644 index 0000000000..c95f3f7c46 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 44679836, + "num_samples": 42921, + "num_queries": 652, + "num_documents": 42269, + "min_document_length": 15, + "average_document_length": 0.8686981002626037, + "max_document_length": 138, + "unique_documents": 42269, + "min_query_length": 78, + "average_query_length": 68471.03834355828, + "max_query_length": 43874, + "unique_queries": 652, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.4003067484662577, + "max_relevant_docs_per_query": 18, + "unique_relevant_docs": 913, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json new file mode 100644 index 0000000000..6f0c7b206b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 88645392, + "num_samples": 71090, + "num_queries": 2906, + "num_documents": 68184, + "min_document_length": 15, + "average_document_length": 2.0003813211310573, + "max_document_length": 133, + "unique_documents": 68184, + "min_query_length": 61, + "average_query_length": 30457.328974535445, + "max_query_length": 31204, + "unique_queries": 2906, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.7735719201651754, + "max_relevant_docs_per_query": 146, + "unique_relevant_docs": 5154, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json new file mode 100644 index 0000000000..2169ef7f37 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 47711333, + "num_samples": 48454, + "num_queries": 1072, + "num_documents": 47382, + "min_document_length": 15, + "average_document_length": 1.1385547254231565, + "max_document_length": 124, + "unique_documents": 47382, + "min_query_length": 56, + "average_query_length": 44456.51679104478, + "max_query_length": 32623, + "unique_queries": 1072, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5792910447761195, + "max_relevant_docs_per_query": 22, + "unique_relevant_docs": 1693, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json new file mode 100644 index 0000000000..84cd5016ea --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 12355347, + "num_samples": 17911, + "num_queries": 506, + "num_documents": 17405, + "min_document_length": 15, + "average_document_length": 1.5098534903763285, + "max_document_length": 135, + "unique_documents": 17405, + "min_query_length": 49, + "average_query_length": 24365.747035573124, + "max_query_length": 24968, + "unique_queries": 506, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.7569169960474307, + "max_relevant_docs_per_query": 207, + "unique_relevant_docs": 1395, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json new file mode 100644 index 0000000000..e4fe2a3fc6 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 54647154, + "num_samples": 49146, + "num_queries": 541, + "num_documents": 48605, + "min_document_length": 15, + "average_document_length": 0.5423516099166752, + "max_document_length": 121, + "unique_documents": 48605, + "min_query_length": 65, + "average_query_length": 100962.64879852126, + "max_query_length": 32392, + "unique_queries": 541, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.3752310536044363, + "max_relevant_docs_per_query": 62, + "unique_relevant_docs": 744, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/ClimateFEVER.json b/mteb/descriptive_stats/Retrieval/ClimateFEVER.json new file mode 100644 index 0000000000..0b68ede31f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/ClimateFEVER.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 2921043175, + "num_samples": 5418128, + "num_queries": 1535, + "num_documents": 5416593, + "min_document_length": 26, + "average_document_length": 0.03496995251443112, + "max_document_length": 406, + "unique_documents": 5416593, + "min_query_length": 2, + "average_query_length": 1902836.3237785017, + "max_query_length": 374597, + "unique_queries": 1535, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.0495114006514656, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1344, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json b/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json new file mode 100644 index 0000000000..5a74183b2a --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 59222302, + "num_samples": 48416, + "num_queries": 1000, + "num_documents": 47416, + "min_document_length": 29, + "average_document_length": 2.570419267757719, + "max_document_length": 406, + "unique_documents": 47416, + "min_query_length": 2, + "average_query_length": 59100.423, + "max_query_length": 36320, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.048, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1042, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json b/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json new file mode 100644 index 0000000000..3d6aef70b4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 30971243, + "num_samples": 104000, + "num_queries": 3999, + "num_documents": 100001, + "min_document_length": 11, + "average_document_length": 1.938310616893831, + "max_document_length": 153, + "unique_documents": 100001, + "min_query_length": 1, + "average_query_length": 7696.276569142286, + "max_query_length": 60975, + "unique_queries": 3999, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.86271567891973, + "max_relevant_docs_per_query": 19, + "unique_relevant_docs": 7321, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CovidRetrieval.json b/mteb/descriptive_stats/Retrieval/CovidRetrieval.json new file mode 100644 index 0000000000..86bc2cee79 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CovidRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 33266467, + "num_samples": 100950, + "num_queries": 949, + "num_documents": 100001, + "min_document_length": 8, + "average_document_length": 0.24607753922460776, + "max_document_length": 91, + "unique_documents": 100001, + "min_query_length": 1, + "average_query_length": 35028.302423603796, + "max_query_length": 60975, + "unique_queries": 949, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0105374077976819, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 830, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json new file mode 100644 index 0000000000..e42129467e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json @@ -0,0 +1,82 @@ +{ + "test": { + "number_of_characters": 2662337, + "num_samples": 17676, + "num_queries": 2946, + "num_documents": 14730, + "min_document_length": 43, + "average_document_length": 29.88336727766463, + "max_document_length": 613, + "unique_documents": 14730, + "min_query_length": 38, + "average_query_length": 754.2956551255941, + "max_query_length": 648, + "unique_queries": 2946, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2946, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "deu-fra": { + "number_of_characters": 1311631, + "num_samples": 8838, + "num_queries": 1473, + "num_documents": 7365, + "min_document_length": 43, + "average_document_length": 30.591174473862864, + "max_document_length": 588, + "unique_documents": 7365, + "min_query_length": 38, + "average_query_length": 737.4928716904277, + "max_query_length": 648, + "unique_queries": 1473, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1473, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fra-deu": { + "number_of_characters": 1350706, + "num_samples": 8838, + "num_queries": 1473, + "num_documents": 7365, + "min_document_length": 44, + "average_document_length": 29.175560081466394, + "max_document_length": 613, + "unique_documents": 7365, + "min_query_length": 43, + "average_query_length": 771.0984385607603, + "max_query_length": 627, + "unique_queries": 1473, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1473, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json new file mode 100644 index 0000000000..42807d55d3 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json @@ -0,0 +1,82 @@ +{ + "test": { + "number_of_characters": 1881809, + "num_samples": 10716, + "num_queries": 1786, + "num_documents": 8930, + "min_document_length": 40, + "average_document_length": 34.872228443449046, + "max_document_length": 567, + "unique_documents": 8930, + "min_query_length": 40, + "average_query_length": 879.2833146696529, + "max_query_length": 572, + "unique_queries": 1786, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1786, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "deu-fra": { + "number_of_characters": 944833, + "num_samples": 5358, + "num_queries": 893, + "num_documents": 4465, + "min_document_length": 40, + "average_document_length": 34.34602463605823, + "max_document_length": 543, + "unique_documents": 4465, + "min_query_length": 45, + "average_query_length": 886.3135498320269, + "max_query_length": 567, + "unique_queries": 893, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 893, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fra-deu": { + "number_of_characters": 936976, + "num_samples": 5358, + "num_queries": 893, + "num_documents": 4465, + "min_document_length": 50, + "average_document_length": 35.39843225083987, + "max_document_length": 567, + "unique_documents": 4465, + "min_query_length": 40, + "average_query_length": 872.2530795072788, + "max_query_length": 572, + "unique_queries": 893, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 893, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/DBPedia-PL.json b/mteb/descriptive_stats/Retrieval/DBPedia-PL.json new file mode 100644 index 0000000000..3d86e72c82 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/DBPedia-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1449670678, + "num_samples": 4636322, + "num_queries": 400, + "num_documents": 4635922, + "min_document_length": 2, + "average_document_length": 0.003058722730882875, + "max_document_length": 90, + "unique_documents": 4635922, + "min_query_length": 7, + "average_query_length": 3624141.245, + "max_query_length": 42899, + "unique_queries": 400, + "min_relevant_docs_per_query": 21, + "average_relevant_docs_per_query": 38.215, + "max_relevant_docs_per_query": 1499, + "unique_relevant_docs": 40724, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json new file mode 100644 index 0000000000..26ce134325 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 32284954, + "num_samples": 88942, + "num_queries": 400, + "num_documents": 88542, + "min_document_length": 2, + "average_document_length": 0.16014998531770233, + "max_document_length": 90, + "unique_documents": 88542, + "min_query_length": 7, + "average_query_length": 80676.935, + "max_query_length": 12359, + "unique_queries": 400, + "min_relevant_docs_per_query": 21, + "average_relevant_docs_per_query": 38.215, + "max_relevant_docs_per_query": 1499, + "unique_relevant_docs": 40724, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/DBPedia.json b/mteb/descriptive_stats/Retrieval/DBPedia.json new file mode 100644 index 0000000000..3eebad06db --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/DBPedia.json @@ -0,0 +1,54 @@ +{ + "dev": { + "number_of_characters": 1437939026, + "num_samples": 4635989, + "num_queries": 67, + "num_documents": 4635922, + "min_document_length": 5, + "average_document_length": 0.0004038031701137336, + "max_document_length": 79, + "unique_documents": 4635922, + "min_query_length": 8, + "average_query_length": 21461748.56716418, + "max_query_length": 42899, + "unique_queries": 67, + "min_relevant_docs_per_query": 36, + "average_relevant_docs_per_query": 20.970149253731343, + "max_relevant_docs_per_query": 164, + "unique_relevant_docs": 5658, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 1437950788, + "num_samples": 4636322, + "num_queries": 400, + "num_documents": 4635922, + "min_document_length": 6, + "average_document_length": 0.0029409468062663695, + "max_document_length": 88, + "unique_documents": 4635922, + "min_query_length": 8, + "average_query_length": 3594842.885, + "max_query_length": 42899, + "unique_queries": 400, + "min_relevant_docs_per_query": 21, + "average_relevant_docs_per_query": 38.215, + "max_relevant_docs_per_query": 1499, + "unique_relevant_docs": 40724, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json b/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json new file mode 100644 index 0000000000..bd2425dbb4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 30600110, + "num_samples": 90470, + "num_queries": 400, + "num_documents": 90070, + "min_document_length": 6, + "average_document_length": 0.15137115576773622, + "max_document_length": 88, + "unique_documents": 90070, + "min_query_length": 8, + "average_query_length": 76466.19, + "max_query_length": 5857, + "unique_queries": 400, + "min_relevant_docs_per_query": 21, + "average_relevant_docs_per_query": 38.215, + "max_relevant_docs_per_query": 1499, + "unique_relevant_docs": 40724, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json b/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json new file mode 100644 index 0000000000..ee7b5d859f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json @@ -0,0 +1,28 @@ +{ + "train": { + "number_of_characters": 1108138, + "num_samples": 8897, + "num_queries": 6373, + "num_documents": 2524, + "min_document_length": 11, + "average_document_length": 126.92868462757528, + "max_document_length": 188, + "unique_documents": 2524, + "min_query_length": 28, + "average_query_length": 123.61054448454416, + "max_query_length": 1748, + "unique_queries": 6373, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 0.48721167425074535, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 2524, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/DuRetrieval.json b/mteb/descriptive_stats/Retrieval/DuRetrieval.json new file mode 100644 index 0000000000..dcf728482f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/DuRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 33151109, + "num_samples": 102001, + "num_queries": 2000, + "num_documents": 100001, + "min_document_length": 3, + "average_document_length": 0.1857781422185778, + "max_document_length": 55, + "unique_documents": 100001, + "min_query_length": 1, + "average_query_length": 16566.2655, + "max_query_length": 60975, + "unique_queries": 2000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 4.9195, + "max_relevant_docs_per_query": 31, + "unique_relevant_docs": 9792, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/EcomRetrieval.json b/mteb/descriptive_stats/Retrieval/EcomRetrieval.json new file mode 100644 index 0000000000..7267b3e790 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/EcomRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 3334588, + "num_samples": 101902, + "num_queries": 1000, + "num_documents": 100902, + "min_document_length": 3, + "average_document_length": 0.06737230183742642, + "max_document_length": 34, + "unique_documents": 100902, + "min_query_length": 2, + "average_query_length": 3327.79, + "max_query_length": 121, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/EstQA.json b/mteb/descriptive_stats/Retrieval/EstQA.json new file mode 100644 index 0000000000..5b9ed73cf2 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/EstQA.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 128536, + "num_samples": 724, + "num_queries": 603, + "num_documents": 121, + "min_document_length": 19, + "average_document_length": 275.68595041322317, + "max_document_length": 115, + "unique_documents": 121, + "min_query_length": 510, + "average_query_length": 157.8407960199005, + "max_query_length": 2725, + "unique_queries": 603, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 121, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json b/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json new file mode 100644 index 0000000000..1f5a27ff36 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 114054968, + "num_samples": 164698, + "num_queries": 1000, + "num_documents": 163698, + "min_document_length": 15, + "average_document_length": 0.30311915845031706, + "max_document_length": 172, + "unique_documents": 163698, + "min_query_length": 2, + "average_query_length": 114005.348, + "max_query_length": 29033, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.171, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 677, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json new file mode 100644 index 0000000000..fe47d8b370 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json @@ -0,0 +1,54 @@ +{ + "test": { + "number_of_characters": 257749, + "num_samples": 669, + "num_queries": 400, + "num_documents": 269, + "min_document_length": 18, + "average_document_length": 87.0185873605948, + "max_document_length": 169, + "unique_documents": 269, + "min_query_length": 501, + "average_query_length": 585.8525, + "max_query_length": 1854, + "unique_queries": 400, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 269, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "validation": { + "number_of_characters": 89794, + "num_samples": 197, + "num_queries": 100, + "num_documents": 97, + "min_document_length": 18, + "average_document_length": 55.8041237113402, + "max_document_length": 107, + "unique_documents": 97, + "min_query_length": 501, + "average_query_length": 843.81, + "max_query_length": 1815, + "unique_queries": 100, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 97, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/FaithDial.json b/mteb/descriptive_stats/Retrieval/FaithDial.json new file mode 100644 index 0000000000..5ba596ba3a --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FaithDial.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 507681, + "num_samples": 5581, + "num_queries": 2042, + "num_documents": 3539, + "min_document_length": 1, + "average_document_length": 2.842610907035886, + "max_document_length": 9, + "unique_documents": 3539, + "min_query_length": 24, + "average_query_length": 243.69294809010773, + "max_query_length": 471, + "unique_queries": 2042, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2042, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json b/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json new file mode 100644 index 0000000000..59a04e8600 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 2923673, + "num_samples": 4356, + "num_queries": 1992, + "num_documents": 2364, + "min_document_length": 17, + "average_document_length": 60.949661590524535, + "max_document_length": 206, + "unique_documents": 2364, + "min_query_length": 118, + "average_query_length": 1395.375502008032, + "max_query_length": 15869, + "unique_queries": 1992, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1230, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/FiQA-PL.json b/mteb/descriptive_stats/Retrieval/FiQA-PL.json new file mode 100644 index 0000000000..bfe7debd7c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FiQA-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 45938883, + "num_samples": 58286, + "num_queries": 648, + "num_documents": 57638, + "min_document_length": 14, + "average_document_length": 0.7870675595960998, + "max_document_length": 185, + "unique_documents": 57638, + "min_query_length": 1, + "average_query_length": 70823.33024691358, + "max_query_length": 16991, + "unique_queries": 648, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.632716049382716, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 1706, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/FiQA2018.json b/mteb/descriptive_stats/Retrieval/FiQA2018.json new file mode 100644 index 0000000000..e7f77ea43a --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FiQA2018.json @@ -0,0 +1,80 @@ +{ + "train": { + "number_of_characters": 44616372, + "num_samples": 63138, + "num_queries": 5500, + "num_documents": 57638, + "min_document_length": 14, + "average_document_length": 5.868298691835248, + "max_document_length": 158, + "unique_documents": 57638, + "min_query_length": 1, + "average_query_length": 8050.57, + "max_query_length": 16991, + "unique_queries": 5500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.5756363636363635, + "max_relevant_docs_per_query": 23, + "unique_relevant_docs": 14166, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "dev": { + "number_of_characters": 44309513, + "num_samples": 58138, + "num_queries": 500, + "num_documents": 57638, + "min_document_length": 15, + "average_document_length": 0.5443977931225927, + "max_document_length": 166, + "unique_documents": 57638, + "min_query_length": 1, + "average_query_length": 88556.27, + "max_query_length": 16991, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.476, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 1238, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 44318767, + "num_samples": 58286, + "num_queries": 648, + "num_documents": 57638, + "min_document_length": 16, + "average_document_length": 0.7049515944342274, + "max_document_length": 147, + "unique_documents": 57638, + "min_query_length": 1, + "average_query_length": 68330.45524691358, + "max_query_length": 16991, + "unique_queries": 648, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.632716049382716, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 1706, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json b/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json new file mode 100644 index 0000000000..b2cd429220 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1470108, + "num_samples": 5131, + "num_queries": 2565, + "num_documents": 2566, + "min_document_length": 16, + "average_document_length": 61.67147310989868, + "max_document_length": 1089, + "unique_documents": 2566, + "min_query_length": 45, + "average_query_length": 511.44600389863547, + "max_query_length": 4847, + "unique_queries": 2565, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0003898635477584, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2566, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GerDaLIR.json b/mteb/descriptive_stats/Retrieval/GerDaLIR.json new file mode 100644 index 0000000000..db7989c665 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GerDaLIR.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 2047828528, + "num_samples": 143743, + "num_queries": 12298, + "num_documents": 131445, + "min_document_length": 3, + "average_document_length": 96.11887101068888, + "max_document_length": 23560, + "unique_documents": 131445, + "min_query_length": 8, + "average_query_length": 165489.85062611807, + "max_query_length": 1170783, + "unique_queries": 12298, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1704342169458448, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 10025, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json b/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json new file mode 100644 index 0000000000..9423b5ce57 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 209081381, + "num_samples": 22203, + "num_queries": 12234, + "num_documents": 9969, + "min_document_length": 150, + "average_document_length": 1265.3312268030895, + "max_document_length": 23560, + "unique_documents": 9969, + "min_query_length": 151, + "average_query_length": 16059.121628249142, + "max_query_length": 427235, + "unique_queries": 12234, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1705084191597188, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 9969, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GermanDPR.json b/mteb/descriptive_stats/Retrieval/GermanDPR.json new file mode 100644 index 0000000000..e26b779a64 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GermanDPR.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3084969, + "num_samples": 3901, + "num_queries": 1025, + "num_documents": 2876, + "min_document_length": 16, + "average_document_length": 22.946453407510432, + "max_document_length": 133, + "unique_documents": 2876, + "min_query_length": 456, + "average_query_length": 2945.3414634146343, + "max_query_length": 2446, + "unique_queries": 1025, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 192, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json b/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json new file mode 100644 index 0000000000..47ffa53fab --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 151500, + "num_samples": 461, + "num_queries": 356, + "num_documents": 105, + "min_document_length": 17, + "average_document_length": 231.16190476190476, + "max_document_length": 172, + "unique_documents": 105, + "min_query_length": 589, + "average_query_length": 357.3820224719101, + "max_query_length": 1493, + "unique_queries": 356, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 105, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json b/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json new file mode 100644 index 0000000000..cf2db0ed7c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1045149, + "num_samples": 2678, + "num_queries": 2204, + "num_documents": 474, + "min_document_length": 15, + "average_document_length": 263.86497890295357, + "max_document_length": 130, + "unique_documents": 474, + "min_query_length": 507, + "average_query_length": 417.4578039927405, + "max_query_length": 11647, + "unique_queries": 2204, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 474, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json b/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json new file mode 100644 index 0000000000..fb179201f5 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json @@ -0,0 +1,28 @@ +{ + "default": { + "number_of_characters": 468846, + "num_samples": 814, + "num_queries": 407, + "num_documents": 407, + "min_document_length": 18, + "average_document_length": 77.06142506142506, + "max_document_length": 313, + "unique_documents": 407, + "min_query_length": 110, + "average_query_length": 1074.894348894349, + "max_query_length": 5057, + "unique_queries": 407, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 407, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HagridRetrieval.json b/mteb/descriptive_stats/Retrieval/HagridRetrieval.json new file mode 100644 index 0000000000..8ed81e0f75 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HagridRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 133142, + "num_samples": 992, + "num_queries": 496, + "num_documents": 496, + "min_document_length": 16, + "average_document_length": 40.064516129032256, + "max_document_length": 122, + "unique_documents": 496, + "min_query_length": 27, + "average_query_length": 228.36693548387098, + "max_query_length": 1205, + "unique_queries": 496, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 496, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HellaSwag.json b/mteb/descriptive_stats/Retrieval/HellaSwag.json new file mode 100644 index 0000000000..eb7ac0ecdb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HellaSwag.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 29811884, + "num_samples": 209204, + "num_queries": 10042, + "num_documents": 199162, + "min_document_length": 30, + "average_document_length": 11.32141673612436, + "max_document_length": 499, + "unique_documents": 199162, + "min_query_length": 2, + "average_query_length": 2744.1832304321847, + "max_query_length": 506, + "unique_queries": 10042, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10012, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json b/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json new file mode 100644 index 0000000000..1727a39137 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1535470621, + "num_samples": 5240734, + "num_queries": 7405, + "num_documents": 5233329, + "min_document_length": 24, + "average_document_length": 0.13391361406859764, + "max_document_length": 293, + "unique_documents": 5233329, + "min_query_length": 7, + "average_query_length": 207261.28386225522, + "max_query_length": 9292, + "unique_queries": 7405, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 13783, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json new file mode 100644 index 0000000000..5d501ccdfe --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 93585678, + "num_samples": 213774, + "num_queries": 1000, + "num_documents": 212774, + "min_document_length": 32, + "average_document_length": 0.4472397943357741, + "max_document_length": 268, + "unique_documents": 212774, + "min_query_length": 11, + "average_query_length": 93490.517, + "max_query_length": 6315, + "unique_queries": 1000, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1975, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json b/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json new file mode 100644 index 0000000000..20a1e4dd0d --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 84600920, + "num_samples": 226621, + "num_queries": 1000, + "num_documents": 225621, + "min_document_length": 34, + "average_document_length": 0.41035187327420763, + "max_document_length": 288, + "unique_documents": 225621, + "min_query_length": 9, + "average_query_length": 84508.336, + "max_query_length": 3463, + "unique_queries": 1000, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1975, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json b/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json new file mode 100644 index 0000000000..72b908be1b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 5321531, + "num_samples": 3996, + "num_queries": 1998, + "num_documents": 1998, + "min_document_length": 30, + "average_document_length": 201.2112112112112, + "max_document_length": 748, + "unique_documents": 1998, + "min_query_length": 309, + "average_query_length": 2462.2177177177177, + "max_query_length": 14850, + "unique_queries": 1998, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1998, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json b/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json new file mode 100644 index 0000000000..7c008ace3b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 4909152, + "num_samples": 24842, + "num_queries": 2048, + "num_documents": 22794, + "min_document_length": 5, + "average_document_length": 5.344345003070984, + "max_document_length": 597, + "unique_documents": 22794, + "min_query_length": 2, + "average_query_length": 2337.56494140625, + "max_query_length": 4922, + "unique_queries": 2048, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2048, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json new file mode 100644 index 0000000000..9a427835ae --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json @@ -0,0 +1,28 @@ +{ + "validation": { + "number_of_characters": 508977, + "num_samples": 5062, + "num_queries": 2048, + "num_documents": 3014, + "min_document_length": 8, + "average_document_length": 20.946250829462507, + "max_document_length": 146, + "unique_documents": 3014, + "min_query_length": 1, + "average_query_length": 217.69775390625, + "max_query_length": 1284, + "unique_queries": 2048, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 3014, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json b/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json new file mode 100644 index 0000000000..bfb2c3a8b8 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 2976191, + "num_samples": 9843, + "num_queries": 592, + "num_documents": 9251, + "min_document_length": 10, + "average_document_length": 1.4560588044535725, + "max_document_length": 60, + "unique_documents": 9251, + "min_query_length": 29, + "average_query_length": 5004.596283783784, + "max_query_length": 5016, + "unique_queries": 592, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9341216216216217, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 1077, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json new file mode 100644 index 0000000000..6b41aa6a11 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 116497954, + "num_samples": 10804, + "num_queries": 10449, + "num_documents": 355, + "min_document_length": 10, + "average_document_length": 1409.718309859155, + "max_document_length": 1220, + "unique_documents": 355, + "min_query_length": 21216, + "average_query_length": 11101.301942769644, + "max_query_length": 1874086, + "unique_queries": 10449, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 355, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json new file mode 100644 index 0000000000..c71b8e094d --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json @@ -0,0 +1,210 @@ +{ + "test_256": { + "number_of_characters": 104346, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 25, + "average_document_length": 30.24, + "max_document_length": 83, + "unique_documents": 100, + "min_query_length": 954, + "average_query_length": 2026.44, + "max_query_length": 1092, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_512": { + "number_of_characters": 203861, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 25, + "average_document_length": 28.65, + "max_document_length": 82, + "unique_documents": 100, + "min_query_length": 1951, + "average_query_length": 4019.92, + "max_query_length": 2089, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_1024": { + "number_of_characters": 409904, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 26, + "average_document_length": 29.14, + "max_document_length": 82, + "unique_documents": 100, + "min_query_length": 4011, + "average_query_length": 8139.8, + "max_query_length": 4149, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_2048": { + "number_of_characters": 848378, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 25, + "average_document_length": 29.96, + "max_document_length": 83, + "unique_documents": 100, + "min_query_length": 8395, + "average_query_length": 16907.64, + "max_query_length": 8533, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_4096": { + "number_of_characters": 1742373, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 25, + "average_document_length": 27.93, + "max_document_length": 83, + "unique_documents": 100, + "min_query_length": 17337, + "average_query_length": 34791.6, + "max_query_length": 17475, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_8192": { + "number_of_characters": 3523362, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 27, + "average_document_length": 29.8, + "max_document_length": 82, + "unique_documents": 100, + "min_query_length": 35145, + "average_query_length": 70407.64, + "max_query_length": 35283, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_16384": { + "number_of_characters": 7208436, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 26, + "average_document_length": 29.56, + "max_document_length": 81, + "unique_documents": 100, + "min_query_length": 71996, + "average_query_length": 144109.6, + "max_query_length": 72134, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_32768": { + "number_of_characters": 14179897, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 26, + "average_document_length": 29.17, + "max_document_length": 82, + "unique_documents": 100, + "min_query_length": 141711, + "average_query_length": 283539.6, + "max_query_length": 141849, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json new file mode 100644 index 0000000000..b91392a9de --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json @@ -0,0 +1,210 @@ +{ + "test_256": { + "number_of_characters": 89529, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 35, + "average_document_length": 19.05, + "max_document_length": 45, + "unique_documents": 100, + "min_query_length": 867, + "average_query_length": 1752.48, + "max_query_length": 891, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_512": { + "number_of_characters": 180408, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 34, + "average_document_length": 18.88, + "max_document_length": 42, + "unique_documents": 100, + "min_query_length": 1776, + "average_query_length": 3570.4, + "max_query_length": 1800, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_1024": { + "number_of_characters": 362602, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 33, + "average_document_length": 18.84, + "max_document_length": 42, + "unique_documents": 100, + "min_query_length": 3598, + "average_query_length": 7214.36, + "max_query_length": 3622, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_2048": { + "number_of_characters": 726110, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 35, + "average_document_length": 18.9, + "max_document_length": 42, + "unique_documents": 100, + "min_query_length": 7233, + "average_query_length": 14484.4, + "max_query_length": 7257, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_4096": { + "number_of_characters": 1453698, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 34, + "average_document_length": 18.82, + "max_document_length": 42, + "unique_documents": 100, + "min_query_length": 14509, + "average_query_length": 29036.32, + "max_query_length": 14533, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_8192": { + "number_of_characters": 2908993, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 33, + "average_document_length": 18.77, + "max_document_length": 41, + "unique_documents": 100, + "min_query_length": 29062, + "average_query_length": 58142.32, + "max_query_length": 29086, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_16384": { + "number_of_characters": 5819422, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 34, + "average_document_length": 19.06, + "max_document_length": 45, + "unique_documents": 100, + "min_query_length": 58166, + "average_query_length": 116350.32, + "max_query_length": 58190, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test_32768": { + "number_of_characters": 11639903, + "num_samples": 150, + "num_queries": 50, + "num_documents": 100, + "min_document_length": 33, + "average_document_length": 18.87, + "max_document_length": 45, + "unique_documents": 100, + "min_query_length": 116371, + "average_query_length": 232760.32, + "max_query_length": 116395, + "unique_queries": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json new file mode 100644 index 0000000000..5ebf7b80c9 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 11169115, + "num_samples": 1724, + "num_queries": 1527, + "num_documents": 197, + "min_document_length": 84, + "average_document_length": 3360.1979695431473, + "max_document_length": 1574, + "unique_documents": 197, + "min_query_length": 6428, + "average_query_length": 6880.914210870988, + "max_query_length": 147260, + "unique_queries": 1527, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 197, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json new file mode 100644 index 0000000000..2b1e0b5102 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json @@ -0,0 +1,28 @@ +{ + "validation": { + "number_of_characters": 10565795, + "num_samples": 672, + "num_queries": 336, + "num_documents": 336, + "min_document_length": 151, + "average_document_length": 591.4910714285714, + "max_document_length": 2495, + "unique_documents": 336, + "min_query_length": 8768, + "average_query_length": 30854.32738095238, + "max_query_length": 91515, + "unique_queries": 336, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 336, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json new file mode 100644 index 0000000000..52ef998c12 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 11253952, + "num_samples": 600, + "num_queries": 300, + "num_documents": 300, + "min_document_length": 33, + "average_document_length": 67.57, + "max_document_length": 129, + "unique_documents": 300, + "min_query_length": 5796, + "average_query_length": 37445.60333333333, + "max_query_length": 75837, + "unique_queries": 300, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 300, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LeCaRDv2.json b/mteb/descriptive_stats/Retrieval/LeCaRDv2.json new file mode 100644 index 0000000000..4a440f1f0a --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LeCaRDv2.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 28129613, + "num_samples": 3954, + "num_queries": 159, + "num_documents": 3795, + "min_document_length": 556, + "average_document_length": 178.45876152832673, + "max_document_length": 34790, + "unique_documents": 3795, + "min_query_length": 967, + "average_query_length": 172656.36477987422, + "max_query_length": 168523, + "unique_queries": 159, + "min_relevant_docs_per_query": 4, + "average_relevant_docs_per_query": 24.50314465408805, + "max_relevant_docs_per_query": 30, + "unique_relevant_docs": 3795, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json b/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json new file mode 100644 index 0000000000..f02b1ea887 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 459605, + "num_samples": 550, + "num_queries": 396, + "num_documents": 154, + "min_document_length": 24, + "average_document_length": 237.62337662337663, + "max_document_length": 258, + "unique_documents": 154, + "min_query_length": 613, + "average_query_length": 1068.209595959596, + "max_query_length": 8095, + "unique_queries": 396, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 154, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json b/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json new file mode 100644 index 0000000000..03d2936b45 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 429952, + "num_samples": 659, + "num_queries": 340, + "num_documents": 319, + "min_document_length": 41, + "average_document_length": 189.58934169278996, + "max_document_length": 733, + "unique_documents": 319, + "min_query_length": 137, + "average_query_length": 1086.6852941176471, + "max_query_length": 11451, + "unique_queries": 340, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 319, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LegalQuAD.json b/mteb/descriptive_stats/Retrieval/LegalQuAD.json new file mode 100644 index 0000000000..a31b9a3adf --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LegalQuAD.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3910984, + "num_samples": 400, + "num_queries": 200, + "num_documents": 200, + "min_document_length": 22, + "average_document_length": 71.965, + "max_document_length": 119, + "unique_documents": 200, + "min_query_length": 769, + "average_query_length": 19482.955, + "max_query_length": 94998, + "unique_queries": 200, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LegalSummarization.json b/mteb/descriptive_stats/Retrieval/LegalSummarization.json new file mode 100644 index 0000000000..573c1e4f61 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LegalSummarization.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 295244, + "num_samples": 722, + "num_queries": 284, + "num_documents": 438, + "min_document_length": 17, + "average_document_length": 66.90867579908675, + "max_document_length": 466, + "unique_documents": 438, + "min_query_length": 45, + "average_query_length": 936.4014084507043, + "max_query_length": 6497, + "unique_queries": 284, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.545774647887324, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 438, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json new file mode 100644 index 0000000000..cfa8267140 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 54079974, + "num_samples": 64780, + "num_queries": 597, + "num_documents": 64183, + "min_document_length": 37, + "average_document_length": 1.3134007447454934, + "max_document_length": 327, + "unique_documents": 64183, + "min_query_length": 0, + "average_query_length": 90445.01842546064, + "max_query_length": 18356, + "unique_queries": 597, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.07035175879397, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 574, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json new file mode 100644 index 0000000000..cc6c3af951 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json @@ -0,0 +1,498 @@ +{ + "dev": { + "number_of_characters": 983901912, + "num_samples": 2460458, + "num_queries": 11076, + "num_documents": 2449382, + "min_document_length": 5, + "average_document_length": 0.1694358005407078, + "max_document_length": 176, + "unique_documents": 2449382, + "min_query_length": 1, + "average_query_length": 88794.41124954858, + "max_query_length": 48538, + "unique_queries": 11076, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.3643011917659806, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 98836, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 81272741, + "num_samples": 193103, + "num_queries": 1000, + "num_documents": 192103, + "min_document_length": 12, + "average_document_length": 0.1540007183646273, + "max_document_length": 83, + "unique_documents": 192103, + "min_query_length": 1, + "average_query_length": 81243.157, + "max_query_length": 48538, + "unique_queries": 1000, + "min_relevant_docs_per_query": 7, + "average_relevant_docs_per_query": 1.982, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 9560, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "bn": { + "number_of_characters": 109132820, + "num_samples": 297676, + "num_queries": 411, + "num_documents": 297265, + "min_document_length": 16, + "average_document_length": 0.06495551107597598, + "max_document_length": 112, + "unique_documents": 297265, + "min_query_length": 1, + "average_query_length": 265482.99513381993, + "max_query_length": 17102, + "unique_queries": 411, + "min_relevant_docs_per_query": 7, + "average_relevant_docs_per_query": 2.099756690997567, + "max_relevant_docs_per_query": 13, + "unique_relevant_docs": 3729, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 35275409, + "num_samples": 71582, + "num_queries": 305, + "num_documents": 71277, + "min_document_length": 15, + "average_document_length": 0.19683768957728298, + "max_document_length": 87, + "unique_documents": 71277, + "min_query_length": 1, + "average_query_length": 115611.07868852459, + "max_query_length": 7635, + "unique_queries": 305, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.6590163934426227, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 3103, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 90963438, + "num_samples": 179567, + "num_queries": 799, + "num_documents": 178768, + "min_document_length": 16, + "average_document_length": 0.17988678063188043, + "max_document_length": 122, + "unique_documents": 178768, + "min_query_length": 1, + "average_query_length": 113806.3579474343, + "max_query_length": 8675, + "unique_queries": 799, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.911138923654568, + "max_relevant_docs_per_query": 16, + "unique_relevant_docs": 7921, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 75804446, + "num_samples": 147398, + "num_queries": 648, + "num_documents": 146750, + "min_document_length": 19, + "average_document_length": 0.20918568994889267, + "max_document_length": 88, + "unique_documents": 146750, + "min_query_length": 1, + "average_query_length": 116934.79629629629, + "max_query_length": 21535, + "unique_queries": 648, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 4.609567901234568, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 6410, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fa": { + "number_of_characters": 53033692, + "num_samples": 134228, + "num_queries": 632, + "num_documents": 133596, + "min_document_length": 18, + "average_document_length": 0.19466900206593013, + "max_document_length": 82, + "unique_documents": 133596, + "min_query_length": 1, + "average_query_length": 83872.91930379746, + "max_query_length": 13646, + "unique_queries": 632, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.079113924050633, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 6405, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fi": { + "number_of_characters": 86890075, + "num_samples": 195415, + "num_queries": 1000, + "num_documents": 194415, + "min_document_length": 14, + "average_document_length": 0.19878095825939357, + "max_document_length": 130, + "unique_documents": 194415, + "min_query_length": 1, + "average_query_length": 86851.429, + "max_query_length": 10055, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.918, + "max_relevant_docs_per_query": 16, + "unique_relevant_docs": 9038, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 33185924, + "num_samples": 75700, + "num_queries": 343, + "num_documents": 75357, + "min_document_length": 16, + "average_document_length": 0.1997425587536659, + "max_document_length": 83, + "unique_documents": 75357, + "min_query_length": 1, + "average_query_length": 96708.08163265306, + "max_query_length": 9670, + "unique_queries": 343, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.131195335276968, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 3407, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 30538763, + "num_samples": 63416, + "num_queries": 350, + "num_documents": 63066, + "min_document_length": 24, + "average_document_length": 0.2960232137760441, + "max_document_length": 120, + "unique_documents": 63066, + "min_query_length": 1, + "average_query_length": 87200.26857142858, + "max_query_length": 29655, + "unique_queries": 350, + "min_relevant_docs_per_query": 6, + "average_relevant_docs_per_query": 2.1485714285714286, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 3342, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "id": { + "number_of_characters": 80132242, + "num_samples": 169133, + "num_queries": 960, + "num_documents": 168173, + "min_document_length": 13, + "average_document_length": 0.2166816314152688, + "max_document_length": 93, + "unique_documents": 168173, + "min_query_length": 1, + "average_query_length": 83433.12708333334, + "max_query_length": 13952, + "unique_queries": 960, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 3.216666666666667, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 8286, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ja": { + "number_of_characters": 36602824, + "num_samples": 186179, + "num_queries": 860, + "num_documents": 185319, + "min_document_length": 7, + "average_document_length": 0.08220419924562511, + "max_document_length": 48, + "unique_documents": 185319, + "min_query_length": 1, + "average_query_length": 42543.70930232558, + "max_query_length": 13222, + "unique_queries": 860, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.0813953488372094, + "max_relevant_docs_per_query": 16, + "unique_relevant_docs": 8066, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ko": { + "number_of_characters": 10865749, + "num_samples": 43506, + "num_queries": 213, + "num_documents": 43293, + "min_document_length": 5, + "average_document_length": 0.10639133347192387, + "max_document_length": 92, + "unique_documents": 43293, + "min_query_length": 1, + "average_query_length": 50991.281690140844, + "max_query_length": 7849, + "unique_queries": 213, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.568075117370892, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 2835, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ru": { + "number_of_characters": 99786144, + "num_samples": 220114, + "num_queries": 1000, + "num_documents": 219114, + "min_document_length": 16, + "average_document_length": 0.20105972233631808, + "max_document_length": 108, + "unique_documents": 219114, + "min_query_length": 1, + "average_query_length": 99742.089, + "max_query_length": 12411, + "unique_queries": 1000, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.833, + "max_relevant_docs_per_query": 18, + "unique_relevant_docs": 10167, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "sw": { + "number_of_characters": 28413887, + "num_samples": 132406, + "num_queries": 482, + "num_documents": 131924, + "min_document_length": 13, + "average_document_length": 0.14238500955095357, + "max_document_length": 80, + "unique_documents": 131924, + "min_query_length": 1, + "average_query_length": 58911.0020746888, + "max_query_length": 11185, + "unique_queries": 482, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.887966804979253, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 3514, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "te": { + "number_of_characters": 59846144, + "num_samples": 102789, + "num_queries": 828, + "num_documents": 101961, + "min_document_length": 14, + "average_document_length": 0.309500691440845, + "max_document_length": 111, + "unique_documents": 101961, + "min_query_length": 1, + "average_query_length": 72239.83937198068, + "max_query_length": 17811, + "unique_queries": 828, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0314009661835748, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 1457, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "th": { + "number_of_characters": 53618254, + "num_samples": 117382, + "num_queries": 733, + "num_documents": 116649, + "min_document_length": 14, + "average_document_length": 0.269423655582131, + "max_document_length": 176, + "unique_documents": 116649, + "min_query_length": 1, + "average_query_length": 73106.17462482947, + "max_query_length": 14607, + "unique_queries": 733, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8321964529331514, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 6868, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "yo": { + "number_of_characters": 7047836, + "num_samples": 49162, + "num_queries": 119, + "num_documents": 49043, + "min_document_length": 25, + "average_document_length": 0.09145035988826132, + "max_document_length": 56, + "unique_documents": 49043, + "min_query_length": 1, + "average_query_length": 59187.82352941176, + "max_query_length": 10457, + "unique_queries": 119, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.2100840336134453, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 942, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zh": { + "number_of_characters": 11491524, + "num_samples": 81702, + "num_queries": 393, + "num_documents": 81309, + "min_document_length": 7, + "average_document_length": 0.052528010429349764, + "max_document_length": 22, + "unique_documents": 81309, + "min_query_length": 1, + "average_query_length": 29229.651399491093, + "max_query_length": 8284, + "unique_queries": 393, + "min_relevant_docs_per_query": 8, + "average_relevant_docs_per_query": 2.5292620865139948, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 3786, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MLQARetrieval.json b/mteb/descriptive_stats/Retrieval/MLQARetrieval.json new file mode 100644 index 0000000000..b19c2b832e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MLQARetrieval.json @@ -0,0 +1,2606 @@ +{ + "validation": { + "number_of_characters": 10812098, + "num_samples": 29490, + "num_queries": 15747, + "num_documents": 13743, + "min_document_length": 5, + "average_document_length": 51.51284290184094, + "max_document_length": 182, + "unique_documents": 13743, + "min_query_length": 51, + "average_query_length": 641.6559979678669, + "max_query_length": 12791, + "unique_queries": 15747, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13743, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ara-ara": { + "number_of_characters": 326497, + "num_samples": 956, + "num_queries": 517, + "num_documents": 439, + "min_document_length": 12, + "average_document_length": 49.840546697038725, + "max_document_length": 119, + "unique_documents": 439, + "min_query_length": 56, + "average_query_length": 589.2011605415861, + "max_query_length": 3967, + "unique_queries": 517, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 439, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-deu": { + "number_of_characters": 140511, + "num_samples": 377, + "num_queries": 207, + "num_documents": 170, + "min_document_length": 18, + "average_document_length": 67.1470588235294, + "max_document_length": 172, + "unique_documents": 170, + "min_query_length": 56, + "average_query_length": 623.6521739130435, + "max_query_length": 3967, + "unique_queries": 207, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 170, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-eng": { + "number_of_characters": 330482, + "num_samples": 956, + "num_queries": 517, + "num_documents": 439, + "min_document_length": 17, + "average_document_length": 58.91799544419134, + "max_document_length": 139, + "unique_documents": 439, + "min_query_length": 56, + "average_query_length": 589.2011605415861, + "max_query_length": 3967, + "unique_queries": 517, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 439, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-spa": { + "number_of_characters": 100247, + "num_samples": 301, + "num_queries": 161, + "num_documents": 140, + "min_document_length": 19, + "average_document_length": 61.74285714285714, + "max_document_length": 136, + "unique_documents": 140, + "min_query_length": 56, + "average_query_length": 568.9627329192547, + "max_query_length": 3338, + "unique_queries": 161, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 140, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-hin": { + "number_of_characters": 106786, + "num_samples": 341, + "num_queries": 186, + "num_documents": 155, + "min_document_length": 18, + "average_document_length": 62.348387096774196, + "max_document_length": 123, + "unique_documents": 155, + "min_query_length": 73, + "average_query_length": 522.1612903225806, + "max_query_length": 2860, + "unique_queries": 186, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 155, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-vie": { + "number_of_characters": 127164, + "num_samples": 311, + "num_queries": 163, + "num_documents": 148, + "min_document_length": 15, + "average_document_length": 54.5945945945946, + "max_document_length": 133, + "unique_documents": 148, + "min_query_length": 78, + "average_query_length": 730.5766871165645, + "max_query_length": 3787, + "unique_queries": 163, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 148, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-zho": { + "number_of_characters": 124970, + "num_samples": 343, + "num_queries": 188, + "num_documents": 155, + "min_document_length": 7, + "average_document_length": 18.941935483870967, + "max_document_length": 36, + "unique_documents": 155, + "min_query_length": 60, + "average_query_length": 649.1170212765958, + "max_query_length": 3967, + "unique_queries": 188, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 155, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-ara": { + "number_of_characters": 134602, + "num_samples": 386, + "num_queries": 207, + "num_documents": 179, + "min_document_length": 15, + "average_document_length": 49.798882681564244, + "max_document_length": 115, + "unique_documents": 179, + "min_query_length": 62, + "average_query_length": 607.1884057971015, + "max_query_length": 3009, + "unique_queries": 207, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 179, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-deu": { + "number_of_characters": 355878, + "num_samples": 968, + "num_queries": 512, + "num_documents": 456, + "min_document_length": 16, + "average_document_length": 59.02850877192982, + "max_document_length": 172, + "unique_documents": 456, + "min_query_length": 55, + "average_query_length": 642.501953125, + "max_query_length": 5536, + "unique_queries": 512, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 456, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-eng": { + "number_of_characters": 353711, + "num_samples": 968, + "num_queries": 512, + "num_documents": 456, + "min_document_length": 15, + "average_document_length": 54.276315789473685, + "max_document_length": 162, + "unique_documents": 456, + "min_query_length": 55, + "average_query_length": 642.501953125, + "max_query_length": 5536, + "unique_queries": 512, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 456, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-spa": { + "number_of_characters": 132505, + "num_samples": 377, + "num_queries": 196, + "num_documents": 181, + "min_document_length": 14, + "average_document_length": 54.79558011049724, + "max_document_length": 182, + "unique_documents": 181, + "min_query_length": 55, + "average_query_length": 625.4438775510204, + "max_query_length": 5536, + "unique_queries": 196, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 181, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-hin": { + "number_of_characters": 107808, + "num_samples": 309, + "num_queries": 163, + "num_documents": 146, + "min_document_length": 17, + "average_document_length": 52.49315068493151, + "max_document_length": 117, + "unique_documents": 146, + "min_query_length": 58, + "average_query_length": 614.3803680981595, + "max_query_length": 4604, + "unique_queries": 163, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 146, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-vie": { + "number_of_characters": 158745, + "num_samples": 345, + "num_queries": 182, + "num_documents": 163, + "min_document_length": 17, + "average_document_length": 52.2760736196319, + "max_document_length": 171, + "unique_documents": 163, + "min_query_length": 60, + "average_query_length": 825.4065934065934, + "max_query_length": 5536, + "unique_queries": 182, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 163, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-zho": { + "number_of_characters": 125856, + "num_samples": 357, + "num_queries": 190, + "num_documents": 167, + "min_document_length": 5, + "average_document_length": 16.994011976047904, + "max_document_length": 38, + "unique_documents": 167, + "min_query_length": 55, + "average_query_length": 647.4631578947368, + "max_query_length": 4781, + "unique_queries": 190, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 167, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-ara": { + "number_of_characters": 450833, + "num_samples": 955, + "num_queries": 517, + "num_documents": 438, + "min_document_length": 12, + "average_document_length": 49.954337899543376, + "max_document_length": 119, + "unique_documents": 438, + "min_query_length": 63, + "average_query_length": 829.6963249516441, + "max_query_length": 4923, + "unique_queries": 517, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 438, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-deu": { + "number_of_characters": 450365, + "num_samples": 959, + "num_queries": 512, + "num_documents": 447, + "min_document_length": 16, + "average_document_length": 60.21700223713646, + "max_document_length": 172, + "unique_documents": 447, + "min_query_length": 73, + "average_query_length": 827.046875, + "max_query_length": 4993, + "unique_queries": 512, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 447, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-eng": { + "number_of_characters": 975867, + "num_samples": 2126, + "num_queries": 1148, + "num_documents": 978, + "min_document_length": 11, + "average_document_length": 57.534764826175866, + "max_document_length": 162, + "unique_documents": 978, + "min_query_length": 52, + "average_query_length": 801.0435540069686, + "max_query_length": 4993, + "unique_queries": 1148, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 978, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-spa": { + "number_of_characters": 423068, + "num_samples": 939, + "num_queries": 500, + "num_documents": 439, + "min_document_length": 14, + "average_document_length": 59.391799544419136, + "max_document_length": 182, + "unique_documents": 439, + "min_query_length": 73, + "average_query_length": 793.99, + "max_query_length": 4956, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 439, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-hin": { + "number_of_characters": 417146, + "num_samples": 930, + "num_queries": 507, + "num_documents": 423, + "min_document_length": 12, + "average_document_length": 59.198581560283685, + "max_document_length": 148, + "unique_documents": 423, + "min_query_length": 98, + "average_query_length": 773.3826429980276, + "max_query_length": 4993, + "unique_queries": 507, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 423, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-vie": { + "number_of_characters": 481917, + "num_samples": 963, + "num_queries": 511, + "num_documents": 452, + "min_document_length": 8, + "average_document_length": 54.35840707964602, + "max_document_length": 171, + "unique_documents": 452, + "min_query_length": 52, + "average_query_length": 895.0039138943249, + "max_query_length": 4993, + "unique_queries": 511, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 452, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-zho": { + "number_of_characters": 438406, + "num_samples": 934, + "num_queries": 504, + "num_documents": 430, + "min_document_length": 5, + "average_document_length": 18.044186046511626, + "max_document_length": 51, + "unique_documents": 430, + "min_query_length": 75, + "average_query_length": 854.4583333333334, + "max_query_length": 4993, + "unique_queries": 504, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 430, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-ara": { + "number_of_characters": 104441, + "num_samples": 306, + "num_queries": 161, + "num_documents": 145, + "min_document_length": 12, + "average_document_length": 45.92413793103448, + "max_document_length": 119, + "unique_documents": 145, + "min_query_length": 71, + "average_query_length": 607.3416149068323, + "max_query_length": 7789, + "unique_queries": 161, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 145, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-deu": { + "number_of_characters": 110271, + "num_samples": 380, + "num_queries": 196, + "num_documents": 184, + "min_document_length": 18, + "average_document_length": 55.25, + "max_document_length": 150, + "unique_documents": 184, + "min_query_length": 52, + "average_query_length": 510.73979591836735, + "max_query_length": 2044, + "unique_queries": 196, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 184, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-eng": { + "number_of_characters": 315965, + "num_samples": 954, + "num_queries": 500, + "num_documents": 454, + "min_document_length": 14, + "average_document_length": 54.136563876651984, + "max_document_length": 162, + "unique_documents": 454, + "min_query_length": 52, + "average_query_length": 582.774, + "max_query_length": 12791, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 454, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-spa": { + "number_of_characters": 317460, + "num_samples": 954, + "num_queries": 500, + "num_documents": 454, + "min_document_length": 14, + "average_document_length": 57.429515418502206, + "max_document_length": 182, + "unique_documents": 454, + "min_query_length": 52, + "average_query_length": 582.774, + "max_query_length": 12791, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 454, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-hin": { + "number_of_characters": 125039, + "num_samples": 352, + "num_queries": 187, + "num_documents": 165, + "min_document_length": 17, + "average_document_length": 54.49090909090909, + "max_document_length": 129, + "unique_documents": 165, + "min_query_length": 52, + "average_query_length": 620.5775401069519, + "max_query_length": 12791, + "unique_queries": 187, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 165, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-vie": { + "number_of_characters": 139091, + "num_samples": 365, + "num_queries": 189, + "num_documents": 176, + "min_document_length": 11, + "average_document_length": 52.43181818181818, + "max_document_length": 171, + "unique_documents": 176, + "min_query_length": 65, + "average_query_length": 687.1058201058202, + "max_query_length": 12791, + "unique_queries": 189, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 176, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-zho": { + "number_of_characters": 93338, + "num_samples": 311, + "num_queries": 161, + "num_documents": 150, + "min_document_length": 5, + "average_document_length": 16.733333333333334, + "max_document_length": 51, + "unique_documents": 150, + "min_query_length": 90, + "average_query_length": 564.1490683229814, + "max_query_length": 2037, + "unique_queries": 161, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 150, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-ara": { + "number_of_characters": 109960, + "num_samples": 338, + "num_queries": 186, + "num_documents": 152, + "min_document_length": 12, + "average_document_length": 53.38157894736842, + "max_document_length": 115, + "unique_documents": 152, + "min_query_length": 65, + "average_query_length": 547.5591397849462, + "max_query_length": 3913, + "unique_queries": 186, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 152, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-deu": { + "number_of_characters": 93151, + "num_samples": 305, + "num_queries": 163, + "num_documents": 142, + "min_document_length": 17, + "average_document_length": 59.021126760563384, + "max_document_length": 150, + "unique_documents": 142, + "min_query_length": 65, + "average_query_length": 520.0613496932515, + "max_query_length": 2375, + "unique_queries": 163, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 142, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-eng": { + "number_of_characters": 319136, + "num_samples": 932, + "num_queries": 507, + "num_documents": 425, + "min_document_length": 11, + "average_document_length": 59.36, + "max_document_length": 139, + "unique_documents": 425, + "min_query_length": 65, + "average_query_length": 579.7001972386588, + "max_query_length": 3916, + "unique_queries": 507, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 425, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-spa": { + "number_of_characters": 122669, + "num_samples": 344, + "num_queries": 187, + "num_documents": 157, + "min_document_length": 14, + "average_document_length": 62.84076433121019, + "max_document_length": 136, + "unique_documents": 157, + "min_query_length": 70, + "average_query_length": 603.2245989304813, + "max_query_length": 3916, + "unique_queries": 187, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 157, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-hin": { + "number_of_characters": 318949, + "num_samples": 932, + "num_queries": 507, + "num_documents": 425, + "min_document_length": 12, + "average_document_length": 58.92, + "max_document_length": 148, + "unique_documents": 425, + "min_query_length": 65, + "average_query_length": 579.7001972386588, + "max_query_length": 3916, + "unique_queries": 507, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 425, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-vie": { + "number_of_characters": 130780, + "num_samples": 334, + "num_queries": 177, + "num_documents": 157, + "min_document_length": 8, + "average_document_length": 54.50955414012739, + "max_document_length": 152, + "unique_documents": 157, + "min_query_length": 75, + "average_query_length": 690.5197740112994, + "max_query_length": 3916, + "unique_queries": 177, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 157, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-zho": { + "number_of_characters": 114001, + "num_samples": 351, + "num_queries": 189, + "num_documents": 162, + "min_document_length": 5, + "average_document_length": 18.641975308641975, + "max_document_length": 51, + "unique_documents": 162, + "min_query_length": 70, + "average_query_length": 587.2010582010582, + "max_query_length": 3308, + "unique_queries": 189, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 162, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-ara": { + "number_of_characters": 141482, + "num_samples": 315, + "num_queries": 163, + "num_documents": 152, + "min_document_length": 12, + "average_document_length": 44.19736842105263, + "max_document_length": 119, + "unique_documents": 152, + "min_query_length": 59, + "average_query_length": 826.7730061349694, + "max_query_length": 5540, + "unique_queries": 163, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 152, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-deu": { + "number_of_characters": 167346, + "num_samples": 343, + "num_queries": 182, + "num_documents": 161, + "min_document_length": 18, + "average_document_length": 57.962732919254655, + "max_document_length": 148, + "unique_documents": 161, + "min_query_length": 71, + "average_query_length": 868.2087912087912, + "max_query_length": 4601, + "unique_queries": 182, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 161, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-eng": { + "number_of_characters": 427193, + "num_samples": 962, + "num_queries": 511, + "num_documents": 451, + "min_document_length": 11, + "average_document_length": 54.48780487804878, + "max_document_length": 162, + "unique_documents": 451, + "min_query_length": 51, + "average_query_length": 787.9041095890411, + "max_query_length": 8272, + "unique_queries": 511, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 451, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-spa": { + "number_of_characters": 165288, + "num_samples": 355, + "num_queries": 189, + "num_documents": 166, + "min_document_length": 14, + "average_document_length": 59.036144578313255, + "max_document_length": 182, + "unique_documents": 166, + "min_query_length": 65, + "average_query_length": 822.6878306878307, + "max_query_length": 8272, + "unique_queries": 189, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 166, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-hin": { + "number_of_characters": 144662, + "num_samples": 334, + "num_queries": 177, + "num_documents": 157, + "min_document_length": 12, + "average_document_length": 52.36305732484077, + "max_document_length": 148, + "unique_documents": 157, + "min_query_length": 65, + "average_query_length": 770.8531073446328, + "max_query_length": 8272, + "unique_queries": 177, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 157, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-vie": { + "number_of_characters": 427189, + "num_samples": 962, + "num_queries": 511, + "num_documents": 451, + "min_document_length": 8, + "average_document_length": 54.47893569844789, + "max_document_length": 171, + "unique_documents": 451, + "min_query_length": 51, + "average_query_length": 787.9041095890411, + "max_query_length": 8272, + "unique_queries": 511, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 451, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-zho": { + "number_of_characters": 162251, + "num_samples": 350, + "num_queries": 184, + "num_documents": 166, + "min_document_length": 5, + "average_document_length": 16.680722891566266, + "max_document_length": 32, + "unique_documents": 166, + "min_query_length": 83, + "average_query_length": 866.75, + "max_query_length": 5540, + "unique_queries": 184, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 166, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-ara": { + "number_of_characters": 46776, + "num_samples": 349, + "num_queries": 188, + "num_documents": 161, + "min_document_length": 12, + "average_document_length": 51.78260869565217, + "max_document_length": 103, + "unique_documents": 161, + "min_query_length": 52, + "average_query_length": 204.4627659574468, + "max_query_length": 1039, + "unique_queries": 188, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 161, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-deu": { + "number_of_characters": 52397, + "num_samples": 354, + "num_queries": 190, + "num_documents": 164, + "min_document_length": 16, + "average_document_length": 62.38414634146341, + "max_document_length": 172, + "unique_documents": 164, + "min_query_length": 52, + "average_query_length": 221.92631578947368, + "max_query_length": 1399, + "unique_queries": 190, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 164, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-eng": { + "number_of_characters": 134543, + "num_samples": 947, + "num_queries": 504, + "num_documents": 443, + "min_document_length": 11, + "average_document_length": 57.05643340857788, + "max_document_length": 125, + "unique_documents": 443, + "min_query_length": 51, + "average_query_length": 216.79960317460316, + "max_query_length": 1399, + "unique_queries": 504, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 443, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-spa": { + "number_of_characters": 45452, + "num_samples": 309, + "num_queries": 161, + "num_documents": 148, + "min_document_length": 14, + "average_document_length": 57.5, + "max_document_length": 136, + "unique_documents": 148, + "min_query_length": 53, + "average_query_length": 229.45341614906832, + "max_query_length": 1052, + "unique_queries": 161, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 148, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-hin": { + "number_of_characters": 48722, + "num_samples": 352, + "num_queries": 189, + "num_documents": 163, + "min_document_length": 12, + "average_document_length": 60.355828220858896, + "max_document_length": 148, + "unique_documents": 163, + "min_query_length": 51, + "average_query_length": 205.73544973544975, + "max_query_length": 873, + "unique_queries": 189, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 163, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-vie": { + "number_of_characters": 54156, + "num_samples": 352, + "num_queries": 184, + "num_documents": 168, + "min_document_length": 8, + "average_document_length": 54.035714285714285, + "max_document_length": 152, + "unique_documents": 168, + "min_query_length": 53, + "average_query_length": 244.9891304347826, + "max_query_length": 1399, + "unique_queries": 184, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 168, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-zho": { + "number_of_characters": 117026, + "num_samples": 947, + "num_queries": 504, + "num_documents": 443, + "min_document_length": 5, + "average_document_length": 17.51467268623025, + "max_document_length": 51, + "unique_documents": 443, + "min_query_length": 51, + "average_query_length": 216.79960317460316, + "max_query_length": 1399, + "unique_queries": 504, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 443, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "test": { + "number_of_characters": 111293089, + "num_samples": 296665, + "num_queries": 158029, + "num_documents": 138636, + "min_document_length": 5, + "average_document_length": 49.93927262760033, + "max_document_length": 190, + "unique_documents": 138636, + "min_query_length": 51, + "average_query_length": 660.4465509495093, + "max_query_length": 10727, + "unique_queries": 158029, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0003417094330787, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 138636, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ara-ara": { + "number_of_characters": 3465612, + "num_samples": 9979, + "num_queries": 5333, + "num_documents": 4646, + "min_document_length": 8, + "average_document_length": 47.36310804993543, + "max_document_length": 148, + "unique_documents": 4646, + "min_query_length": 51, + "average_query_length": 608.5810988186762, + "max_query_length": 6491, + "unique_queries": 5333, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000375023438965, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4646, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-deu": { + "number_of_characters": 975142, + "num_samples": 3151, + "num_queries": 1648, + "num_documents": 1503, + "min_document_length": 12, + "average_document_length": 56.224218230206255, + "max_document_length": 153, + "unique_documents": 1503, + "min_query_length": 51, + "average_query_length": 540.435072815534, + "max_query_length": 4845, + "unique_queries": 1648, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0006067961165048, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1503, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-eng": { + "number_of_characters": 3504466, + "num_samples": 9978, + "num_queries": 5332, + "num_documents": 4646, + "min_document_length": 13, + "average_document_length": 55.72600086095566, + "max_document_length": 167, + "unique_documents": 4646, + "min_query_length": 51, + "average_query_length": 608.6952363090772, + "max_query_length": 6491, + "unique_queries": 5332, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000562640660165, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4646, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-spa": { + "number_of_characters": 1363834, + "num_samples": 3747, + "num_queries": 1978, + "num_documents": 1769, + "min_document_length": 12, + "average_document_length": 57.47993216506501, + "max_document_length": 181, + "unique_documents": 1769, + "min_query_length": 52, + "average_query_length": 638.0950455005055, + "max_query_length": 6491, + "unique_queries": 1978, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1769, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-hin": { + "number_of_characters": 1150837, + "num_samples": 3343, + "num_queries": 1831, + "num_documents": 1512, + "min_document_length": 14, + "average_document_length": 58.99669312169312, + "max_document_length": 147, + "unique_documents": 1512, + "min_query_length": 51, + "average_query_length": 579.811032222829, + "max_query_length": 4682, + "unique_queries": 1831, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1512, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-vie": { + "number_of_characters": 1466341, + "num_samples": 3880, + "num_queries": 2047, + "num_documents": 1833, + "min_document_length": 12, + "average_document_length": 54.51500272776868, + "max_document_length": 152, + "unique_documents": 1833, + "min_query_length": 54, + "average_query_length": 667.5207620908647, + "max_query_length": 4615, + "unique_queries": 2047, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1833, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-zho": { + "number_of_characters": 1343684, + "num_samples": 3610, + "num_queries": 1912, + "num_documents": 1698, + "min_document_length": 5, + "average_document_length": 16.87396937573616, + "max_document_length": 69, + "unique_documents": 1698, + "min_query_length": 53, + "average_query_length": 687.7782426778243, + "max_query_length": 5425, + "unique_queries": 1912, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1698, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-ara": { + "number_of_characters": 1142572, + "num_samples": 3146, + "num_queries": 1649, + "num_documents": 1497, + "min_document_length": 8, + "average_document_length": 43.56112224448898, + "max_document_length": 139, + "unique_documents": 1497, + "min_query_length": 53, + "average_query_length": 653.3420254699818, + "max_query_length": 6774, + "unique_queries": 1649, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1497, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-deu": { + "number_of_characters": 3173494, + "num_samples": 8566, + "num_queries": 4513, + "num_documents": 4053, + "min_document_length": 10, + "average_document_length": 57.46829509005675, + "max_document_length": 190, + "unique_documents": 4053, + "min_query_length": 51, + "average_query_length": 651.5787724351873, + "max_query_length": 6774, + "unique_queries": 4513, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0008863283846665, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 4053, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-eng": { + "number_of_characters": 3153037, + "num_samples": 8566, + "num_queries": 4513, + "num_documents": 4053, + "min_document_length": 9, + "average_document_length": 52.42092277325438, + "max_document_length": 158, + "unique_documents": 4053, + "min_query_length": 51, + "average_query_length": 651.5787724351873, + "max_query_length": 6774, + "unique_queries": 4513, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0008863283846665, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4053, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-spa": { + "number_of_characters": 1269348, + "num_samples": 3369, + "num_queries": 1775, + "num_documents": 1594, + "min_document_length": 13, + "average_document_length": 55.78732747804266, + "max_document_length": 155, + "unique_documents": 1594, + "min_query_length": 52, + "average_query_length": 665.0270422535211, + "max_query_length": 5662, + "unique_queries": 1775, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0005633802816902, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1594, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-hin": { + "number_of_characters": 932475, + "num_samples": 2717, + "num_queries": 1430, + "num_documents": 1287, + "min_document_length": 13, + "average_document_length": 50.162393162393165, + "max_document_length": 130, + "unique_documents": 1287, + "min_query_length": 51, + "average_query_length": 606.9342657342658, + "max_query_length": 4818, + "unique_queries": 1430, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1287, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-vie": { + "number_of_characters": 1231990, + "num_samples": 3193, + "num_queries": 1675, + "num_documents": 1518, + "min_document_length": 8, + "average_document_length": 51.46772068511199, + "max_document_length": 176, + "unique_documents": 1518, + "min_query_length": 51, + "average_query_length": 688.8728358208955, + "max_query_length": 6558, + "unique_queries": 1675, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1518, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-zho": { + "number_of_characters": 1146502, + "num_samples": 3075, + "num_queries": 1620, + "num_documents": 1455, + "min_document_length": 5, + "average_document_length": 16.637113402061857, + "max_document_length": 69, + "unique_documents": 1455, + "min_query_length": 58, + "average_query_length": 692.7746913580247, + "max_query_length": 6774, + "unique_queries": 1620, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0006172839506173, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1455, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-ara": { + "number_of_characters": 4864548, + "num_samples": 9939, + "num_queries": 5333, + "num_documents": 4606, + "min_document_length": 8, + "average_document_length": 47.774424663482414, + "max_document_length": 148, + "unique_documents": 4606, + "min_query_length": 52, + "average_query_length": 870.8979936246016, + "max_query_length": 9610, + "unique_queries": 5333, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000375023438965, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4606, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-deu": { + "number_of_characters": 3903340, + "num_samples": 8545, + "num_queries": 4513, + "num_documents": 4032, + "min_document_length": 10, + "average_document_length": 57.76760912698413, + "max_document_length": 190, + "unique_documents": 4032, + "min_query_length": 51, + "average_query_length": 813.2995789940173, + "max_query_length": 7552, + "unique_queries": 4513, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0008863283846665, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 4032, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-eng": { + "number_of_characters": 10303894, + "num_samples": 21498, + "num_queries": 11582, + "num_documents": 9916, + "min_document_length": 9, + "average_document_length": 56.01865671641791, + "max_document_length": 167, + "unique_documents": 9916, + "min_query_length": 51, + "average_query_length": 841.6864962873424, + "max_query_length": 10727, + "unique_queries": 11582, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000690726990157, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 9916, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-spa": { + "number_of_characters": 4738041, + "num_samples": 9872, + "num_queries": 5251, + "num_documents": 4621, + "min_document_length": 12, + "average_document_length": 57.86582990694655, + "max_document_length": 181, + "unique_documents": 4621, + "min_query_length": 51, + "average_query_length": 851.3888783088936, + "max_query_length": 10727, + "unique_queries": 5251, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000380879832413, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4621, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-hin": { + "number_of_characters": 4235029, + "num_samples": 8975, + "num_queries": 4916, + "num_documents": 4059, + "min_document_length": 8, + "average_document_length": 57.3210150283321, + "max_document_length": 147, + "unique_documents": 4059, + "min_query_length": 51, + "average_query_length": 814.1503254678601, + "max_query_length": 10727, + "unique_queries": 4916, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000406834825061, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4059, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-vie": { + "number_of_characters": 5254594, + "num_samples": 10254, + "num_queries": 5495, + "num_documents": 4759, + "min_document_length": 8, + "average_document_length": 55.532044547173776, + "max_document_length": 176, + "unique_documents": 4759, + "min_query_length": 51, + "average_query_length": 908.1559599636033, + "max_query_length": 9610, + "unique_queries": 5495, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 4759, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-zho": { + "number_of_characters": 4830436, + "num_samples": 9604, + "num_queries": 5136, + "num_documents": 4468, + "min_document_length": 5, + "average_document_length": 17.264547896150404, + "max_document_length": 69, + "unique_documents": 4468, + "min_query_length": 51, + "average_query_length": 925.4863707165109, + "max_query_length": 10727, + "unique_queries": 5136, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0001947040498442, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4468, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-ara": { + "number_of_characters": 1249059, + "num_samples": 3788, + "num_queries": 1978, + "num_documents": 1810, + "min_document_length": 9, + "average_document_length": 44.56961325966851, + "max_document_length": 140, + "unique_documents": 1810, + "min_query_length": 58, + "average_query_length": 590.6916076845298, + "max_query_length": 10458, + "unique_queries": 1978, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1810, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-deu": { + "number_of_characters": 1045840, + "num_samples": 3400, + "num_queries": 1774, + "num_documents": 1626, + "min_document_length": 10, + "average_document_length": 56.59225092250922, + "max_document_length": 165, + "unique_documents": 1626, + "min_query_length": 51, + "average_query_length": 537.6668545659527, + "max_query_length": 6250, + "unique_queries": 1774, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011273957158964, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 1626, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-eng": { + "number_of_characters": 3253877, + "num_samples": 10014, + "num_queries": 5253, + "num_documents": 4761, + "min_document_length": 9, + "average_document_length": 52.77042638101239, + "max_document_length": 167, + "unique_documents": 4761, + "min_query_length": 51, + "average_query_length": 571.604226156482, + "max_query_length": 10458, + "unique_queries": 5253, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 4761, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-spa": { + "number_of_characters": 3270035, + "num_samples": 10012, + "num_queries": 5251, + "num_documents": 4761, + "min_document_length": 12, + "average_document_length": 56.16425120772947, + "max_document_length": 181, + "unique_documents": 4761, + "min_query_length": 51, + "average_query_length": 571.821938678347, + "max_query_length": 10458, + "unique_queries": 5251, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000380879832413, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4761, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-hin": { + "number_of_characters": 1010952, + "num_samples": 3241, + "num_queries": 1723, + "num_documents": 1518, + "min_document_length": 12, + "average_document_length": 52.62845849802372, + "max_document_length": 147, + "unique_documents": 1518, + "min_query_length": 52, + "average_query_length": 540.3726059199072, + "max_query_length": 8825, + "unique_queries": 1723, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1518, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-vie": { + "number_of_characters": 1333310, + "num_samples": 3892, + "num_queries": 2018, + "num_documents": 1874, + "min_document_length": 8, + "average_document_length": 51.86019210245464, + "max_document_length": 152, + "unique_documents": 1874, + "min_query_length": 51, + "average_query_length": 612.5490584737364, + "max_query_length": 10458, + "unique_queries": 2018, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1874, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-zho": { + "number_of_characters": 1225671, + "num_samples": 3736, + "num_queries": 1947, + "num_documents": 1789, + "min_document_length": 5, + "average_document_length": 16.450531022917833, + "max_document_length": 46, + "unique_documents": 1789, + "min_query_length": 53, + "average_query_length": 614.402157164869, + "max_query_length": 6250, + "unique_queries": 1947, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1789, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-ara": { + "number_of_characters": 1226070, + "num_samples": 3333, + "num_queries": 1831, + "num_documents": 1502, + "min_document_length": 11, + "average_document_length": 51.25632490013316, + "max_document_length": 148, + "unique_documents": 1502, + "min_query_length": 56, + "average_query_length": 627.5712725286728, + "max_query_length": 5572, + "unique_queries": 1831, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1502, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-deu": { + "number_of_characters": 990471, + "num_samples": 2704, + "num_queries": 1429, + "num_documents": 1275, + "min_document_length": 11, + "average_document_length": 57.16313725490196, + "max_document_length": 174, + "unique_documents": 1275, + "min_query_length": 51, + "average_query_length": 642.1189643107068, + "max_query_length": 5103, + "unique_queries": 1429, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000699790062981, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1275, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-eng": { + "number_of_characters": 3354858, + "num_samples": 9015, + "num_queries": 4916, + "num_documents": 4099, + "min_document_length": 9, + "average_document_length": 57.46206391802879, + "max_document_length": 144, + "unique_documents": 4099, + "min_query_length": 51, + "average_query_length": 634.5242066720912, + "max_query_length": 9013, + "unique_queries": 4916, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000406834825061, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4099, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-spa": { + "number_of_characters": 1186262, + "num_samples": 3181, + "num_queries": 1722, + "num_documents": 1459, + "min_document_length": 13, + "average_document_length": 59.564084989718985, + "max_document_length": 154, + "unique_documents": 1459, + "min_query_length": 51, + "average_query_length": 638.4192799070847, + "max_query_length": 6119, + "unique_queries": 1722, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0005807200929153, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1459, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-hin": { + "number_of_characters": 3351987, + "num_samples": 9015, + "num_queries": 4916, + "num_documents": 4099, + "min_document_length": 8, + "average_document_length": 56.761649182727496, + "max_document_length": 147, + "unique_documents": 4099, + "min_query_length": 51, + "average_query_length": 634.5242066720912, + "max_query_length": 9013, + "unique_queries": 4916, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000406834825061, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4099, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-vie": { + "number_of_characters": 1385396, + "num_samples": 3582, + "num_queries": 1947, + "num_documents": 1635, + "min_document_length": 12, + "average_document_length": 57.411620795107034, + "max_document_length": 138, + "unique_documents": 1635, + "min_query_length": 51, + "average_query_length": 663.3425783256291, + "max_query_length": 9013, + "unique_queries": 1947, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1635, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-zho": { + "number_of_characters": 1275490, + "num_samples": 3264, + "num_queries": 1767, + "num_documents": 1497, + "min_document_length": 5, + "average_document_length": 17.8249832999332, + "max_document_length": 56, + "unique_documents": 1497, + "min_query_length": 68, + "average_query_length": 706.737973967176, + "max_query_length": 9013, + "unique_queries": 1767, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1497, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-ara": { + "number_of_characters": 1893427, + "num_samples": 3869, + "num_queries": 2047, + "num_documents": 1822, + "min_document_length": 8, + "average_document_length": 46.98957189901208, + "max_document_length": 140, + "unique_documents": 1822, + "min_query_length": 53, + "average_query_length": 883.151929653151, + "max_query_length": 10556, + "unique_queries": 2047, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1822, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-deu": { + "number_of_characters": 1384837, + "num_samples": 3182, + "num_queries": 1674, + "num_documents": 1508, + "min_document_length": 10, + "average_document_length": 57.26591511936339, + "max_document_length": 157, + "unique_documents": 1508, + "min_query_length": 51, + "average_query_length": 775.6750298685782, + "max_query_length": 5614, + "unique_queries": 1674, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0005973715651135, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1508, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-eng": { + "number_of_characters": 4646248, + "num_samples": 10288, + "num_queries": 5493, + "num_documents": 4795, + "min_document_length": 9, + "average_document_length": 55.114285714285714, + "max_document_length": 167, + "unique_documents": 4795, + "min_query_length": 51, + "average_query_length": 797.7380302202804, + "max_query_length": 10556, + "unique_queries": 5493, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0003640997633352, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4795, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-spa": { + "number_of_characters": 1822456, + "num_samples": 3846, + "num_queries": 2017, + "num_documents": 1829, + "min_document_length": 12, + "average_document_length": 56.389830508474574, + "max_document_length": 181, + "unique_documents": 1829, + "min_query_length": 51, + "average_query_length": 852.4139811601389, + "max_query_length": 10556, + "unique_queries": 2017, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0004957858205255, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1829, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-hin": { + "number_of_characters": 1467053, + "num_samples": 3587, + "num_queries": 1947, + "num_documents": 1640, + "min_document_length": 15, + "average_document_length": 56.37317073170732, + "max_document_length": 130, + "unique_documents": 1640, + "min_query_length": 54, + "average_query_length": 706.009758602979, + "max_query_length": 5381, + "unique_queries": 1947, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1640, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-vie": { + "number_of_characters": 4646252, + "num_samples": 10290, + "num_queries": 5495, + "num_documents": 4795, + "min_document_length": 8, + "average_document_length": 55.11511991657977, + "max_document_length": 176, + "unique_documents": 4795, + "min_query_length": 51, + "average_query_length": 797.4476797088262, + "max_query_length": 10556, + "unique_queries": 5495, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 4795, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vie-zho": { + "number_of_characters": 1747625, + "num_samples": 3663, + "num_queries": 1943, + "num_documents": 1720, + "min_document_length": 5, + "average_document_length": 16.996511627906976, + "max_document_length": 55, + "unique_documents": 1720, + "min_query_length": 55, + "average_query_length": 884.4009264024704, + "max_query_length": 8544, + "unique_queries": 1943, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1720, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-ara": { + "number_of_characters": 516234, + "num_samples": 3629, + "num_queries": 1911, + "num_documents": 1718, + "min_document_length": 8, + "average_document_length": 46.77240977881257, + "max_document_length": 148, + "unique_documents": 1718, + "min_query_length": 51, + "average_query_length": 228.0894819466248, + "max_query_length": 2435, + "unique_queries": 1911, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000523286237572, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1718, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-deu": { + "number_of_characters": 438762, + "num_samples": 3085, + "num_queries": 1621, + "num_documents": 1464, + "min_document_length": 12, + "average_document_length": 57.85450819672131, + "max_document_length": 190, + "unique_documents": 1464, + "min_query_length": 51, + "average_query_length": 218.42257865515114, + "max_query_length": 2658, + "unique_queries": 1621, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1464, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-eng": { + "number_of_characters": 1375165, + "num_samples": 9681, + "num_queries": 5135, + "num_documents": 4546, + "min_document_length": 12, + "average_document_length": 54.94390673119226, + "max_document_length": 158, + "unique_documents": 4546, + "min_query_length": 51, + "average_query_length": 219.16066212268743, + "max_query_length": 2658, + "unique_queries": 5135, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0003894839337877, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4546, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-spa": { + "number_of_characters": 547101, + "num_samples": 3700, + "num_queries": 1947, + "num_documents": 1753, + "min_document_length": 12, + "average_document_length": 57.64860239589275, + "max_document_length": 162, + "unique_documents": 1753, + "min_query_length": 51, + "average_query_length": 229.0924499229584, + "max_query_length": 2658, + "unique_queries": 1947, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1753, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-hin": { + "number_of_characters": 436800, + "num_samples": 3291, + "num_queries": 1766, + "num_documents": 1525, + "min_document_length": 12, + "average_document_length": 56.82032786885246, + "max_document_length": 130, + "unique_documents": 1525, + "min_query_length": 51, + "average_query_length": 198.27236693091731, + "max_query_length": 2570, + "unique_queries": 1766, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0005662514156286, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1525, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-vie": { + "number_of_characters": 560107, + "num_samples": 3688, + "num_queries": 1943, + "num_documents": 1745, + "min_document_length": 10, + "average_document_length": 54.86418338108883, + "max_document_length": 176, + "unique_documents": 1745, + "min_query_length": 51, + "average_query_length": 238.99588265568707, + "max_query_length": 2435, + "unique_queries": 1943, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1745, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho-zho": { + "number_of_characters": 1202528, + "num_samples": 9682, + "num_queries": 5136, + "num_documents": 4546, + "min_document_length": 5, + "average_document_length": 16.968323801143864, + "max_document_length": 69, + "unique_documents": 4546, + "min_query_length": 51, + "average_query_length": 219.1179906542056, + "max_query_length": 2658, + "unique_queries": 5136, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0001947040498442, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4546, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MLQuestions.json b/mteb/descriptive_stats/Retrieval/MLQuestions.json new file mode 100644 index 0000000000..1c34a0fc82 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MLQuestions.json @@ -0,0 +1,54 @@ +{ + "dev": { + "number_of_characters": 2915233, + "num_samples": 12500, + "num_queries": 1500, + "num_documents": 11000, + "min_document_length": 14, + "average_document_length": 6.143909090909091, + "max_document_length": 160, + "unique_documents": 11000, + "min_query_length": 3, + "average_query_length": 1898.4333333333334, + "max_query_length": 395, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 2916280, + "num_samples": 12500, + "num_queries": 1500, + "num_documents": 11000, + "min_document_length": 12, + "average_document_length": 6.239090909090909, + "max_document_length": 165, + "unique_documents": 11000, + "min_query_length": 3, + "average_query_length": 1898.4333333333334, + "max_query_length": 395, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1499, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json b/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json new file mode 100644 index 0000000000..c43b47c217 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 12294685, + "num_samples": 113793, + "num_queries": 6980, + "num_documents": 106813, + "min_document_length": 2, + "average_document_length": 0.6868920449757988, + "max_document_length": 61, + "unique_documents": 106813, + "min_query_length": 13, + "average_query_length": 1750.904871060172, + "max_query_length": 1709, + "unique_queries": 6980, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0654727793696275, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 7433, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json b/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json new file mode 100644 index 0000000000..f82cbce0ae --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3097800368, + "num_samples": 8841866, + "num_queries": 43, + "num_documents": 8841823, + "min_document_length": 16, + "average_document_length": 0.00016060036487950506, + "max_document_length": 55, + "unique_documents": 8841823, + "min_query_length": 2, + "average_query_length": 72041836.0, + "max_query_length": 1727, + "unique_queries": 43, + "min_relevant_docs_per_query": 132, + "average_relevant_docs_per_query": 95.3953488372093, + "max_relevant_docs_per_query": 582, + "unique_relevant_docs": 9139, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json new file mode 100644 index 0000000000..f4f404fe90 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3635939, + "num_samples": 9524, + "num_queries": 43, + "num_documents": 9481, + "min_document_length": 16, + "average_document_length": 0.14977323067187007, + "max_document_length": 55, + "unique_documents": 9481, + "min_query_length": 10, + "average_query_length": 84523.69767441861, + "max_query_length": 1619, + "unique_queries": 43, + "min_relevant_docs_per_query": 132, + "average_relevant_docs_per_query": 95.3953488372093, + "max_relevant_docs_per_query": 582, + "unique_relevant_docs": 9139, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json b/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json new file mode 100644 index 0000000000..1d6d7b2bd5 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3141044, + "num_samples": 8855, + "num_queries": 43, + "num_documents": 8812, + "min_document_length": 16, + "average_document_length": 0.15978211529732184, + "max_document_length": 55, + "unique_documents": 8812, + "min_query_length": 65, + "average_query_length": 73014.79069767441, + "max_query_length": 1111, + "unique_queries": 43, + "min_relevant_docs_per_query": 132, + "average_relevant_docs_per_query": 95.3953488372093, + "max_relevant_docs_per_query": 582, + "unique_relevant_docs": 9139, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json b/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json new file mode 100644 index 0000000000..b513e5de41 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 2471858, + "num_samples": 4096, + "num_queries": 2048, + "num_documents": 2048, + "min_document_length": 16, + "average_document_length": 52.4794921875, + "max_document_length": 191, + "unique_documents": 2048, + "min_query_length": 7, + "average_query_length": 1154.482421875, + "max_query_length": 14442, + "unique_queries": 2048, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2048, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json b/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json new file mode 100644 index 0000000000..909d60c736 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 12344090, + "num_samples": 101999, + "num_queries": 1000, + "num_documents": 100999, + "min_document_length": 2, + "average_document_length": 0.17760571886850365, + "max_document_length": 110, + "unique_documents": 100999, + "min_query_length": 7, + "average_query_length": 12326.152, + "max_query_length": 512, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json b/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json new file mode 100644 index 0000000000..3a3f9ea336 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json @@ -0,0 +1,238 @@ +{ + "test": { + "number_of_characters": 1209570, + "num_samples": 30048, + "num_queries": 17841, + "num_documents": 12207, + "min_document_length": 8, + "average_document_length": 85.71966904235275, + "max_document_length": 222, + "unique_documents": 12207, + "min_query_length": 1, + "average_query_length": 9.146908805560226, + "max_query_length": 86, + "unique_queries": 17841, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.000112101339611, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 12207, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 140762, + "num_samples": 3694, + "num_queries": 2203, + "num_documents": 1491, + "min_document_length": 15, + "average_document_length": 81.67136150234742, + "max_document_length": 182, + "unique_documents": 1491, + "min_query_length": 2, + "average_query_length": 8.620063549704948, + "max_query_length": 71, + "unique_queries": 2203, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1491, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 179124, + "num_samples": 4029, + "num_queries": 2374, + "num_documents": 1655, + "min_document_length": 17, + "average_document_length": 93.83141993957705, + "max_document_length": 200, + "unique_documents": 1655, + "min_query_length": 1, + "average_query_length": 10.039174389216512, + "max_query_length": 71, + "unique_queries": 2374, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1655, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 181473, + "num_samples": 4117, + "num_queries": 2424, + "num_documents": 1693, + "min_document_length": 20, + "average_document_length": 92.8984051978736, + "max_document_length": 181, + "unique_documents": 1693, + "min_query_length": 1, + "average_query_length": 9.981848184818482, + "max_query_length": 71, + "unique_queries": 2424, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1693, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 192910, + "num_samples": 4156, + "num_queries": 2442, + "num_documents": 1714, + "min_document_length": 18, + "average_document_length": 98.14235705950992, + "max_document_length": 222, + "unique_documents": 1714, + "min_query_length": 1, + "average_query_length": 10.112203112203112, + "max_query_length": 71, + "unique_queries": 2442, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1714, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 87874, + "num_samples": 2107, + "num_queries": 1337, + "num_documents": 770, + "min_document_length": 20, + "average_document_length": 101.41168831168831, + "max_document_length": 187, + "unique_documents": 770, + "min_query_length": 2, + "average_query_length": 7.320119670905012, + "max_query_length": 71, + "unique_queries": 1337, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 770, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "it": { + "number_of_characters": 178136, + "num_samples": 4059, + "num_queries": 2395, + "num_documents": 1664, + "min_document_length": 20, + "average_document_length": 92.68689903846153, + "max_document_length": 185, + "unique_documents": 1664, + "min_query_length": 1, + "average_query_length": 9.981210855949895, + "max_query_length": 86, + "unique_queries": 2395, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0004175365344468, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1664, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ja": { + "number_of_characters": 83867, + "num_samples": 3904, + "num_queries": 2312, + "num_documents": 1592, + "min_document_length": 8, + "average_document_length": 43.51256281407035, + "max_document_length": 68, + "unique_documents": 1592, + "min_query_length": 1, + "average_query_length": 6.312716262975779, + "max_query_length": 71, + "unique_queries": 2312, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1592, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pt": { + "number_of_characters": 165424, + "num_samples": 3982, + "num_queries": 2354, + "num_documents": 1628, + "min_document_length": 18, + "average_document_length": 87.36732186732186, + "max_document_length": 165, + "unique_documents": 1628, + "min_query_length": 1, + "average_query_length": 9.851316907391674, + "max_query_length": 80, + "unique_queries": 2354, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0004248088360237, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1628, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json b/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json new file mode 100644 index 0000000000..e983c08303 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 6013927, + "num_samples": 3956, + "num_queries": 323, + "num_documents": 3633, + "min_document_length": 3, + "average_document_length": 2.1684558216350123, + "max_document_length": 96, + "unique_documents": 3633, + "min_query_length": 110, + "average_query_length": 18594.57894736842, + "max_query_length": 10705, + "unique_queries": 323, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 38.18575851393189, + "max_relevant_docs_per_query": 475, + "unique_relevant_docs": 3128, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json b/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json new file mode 100644 index 0000000000..08c175272e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1214218, + "num_samples": 908, + "num_queries": 404, + "num_documents": 504, + "min_document_length": 120, + "average_document_length": 356.3015873015873, + "max_document_length": 1290, + "unique_documents": 504, + "min_query_length": 304, + "average_query_length": 2560.9950495049507, + "max_query_length": 9565, + "unique_queries": 404, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 404, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json b/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json new file mode 100644 index 0000000000..4ac76aa5be --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 233742, + "num_samples": 908, + "num_queries": 404, + "num_documents": 504, + "min_document_length": 5, + "average_document_length": 22.099206349206348, + "max_document_length": 71, + "unique_documents": 504, + "min_query_length": 120, + "average_query_length": 551.0, + "max_query_length": 1290, + "unique_queries": 404, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 404, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json b/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json new file mode 100644 index 0000000000..1c49e377eb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1045780, + "num_samples": 908, + "num_queries": 404, + "num_documents": 504, + "min_document_length": 5, + "average_document_length": 22.099206349206348, + "max_document_length": 71, + "unique_documents": 504, + "min_query_length": 304, + "average_query_length": 2560.9950495049507, + "max_query_length": 9565, + "unique_queries": 404, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 404, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NQ-PL.json b/mteb/descriptive_stats/Retrieval/NQ-PL.json new file mode 100644 index 0000000000..d44203e5ae --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NQ-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1349328700, + "num_samples": 2684920, + "num_queries": 3452, + "num_documents": 2681468, + "min_document_length": 18, + "average_document_length": 0.062200630400959474, + "max_document_length": 111, + "unique_documents": 2681468, + "min_query_length": 5, + "average_query_length": 390834.8525492468, + "max_query_length": 17008, + "unique_queries": 3452, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.2169756662804172, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 4201, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json new file mode 100644 index 0000000000..6c6a35f437 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 113077430, + "num_samples": 185765, + "num_queries": 1000, + "num_documents": 184765, + "min_document_length": 18, + "average_document_length": 0.2618515411468622, + "max_document_length": 106, + "unique_documents": 184765, + "min_query_length": 5, + "average_query_length": 113029.049, + "max_query_length": 14247, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.213, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 1213, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NQ.json b/mteb/descriptive_stats/Retrieval/NQ.json new file mode 100644 index 0000000000..5df9862d66 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NQ.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1322743518, + "num_samples": 2684920, + "num_queries": 3452, + "num_documents": 2681468, + "min_document_length": 25, + "average_document_length": 0.06202348862637928, + "max_document_length": 100, + "unique_documents": 2681468, + "min_query_length": 5, + "average_query_length": 383133.6048667439, + "max_query_length": 17008, + "unique_queries": 3452, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.2169756662804172, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 4201, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NQHardNegatives.json b/mteb/descriptive_stats/Retrieval/NQHardNegatives.json new file mode 100644 index 0000000000..5d68b54792 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NQHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 120068721, + "num_samples": 199779, + "num_queries": 1000, + "num_documents": 198779, + "min_document_length": 29, + "average_document_length": 0.24086045306596773, + "max_document_length": 94, + "unique_documents": 198779, + "min_query_length": 5, + "average_query_length": 120020.843, + "max_query_length": 17008, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.213, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 1213, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json b/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json new file mode 100644 index 0000000000..e13d67496c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 116501399, + "num_samples": 10912, + "num_queries": 10557, + "num_documents": 355, + "min_document_length": 10, + "average_document_length": 1419.4225352112676, + "max_document_length": 1220, + "unique_documents": 355, + "min_query_length": 21216, + "average_query_length": 10987.73363644975, + "max_query_length": 1874086, + "unique_queries": 10557, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 355, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json new file mode 100644 index 0000000000..58ab161af5 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json @@ -0,0 +1,108 @@ +{ + "test": { + "number_of_characters": 56422814, + "num_samples": 28067, + "num_queries": 136, + "num_documents": 27931, + "min_document_length": 12, + "average_document_length": 0.3093337152268089, + "max_document_length": 153, + "unique_documents": 27931, + "min_query_length": 0, + "average_query_length": 414810.10294117645, + "max_query_length": 23822, + "unique_queries": 136, + "min_relevant_docs_per_query": 348, + "average_relevant_docs_per_query": 40.39705882352941, + "max_relevant_docs_per_query": 1288, + "unique_relevant_docs": 94693, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "fas": { + "number_of_characters": 24554222, + "num_samples": 8927, + "num_queries": 45, + "num_documents": 8882, + "min_document_length": 40, + "average_document_length": 0.4218644449448322, + "max_document_length": 128, + "unique_documents": 8882, + "min_query_length": 0, + "average_query_length": 545566.1111111111, + "max_query_length": 23692, + "unique_queries": 45, + "min_relevant_docs_per_query": 348, + "average_relevant_docs_per_query": 32.71111111111111, + "max_relevant_docs_per_query": 1288, + "unique_relevant_docs": 31175, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "rus": { + "number_of_characters": 20783375, + "num_samples": 8768, + "num_queries": 44, + "num_documents": 8724, + "min_document_length": 29, + "average_document_length": 0.4315680880330124, + "max_document_length": 153, + "unique_documents": 8724, + "min_query_length": 0, + "average_query_length": 472263.86363636365, + "max_query_length": 23626, + "unique_queries": 44, + "min_relevant_docs_per_query": 364, + "average_relevant_docs_per_query": 42.93181818181818, + "max_relevant_docs_per_query": 1080, + "unique_relevant_docs": 30938, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho": { + "number_of_characters": 11085217, + "num_samples": 10372, + "num_queries": 47, + "num_documents": 10325, + "min_document_length": 12, + "average_document_length": 0.10924939467312349, + "max_document_length": 43, + "unique_documents": 10325, + "min_query_length": 0, + "average_query_length": 235831.68085106384, + "max_query_length": 23822, + "unique_queries": 47, + "min_relevant_docs_per_query": 470, + "average_relevant_docs_per_query": 45.38297872340426, + "max_relevant_docs_per_query": 1114, + "unique_relevant_docs": 32580, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json new file mode 100644 index 0000000000..faa1046d0e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json @@ -0,0 +1,108 @@ +{ + "test": { + "number_of_characters": 108197361, + "num_samples": 49657, + "num_queries": 224, + "num_documents": 49433, + "min_document_length": 10, + "average_document_length": 0.24516011571217608, + "max_document_length": 135, + "unique_documents": 49433, + "min_query_length": 0, + "average_query_length": 482969.83035714284, + "max_query_length": 23987, + "unique_queries": 224, + "min_relevant_docs_per_query": 94, + "average_relevant_docs_per_query": 61.816964285714285, + "max_relevant_docs_per_query": 801, + "unique_relevant_docs": 75273, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "fas": { + "number_of_characters": 45268849, + "num_samples": 15995, + "num_queries": 74, + "num_documents": 15921, + "min_document_length": 25, + "average_document_length": 0.30626216946171725, + "max_document_length": 126, + "unique_documents": 15921, + "min_query_length": 0, + "average_query_length": 611675.3108108108, + "max_query_length": 23987, + "unique_queries": 74, + "min_relevant_docs_per_query": 94, + "average_relevant_docs_per_query": 68.08108108108108, + "max_relevant_docs_per_query": 801, + "unique_relevant_docs": 24476, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "rus": { + "number_of_characters": 43207455, + "num_samples": 16322, + "num_queries": 75, + "num_documents": 16247, + "min_document_length": 26, + "average_document_length": 0.34350957099772267, + "max_document_length": 135, + "unique_documents": 16247, + "min_query_length": 0, + "average_query_length": 576024.9866666667, + "max_query_length": 23814, + "unique_queries": 75, + "min_relevant_docs_per_query": 119, + "average_relevant_docs_per_query": 63.053333333333335, + "max_relevant_docs_per_query": 730, + "unique_relevant_docs": 24619, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho": { + "number_of_characters": 19721057, + "num_samples": 17340, + "num_queries": 75, + "num_documents": 17265, + "min_document_length": 10, + "average_document_length": 0.09626411815812337, + "max_document_length": 44, + "unique_documents": 17265, + "min_query_length": 0, + "average_query_length": 262925.26666666666, + "max_query_length": 21556, + "unique_queries": 75, + "min_relevant_docs_per_query": 136, + "average_relevant_docs_per_query": 54.4, + "max_relevant_docs_per_query": 777, + "unique_relevant_docs": 26178, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json b/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json new file mode 100644 index 0000000000..236c823e12 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 273854, + "num_samples": 2072, + "num_queries": 1024, + "num_documents": 1048, + "min_document_length": 11, + "average_document_length": 46.79961832061068, + "max_document_length": 100, + "unique_documents": 1048, + "min_query_length": 1, + "average_query_length": 219.5390625, + "max_query_length": 2606, + "unique_queries": 1024, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1328, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/PIQA.json b/mteb/descriptive_stats/Retrieval/PIQA.json new file mode 100644 index 0000000000..4af121c529 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/PIQA.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3652153, + "num_samples": 37380, + "num_queries": 1838, + "num_documents": 35542, + "min_document_length": 3, + "average_document_length": 1.8658488548759213, + "max_document_length": 116, + "unique_documents": 35542, + "min_query_length": 4, + "average_query_length": 1950.9450489662677, + "max_query_length": 2187, + "unique_queries": 1838, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1838, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/PublicHealthQA.json b/mteb/descriptive_stats/Retrieval/PublicHealthQA.json new file mode 100644 index 0000000000..80d408468b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/PublicHealthQA.json @@ -0,0 +1,247 @@ +{ + "test": { + "number_of_characters": 692595, + "num_samples": 1776, + "num_queries": 888, + "num_documents": 888, + "min_document_length": 7, + "average_document_length": 67.0608108108108, + "max_document_length": 310, + "unique_documents": 888, + "min_query_length": 24, + "average_query_length": 712.8885135135135, + "max_query_length": 4976, + "unique_queries": 888, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 888, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "arabic": { + "number_of_characters": 79676, + "num_samples": 174, + "num_queries": 87, + "num_documents": 87, + "min_document_length": 19, + "average_document_length": 78.93103448275862, + "max_document_length": 310, + "unique_documents": 87, + "min_query_length": 181, + "average_query_length": 836.8850574712644, + "max_query_length": 3749, + "unique_queries": 87, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 87, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "chinese": { + "number_of_characters": 43099, + "num_samples": 326, + "num_queries": 163, + "num_documents": 163, + "min_document_length": 7, + "average_document_length": 24.828220858895705, + "max_document_length": 60, + "unique_documents": 163, + "min_query_length": 24, + "average_query_length": 239.58282208588957, + "max_query_length": 1208, + "unique_queries": 163, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 163, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "english": { + "number_of_characters": 149834, + "num_samples": 344, + "num_queries": 172, + "num_documents": 172, + "min_document_length": 17, + "average_document_length": 71.78488372093024, + "max_document_length": 201, + "unique_documents": 172, + "min_query_length": 69, + "average_query_length": 799.3430232558139, + "max_query_length": 3896, + "unique_queries": 172, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 172, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "french": { + "number_of_characters": 95503, + "num_samples": 170, + "num_queries": 85, + "num_documents": 85, + "min_document_length": 27, + "average_document_length": 101.88235294117646, + "max_document_length": 255, + "unique_documents": 85, + "min_query_length": 210, + "average_query_length": 1021.6823529411764, + "max_query_length": 4320, + "unique_queries": 85, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 85, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "korean": { + "number_of_characters": 28945, + "num_samples": 154, + "num_queries": 77, + "num_documents": 77, + "min_document_length": 11, + "average_document_length": 36.90909090909091, + "max_document_length": 90, + "unique_documents": 77, + "min_query_length": 38, + "average_query_length": 339.0, + "max_query_length": 1289, + "unique_queries": 77, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 77, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "russian": { + "number_of_characters": 69570, + "num_samples": 130, + "num_queries": 65, + "num_documents": 65, + "min_document_length": 15, + "average_document_length": 85.2, + "max_document_length": 275, + "unique_documents": 65, + "min_query_length": 175, + "average_query_length": 985.1076923076923, + "max_query_length": 4559, + "unique_queries": 65, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 65, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spanish": { + "number_of_characters": 166187, + "num_samples": 324, + "num_queries": 162, + "num_documents": 162, + "min_document_length": 20, + "average_document_length": 84.67901234567901, + "max_document_length": 285, + "unique_documents": 162, + "min_query_length": 86, + "average_query_length": 941.1666666666666, + "max_query_length": 4976, + "unique_queries": 162, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 162, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vietnamese": { + "number_of_characters": 59781, + "num_samples": 154, + "num_queries": 77, + "num_documents": 77, + "min_document_length": 26, + "average_document_length": 71.83116883116882, + "max_document_length": 180, + "unique_documents": 77, + "min_query_length": 69, + "average_query_length": 704.5454545454545, + "max_query_length": 2604, + "unique_queries": 77, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 77, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Quail.json b/mteb/descriptive_stats/Retrieval/Quail.json new file mode 100644 index 0000000000..41a06a2007 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/Quail.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 6258716, + "num_samples": 35507, + "num_queries": 2720, + "num_documents": 32787, + "min_document_length": 1520, + "average_document_length": 162.38228566200019, + "max_document_length": 2587, + "unique_documents": 32787, + "min_query_length": 2, + "average_query_length": 343.63529411764705, + "max_query_length": 161, + "unique_queries": 2720, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2274, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Quora-PL.json b/mteb/descriptive_stats/Retrieval/Quora-PL.json new file mode 100644 index 0000000000..6243b649e4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/Quora-PL.json @@ -0,0 +1,54 @@ +{ + "validation": { + "number_of_characters": 35217726, + "num_samples": 527931, + "num_queries": 5000, + "num_documents": 522931, + "min_document_length": 11, + "average_document_length": 0.5220631402613347, + "max_document_length": 317, + "unique_documents": 522931, + "min_query_length": 2, + "average_query_length": 6988.9446, + "max_query_length": 1266, + "unique_queries": 5000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5252, + "max_relevant_docs_per_query": 84, + "unique_relevant_docs": 7626, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 35490077, + "num_samples": 532931, + "num_queries": 10000, + "num_documents": 522931, + "min_document_length": 2, + "average_document_length": 1.0428794621087676, + "max_document_length": 270, + "unique_documents": 522931, + "min_query_length": 2, + "average_query_length": 3494.4723, + "max_query_length": 1266, + "unique_queries": 10000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5675, + "max_relevant_docs_per_query": 75, + "unique_relevant_docs": 15675, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json new file mode 100644 index 0000000000..f797184aaf --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 11885329, + "num_samples": 173031, + "num_queries": 1000, + "num_documents": 172031, + "min_document_length": 2, + "average_document_length": 0.3130017264330266, + "max_document_length": 187, + "unique_documents": 172031, + "min_query_length": 2, + "average_query_length": 11831.483, + "max_query_length": 1266, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.641, + "max_relevant_docs_per_query": 34, + "unique_relevant_docs": 1641, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json b/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json new file mode 100644 index 0000000000..05053f3d4c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json @@ -0,0 +1,54 @@ +{ + "dev": { + "number_of_characters": 33285028, + "num_samples": 527931, + "num_queries": 5000, + "num_documents": 522931, + "min_document_length": 12, + "average_document_length": 0.49274378455283774, + "max_document_length": 268, + "unique_documents": 522931, + "min_query_length": 2, + "average_query_length": 6605.4714, + "max_query_length": 1170, + "unique_queries": 5000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5252, + "max_relevant_docs_per_query": 84, + "unique_relevant_docs": 7626, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 33542753, + "num_samples": 532931, + "num_queries": 10000, + "num_documents": 522931, + "min_document_length": 2, + "average_document_length": 0.9855908332074403, + "max_document_length": 258, + "unique_documents": 522931, + "min_query_length": 2, + "average_query_length": 3302.7357, + "max_query_length": 1170, + "unique_queries": 10000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5675, + "max_relevant_docs_per_query": 75, + "unique_relevant_docs": 15675, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json new file mode 100644 index 0000000000..aa1cb8b4fd --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 10675629, + "num_samples": 178163, + "num_queries": 1000, + "num_documents": 177163, + "min_document_length": 2, + "average_document_length": 0.28915744258112586, + "max_document_length": 180, + "unique_documents": 177163, + "min_query_length": 2, + "average_query_length": 10624.401, + "max_query_length": 582, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.641, + "max_relevant_docs_per_query": 34, + "unique_relevant_docs": 1641, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/RARbCode.json b/mteb/descriptive_stats/Retrieval/RARbCode.json new file mode 100644 index 0000000000..8b657358b6 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/RARbCode.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 240139724, + "num_samples": 302966, + "num_queries": 1484, + "num_documents": 301482, + "min_document_length": 37, + "average_document_length": 1.849576425789931, + "max_document_length": 1512, + "unique_documents": 301482, + "min_query_length": 17, + "average_query_length": 161443.4703504043, + "max_query_length": 11365, + "unique_queries": 1484, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1484, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/RARbMath.json b/mteb/descriptive_stats/Retrieval/RARbMath.json new file mode 100644 index 0000000000..88a19cb11d --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/RARbMath.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 197971515, + "num_samples": 395695, + "num_queries": 6319, + "num_documents": 389376, + "min_document_length": 25, + "average_document_length": 3.412978714661407, + "max_document_length": 2837, + "unique_documents": 389376, + "min_query_length": 16, + "average_query_length": 31119.256686184523, + "max_query_length": 5368, + "unique_queries": 6319, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 6319, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json b/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json new file mode 100644 index 0000000000..bfbeab6b4e --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 822341995, + "num_samples": 714344, + "num_queries": 10000, + "num_documents": 704344, + "min_document_length": 4, + "average_document_length": 0.8859719114523585, + "max_document_length": 100, + "unique_documents": 704344, + "min_query_length": 1, + "average_query_length": 82171.7966, + "max_query_length": 2001, + "unique_queries": 10000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json new file mode 100644 index 0000000000..3955ec2cd5 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 234657607, + "num_samples": 192237, + "num_queries": 1000, + "num_documents": 191237, + "min_document_length": 4, + "average_document_length": 0.32597248440416865, + "max_document_length": 85, + "unique_documents": 191237, + "min_query_length": 1, + "average_query_length": 234595.269, + "max_query_length": 2001, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json b/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json new file mode 100644 index 0000000000..70a7bd0d69 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 25645306, + "num_samples": 58518, + "num_queries": 1692, + "num_documents": 56826, + "min_document_length": 13, + "average_document_length": 1.3486960194277267, + "max_document_length": 152, + "unique_documents": 56826, + "min_query_length": 2, + "average_query_length": 15111.504137115839, + "max_query_length": 11011, + "unique_queries": 1692, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6814420803782506, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 2710, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json b/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json new file mode 100644 index 0000000000..0d1787959c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 32692750, + "num_samples": 26657, + "num_queries": 1000, + "num_documents": 25657, + "min_document_length": 14, + "average_document_length": 3.144210157072144, + "max_document_length": 235, + "unique_documents": 25657, + "min_query_length": 12, + "average_query_length": 32612.079, + "max_query_length": 11840, + "unique_queries": 1000, + "min_relevant_docs_per_query": 27, + "average_relevant_docs_per_query": 4.928, + "max_relevant_docs_per_query": 30, + "unique_relevant_docs": 25657, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SCIDOCS.json b/mteb/descriptive_stats/Retrieval/SCIDOCS.json new file mode 100644 index 0000000000..f1b0b36c97 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SCIDOCS.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 30972050, + "num_samples": 26657, + "num_queries": 1000, + "num_documents": 25657, + "min_document_length": 16, + "average_document_length": 2.791908640916709, + "max_document_length": 206, + "unique_documents": 25657, + "min_query_length": 11, + "average_query_length": 30900.418, + "max_query_length": 10169, + "unique_queries": 1000, + "min_relevant_docs_per_query": 27, + "average_relevant_docs_per_query": 4.928, + "max_relevant_docs_per_query": 30, + "unique_relevant_docs": 25657, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SIQA.json b/mteb/descriptive_stats/Retrieval/SIQA.json new file mode 100644 index 0000000000..c63d2a4ead --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SIQA.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1957909, + "num_samples": 73230, + "num_queries": 1954, + "num_documents": 71276, + "min_document_length": 70, + "average_document_length": 3.502314944721926, + "max_document_length": 276, + "unique_documents": 71276, + "min_query_length": 3, + "average_query_length": 874.2466734902764, + "max_query_length": 170, + "unique_queries": 1954, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1769, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json b/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json new file mode 100644 index 0000000000..99f58b5129 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 7706966, + "num_samples": 7611, + "num_queries": 1134, + "num_documents": 6477, + "min_document_length": 11, + "average_document_length": 9.390304153157325, + "max_document_length": 173, + "unique_documents": 6477, + "min_query_length": 8, + "average_query_length": 6742.632275132275, + "max_query_length": 76886, + "unique_queries": 1134, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.853615520282187, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 6461, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SNLRetrieval.json b/mteb/descriptive_stats/Retrieval/SNLRetrieval.json new file mode 100644 index 0000000000..50c79436e7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SNLRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 2602407, + "num_samples": 2600, + "num_queries": 1300, + "num_documents": 1300, + "min_document_length": 2, + "average_document_length": 14.906153846153845, + "max_document_length": 64, + "unique_documents": 1300, + "min_query_length": 400, + "average_query_length": 1986.9453846153847, + "max_query_length": 68710, + "unique_queries": 1300, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1300, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json b/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json new file mode 100644 index 0000000000..4c4d7c5f27 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 10203228, + "num_samples": 25068, + "num_queries": 2089, + "num_documents": 22979, + "min_document_length": 12, + "average_document_length": 6.116454153792593, + "max_document_length": 163, + "unique_documents": 22979, + "min_query_length": 101, + "average_query_length": 4816.983245572044, + "max_query_length": 4944, + "unique_queries": 2089, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2089, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SciFact-PL.json b/mteb/descriptive_stats/Retrieval/SciFact-PL.json new file mode 100644 index 0000000000..d8cde257d0 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SciFact-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 8085698, + "num_samples": 5483, + "num_queries": 300, + "num_documents": 5183, + "min_document_length": 27, + "average_document_length": 5.524213775805518, + "max_document_length": 227, + "unique_documents": 5183, + "min_query_length": 233, + "average_query_length": 26856.886666666665, + "max_query_length": 10870, + "unique_queries": 300, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.13, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 283, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SciFact.json b/mteb/descriptive_stats/Retrieval/SciFact.json new file mode 100644 index 0000000000..9a38e491ec --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SciFact.json @@ -0,0 +1,54 @@ +{ + "train": { + "number_of_characters": 7843137, + "num_samples": 5992, + "num_queries": 809, + "num_documents": 5183, + "min_document_length": 26, + "average_document_length": 13.827513023345553, + "max_document_length": 249, + "unique_documents": 5183, + "min_query_length": 221, + "average_query_length": 9606.265760197775, + "max_query_length": 10127, + "unique_queries": 809, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1359703337453646, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 565, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 7798573, + "num_samples": 5483, + "num_queries": 300, + "num_documents": 5183, + "min_document_length": 28, + "average_document_length": 5.229403820181362, + "max_document_length": 204, + "unique_documents": 5183, + "min_query_length": 221, + "average_query_length": 25904.896666666667, + "max_query_length": 10127, + "unique_queries": 300, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.13, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 283, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json b/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json new file mode 100644 index 0000000000..a4b9ce59c7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 1339304, + "num_samples": 1200, + "num_queries": 600, + "num_documents": 600, + "min_document_length": 25, + "average_document_length": 143.59833333333333, + "max_document_length": 606, + "unique_documents": 600, + "min_query_length": 177, + "average_query_length": 2088.575, + "max_query_length": 22445, + "unique_queries": 600, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 600, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json new file mode 100644 index 0000000000..7d6b12791f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 26460964, + "num_samples": 10204, + "num_queries": 167, + "num_documents": 10037, + "min_document_length": 24, + "average_document_length": 1.1240410481219487, + "max_document_length": 116, + "unique_documents": 10037, + "min_query_length": 22, + "average_query_length": 158381.32934131735, + "max_query_length": 22035, + "unique_queries": 167, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 6.053892215568863, + "max_relevant_docs_per_query": 19, + "unique_relevant_docs": 124, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json new file mode 100644 index 0000000000..f7204e72de --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 126449, + "num_samples": 432, + "num_queries": 167, + "num_documents": 265, + "min_document_length": 24, + "average_document_length": 42.573584905660375, + "max_document_length": 116, + "unique_documents": 265, + "min_query_length": 51, + "average_query_length": 689.622754491018, + "max_query_length": 2084, + "unique_queries": 167, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.718562874251497, + "max_relevant_docs_per_query": 24, + "unique_relevant_docs": 265, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SpartQA.json b/mteb/descriptive_stats/Retrieval/SpartQA.json new file mode 100644 index 0000000000..97df0dac4c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SpartQA.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 2440343, + "num_samples": 5186, + "num_queries": 3594, + "num_documents": 1592, + "min_document_length": 380, + "average_document_length": 1481.4704773869346, + "max_document_length": 1442, + "unique_documents": 1592, + "min_query_length": 8, + "average_query_length": 22.771841958820257, + "max_query_length": 91, + "unique_queries": 3594, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8786867000556482, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 515, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json b/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json new file mode 100644 index 0000000000..9e91322e47 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json @@ -0,0 +1,162 @@ +{ + "dev": { + "number_of_characters": 80426667, + "num_samples": 12479, + "num_queries": 665, + "num_documents": 11814, + "min_document_length": 2, + "average_document_length": 0.7731504994074826, + "max_document_length": 2, + "unique_documents": 11814, + "min_query_length": 257, + "average_query_length": 120928.62105263158, + "max_query_length": 398046, + "unique_queries": 665, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5037593984962405, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 437, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "english": { + "number_of_characters": 38614817, + "num_samples": 6450, + "num_queries": 543, + "num_documents": 5907, + "min_document_length": 2, + "average_document_length": 1.2629084137464026, + "max_document_length": 2, + "unique_documents": 5907, + "min_query_length": 257, + "average_query_length": 71100.10497237569, + "max_query_length": 391133, + "unique_queries": 543, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.4714548802946592, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 349, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "french": { + "number_of_characters": 41811850, + "num_samples": 6029, + "num_queries": 122, + "num_documents": 5907, + "min_document_length": 2, + "average_document_length": 0.28339258506856274, + "max_document_length": 2, + "unique_documents": 5907, + "min_query_length": 268, + "average_query_length": 342706.3606557377, + "max_query_length": 398046, + "unique_queries": 122, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6475409836065573, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 88, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "test": { + "number_of_characters": 80427271, + "num_samples": 12475, + "num_queries": 661, + "num_documents": 11814, + "min_document_length": 2, + "average_document_length": 0.824276282376841, + "max_document_length": 2, + "unique_documents": 11814, + "min_query_length": 257, + "average_query_length": 121660.41301059001, + "max_query_length": 398046, + "unique_queries": 661, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.529500756429652, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 472, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "english": { + "number_of_characters": 38615819, + "num_samples": 6460, + "num_queries": 553, + "num_documents": 5907, + "min_document_length": 2, + "average_document_length": 1.4325376671745387, + "max_document_length": 2, + "unique_documents": 5907, + "min_query_length": 257, + "average_query_length": 69814.38878842676, + "max_query_length": 391133, + "unique_queries": 553, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.573236889692586, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 388, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "french": { + "number_of_characters": 41811452, + "num_samples": 6015, + "num_queries": 108, + "num_documents": 5907, + "min_document_length": 2, + "average_document_length": 0.21601489757914338, + "max_document_length": 2, + "unique_documents": 5907, + "min_query_length": 268, + "average_query_length": 387131.25925925927, + "max_query_length": 398046, + "unique_queries": 108, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.3055555555555556, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 84, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json b/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json new file mode 100644 index 0000000000..94c3d40385 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 199616, + "num_samples": 1024, + "num_queries": 513, + "num_documents": 511, + "min_document_length": 11, + "average_document_length": 70.79060665362036, + "max_document_length": 229, + "unique_documents": 511, + "min_query_length": 31, + "average_query_length": 318.6003898635478, + "max_query_length": 1904, + "unique_queries": 513, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 511, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SwednRetrieval.json b/mteb/descriptive_stats/Retrieval/SwednRetrieval.json new file mode 100644 index 0000000000..9995eb778f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SwednRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 5973257, + "num_samples": 3070, + "num_queries": 1024, + "num_documents": 2046, + "min_document_length": 6, + "average_document_length": 22.960899315738025, + "max_document_length": 122, + "unique_documents": 2046, + "min_query_length": 63, + "average_query_length": 5787.3818359375, + "max_query_length": 33779, + "unique_queries": 1024, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2046, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json b/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json new file mode 100644 index 0000000000..79d46dd193 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 113404, + "num_samples": 190, + "num_queries": 100, + "num_documents": 90, + "min_document_length": 18, + "average_document_length": 80.91111111111111, + "max_document_length": 175, + "unique_documents": 90, + "min_query_length": 51, + "average_query_length": 1061.22, + "max_query_length": 6874, + "unique_queries": 100, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 53, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/T2Retrieval.json b/mteb/descriptive_stats/Retrieval/T2Retrieval.json new file mode 100644 index 0000000000..77ad018985 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/T2Retrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 103924352, + "num_samples": 141417, + "num_queries": 22812, + "num_documents": 118605, + "min_document_length": 4, + "average_document_length": 2.1039332237258126, + "max_document_length": 31, + "unique_documents": 118605, + "min_query_length": 1, + "average_query_length": 4544.7490355953005, + "max_query_length": 42956, + "unique_queries": 22812, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.213571804313519, + "max_relevant_docs_per_query": 62, + "unique_relevant_docs": 118605, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json b/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json new file mode 100644 index 0000000000..a0fc70975b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 198886004, + "num_samples": 171382, + "num_queries": 50, + "num_documents": 171332, + "min_document_length": 30, + "average_document_length": 0.020258912520720006, + "max_document_length": 199, + "unique_documents": 171332, + "min_query_length": 1, + "average_query_length": 3977650.66, + "max_query_length": 122472, + "unique_queries": 50, + "min_relevant_docs_per_query": 631, + "average_relevant_docs_per_query": 493.5, + "max_relevant_docs_per_query": 1941, + "unique_relevant_docs": 35480, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TRECCOVID.json b/mteb/descriptive_stats/Retrieval/TRECCOVID.json new file mode 100644 index 0000000000..b9d80db016 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TRECCOVID.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 191508678, + "num_samples": 171382, + "num_queries": 50, + "num_documents": 171332, + "min_document_length": 30, + "average_document_length": 0.020206382929050033, + "max_document_length": 165, + "unique_documents": 171332, + "min_query_length": 1, + "average_query_length": 3830104.32, + "max_query_length": 122459, + "unique_queries": 50, + "min_relevant_docs_per_query": 631, + "average_relevant_docs_per_query": 493.5, + "max_relevant_docs_per_query": 1941, + "unique_relevant_docs": 35480, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json b/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json new file mode 100644 index 0000000000..e1a96bb010 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 3209663, + "num_samples": 4096, + "num_queries": 2048, + "num_documents": 2048, + "min_document_length": 25, + "average_document_length": 126.552734375, + "max_document_length": 400, + "unique_documents": 2048, + "min_query_length": 28, + "average_query_length": 1440.66552734375, + "max_query_length": 15619, + "unique_queries": 2048, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2048, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL1.json b/mteb/descriptive_stats/Retrieval/TempReasonL1.json new file mode 100644 index 0000000000..e5be9a73bd --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL1.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 325808, + "num_samples": 16504, + "num_queries": 4000, + "num_documents": 12504, + "min_document_length": 39, + "average_document_length": 16.066458733205373, + "max_document_length": 54, + "unique_documents": 12504, + "min_query_length": 9, + "average_query_length": 31.22825, + "max_query_length": 10, + "unique_queries": 4000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3397, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json b/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json new file mode 100644 index 0000000000..70f2d53e95 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 64656976, + "num_samples": 21184, + "num_queries": 5397, + "num_documents": 15787, + "min_document_length": 409, + "average_document_length": 4074.759929055552, + "max_document_length": 115754, + "unique_documents": 15787, + "min_query_length": 3, + "average_query_length": 60.911802853437095, + "max_query_length": 141, + "unique_queries": 5397, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3781, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json b/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json new file mode 100644 index 0000000000..72dc39f22c --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 4812174, + "num_samples": 21184, + "num_queries": 5397, + "num_documents": 15787, + "min_document_length": 230, + "average_document_length": 283.9952492557167, + "max_document_length": 4822, + "unique_documents": 15787, + "min_query_length": 3, + "average_query_length": 60.911802853437095, + "max_query_length": 141, + "unique_queries": 5397, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3781, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json b/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json new file mode 100644 index 0000000000..e83bde9b37 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 630654, + "num_samples": 21184, + "num_queries": 5397, + "num_documents": 15787, + "min_document_length": 36, + "average_document_length": 19.12415278393615, + "max_document_length": 90, + "unique_documents": 15787, + "min_query_length": 3, + "average_query_length": 60.911802853437095, + "max_query_length": 141, + "unique_queries": 5397, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3781, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json b/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json new file mode 100644 index 0000000000..93206cedc7 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 59743321, + "num_samples": 20090, + "num_queries": 4426, + "num_documents": 15664, + "min_document_length": 412, + "average_document_length": 3793.2473186925436, + "max_document_length": 115787, + "unique_documents": 15664, + "min_query_length": 3, + "average_query_length": 73.63194758246723, + "max_query_length": 141, + "unique_queries": 4426, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2735, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json b/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json new file mode 100644 index 0000000000..48b4e4cc37 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 4291925, + "num_samples": 20090, + "num_queries": 4426, + "num_documents": 15664, + "min_document_length": 232, + "average_document_length": 253.19394790602655, + "max_document_length": 4791, + "unique_documents": 15664, + "min_query_length": 3, + "average_query_length": 73.63194758246723, + "max_query_length": 141, + "unique_queries": 4426, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2735, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json b/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json new file mode 100644 index 0000000000..57b125d8a3 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 655367, + "num_samples": 20090, + "num_queries": 4426, + "num_documents": 15664, + "min_document_length": 39, + "average_document_length": 21.03370786516854, + "max_document_length": 142, + "unique_documents": 15664, + "min_query_length": 3, + "average_query_length": 73.63194758246723, + "max_query_length": 141, + "unique_queries": 4426, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2735, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json b/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json new file mode 100644 index 0000000000..767d034174 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json @@ -0,0 +1,28 @@ +{ + "validation": { + "number_of_characters": 48554965, + "num_samples": 90933, + "num_queries": 1000, + "num_documents": 89933, + "min_document_length": 1, + "average_document_length": 0.1428841470872761, + "max_document_length": 31, + "unique_documents": 89933, + "min_query_length": 14, + "average_query_length": 48542.115, + "max_query_length": 6136, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 888, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json b/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json new file mode 100644 index 0000000000..37c1ca2ad2 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 272814, + "num_samples": 2237, + "num_queries": 1024, + "num_documents": 1213, + "min_document_length": 13, + "average_document_length": 52.78730420445177, + "max_document_length": 199, + "unique_documents": 1213, + "min_query_length": 1, + "average_query_length": 203.8896484375, + "max_query_length": 10521, + "unique_queries": 1024, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 1213, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json b/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json new file mode 100644 index 0000000000..f60a7414ba --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json @@ -0,0 +1,28 @@ +{ + "train": { + "number_of_characters": 46737, + "num_samples": 340, + "num_queries": 78, + "num_documents": 262, + "min_document_length": 48, + "average_document_length": 49.534351145038165, + "max_document_length": 383, + "unique_documents": 262, + "min_query_length": 16, + "average_query_length": 432.8076923076923, + "max_query_length": 595, + "unique_queries": 78, + "min_relevant_docs_per_query": 0, + "average_relevant_docs_per_query": 3.358974358974359, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 262, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/VideoRetrieval.json b/mteb/descriptive_stats/Retrieval/VideoRetrieval.json new file mode 100644 index 0000000000..2498aaa578 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/VideoRetrieval.json @@ -0,0 +1,28 @@ +{ + "dev": { + "number_of_characters": 3141126, + "num_samples": 101930, + "num_queries": 1000, + "num_documents": 100930, + "min_document_length": 2, + "average_document_length": 0.07297136629347072, + "max_document_length": 19, + "unique_documents": 100930, + "min_query_length": 1, + "average_query_length": 3133.761, + "max_query_length": 5869, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json new file mode 100644 index 0000000000..043e2d1f72 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json @@ -0,0 +1,28 @@ +{ + "validation": { + "number_of_characters": 657355, + "num_samples": 4538, + "num_queries": 2048, + "num_documents": 2490, + "min_document_length": 10, + "average_document_length": 53.8855421686747, + "max_document_length": 245, + "unique_documents": 2490, + "min_query_length": 1, + "average_query_length": 255.458984375, + "max_query_length": 2852, + "unique_queries": 2048, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2490, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json b/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json new file mode 100644 index 0000000000..495ee557ea --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json @@ -0,0 +1,446 @@ +{ + "test": { + "number_of_characters": 83866932, + "num_samples": 240000, + "num_queries": 24000, + "num_documents": 216000, + "min_document_length": 7, + "average_document_length": 6.565689814814815, + "max_document_length": 180, + "unique_documents": 216000, + "min_query_length": 100, + "average_query_length": 3435.3642916666668, + "max_query_length": 9461, + "unique_queries": 24000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 24000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "bg": { + "number_of_characters": 5145316, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 6.758518518518518, + "max_document_length": 166, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3369.384, + "max_query_length": 4869, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "bn": { + "number_of_characters": 5390581, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 7, + "average_document_length": 5.2518518518518515, + "max_document_length": 123, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3546.454, + "max_query_length": 5104, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "cs": { + "number_of_characters": 5079180, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 17, + "average_document_length": 6.2524444444444445, + "max_document_length": 137, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3329.848, + "max_query_length": 3487, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "da": { + "number_of_characters": 4746132, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 17, + "average_document_length": 6.30562962962963, + "max_document_length": 137, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3107.3373333333334, + "max_query_length": 2563, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 5483592, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 20, + "average_document_length": 7.778222222222222, + "max_document_length": 180, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3585.724, + "max_query_length": 3083, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 6217884, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 7.596888888888889, + "max_document_length": 162, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 4076.884, + "max_query_length": 3662, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fa": { + "number_of_characters": 4732619, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 12, + "average_document_length": 5.407481481481481, + "max_document_length": 119, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3106.412, + "max_query_length": 4707, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fi": { + "number_of_characters": 5209132, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 14, + "average_document_length": 6.149259259259259, + "max_document_length": 132, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3417.4113333333335, + "max_query_length": 2574, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 5620959, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 13, + "average_document_length": 5.641925925925926, + "max_document_length": 125, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3696.5286666666666, + "max_query_length": 5912, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "it": { + "number_of_characters": 5420496, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 23, + "average_document_length": 7.783851851851852, + "max_document_length": 156, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3543.6093333333333, + "max_query_length": 9461, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "nl": { + "number_of_characters": 5169556, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 7.260518518518518, + "max_document_length": 136, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3381.026, + "max_query_length": 3641, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pt": { + "number_of_characters": 5474356, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 18, + "average_document_length": 7.235481481481481, + "max_document_length": 176, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3584.4513333333334, + "max_query_length": 3057, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ro": { + "number_of_characters": 4796113, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 14, + "average_document_length": 6.885925925925926, + "max_document_length": 169, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3135.4353333333333, + "max_query_length": 4213, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "sr": { + "number_of_characters": 5271732, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 15, + "average_document_length": 6.185481481481482, + "max_document_length": 146, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3458.8186666666666, + "max_query_length": 3668, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "no": { + "number_of_characters": 5036586, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 14, + "average_document_length": 6.143111111111111, + "max_document_length": 129, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3302.436, + "max_query_length": 2841, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "sv": { + "number_of_characters": 5072698, + "num_samples": 15000, + "num_queries": 1500, + "num_documents": 13500, + "min_document_length": 17, + "average_document_length": 6.414444444444444, + "max_document_length": 133, + "unique_documents": 13500, + "min_query_length": 100, + "average_query_length": 3324.0686666666666, + "max_query_length": 3680, + "unique_queries": 1500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/WinoGrande.json b/mteb/descriptive_stats/Retrieval/WinoGrande.json new file mode 100644 index 0000000000..39d09c1855 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/WinoGrande.json @@ -0,0 +1,28 @@ +{ + "test": { + "number_of_characters": 185865, + "num_samples": 6362, + "num_queries": 1267, + "num_documents": 5095, + "min_document_length": 79, + "average_document_length": 27.797448478900883, + "max_document_length": 185, + "unique_documents": 5095, + "min_query_length": 3, + "average_query_length": 34.9147592738753, + "max_query_length": 32, + "unique_queries": 1267, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 478, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/XMarket.json b/mteb/descriptive_stats/Retrieval/XMarket.json new file mode 100644 index 0000000000..477dbdf9d5 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/XMarket.json @@ -0,0 +1,112 @@ +{ + "test": { + "number_of_characters": 98558149, + "num_samples": 345689, + "num_queries": 16711, + "num_documents": 328978, + "min_document_length": 1, + "average_document_length": 0.8491570864921059, + "max_document_length": 88, + "unique_documents": 328978, + "min_query_length": 0, + "average_query_length": 5881.084016516067, + "max_query_length": 151924, + "unique_queries": 16711, + "none_queries": 1, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 67.35377894799832, + "max_relevant_docs_per_query": 81770, + "unique_relevant_docs": 313563, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "de": { + "number_of_characters": 9005388, + "num_samples": 74563, + "num_queries": 4037, + "num_documents": 70526, + "min_document_length": 2, + "average_document_length": 0.8996965658055185, + "max_document_length": 51, + "unique_documents": 70526, + "min_query_length": 0, + "average_query_length": 2214.9952935348033, + "max_query_length": 151924, + "unique_queries": 4037, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 54.3522417636859, + "max_relevant_docs_per_query": 41933, + "unique_relevant_docs": 67368, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 81219963, + "num_samples": 227876, + "num_queries": 9099, + "num_documents": 218777, + "min_document_length": 1, + "average_document_length": 0.6605219012967543, + "max_document_length": 66, + "unique_documents": 218777, + "min_query_length": 0, + "average_query_length": 8910.369930761623, + "max_query_length": 35724, + "unique_queries": 9099, + "none_queries": 1, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 85.43719090009891, + "max_relevant_docs_per_query": 81770, + "unique_relevant_docs": 207995, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 8332798, + "num_samples": 43250, + "num_queries": 3575, + "num_documents": 39675, + "min_document_length": 2, + "average_document_length": 1.7994959042218022, + "max_document_length": 88, + "unique_documents": 39675, + "min_query_length": 0, + "average_query_length": 2310.881958041958, + "max_query_length": 29074, + "unique_queries": 3575, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 36.01006993006993, + "max_relevant_docs_per_query": 17788, + "unique_relevant_docs": 38200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/XPQARetrieval.json b/mteb/descriptive_stats/Retrieval/XPQARetrieval.json new file mode 100644 index 0000000000..b00f01f22b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/XPQARetrieval.json @@ -0,0 +1,966 @@ +{ + "test": { + "number_of_characters": 5308501, + "num_samples": 81710, + "num_queries": 27856, + "num_documents": 53854, + "min_document_length": 3, + "average_document_length": 20.861588739926468, + "max_document_length": 298, + "unique_documents": 53854, + "min_query_length": 3, + "average_query_length": 150.2376866743251, + "max_query_length": 4229, + "unique_queries": 27856, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9896611143021252, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 53854, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ara-ara": { + "number_of_characters": 114782, + "num_samples": 2245, + "num_queries": 750, + "num_documents": 1495, + "min_document_length": 8, + "average_document_length": 14.893645484949833, + "max_document_length": 111, + "unique_documents": 1495, + "min_query_length": 10, + "average_query_length": 123.35466666666666, + "max_query_length": 1200, + "unique_queries": 750, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.004, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1495, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-ara": { + "number_of_characters": 214304, + "num_samples": 2283, + "num_queries": 750, + "num_documents": 1533, + "min_document_length": 8, + "average_document_length": 14.524461839530332, + "max_document_length": 111, + "unique_documents": 1533, + "min_query_length": 9, + "average_query_length": 256.05066666666664, + "max_query_length": 4229, + "unique_queries": 750, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.058666666666667, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1533, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ara-eng": { + "number_of_characters": 121839, + "num_samples": 2237, + "num_queries": 742, + "num_documents": 1495, + "min_document_length": 11, + "average_document_length": 19.614046822742473, + "max_document_length": 162, + "unique_documents": 1495, + "min_query_length": 10, + "average_query_length": 124.68463611859838, + "max_query_length": 1200, + "unique_queries": 742, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.024258760107817, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1495, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-deu": { + "number_of_characters": 129323, + "num_samples": 2014, + "num_queries": 766, + "num_documents": 1248, + "min_document_length": 17, + "average_document_length": 34.076121794871796, + "max_document_length": 144, + "unique_documents": 1248, + "min_query_length": 13, + "average_query_length": 113.31070496083551, + "max_query_length": 383, + "unique_queries": 766, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6318537859007833, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1248, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-deu": { + "number_of_characters": 216068, + "num_samples": 2265, + "num_queries": 766, + "num_documents": 1499, + "min_document_length": 17, + "average_document_length": 28.370246831220815, + "max_document_length": 144, + "unique_documents": 1499, + "min_query_length": 3, + "average_query_length": 226.55483028720627, + "max_query_length": 1130, + "unique_queries": 766, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9634464751958225, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1499, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "deu-eng": { + "number_of_characters": 126543, + "num_samples": 2014, + "num_queries": 766, + "num_documents": 1248, + "min_document_length": 15, + "average_document_length": 31.848557692307693, + "max_document_length": 144, + "unique_documents": 1248, + "min_query_length": 13, + "average_query_length": 113.31070496083551, + "max_query_length": 383, + "unique_queries": 766, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6318537859007833, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1248, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-spa": { + "number_of_characters": 169564, + "num_samples": 2734, + "num_queries": 793, + "num_documents": 1941, + "min_document_length": 12, + "average_document_length": 19.08397733127254, + "max_document_length": 140, + "unique_documents": 1941, + "min_query_length": 11, + "average_query_length": 167.11475409836066, + "max_query_length": 266, + "unique_queries": 793, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.4489281210592684, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1941, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-spa": { + "number_of_characters": 276016, + "num_samples": 2729, + "num_queries": 793, + "num_documents": 1936, + "min_document_length": 12, + "average_document_length": 19.13326446280992, + "max_document_length": 140, + "unique_documents": 1936, + "min_query_length": 13, + "average_query_length": 301.3543505674653, + "max_query_length": 1401, + "unique_queries": 793, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.472887767969735, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1936, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "spa-eng": { + "number_of_characters": 169960, + "num_samples": 2734, + "num_queries": 793, + "num_documents": 1941, + "min_document_length": 12, + "average_document_length": 19.287995878413188, + "max_document_length": 133, + "unique_documents": 1941, + "min_query_length": 11, + "average_query_length": 167.11475409836066, + "max_query_length": 266, + "unique_queries": 793, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.4489281210592684, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1941, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fra-fra": { + "number_of_characters": 161169, + "num_samples": 2297, + "num_queries": 749, + "num_documents": 1548, + "min_document_length": 12, + "average_document_length": 27.120801033591732, + "max_document_length": 110, + "unique_documents": 1548, + "min_query_length": 16, + "average_query_length": 159.1268357810414, + "max_query_length": 359, + "unique_queries": 749, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.069425901201602, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1548, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-fra": { + "number_of_characters": 271844, + "num_samples": 2423, + "num_queries": 749, + "num_documents": 1674, + "min_document_length": 12, + "average_document_length": 25.079450418160096, + "max_document_length": 110, + "unique_documents": 1674, + "min_query_length": 7, + "average_query_length": 306.890520694259, + "max_query_length": 1798, + "unique_queries": 749, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.248331108144192, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1674, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fra-eng": { + "number_of_characters": 156327, + "num_samples": 2297, + "num_queries": 749, + "num_documents": 1548, + "min_document_length": 11, + "average_document_length": 23.992894056847547, + "max_document_length": 110, + "unique_documents": 1548, + "min_query_length": 16, + "average_query_length": 159.1268357810414, + "max_query_length": 359, + "unique_queries": 749, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.069425901201602, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1548, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-hin": { + "number_of_characters": 90024, + "num_samples": 2176, + "num_queries": 925, + "num_documents": 1251, + "min_document_length": 8, + "average_document_length": 24.753796962430055, + "max_document_length": 97, + "unique_documents": 1251, + "min_query_length": 11, + "average_query_length": 63.84540540540541, + "max_query_length": 246, + "unique_queries": 925, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.3902702702702703, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1251, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-hin": { + "number_of_characters": 191622, + "num_samples": 2431, + "num_queries": 925, + "num_documents": 1506, + "min_document_length": 8, + "average_document_length": 20.562416998671978, + "max_document_length": 97, + "unique_documents": 1506, + "min_query_length": 9, + "average_query_length": 173.6810810810811, + "max_query_length": 2000, + "unique_queries": 925, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8054054054054054, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1506, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hin-eng": { + "number_of_characters": 90964, + "num_samples": 2163, + "num_queries": 912, + "num_documents": 1251, + "min_document_length": 8, + "average_document_length": 25.50519584332534, + "max_document_length": 118, + "unique_documents": 1251, + "min_query_length": 11, + "average_query_length": 64.75548245614036, + "max_query_length": 246, + "unique_queries": 912, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.4100877192982457, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 1251, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ita-ita": { + "number_of_characters": 108624, + "num_samples": 1935, + "num_queries": 663, + "num_documents": 1272, + "min_document_length": 13, + "average_document_length": 25.617924528301888, + "max_document_length": 134, + "unique_documents": 1272, + "min_query_length": 17, + "average_query_length": 114.68778280542986, + "max_query_length": 293, + "unique_queries": 663, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9245852187028658, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1272, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-ita": { + "number_of_characters": 192704, + "num_samples": 1964, + "num_queries": 663, + "num_documents": 1301, + "min_document_length": 13, + "average_document_length": 25.046887009992314, + "max_document_length": 134, + "unique_documents": 1301, + "min_query_length": 6, + "average_query_length": 241.5052790346908, + "max_query_length": 1561, + "unique_queries": 663, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9849170437405732, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1301, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ita-eng": { + "number_of_characters": 108552, + "num_samples": 1935, + "num_queries": 663, + "num_documents": 1272, + "min_document_length": 11, + "average_document_length": 25.56132075471698, + "max_document_length": 131, + "unique_documents": 1272, + "min_query_length": 17, + "average_query_length": 114.68778280542986, + "max_query_length": 293, + "unique_queries": 663, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9245852187028658, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1272, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "jpn-jpn": { + "number_of_characters": 84910, + "num_samples": 2426, + "num_queries": 825, + "num_documents": 1601, + "min_document_length": 5, + "average_document_length": 12.004996876951905, + "max_document_length": 49, + "unique_documents": 1601, + "min_query_length": 8, + "average_query_length": 79.62424242424242, + "max_query_length": 368, + "unique_queries": 825, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9406060606060607, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1601, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-jpn": { + "number_of_characters": 239552, + "num_samples": 2570, + "num_queries": 825, + "num_documents": 1745, + "min_document_length": 5, + "average_document_length": 11.01432664756447, + "max_document_length": 49, + "unique_documents": 1745, + "min_query_length": 3, + "average_query_length": 267.0690909090909, + "max_query_length": 1116, + "unique_queries": 825, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.1187878787878787, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1745, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "jpn-eng": { + "number_of_characters": 107954, + "num_samples": 2423, + "num_queries": 822, + "num_documents": 1601, + "min_document_length": 13, + "average_document_length": 26.398500936914427, + "max_document_length": 154, + "unique_documents": 1601, + "min_query_length": 8, + "average_query_length": 79.91484184914842, + "max_query_length": 368, + "unique_queries": 822, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9476885644768855, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 1601, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "kor-kor": { + "number_of_characters": 42030, + "num_samples": 1543, + "num_queries": 654, + "num_documents": 889, + "min_document_length": 4, + "average_document_length": 16.050618672665916, + "max_document_length": 149, + "unique_documents": 889, + "min_query_length": 4, + "average_query_length": 42.448012232415905, + "max_query_length": 231, + "unique_queries": 654, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.5642201834862386, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 889, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-kor": { + "number_of_characters": 145679, + "num_samples": 1823, + "num_queries": 654, + "num_documents": 1169, + "min_document_length": 4, + "average_document_length": 12.206159110350727, + "max_document_length": 149, + "unique_documents": 1169, + "min_query_length": 5, + "average_query_length": 200.93272171253824, + "max_query_length": 1948, + "unique_queries": 654, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.952599388379205, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1169, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "kor-eng": { + "number_of_characters": 54748, + "num_samples": 1503, + "num_queries": 614, + "num_documents": 889, + "min_document_length": 5, + "average_document_length": 30.35658042744657, + "max_document_length": 298, + "unique_documents": 889, + "min_query_length": 4, + "average_query_length": 45.21335504885994, + "max_query_length": 231, + "unique_queries": 614, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6661237785016287, + "max_relevant_docs_per_query": 9, + "unique_relevant_docs": 889, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pol-pol": { + "number_of_characters": 122176, + "num_samples": 2364, + "num_queries": 785, + "num_documents": 1579, + "min_document_length": 8, + "average_document_length": 26.707409753008232, + "max_document_length": 150, + "unique_documents": 1579, + "min_query_length": 18, + "average_query_length": 101.9171974522293, + "max_query_length": 219, + "unique_queries": 785, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.080254777070064, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1579, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-pol": { + "number_of_characters": 240206, + "num_samples": 2538, + "num_queries": 785, + "num_documents": 1753, + "min_document_length": 8, + "average_document_length": 24.056474614945806, + "max_document_length": 150, + "unique_documents": 1753, + "min_query_length": 5, + "average_query_length": 252.27388535031847, + "max_query_length": 1459, + "unique_queries": 785, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.385987261146497, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1753, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pol-eng": { + "number_of_characters": 122118, + "num_samples": 2356, + "num_queries": 777, + "num_documents": 1579, + "min_document_length": 5, + "average_document_length": 26.67067764407853, + "max_document_length": 180, + "unique_documents": 1579, + "min_query_length": 18, + "average_query_length": 102.96653796653797, + "max_query_length": 219, + "unique_queries": 777, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.101673101673102, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 1579, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "por-por": { + "number_of_characters": 157318, + "num_samples": 2422, + "num_queries": 800, + "num_documents": 1622, + "min_document_length": 9, + "average_document_length": 21.005548705302097, + "max_document_length": 126, + "unique_documents": 1622, + "min_query_length": 7, + "average_query_length": 154.05875, + "max_query_length": 500, + "unique_queries": 800, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.14, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1622, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-por": { + "number_of_characters": 216697, + "num_samples": 2439, + "num_queries": 800, + "num_documents": 1639, + "min_document_length": 9, + "average_document_length": 20.787675411836485, + "max_document_length": 126, + "unique_documents": 1639, + "min_query_length": 9, + "average_query_length": 228.2825, + "max_query_length": 1206, + "unique_queries": 800, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.21875, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1639, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "por-eng": { + "number_of_characters": 160371, + "num_samples": 2419, + "num_queries": 797, + "num_documents": 1622, + "min_document_length": 9, + "average_document_length": 22.887792848335387, + "max_document_length": 136, + "unique_documents": 1622, + "min_query_length": 7, + "average_query_length": 154.63864491844416, + "max_query_length": 500, + "unique_queries": 797, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.148055207026349, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 1622, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "tam-tam": { + "number_of_characters": 108750, + "num_samples": 2057, + "num_queries": 782, + "num_documents": 1275, + "min_document_length": 3, + "average_document_length": 20.40392156862745, + "max_document_length": 146, + "unique_documents": 1275, + "min_query_length": 4, + "average_query_length": 105.79923273657289, + "max_query_length": 441, + "unique_queries": 782, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6994884910485935, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1275, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-tam": { + "number_of_characters": 169909, + "num_samples": 2266, + "num_queries": 782, + "num_documents": 1484, + "min_document_length": 3, + "average_document_length": 17.53032345013477, + "max_document_length": 146, + "unique_documents": 1484, + "min_query_length": 5, + "average_query_length": 184.0076726342711, + "max_query_length": 1240, + "unique_queries": 782, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.0255754475703327, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1484, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "tam-eng": { + "number_of_characters": 109479, + "num_samples": 2044, + "num_queries": 769, + "num_documents": 1275, + "min_document_length": 6, + "average_document_length": 20.975686274509805, + "max_document_length": 162, + "unique_documents": 1275, + "min_query_length": 4, + "average_query_length": 107.58777633289986, + "max_query_length": 441, + "unique_queries": 769, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.728218465539662, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 1275, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "cmn-cmn": { + "number_of_characters": 45797, + "num_samples": 2529, + "num_queries": 824, + "num_documents": 1705, + "min_document_length": 5, + "average_document_length": 5.901466275659824, + "max_document_length": 29, + "unique_documents": 1705, + "min_query_length": 5, + "average_query_length": 43.36771844660194, + "max_query_length": 236, + "unique_queries": 824, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.0716019417475726, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1705, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "eng-cmn": { + "number_of_characters": 201023, + "num_samples": 2587, + "num_queries": 824, + "num_documents": 1763, + "min_document_length": 5, + "average_document_length": 5.7073170731707314, + "max_document_length": 29, + "unique_documents": 1763, + "min_query_length": 15, + "average_query_length": 231.748786407767, + "max_query_length": 965, + "unique_queries": 824, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.2633495145631066, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 1763, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "cmn-eng": { + "number_of_characters": 69555, + "num_samples": 2525, + "num_queries": 820, + "num_documents": 1705, + "min_document_length": 10, + "average_document_length": 19.835777126099707, + "max_document_length": 130, + "unique_documents": 1705, + "min_query_length": 5, + "average_query_length": 43.579268292682926, + "max_query_length": 236, + "unique_queries": 820, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.0817073170731706, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 1705, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json new file mode 100644 index 0000000000..cad459ef82 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json @@ -0,0 +1,342 @@ +{ + "validation": { + "number_of_characters": 3049962, + "num_samples": 17079, + "num_queries": 14199, + "num_documents": 2880, + "min_document_length": 6, + "average_document_length": 291.05694444444447, + "max_document_length": 307, + "unique_documents": 2880, + "min_query_length": 56, + "average_query_length": 155.76575815198254, + "max_query_length": 3884, + "unique_queries": 14199, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2880, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 227279, + "num_samples": 1426, + "num_queries": 1186, + "num_documents": 240, + "min_document_length": 14, + "average_document_length": 263.52916666666664, + "max_document_length": 221, + "unique_documents": 240, + "min_query_length": 146, + "average_query_length": 138.30691399662732, + "max_query_length": 2772, + "unique_queries": 1186, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 296116, + "num_samples": 1421, + "num_queries": 1181, + "num_documents": 240, + "min_document_length": 15, + "average_document_length": 339.75, + "max_document_length": 197, + "unique_documents": 240, + "min_query_length": 186, + "average_query_length": 181.69009314140558, + "max_query_length": 3884, + "unique_queries": 1181, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "el": { + "number_of_characters": 295889, + "num_samples": 1424, + "num_queries": 1184, + "num_documents": 240, + "min_document_length": 18, + "average_document_length": 338.4916666666667, + "max_document_length": 198, + "unique_documents": 240, + "min_query_length": 176, + "average_query_length": 181.29307432432432, + "max_query_length": 3745, + "unique_queries": 1184, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 260942, + "num_samples": 1425, + "num_queries": 1185, + "num_documents": 240, + "min_document_length": 15, + "average_document_length": 302.425, + "max_document_length": 197, + "unique_documents": 240, + "min_query_length": 158, + "average_query_length": 158.95358649789029, + "max_query_length": 3326, + "unique_queries": 1185, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 292907, + "num_samples": 1424, + "num_queries": 1184, + "num_documents": 240, + "min_document_length": 15, + "average_document_length": 336.64166666666665, + "max_document_length": 226, + "unique_documents": 240, + "min_query_length": 173, + "average_query_length": 179.14949324324326, + "max_query_length": 3734, + "unique_queries": 1184, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 254193, + "num_samples": 1423, + "num_queries": 1183, + "num_documents": 240, + "min_document_length": 14, + "average_document_length": 294.1958333333333, + "max_document_length": 307, + "unique_documents": 240, + "min_query_length": 132, + "average_query_length": 155.1868131868132, + "max_query_length": 3044, + "unique_queries": 1183, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ro": { + "number_of_characters": 290359, + "num_samples": 1424, + "num_queries": 1184, + "num_documents": 240, + "min_document_length": 14, + "average_document_length": 331.3833333333333, + "max_document_length": 211, + "unique_documents": 240, + "min_query_length": 184, + "average_query_length": 178.06334459459458, + "max_query_length": 3732, + "unique_queries": 1184, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ru": { + "number_of_characters": 281002, + "num_samples": 1425, + "num_queries": 1185, + "num_documents": 240, + "min_document_length": 16, + "average_document_length": 320.65416666666664, + "max_document_length": 210, + "unique_documents": 240, + "min_query_length": 182, + "average_query_length": 172.18987341772151, + "max_query_length": 3691, + "unique_queries": 1185, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "th": { + "number_of_characters": 241844, + "num_samples": 1420, + "num_queries": 1180, + "num_documents": 240, + "min_document_length": 11, + "average_document_length": 270.925, + "max_document_length": 161, + "unique_documents": 240, + "min_query_length": 154, + "average_query_length": 149.84915254237288, + "max_query_length": 2891, + "unique_queries": 1180, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "tr": { + "number_of_characters": 261270, + "num_samples": 1424, + "num_queries": 1184, + "num_documents": 240, + "min_document_length": 13, + "average_document_length": 300.325, + "max_document_length": 164, + "unique_documents": 240, + "min_query_length": 140, + "average_query_length": 159.79054054054055, + "max_query_length": 3266, + "unique_queries": 1184, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "vi": { + "number_of_characters": 265783, + "num_samples": 1422, + "num_queries": 1182, + "num_documents": 240, + "min_document_length": 12, + "average_document_length": 303.5208333333333, + "max_document_length": 177, + "unique_documents": 240, + "min_query_length": 184, + "average_query_length": 163.2301184433164, + "max_query_length": 3412, + "unique_queries": 1182, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zh": { + "number_of_characters": 82378, + "num_samples": 1421, + "num_queries": 1181, + "num_documents": 240, + "min_document_length": 6, + "average_document_length": 90.84166666666667, + "max_document_length": 52, + "unique_documents": 240, + "min_query_length": 56, + "average_query_length": 51.29212531752752, + "max_query_length": 974, + "unique_queries": 1181, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 240, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIR.json b/mteb/descriptive_stats/Retrieval/mFollowIR.json new file mode 100644 index 0000000000..e1f65148e8 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/mFollowIR.json @@ -0,0 +1,108 @@ +{ + "test": { + "number_of_characters": 277808433, + "num_samples": 121881, + "num_queries": 246, + "num_documents": 121635, + "min_document_length": 10, + "average_document_length": 0.11550951617544292, + "max_document_length": 136, + "unique_documents": 121635, + "min_query_length": 0, + "average_query_length": 1129245.4593495934, + "max_query_length": 24117, + "unique_queries": 246, + "min_relevant_docs_per_query": 123, + "average_relevant_docs_per_query": 7.865853658536586, + "max_relevant_docs_per_query": 450, + "unique_relevant_docs": 36075, + "num_instructions": 246, + "min_instruction_length": 37, + "average_instruction_length": 74785, + "max_instruction_length": 1083, + "unique_instructions": 246, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000, + "hf_subset_descriptive_stats": { + "fas": { + "number_of_characters": 127465295, + "num_samples": 41269, + "num_queries": 80, + "num_documents": 41189, + "min_document_length": 34, + "average_document_length": 0.14110563499963583, + "max_document_length": 124, + "unique_documents": 41189, + "min_query_length": 0, + "average_query_length": 1593243.5375, + "max_query_length": 24117, + "unique_queries": 80, + "min_relevant_docs_per_query": 151, + "average_relevant_docs_per_query": 8.075, + "max_relevant_docs_per_query": 450, + "unique_relevant_docs": 11859, + "num_instructions": 80, + "min_instruction_length": 121, + "average_instruction_length": 30970, + "max_instruction_length": 842, + "unique_instructions": 80, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + }, + "rus": { + "number_of_characters": 106937404, + "num_samples": 39406, + "num_queries": 80, + "num_documents": 39326, + "min_document_length": 26, + "average_document_length": 0.15765651223109392, + "max_document_length": 136, + "unique_documents": 39326, + "min_query_length": 0, + "average_query_length": 1336640.05, + "max_query_length": 24033, + "unique_queries": 80, + "min_relevant_docs_per_query": 168, + "average_relevant_docs_per_query": 7.35, + "max_relevant_docs_per_query": 443, + "unique_relevant_docs": 11934, + "num_instructions": 80, + "min_instruction_length": 78, + "average_instruction_length": 33800, + "max_instruction_length": 1083, + "unique_instructions": 80, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + }, + "zho": { + "number_of_characters": 43405734, + "num_samples": 41206, + "num_queries": 86, + "num_documents": 41120, + "min_document_length": 10, + "average_document_length": 0.04956225680933852, + "max_document_length": 44, + "unique_documents": 41120, + "min_query_length": 0, + "average_query_length": 504694.1395348837, + "max_query_length": 23822, + "unique_queries": 86, + "min_relevant_docs_per_query": 123, + "average_relevant_docs_per_query": 8.151162790697674, + "max_relevant_docs_per_query": 429, + "unique_relevant_docs": 12282, + "num_instructions": 86, + "min_instruction_length": 37, + "average_instruction_length": 10015, + "max_instruction_length": 229, + "unique_instructions": 86, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json new file mode 100644 index 0000000000..482a144707 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json @@ -0,0 +1,108 @@ +{ + "test": { + "number_of_characters": 277814525, + "num_samples": 121881, + "num_queries": 246, + "num_documents": 121635, + "min_document_length": 32, + "average_document_length": 0.1655937846836848, + "max_document_length": 173, + "unique_documents": 121635, + "min_query_length": 0, + "average_query_length": 1129245.4593495934, + "max_query_length": 24117, + "unique_queries": 246, + "min_relevant_docs_per_query": 123, + "average_relevant_docs_per_query": 7.865853658536586, + "max_relevant_docs_per_query": 450, + "unique_relevant_docs": 36075, + "num_instructions": 246, + "min_instruction_length": 93, + "average_instruction_length": 103382, + "max_instruction_length": 974, + "unique_instructions": 246, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000, + "hf_subset_descriptive_stats": { + "eng-fas": { + "number_of_characters": 127465889, + "num_samples": 41269, + "num_queries": 80, + "num_documents": 41189, + "min_document_length": 34, + "average_document_length": 0.15552696108184225, + "max_document_length": 124, + "unique_documents": 41189, + "min_query_length": 0, + "average_query_length": 1593243.5375, + "max_query_length": 24117, + "unique_queries": 80, + "min_relevant_docs_per_query": 151, + "average_relevant_docs_per_query": 8.075, + "max_relevant_docs_per_query": 450, + "unique_relevant_docs": 11859, + "num_instructions": 80, + "min_instruction_length": 150, + "average_instruction_length": 34402, + "max_instruction_length": 974, + "unique_instructions": 80, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + }, + "eng-rus": { + "number_of_characters": 106937754, + "num_samples": 39406, + "num_queries": 80, + "num_documents": 39326, + "min_document_length": 32, + "average_document_length": 0.16655647663123632, + "max_document_length": 173, + "unique_documents": 39326, + "min_query_length": 0, + "average_query_length": 1336640.05, + "max_query_length": 24033, + "unique_queries": 80, + "min_relevant_docs_per_query": 168, + "average_relevant_docs_per_query": 7.35, + "max_relevant_docs_per_query": 443, + "unique_relevant_docs": 11934, + "num_instructions": 80, + "min_instruction_length": 93, + "average_instruction_length": 32117, + "max_instruction_length": 957, + "unique_instructions": 80, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + }, + "eng-zho": { + "number_of_characters": 43410882, + "num_samples": 41206, + "num_queries": 86, + "num_documents": 41120, + "min_document_length": 32, + "average_document_length": 0.1747568093385214, + "max_document_length": 159, + "unique_documents": 41120, + "min_query_length": 0, + "average_query_length": 504694.1395348837, + "max_query_length": 23822, + "unique_queries": 86, + "min_relevant_docs_per_query": 123, + "average_relevant_docs_per_query": 8.151162790697674, + "max_relevant_docs_per_query": 429, + "unique_relevant_docs": 12282, + "num_instructions": 86, + "min_instruction_length": 157, + "average_instruction_length": 36863, + "max_instruction_length": 822, + "unique_instructions": 86, + "min_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_top_ranked_per_query": 1000 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/AFQMC.json b/mteb/descriptive_stats/STS/AFQMC.json new file mode 100644 index 0000000000..32ae573ce5 --- /dev/null +++ b/mteb/descriptive_stats/STS/AFQMC.json @@ -0,0 +1,18 @@ +{ + "validation": { + "num_samples": 4316, + "number_of_characters": 115065, + "unique_pairs": 4316, + "min_sentence1_length": 5, + "average_sentence1_len": 13.385773864689527, + "max_sentence1_length": 76, + "unique_sentence1": 4313, + "min_sentence2_length": 5, + "average_sentence2_len": 13.274328081556996, + "max_sentence2_length": 82, + "unique_sentence2": 4313, + "min_score": 0, + "avg_score": 0.31000926784059313, + "max_score": 1 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/ATEC.json b/mteb/descriptive_stats/STS/ATEC.json new file mode 100644 index 0000000000..7f653cccca --- /dev/null +++ b/mteb/descriptive_stats/STS/ATEC.json @@ -0,0 +1,34 @@ +{ + "validation": { + "num_samples": 20000, + "number_of_characters": 536573, + "unique_pairs": 20000, + "min_sentence1_length": 5, + "average_sentence1_len": 13.4172, + "max_sentence1_length": 84, + "unique_sentence1": 19909, + "min_sentence2_length": 5, + "average_sentence2_len": 13.41145, + "max_sentence2_length": 82, + "unique_sentence2": 19882, + "min_score": 0, + "avg_score": 0.1844, + "max_score": 1 + }, + "test": { + "num_samples": 20000, + "number_of_characters": 536531, + "unique_pairs": 20000, + "min_sentence1_length": 5, + "average_sentence1_len": 13.40835, + "max_sentence1_length": 97, + "unique_sentence1": 19911, + "min_sentence2_length": 5, + "average_sentence2_len": 13.4182, + "max_sentence2_length": 88, + "unique_sentence2": 19907, + "min_score": 0, + "avg_score": 0.1805, + "max_score": 1 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/Assin2STS.json b/mteb/descriptive_stats/STS/Assin2STS.json new file mode 100644 index 0000000000..80d59884ac --- /dev/null +++ b/mteb/descriptive_stats/STS/Assin2STS.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 2448, + "number_of_characters": 262185, + "unique_pairs": 2436, + "min_sentence1_length": 19, + "average_sentence1_len": 55.15318627450981, + "max_sentence1_length": 159, + "unique_sentence1": 2064, + "min_sentence2_length": 18, + "average_sentence2_len": 51.9485294117647, + "max_sentence2_length": 158, + "unique_sentence2": 2075, + "min_score": 1.0, + "avg_score": 3.565230803113747, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/BIOSSES.json b/mteb/descriptive_stats/STS/BIOSSES.json new file mode 100644 index 0000000000..13ed7f3ff3 --- /dev/null +++ b/mteb/descriptive_stats/STS/BIOSSES.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 100, + "number_of_characters": 31313, + "unique_pairs": 100, + "min_sentence1_length": 46, + "average_sentence1_len": 154.67, + "max_sentence1_length": 337, + "unique_sentence1": 92, + "min_sentence2_length": 40, + "average_sentence2_len": 158.46, + "max_sentence2_length": 335, + "unique_sentence2": 97, + "min_score": 0.0, + "avg_score": 2.196, + "max_score": 4.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/BQ.json b/mteb/descriptive_stats/STS/BQ.json new file mode 100644 index 0000000000..25f1b77823 --- /dev/null +++ b/mteb/descriptive_stats/STS/BQ.json @@ -0,0 +1,34 @@ +{ + "validation": { + "num_samples": 10000, + "number_of_characters": 237203, + "unique_pairs": 10000, + "min_sentence1_length": 1, + "average_sentence1_len": 11.7486, + "max_sentence1_length": 130, + "unique_sentence1": 4584, + "min_sentence2_length": 1, + "average_sentence2_len": 11.9717, + "max_sentence2_length": 112, + "unique_sentence2": 5172, + "min_score": 0, + "avg_score": 0.5, + "max_score": 1 + }, + "test": { + "num_samples": 10000, + "number_of_characters": 238712, + "unique_pairs": 10000, + "min_sentence1_length": 2, + "average_sentence1_len": 11.722, + "max_sentence1_length": 70, + "unique_sentence1": 4301, + "min_sentence2_length": 1, + "average_sentence2_len": 12.1492, + "max_sentence2_length": 100, + "unique_sentence2": 4809, + "min_score": 0, + "avg_score": 0.5, + "max_score": 1 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/CDSC-R.json b/mteb/descriptive_stats/STS/CDSC-R.json new file mode 100644 index 0000000000..453b854691 --- /dev/null +++ b/mteb/descriptive_stats/STS/CDSC-R.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 149941, + "unique_pairs": 998, + "min_sentence1_length": 14, + "average_sentence1_len": 75.237, + "max_sentence1_length": 190, + "unique_sentence1": 510, + "min_sentence2_length": 17, + "average_sentence2_len": 74.704, + "max_sentence2_length": 190, + "unique_sentence2": 509, + "min_score": 0.0, + "avg_score": 2.504430000000003, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/FaroeseSTS.json b/mteb/descriptive_stats/STS/FaroeseSTS.json new file mode 100644 index 0000000000..80a5f296f0 --- /dev/null +++ b/mteb/descriptive_stats/STS/FaroeseSTS.json @@ -0,0 +1,18 @@ +{ + "train": { + "num_samples": 729, + "number_of_characters": 63582, + "unique_pairs": 727, + "min_sentence1_length": 11, + "average_sentence1_len": 43.63374485596708, + "max_sentence1_length": 114, + "unique_sentence1": 672, + "min_sentence2_length": 8, + "average_sentence2_len": 43.584362139917694, + "max_sentence2_length": 115, + "unique_sentence2": 701, + "min_score": 0.0, + "avg_score": 2.342330589849108, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/FinParaSTS.json b/mteb/descriptive_stats/STS/FinParaSTS.json new file mode 100644 index 0000000000..89effa5731 --- /dev/null +++ b/mteb/descriptive_stats/STS/FinParaSTS.json @@ -0,0 +1,34 @@ +{ + "validation": { + "num_samples": 1000, + "number_of_characters": 117633, + "unique_pairs": 1000, + "min_sentence1_length": 5, + "average_sentence1_len": 59.597, + "max_sentence1_length": 329, + "unique_sentence1": 991, + "min_sentence2_length": 8, + "average_sentence2_len": 58.036, + "max_sentence2_length": 295, + "unique_sentence2": 992, + "min_score": 2, + "avg_score": 3.746, + "max_score": 4 + }, + "test": { + "num_samples": 1000, + "number_of_characters": 118123, + "unique_pairs": 1000, + "min_sentence1_length": 6, + "average_sentence1_len": 59.892, + "max_sentence1_length": 322, + "unique_sentence1": 996, + "min_sentence2_length": 3, + "average_sentence2_len": 58.231, + "max_sentence2_length": 358, + "unique_sentence2": 995, + "min_score": 2, + "avg_score": 3.754, + "max_score": 4 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/GermanSTSBenchmark.json b/mteb/descriptive_stats/STS/GermanSTSBenchmark.json new file mode 100644 index 0000000000..68a0427512 --- /dev/null +++ b/mteb/descriptive_stats/STS/GermanSTSBenchmark.json @@ -0,0 +1,34 @@ +{ + "validation": { + "num_samples": 1500, + "number_of_characters": 218610, + "unique_pairs": 1497, + "min_sentence1_length": 14, + "average_sentence1_len": 73.23733333333334, + "max_sentence1_length": 241, + "unique_sentence1": 1468, + "min_sentence2_length": 14, + "average_sentence2_len": 72.50266666666667, + "max_sentence2_length": 245, + "unique_sentence2": 1458, + "min_score": 0.0, + "avg_score": 2.363907555555555, + "max_score": 5.0 + }, + "test": { + "num_samples": 1379, + "number_of_characters": 168618, + "unique_pairs": 1376, + "min_sentence1_length": 14, + "average_sentence1_len": 61.184916606236406, + "max_sentence1_length": 232, + "unique_sentence1": 1245, + "min_sentence2_length": 13, + "average_sentence2_len": 61.090645395213926, + "max_sentence2_length": 238, + "unique_sentence2": 1327, + "min_score": 0.0, + "avg_score": 2.607916606236405, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/IndicCrosslingualSTS.json b/mteb/descriptive_stats/STS/IndicCrosslingualSTS.json new file mode 100644 index 0000000000..46ae41ce14 --- /dev/null +++ b/mteb/descriptive_stats/STS/IndicCrosslingualSTS.json @@ -0,0 +1,212 @@ +{ + "test": { + "num_samples": 3072, + "number_of_characters": 468907, + "unique_pairs": 3072, + "min_sentence1_length": 2, + "average_sentence1_len": 74.6455078125, + "max_sentence1_length": 1042, + "unique_sentence1": 3059, + "min_sentence2_length": 5, + "average_sentence2_len": 77.99348958333333, + "max_sentence2_length": 958, + "unique_sentence2": 3071, + "min_score": 0.0, + "avg_score": 3.816057942708332, + "max_score": 5.0, + "hf_subset_descriptive_stats": { + "en-as": { + "num_samples": 256, + "number_of_characters": 30635, + "unique_pairs": 256, + "min_sentence1_length": 5, + "average_sentence1_len": 58.41015625, + "max_sentence1_length": 423, + "unique_sentence1": 255, + "min_sentence2_length": 11, + "average_sentence2_len": 61.2578125, + "max_sentence2_length": 345, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.074609375, + "max_score": 5.0 + }, + "en-bn": { + "num_samples": 256, + "number_of_characters": 34576, + "unique_pairs": 256, + "min_sentence1_length": 7, + "average_sentence1_len": 68.76171875, + "max_sentence1_length": 287, + "unique_sentence1": 256, + "min_sentence2_length": 5, + "average_sentence2_len": 66.30078125, + "max_sentence2_length": 435, + "unique_sentence2": 255, + "min_score": 0.0, + "avg_score": 4.255156250000001, + "max_score": 5.0 + }, + "en-gu": { + "num_samples": 256, + "number_of_characters": 33860, + "unique_pairs": 256, + "min_sentence1_length": 4, + "average_sentence1_len": 67.1171875, + "max_sentence1_length": 528, + "unique_sentence1": 252, + "min_sentence2_length": 10, + "average_sentence2_len": 65.1484375, + "max_sentence2_length": 324, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.612500000000001, + "max_score": 5.0 + }, + "en-hi": { + "num_samples": 256, + "number_of_characters": 50177, + "unique_pairs": 256, + "min_sentence1_length": 5, + "average_sentence1_len": 100.1171875, + "max_sentence1_length": 505, + "unique_sentence1": 256, + "min_sentence2_length": 8, + "average_sentence2_len": 95.88671875, + "max_sentence2_length": 852, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 4.0542187499999995, + "max_score": 5.0 + }, + "en-kn": { + "num_samples": 256, + "number_of_characters": 40104, + "unique_pairs": 256, + "min_sentence1_length": 10, + "average_sentence1_len": 78.82421875, + "max_sentence1_length": 525, + "unique_sentence1": 256, + "min_sentence2_length": 10, + "average_sentence2_len": 77.83203125, + "max_sentence2_length": 339, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.757187500000002, + "max_score": 5.0 + }, + "en-ml": { + "num_samples": 256, + "number_of_characters": 39933, + "unique_pairs": 256, + "min_sentence1_length": 6, + "average_sentence1_len": 73.79296875, + "max_sentence1_length": 362, + "unique_sentence1": 256, + "min_sentence2_length": 10, + "average_sentence2_len": 82.1953125, + "max_sentence2_length": 367, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.5407421875, + "max_score": 5.0 + }, + "en-mr": { + "num_samples": 256, + "number_of_characters": 39277, + "unique_pairs": 256, + "min_sentence1_length": 11, + "average_sentence1_len": 76.4453125, + "max_sentence1_length": 1042, + "unique_sentence1": 256, + "min_sentence2_length": 10, + "average_sentence2_len": 76.98046875, + "max_sentence2_length": 958, + "unique_sentence2": 256, + "min_score": 0.33, + "avg_score": 3.562265625000002, + "max_score": 5.0 + }, + "en-or": { + "num_samples": 256, + "number_of_characters": 28708, + "unique_pairs": 256, + "min_sentence1_length": 10, + "average_sentence1_len": 55.22265625, + "max_sentence1_length": 281, + "unique_sentence1": 255, + "min_sentence2_length": 10, + "average_sentence2_len": 56.91796875, + "max_sentence2_length": 258, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 4.3892187499999995, + "max_score": 5.0 + }, + "en-pa": { + "num_samples": 256, + "number_of_characters": 40501, + "unique_pairs": 256, + "min_sentence1_length": 7, + "average_sentence1_len": 80.9453125, + "max_sentence1_length": 470, + "unique_sentence1": 256, + "min_sentence2_length": 10, + "average_sentence2_len": 77.26171875, + "max_sentence2_length": 362, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.5989843749999997, + "max_score": 5.0 + }, + "en-ta": { + "num_samples": 256, + "number_of_characters": 43245, + "unique_pairs": 256, + "min_sentence1_length": 7, + "average_sentence1_len": 75.0390625, + "max_sentence1_length": 388, + "unique_sentence1": 256, + "min_sentence2_length": 11, + "average_sentence2_len": 93.88671875, + "max_sentence2_length": 472, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.7492968750000006, + "max_score": 5.0 + }, + "en-te": { + "num_samples": 256, + "number_of_characters": 40578, + "unique_pairs": 256, + "min_sentence1_length": 2, + "average_sentence1_len": 78.83203125, + "max_sentence1_length": 341, + "unique_sentence1": 256, + "min_sentence2_length": 7, + "average_sentence2_len": 79.67578125, + "max_sentence2_length": 579, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 4.226210937499999, + "max_score": 5.0 + }, + "en-ur": { + "num_samples": 256, + "number_of_characters": 47313, + "unique_pairs": 256, + "min_sentence1_length": 7, + "average_sentence1_len": 82.23828125, + "max_sentence1_length": 362, + "unique_sentence1": 256, + "min_sentence2_length": 10, + "average_sentence2_len": 102.578125, + "max_sentence2_length": 851, + "unique_sentence2": 256, + "min_score": 0.0, + "avg_score": 3.972304687499998, + "max_score": 5.0 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/JSICK.json b/mteb/descriptive_stats/STS/JSICK.json new file mode 100644 index 0000000000..86f1dcf121 --- /dev/null +++ b/mteb/descriptive_stats/STS/JSICK.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1986, + "number_of_characters": 84239, + "unique_pairs": 1977, + "min_sentence1_length": 7, + "average_sentence1_len": 21.476334340382678, + "max_sentence1_length": 69, + "unique_sentence1": 1660, + "min_sentence2_length": 7, + "average_sentence2_len": 20.94008056394763, + "max_sentence2_length": 61, + "unique_sentence2": 1644, + "min_score": 1.0, + "avg_score": 3.131570999956083, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/JSTS.json b/mteb/descriptive_stats/STS/JSTS.json new file mode 100644 index 0000000000..97ccdff5a8 --- /dev/null +++ b/mteb/descriptive_stats/STS/JSTS.json @@ -0,0 +1,18 @@ +{ + "validation": { + "num_samples": 1457, + "number_of_characters": 67518, + "unique_pairs": 1456, + "min_sentence1_length": 12, + "average_sentence1_len": 23.3452299245024, + "max_sentence1_length": 79, + "unique_sentence1": 1403, + "min_sentence2_length": 8, + "average_sentence2_len": 22.99519560741249, + "max_sentence2_length": 77, + "unique_sentence2": 1434, + "min_score": 0.0, + "avg_score": 2.2719286174379807, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/KLUE-STS.json b/mteb/descriptive_stats/STS/KLUE-STS.json new file mode 100644 index 0000000000..7f69948ab1 --- /dev/null +++ b/mteb/descriptive_stats/STS/KLUE-STS.json @@ -0,0 +1,18 @@ +{ + "validation": { + "num_samples": 519, + "number_of_characters": 34439, + "unique_pairs": 519, + "min_sentence1_length": 7, + "average_sentence1_len": 33.113680154142585, + "max_sentence1_length": 99, + "unique_sentence1": 519, + "min_sentence2_length": 10, + "average_sentence2_len": 33.24277456647399, + "max_sentence2_length": 113, + "unique_sentence2": 519, + "min_score": 0.0, + "avg_score": 2.484585741811175, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/KorSTS.json b/mteb/descriptive_stats/STS/KorSTS.json new file mode 100644 index 0000000000..2e70b49b29 --- /dev/null +++ b/mteb/descriptive_stats/STS/KorSTS.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1376, + "number_of_characters": 80577, + "unique_pairs": 1374, + "min_sentence1_length": 6, + "average_sentence1_len": 29.38953488372093, + "max_sentence1_length": 98, + "unique_sentence1": 1243, + "min_sentence2_length": 8, + "average_sentence2_len": 29.169331395348838, + "max_sentence2_length": 100, + "unique_sentence2": 1324, + "min_score": 0.0, + "avg_score": 2.6053539244186066, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/LCQMC.json b/mteb/descriptive_stats/STS/LCQMC.json new file mode 100644 index 0000000000..44ddf42b27 --- /dev/null +++ b/mteb/descriptive_stats/STS/LCQMC.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 12500, + "number_of_characters": 242932, + "unique_pairs": 12500, + "min_sentence1_length": 4, + "average_sentence1_len": 9.61608, + "max_sentence1_length": 26, + "unique_sentence1": 12088, + "min_sentence2_length": 4, + "average_sentence2_len": 9.81848, + "max_sentence2_length": 27, + "unique_sentence2": 12064, + "min_score": 0, + "avg_score": 0.5, + "max_score": 1 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/PAWSX.json b/mteb/descriptive_stats/STS/PAWSX.json new file mode 100644 index 0000000000..e8dea2e340 --- /dev/null +++ b/mteb/descriptive_stats/STS/PAWSX.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 2000, + "number_of_characters": 173050, + "unique_pairs": 1994, + "min_sentence1_length": 2, + "average_sentence1_len": 43.243, + "max_sentence1_length": 120, + "unique_sentence1": 1909, + "min_sentence2_length": 2, + "average_sentence2_len": 43.282, + "max_sentence2_length": 113, + "unique_sentence2": 1909, + "min_score": 0, + "avg_score": 0.447, + "max_score": 1 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/QBQTC.json b/mteb/descriptive_stats/STS/QBQTC.json new file mode 100644 index 0000000000..8d172f2555 --- /dev/null +++ b/mteb/descriptive_stats/STS/QBQTC.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 5000, + "number_of_characters": 174698, + "unique_pairs": 4991, + "min_sentence1_length": 1, + "average_sentence1_len": 9.6516, + "max_sentence1_length": 202, + "unique_sentence1": 4924, + "min_sentence2_length": 2, + "average_sentence2_len": 25.288, + "max_sentence2_length": 127, + "unique_sentence2": 4889, + "min_score": 0, + "avg_score": 0.8846, + "max_score": 2 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/RUParaPhraserSTS.json b/mteb/descriptive_stats/STS/RUParaPhraserSTS.json new file mode 100644 index 0000000000..425ae97571 --- /dev/null +++ b/mteb/descriptive_stats/STS/RUParaPhraserSTS.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1924, + "number_of_characters": 230196, + "unique_pairs": 1899, + "min_sentence1_length": 22, + "average_sentence1_len": 61.24064449064449, + "max_sentence1_length": 123, + "unique_sentence1": 1514, + "min_sentence2_length": 25, + "average_sentence2_len": 58.40384615384615, + "max_sentence2_length": 104, + "unique_sentence2": 1383, + "min_score": -1.0, + "avg_score": -0.20686070686070687, + "max_score": 1.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/RonSTS.json b/mteb/descriptive_stats/STS/RonSTS.json new file mode 100644 index 0000000000..8b3a78a0ef --- /dev/null +++ b/mteb/descriptive_stats/STS/RonSTS.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1379, + "number_of_characters": 166895, + "unique_pairs": 1377, + "min_sentence1_length": 12, + "average_sentence1_len": 60.58230601885424, + "max_sentence1_length": 257, + "unique_sentence1": 1296, + "min_sentence2_length": 12, + "average_sentence2_len": 60.44379985496737, + "max_sentence2_length": 264, + "unique_sentence2": 1337, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/RuSTSBenchmarkSTS.json b/mteb/descriptive_stats/STS/RuSTSBenchmarkSTS.json new file mode 100644 index 0000000000..a827385c28 --- /dev/null +++ b/mteb/descriptive_stats/STS/RuSTSBenchmarkSTS.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1264, + "number_of_characters": 137036, + "unique_pairs": 1264, + "min_sentence1_length": 10, + "average_sentence1_len": 54.374208860759495, + "max_sentence1_length": 263, + "unique_sentence1": 1135, + "min_sentence2_length": 10, + "average_sentence2_len": 54.04034810126582, + "max_sentence2_length": 269, + "unique_sentence2": 1217, + "min_score": 0.0, + "avg_score": 2.5609881337849965, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/SICK-BR-STS.json b/mteb/descriptive_stats/STS/SICK-BR-STS.json new file mode 100644 index 0000000000..fe7def0802 --- /dev/null +++ b/mteb/descriptive_stats/STS/SICK-BR-STS.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1000, + "number_of_characters": 108859, + "unique_pairs": 1000, + "min_sentence1_length": 20, + "average_sentence1_len": 54.615, + "max_sentence1_length": 158, + "unique_sentence1": 907, + "min_sentence2_length": 21, + "average_sentence2_len": 54.244, + "max_sentence2_length": 136, + "unique_sentence2": 917, + "min_score": 1.0, + "avg_score": 3.53862, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/SICK-R-PL.json b/mteb/descriptive_stats/STS/SICK-R-PL.json new file mode 100644 index 0000000000..01678ccdc8 --- /dev/null +++ b/mteb/descriptive_stats/STS/SICK-R-PL.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 4906, + "number_of_characters": 419806, + "unique_pairs": 4892, + "min_sentence1_length": 8, + "average_sentence1_len": 43.16102731349368, + "max_sentence1_length": 169, + "unique_sentence1": 3332, + "min_sentence2_length": 8, + "average_sentence2_len": 42.40888707704851, + "max_sentence2_length": 144, + "unique_sentence2": 3278, + "min_score": 1.0, + "avg_score": 3.527907667576356, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/SICK-R.json b/mteb/descriptive_stats/STS/SICK-R.json new file mode 100644 index 0000000000..a918ce4770 --- /dev/null +++ b/mteb/descriptive_stats/STS/SICK-R.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 9927, + "number_of_characters": 915617, + "unique_pairs": 9842, + "min_sentence1_length": 15, + "average_sentence1_len": 46.602196031026494, + "max_sentence1_length": 151, + "unique_sentence1": 5014, + "min_sentence2_length": 14, + "average_sentence2_len": 45.63281958295558, + "max_sentence2_length": 151, + "unique_sentence2": 4946, + "min_score": 1.0, + "avg_score": 3.5291492898156607, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/SICKFr.json b/mteb/descriptive_stats/STS/SICKFr.json new file mode 100644 index 0000000000..c91dc416e5 --- /dev/null +++ b/mteb/descriptive_stats/STS/SICKFr.json @@ -0,0 +1,34 @@ +{ + "validation": { + "num_samples": 495, + "number_of_characters": 53143, + "unique_pairs": 495, + "min_sentence1_length": 14, + "average_sentence1_len": 54.67272727272727, + "max_sentence1_length": 156, + "unique_sentence1": 471, + "min_sentence2_length": 14, + "average_sentence2_len": 52.686868686868685, + "max_sentence2_length": 156, + "unique_sentence2": 468, + "min_score": 1.0, + "avg_score": 3.5913333401535494, + "max_score": 5.0 + }, + "test": { + "num_samples": 4906, + "number_of_characters": 515052, + "unique_pairs": 4850, + "min_sentence1_length": 12, + "average_sentence1_len": 52.897472482674274, + "max_sentence1_length": 158, + "unique_sentence1": 3281, + "min_sentence2_length": 12, + "average_sentence2_len": 52.08662861801875, + "max_sentence2_length": 169, + "unique_sentence2": 3245, + "min_score": 1.0, + "avg_score": 3.5279076675760273, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS13.json b/mteb/descriptive_stats/STS/STS13.json new file mode 100644 index 0000000000..4e7d497897 --- /dev/null +++ b/mteb/descriptive_stats/STS/STS13.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1500, + "number_of_characters": 161949, + "unique_pairs": 1500, + "min_sentence1_length": 19, + "average_sentence1_len": 62.06733333333333, + "max_sentence1_length": 415, + "unique_sentence1": 1314, + "min_sentence2_length": 19, + "average_sentence2_len": 45.898666666666664, + "max_sentence2_length": 185, + "unique_sentence2": 1355, + "min_score": 0.0, + "avg_score": 2.3361888888888864, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS14.json b/mteb/descriptive_stats/STS/STS14.json new file mode 100644 index 0000000000..7c6c2b4282 --- /dev/null +++ b/mteb/descriptive_stats/STS/STS14.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 3750, + "number_of_characters": 407185, + "unique_pairs": 3682, + "min_sentence1_length": 20, + "average_sentence1_len": 56.6216, + "max_sentence1_length": 372, + "unique_sentence1": 3408, + "min_sentence2_length": 18, + "average_sentence2_len": 51.96106666666667, + "max_sentence2_length": 314, + "unique_sentence2": 3164, + "min_score": 0.0, + "avg_score": 2.8114334391534355, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS15.json b/mteb/descriptive_stats/STS/STS15.json new file mode 100644 index 0000000000..5b0f97ba45 --- /dev/null +++ b/mteb/descriptive_stats/STS/STS15.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 3000, + "number_of_characters": 346442, + "unique_pairs": 3000, + "min_sentence1_length": 15, + "average_sentence1_len": 57.822, + "max_sentence1_length": 302, + "unique_sentence1": 2942, + "min_sentence2_length": 16, + "average_sentence2_len": 57.65866666666667, + "max_sentence2_length": 275, + "unique_sentence2": 2310, + "min_score": 0.0, + "avg_score": 2.40591333333333, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS16.json b/mteb/descriptive_stats/STS/STS16.json new file mode 100644 index 0000000000..84b3ca6a9b --- /dev/null +++ b/mteb/descriptive_stats/STS/STS16.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1186, + "number_of_characters": 154802, + "unique_pairs": 1186, + "min_sentence1_length": 12, + "average_sentence1_len": 65.5177065767285, + "max_sentence1_length": 283, + "unique_sentence1": 928, + "min_sentence2_length": 10, + "average_sentence2_len": 65.00674536256324, + "max_sentence2_length": 290, + "unique_sentence2": 1055, + "min_score": 0.0, + "avg_score": 2.4131534569983137, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS22.v2.json b/mteb/descriptive_stats/STS/STS22.v2.json new file mode 100644 index 0000000000..d544a2a4e7 --- /dev/null +++ b/mteb/descriptive_stats/STS/STS22.v2.json @@ -0,0 +1,308 @@ +{ + "test": { + "num_samples": 3958, + "number_of_characters": 15936443, + "unique_pairs": 3946, + "min_sentence1_length": 16, + "average_sentence1_len": 2167.554573016675, + "max_sentence1_length": 47013, + "unique_sentence1": 3920, + "min_sentence2_length": 51, + "average_sentence2_len": 1858.833249115715, + "max_sentence2_length": 99998, + "unique_sentence2": 3867, + "min_score": 1.0, + "avg_score": 2.494357419572234, + "max_score": 4.0, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 197, + "number_of_characters": 1107336, + "unique_pairs": 197, + "min_sentence1_length": 93, + "average_sentence1_len": 3152.5888324873094, + "max_sentence1_length": 27173, + "unique_sentence1": 197, + "min_sentence2_length": 69, + "average_sentence2_len": 2468.4060913705584, + "max_sentence2_length": 28987, + "unique_sentence2": 195, + "min_score": 1.0, + "avg_score": 2.6615905245350255, + "max_score": 4.0 + }, + "de": { + "num_samples": 514, + "number_of_characters": 2173456, + "unique_pairs": 511, + "min_sentence1_length": 74, + "average_sentence1_len": 2303.1770428015566, + "max_sentence1_length": 21787, + "unique_sentence1": 504, + "min_sentence2_length": 102, + "average_sentence2_len": 1925.3365758754865, + "max_sentence2_length": 19246, + "unique_sentence2": 504, + "min_score": 1.0, + "avg_score": 2.268806744487743, + "max_score": 4.0 + }, + "es": { + "num_samples": 200, + "number_of_characters": 819286, + "unique_pairs": 200, + "min_sentence1_length": 126, + "average_sentence1_len": 2010.545, + "max_sentence1_length": 7148, + "unique_sentence1": 200, + "min_sentence2_length": 84, + "average_sentence2_len": 2085.885, + "max_sentence2_length": 56388, + "unique_sentence2": 199, + "min_score": 1.0, + "avg_score": 2.97, + "max_score": 4.0 + }, + "pl": { + "num_samples": 182, + "number_of_characters": 754440, + "unique_pairs": 175, + "min_sentence1_length": 170, + "average_sentence1_len": 2103.7912087912086, + "max_sentence1_length": 10525, + "unique_sentence1": 172, + "min_sentence2_length": 170, + "average_sentence2_len": 2041.4835164835165, + "max_sentence2_length": 14962, + "unique_sentence2": 174, + "min_score": 1.0, + "avg_score": 3.087912087912088, + "max_score": 4.0 + }, + "tr": { + "num_samples": 208, + "number_of_characters": 823671, + "unique_pairs": 207, + "min_sentence1_length": 56, + "average_sentence1_len": 2159.1490384615386, + "max_sentence1_length": 12489, + "unique_sentence1": 207, + "min_sentence2_length": 114, + "average_sentence2_len": 1800.8076923076924, + "max_sentence2_length": 12266, + "unique_sentence2": 207, + "min_score": 1.0, + "avg_score": 2.5528846153846154, + "max_score": 4.0 + }, + "ar": { + "num_samples": 193, + "number_of_characters": 618141, + "unique_pairs": 192, + "min_sentence1_length": 79, + "average_sentence1_len": 1760.8549222797928, + "max_sentence1_length": 23606, + "unique_sentence1": 186, + "min_sentence2_length": 79, + "average_sentence2_len": 1441.9481865284974, + "max_sentence2_length": 8677, + "unique_sentence2": 176, + "min_score": 1.0, + "avg_score": 2.7927461139896375, + "max_score": 4.0 + }, + "ru": { + "num_samples": 265, + "number_of_characters": 867284, + "unique_pairs": 265, + "min_sentence1_length": 56, + "average_sentence1_len": 1759.8415094339623, + "max_sentence1_length": 11008, + "unique_sentence1": 264, + "min_sentence2_length": 209, + "average_sentence2_len": 1512.9283018867925, + "max_sentence2_length": 13772, + "unique_sentence2": 265, + "min_score": 1.0, + "avg_score": 2.1849056603773587, + "max_score": 4.0 + }, + "zh": { + "num_samples": 637, + "number_of_characters": 1062054, + "unique_pairs": 637, + "min_sentence1_length": 72, + "average_sentence1_len": 846.5620094191523, + "max_sentence1_length": 16589, + "unique_sentence1": 634, + "min_sentence2_length": 100, + "average_sentence2_len": 820.712715855573, + "max_sentence2_length": 17899, + "unique_sentence2": 630, + "min_score": 1.0, + "avg_score": 2.706436420722135, + "max_score": 4.0 + }, + "fr": { + "num_samples": 101, + "number_of_characters": 531615, + "unique_pairs": 101, + "min_sentence1_length": 94, + "average_sentence1_len": 2819.5445544554455, + "max_sentence1_length": 18550, + "unique_sentence1": 101, + "min_sentence2_length": 274, + "average_sentence2_len": 2443.970297029703, + "max_sentence2_length": 12790, + "unique_sentence2": 100, + "min_score": 1.0, + "avg_score": 2.772277227722772, + "max_score": 4.0 + }, + "de-en": { + "num_samples": 158, + "number_of_characters": 723294, + "unique_pairs": 158, + "min_sentence1_length": 123, + "average_sentence1_len": 2818.7088607594937, + "max_sentence1_length": 25436, + "unique_sentence1": 158, + "min_sentence2_length": 72, + "average_sentence2_len": 1759.1012658227849, + "max_sentence2_length": 9728, + "unique_sentence2": 151, + "min_score": 1.0, + "avg_score": 1.5643459915613924, + "max_score": 4.0 + }, + "es-en": { + "num_samples": 365, + "number_of_characters": 2075804, + "unique_pairs": 365, + "min_sentence1_length": 95, + "average_sentence1_len": 3109.8438356164384, + "max_sentence1_length": 47013, + "unique_sentence1": 365, + "min_sentence2_length": 51, + "average_sentence2_len": 2577.290410958904, + "max_sentence2_length": 99998, + "unique_sentence2": 348, + "min_score": 1.0, + "avg_score": 2.1803652968038354, + "max_score": 4.0 + }, + "it": { + "num_samples": 407, + "number_of_characters": 1571127, + "unique_pairs": 407, + "min_sentence1_length": 125, + "average_sentence1_len": 2094.840294840295, + "max_sentence1_length": 12681, + "unique_sentence1": 407, + "min_sentence2_length": 117, + "average_sentence2_len": 1765.4226044226045, + "max_sentence2_length": 10539, + "unique_sentence2": 406, + "min_score": 1.0, + "avg_score": 2.3574938574938575, + "max_score": 4.0 + }, + "pl-en": { + "num_samples": 48, + "number_of_characters": 268907, + "unique_pairs": 48, + "min_sentence1_length": 288, + "average_sentence1_len": 3447.0416666666665, + "max_sentence1_length": 12896, + "unique_sentence1": 48, + "min_sentence2_length": 76, + "average_sentence2_len": 2155.1875, + "max_sentence2_length": 7858, + "unique_sentence2": 43, + "min_score": 1.0, + "avg_score": 2.5729166666666665, + "max_score": 4.0 + }, + "zh-en": { + "num_samples": 161, + "number_of_characters": 656286, + "unique_pairs": 161, + "min_sentence1_length": 16, + "average_sentence1_len": 1412.3354037267081, + "max_sentence1_length": 28881, + "unique_sentence1": 161, + "min_sentence2_length": 53, + "average_sentence2_len": 2663.9751552795033, + "max_sentence2_length": 20810, + "unique_sentence2": 157, + "min_score": 1.0, + "avg_score": 1.9761904761906832, + "max_score": 4.0 + }, + "es-it": { + "num_samples": 212, + "number_of_characters": 1059099, + "unique_pairs": 212, + "min_sentence1_length": 99, + "average_sentence1_len": 2847.1367924528304, + "max_sentence1_length": 28292, + "unique_sentence1": 212, + "min_sentence2_length": 127, + "average_sentence2_len": 2148.6132075471696, + "max_sentence2_length": 10192, + "unique_sentence2": 206, + "min_score": 1.0, + "avg_score": 2.75, + "max_score": 4.0 + }, + "de-fr": { + "num_samples": 74, + "number_of_characters": 568919, + "unique_pairs": 74, + "min_sentence1_length": 146, + "average_sentence1_len": 4468.945945945946, + "max_sentence1_length": 26757, + "unique_sentence1": 74, + "min_sentence2_length": 183, + "average_sentence2_len": 3219.1486486486488, + "max_sentence2_length": 27900, + "unique_sentence2": 74, + "min_score": 1.0, + "avg_score": 3.0405405405405403, + "max_score": 4.0 + }, + "de-pl": { + "num_samples": 28, + "number_of_characters": 209634, + "unique_pairs": 28, + "min_sentence1_length": 162, + "average_sentence1_len": 4455.0, + "max_sentence1_length": 12415, + "unique_sentence1": 28, + "min_sentence2_length": 417, + "average_sentence2_len": 3031.9285714285716, + "max_sentence2_length": 9181, + "unique_sentence2": 28, + "min_score": 1.0, + "avg_score": 3.392857142857143, + "max_score": 4.0 + }, + "fr-pl": { + "num_samples": 8, + "number_of_characters": 46090, + "unique_pairs": 8, + "min_sentence1_length": 1124, + "average_sentence1_len": 3127.625, + "max_sentence1_length": 5366, + "unique_sentence1": 8, + "min_sentence2_length": 1603, + "average_sentence2_len": 2633.625, + "max_sentence2_length": 6000, + "unique_sentence2": 8, + "min_score": 2.0, + "avg_score": 3.25, + "max_score": 4.0 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STSB.json b/mteb/descriptive_stats/STS/STSB.json new file mode 100644 index 0000000000..26995b1e85 --- /dev/null +++ b/mteb/descriptive_stats/STS/STSB.json @@ -0,0 +1,34 @@ +{ + "validation": { + "num_samples": 1458, + "number_of_characters": 64295, + "unique_pairs": 1455, + "min_sentence1_length": 4, + "average_sentence1_len": 21.980109739369, + "max_sentence1_length": 99, + "unique_sentence1": 1427, + "min_sentence2_length": 5, + "average_sentence2_len": 22.117969821673526, + "max_sentence2_length": 106, + "unique_sentence2": 1415, + "min_score": 0, + "avg_score": 2.00480109739369, + "max_score": 5 + }, + "test": { + "num_samples": 1361, + "number_of_characters": 51020, + "unique_pairs": 1354, + "min_sentence1_length": 4, + "average_sentence1_len": 18.857457751653197, + "max_sentence1_length": 113, + "unique_sentence1": 1225, + "min_sentence2_length": 3, + "average_sentence2_len": 18.62968405584129, + "max_sentence2_length": 127, + "unique_sentence2": 1298, + "min_score": 0, + "avg_score": 2.3115356355620866, + "max_score": 5 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STSBenchmark.json b/mteb/descriptive_stats/STS/STSBenchmark.json new file mode 100644 index 0000000000..78cafeb492 --- /dev/null +++ b/mteb/descriptive_stats/STS/STSBenchmark.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 1379, + "number_of_characters": 147886, + "unique_pairs": 1378, + "min_sentence1_length": 16, + "average_sentence1_len": 53.73966642494561, + "max_sentence1_length": 215, + "unique_sentence1": 1256, + "min_sentence2_length": 13, + "average_sentence2_len": 53.50181290790428, + "max_sentence2_length": 199, + "unique_sentence2": 1337, + "min_score": 0.0, + "avg_score": 2.607916606236405, + "max_score": 5.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STSBenchmarkMultilingualSTS.json b/mteb/descriptive_stats/STS/STSBenchmarkMultilingualSTS.json new file mode 100644 index 0000000000..0056deda9b --- /dev/null +++ b/mteb/descriptive_stats/STS/STSBenchmarkMultilingualSTS.json @@ -0,0 +1,358 @@ +{ + "dev": { + "num_samples": 15000, + "number_of_characters": 1996110, + "unique_pairs": 14974, + "min_sentence1_length": 3, + "average_sentence1_len": 66.6904, + "max_sentence1_length": 274, + "unique_sentence1": 14676, + "min_sentence2_length": 3, + "average_sentence2_len": 66.3836, + "max_sentence2_length": 281, + "unique_sentence2": 14605, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 1500, + "number_of_characters": 191955, + "unique_pairs": 1498, + "min_sentence1_length": 12, + "average_sentence1_len": 64.258, + "max_sentence1_length": 200, + "unique_sentence1": 1474, + "min_sentence2_length": 17, + "average_sentence2_len": 63.712, + "max_sentence2_length": 186, + "unique_sentence2": 1467, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "de": { + "num_samples": 1500, + "number_of_characters": 225853, + "unique_pairs": 1497, + "min_sentence1_length": 14, + "average_sentence1_len": 75.482, + "max_sentence1_length": 246, + "unique_sentence1": 1467, + "min_sentence2_length": 14, + "average_sentence2_len": 75.08666666666667, + "max_sentence2_length": 267, + "unique_sentence2": 1457, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "es": { + "num_samples": 1500, + "number_of_characters": 222932, + "unique_pairs": 1498, + "min_sentence1_length": 18, + "average_sentence1_len": 74.578, + "max_sentence1_length": 240, + "unique_sentence1": 1470, + "min_sentence2_length": 21, + "average_sentence2_len": 74.04333333333334, + "max_sentence2_length": 238, + "unique_sentence2": 1462, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "fr": { + "num_samples": 1500, + "number_of_characters": 230006, + "unique_pairs": 1497, + "min_sentence1_length": 15, + "average_sentence1_len": 76.81, + "max_sentence1_length": 260, + "unique_sentence1": 1465, + "min_sentence2_length": 12, + "average_sentence2_len": 76.52733333333333, + "max_sentence2_length": 244, + "unique_sentence2": 1459, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "it": { + "num_samples": 1500, + "number_of_characters": 223918, + "unique_pairs": 1498, + "min_sentence1_length": 16, + "average_sentence1_len": 74.784, + "max_sentence1_length": 257, + "unique_sentence1": 1469, + "min_sentence2_length": 19, + "average_sentence2_len": 74.49466666666666, + "max_sentence2_length": 238, + "unique_sentence2": 1463, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "nl": { + "num_samples": 1500, + "number_of_characters": 216574, + "unique_pairs": 1498, + "min_sentence1_length": 11, + "average_sentence1_len": 72.27533333333334, + "max_sentence1_length": 274, + "unique_sentence1": 1471, + "min_sentence2_length": 14, + "average_sentence2_len": 72.10733333333333, + "max_sentence2_length": 248, + "unique_sentence2": 1461, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "pl": { + "num_samples": 1500, + "number_of_characters": 202402, + "unique_pairs": 1498, + "min_sentence1_length": 12, + "average_sentence1_len": 67.58666666666667, + "max_sentence1_length": 251, + "unique_sentence1": 1466, + "min_sentence2_length": 12, + "average_sentence2_len": 67.348, + "max_sentence2_length": 238, + "unique_sentence2": 1460, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "pt": { + "num_samples": 1500, + "number_of_characters": 216388, + "unique_pairs": 1498, + "min_sentence1_length": 16, + "average_sentence1_len": 72.25933333333333, + "max_sentence1_length": 254, + "unique_sentence1": 1470, + "min_sentence2_length": 16, + "average_sentence2_len": 71.99933333333334, + "max_sentence2_length": 222, + "unique_sentence2": 1464, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "ru": { + "num_samples": 1500, + "number_of_characters": 203028, + "unique_pairs": 1495, + "min_sentence1_length": 13, + "average_sentence1_len": 67.802, + "max_sentence1_length": 261, + "unique_sentence1": 1464, + "min_sentence2_length": 10, + "average_sentence2_len": 67.55, + "max_sentence2_length": 281, + "unique_sentence2": 1454, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + }, + "zh": { + "num_samples": 1500, + "number_of_characters": 63054, + "unique_pairs": 1497, + "min_sentence1_length": 3, + "average_sentence1_len": 21.068666666666665, + "max_sentence1_length": 95, + "unique_sentence1": 1466, + "min_sentence2_length": 3, + "average_sentence2_len": 20.967333333333332, + "max_sentence2_length": 83, + "unique_sentence2": 1459, + "min_score": 0.0, + "avg_score": 2.3639075540602206, + "max_score": 5.0 + } + } + }, + "test": { + "num_samples": 13790, + "number_of_characters": 1545886, + "unique_pairs": 13756, + "min_sentence1_length": 3, + "average_sentence1_len": 56.14786076867295, + "max_sentence1_length": 297, + "unique_sentence1": 12462, + "min_sentence2_length": 3, + "average_sentence2_len": 55.95409717186367, + "max_sentence2_length": 315, + "unique_sentence2": 13267, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 1379, + "number_of_characters": 147873, + "unique_pairs": 1378, + "min_sentence1_length": 16, + "average_sentence1_len": 53.734590282813635, + "max_sentence1_length": 215, + "unique_sentence1": 1256, + "min_sentence2_length": 13, + "average_sentence2_len": 53.49746192893401, + "max_sentence2_length": 199, + "unique_sentence2": 1337, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "de": { + "num_samples": 1379, + "number_of_characters": 174195, + "unique_pairs": 1376, + "min_sentence1_length": 14, + "average_sentence1_len": 63.28426395939086, + "max_sentence1_length": 275, + "unique_sentence1": 1248, + "min_sentence2_length": 13, + "average_sentence2_len": 63.035532994923855, + "max_sentence2_length": 268, + "unique_sentence2": 1327, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "es": { + "num_samples": 1379, + "number_of_characters": 174677, + "unique_pairs": 1376, + "min_sentence1_length": 14, + "average_sentence1_len": 63.44379985496737, + "max_sentence1_length": 240, + "unique_sentence1": 1248, + "min_sentence2_length": 13, + "average_sentence2_len": 63.22552574329224, + "max_sentence2_length": 271, + "unique_sentence2": 1330, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "fr": { + "num_samples": 1379, + "number_of_characters": 179252, + "unique_pairs": 1374, + "min_sentence1_length": 14, + "average_sentence1_len": 64.99202320522117, + "max_sentence1_length": 265, + "unique_sentence1": 1244, + "min_sentence2_length": 12, + "average_sentence2_len": 64.99492385786802, + "max_sentence2_length": 258, + "unique_sentence2": 1322, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "it": { + "num_samples": 1379, + "number_of_characters": 174276, + "unique_pairs": 1375, + "min_sentence1_length": 11, + "average_sentence1_len": 63.370558375634516, + "max_sentence1_length": 297, + "unique_sentence1": 1249, + "min_sentence2_length": 11, + "average_sentence2_len": 63.00797679477883, + "max_sentence2_length": 315, + "unique_sentence2": 1330, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "nl": { + "num_samples": 1379, + "number_of_characters": 166173, + "unique_pairs": 1377, + "min_sentence1_length": 13, + "average_sentence1_len": 60.37490935460479, + "max_sentence1_length": 284, + "unique_sentence1": 1247, + "min_sentence2_length": 14, + "average_sentence2_len": 60.1276287164612, + "max_sentence2_length": 255, + "unique_sentence2": 1327, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "pl": { + "num_samples": 1379, + "number_of_characters": 156173, + "unique_pairs": 1375, + "min_sentence1_length": 11, + "average_sentence1_len": 56.66352429296592, + "max_sentence1_length": 245, + "unique_sentence1": 1243, + "min_sentence2_length": 9, + "average_sentence2_len": 56.58738216098622, + "max_sentence2_length": 224, + "unique_sentence2": 1325, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "pt": { + "num_samples": 1379, + "number_of_characters": 167773, + "unique_pairs": 1377, + "min_sentence1_length": 8, + "average_sentence1_len": 60.849166062364034, + "max_sentence1_length": 257, + "unique_sentence1": 1249, + "min_sentence2_length": 8, + "average_sentence2_len": 60.81363306744017, + "max_sentence2_length": 248, + "unique_sentence2": 1332, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "ru": { + "num_samples": 1379, + "number_of_characters": 156178, + "unique_pairs": 1376, + "min_sentence1_length": 10, + "average_sentence1_len": 56.80928208846991, + "max_sentence1_length": 263, + "unique_sentence1": 1240, + "min_sentence2_length": 10, + "average_sentence2_len": 56.44525018129079, + "max_sentence2_length": 269, + "unique_sentence2": 1321, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + }, + "zh": { + "num_samples": 1379, + "number_of_characters": 49316, + "unique_pairs": 1372, + "min_sentence1_length": 3, + "average_sentence1_len": 17.956490210297318, + "max_sentence1_length": 96, + "unique_sentence1": 1242, + "min_sentence2_length": 3, + "average_sentence2_len": 17.80565627266135, + "max_sentence2_length": 131, + "unique_sentence2": 1320, + "min_score": 0.0, + "avg_score": 2.6079166059890806, + "max_score": 5.0 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STSES.json b/mteb/descriptive_stats/STS/STSES.json new file mode 100644 index 0000000000..7903adb893 --- /dev/null +++ b/mteb/descriptive_stats/STS/STSES.json @@ -0,0 +1,18 @@ +{ + "test": { + "num_samples": 155, + "number_of_characters": 58718, + "unique_pairs": 155, + "min_sentence1_length": 75, + "average_sentence1_len": 191.13548387096773, + "max_sentence1_length": 374, + "unique_sentence1": 151, + "min_sentence2_length": 64, + "average_sentence2_len": 187.69032258064516, + "max_sentence2_length": 508, + "unique_sentence2": 154, + "min_score": 0.0, + "avg_score": 2.1612903246956487, + "max_score": 4.0 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/SemRel24STS.json b/mteb/descriptive_stats/STS/SemRel24STS.json new file mode 100644 index 0000000000..cec993e9e5 --- /dev/null +++ b/mteb/descriptive_stats/STS/SemRel24STS.json @@ -0,0 +1,212 @@ +{ + "test": { + "num_samples": 7498, + "number_of_characters": 1094064, + "unique_pairs": 7498, + "min_sentence1_length": 4, + "average_sentence1_len": 72.16057615364097, + "max_sentence1_length": 295, + "unique_sentence1": 7371, + "min_sentence2_length": 4, + "average_sentence2_len": 73.75353427580689, + "max_sentence2_length": 282, + "unique_sentence2": 7390, + "min_score": 0.0, + "avg_score": 0.5065510802880743, + "max_score": 1.0, + "hf_subset_descriptive_stats": { + "afr": { + "num_samples": 375, + "number_of_characters": 59106, + "unique_pairs": 375, + "min_sentence1_length": 25, + "average_sentence1_len": 78.16266666666667, + "max_sentence1_length": 133, + "unique_sentence1": 369, + "min_sentence2_length": 25, + "average_sentence2_len": 79.45333333333333, + "max_sentence2_length": 137, + "unique_sentence2": 374, + "min_score": 0.03, + "avg_score": 0.49927999999999995, + "max_score": 0.97 + }, + "amh": { + "num_samples": 171, + "number_of_characters": 13562, + "unique_pairs": 171, + "min_sentence1_length": 14, + "average_sentence1_len": 39.76608187134503, + "max_sentence1_length": 82, + "unique_sentence1": 171, + "min_sentence2_length": 14, + "average_sentence2_len": 39.54385964912281, + "max_sentence2_length": 77, + "unique_sentence2": 171, + "min_score": 0.08, + "avg_score": 0.5015204678362573, + "max_score": 0.94 + }, + "arb": { + "num_samples": 595, + "number_of_characters": 55662, + "unique_pairs": 595, + "min_sentence1_length": 9, + "average_sentence1_len": 45.36302521008403, + "max_sentence1_length": 216, + "unique_sentence1": 593, + "min_sentence2_length": 9, + "average_sentence2_len": 48.18655462184874, + "max_sentence2_length": 234, + "unique_sentence2": 593, + "min_score": 0.0, + "avg_score": 0.5027563025210086, + "max_score": 1.0 + }, + "arq": { + "num_samples": 583, + "number_of_characters": 38355, + "unique_pairs": 583, + "min_sentence1_length": 4, + "average_sentence1_len": 32.35849056603774, + "max_sentence1_length": 217, + "unique_sentence1": 576, + "min_sentence2_length": 4, + "average_sentence2_len": 33.43053173241852, + "max_sentence2_length": 207, + "unique_sentence2": 575, + "min_score": 0.12, + "avg_score": 0.5100686106346484, + "max_score": 0.91 + }, + "ary": { + "num_samples": 426, + "number_of_characters": 94286, + "unique_pairs": 426, + "min_sentence1_length": 29, + "average_sentence1_len": 111.05399061032864, + "max_sentence1_length": 295, + "unique_sentence1": 401, + "min_sentence2_length": 35, + "average_sentence2_len": 110.27464788732394, + "max_sentence2_length": 282, + "unique_sentence2": 403, + "min_score": 0.0, + "avg_score": 0.5103521126760563, + "max_score": 1.0 + }, + "eng": { + "num_samples": 2600, + "number_of_characters": 333047, + "unique_pairs": 2600, + "min_sentence1_length": 16, + "average_sentence1_len": 63.94384615384615, + "max_sentence1_length": 197, + "unique_sentence1": 2597, + "min_sentence2_length": 16, + "average_sentence2_len": 64.15115384615385, + "max_sentence2_length": 171, + "unique_sentence2": 2595, + "min_score": 0.12, + "avg_score": 0.5166153846153849, + "max_score": 0.97 + }, + "hau": { + "num_samples": 603, + "number_of_characters": 129799, + "unique_pairs": 603, + "min_sentence1_length": 42, + "average_sentence1_len": 105.33665008291874, + "max_sentence1_length": 169, + "unique_sentence1": 543, + "min_sentence2_length": 27, + "average_sentence2_len": 109.91873963515755, + "max_sentence2_length": 164, + "unique_sentence2": 546, + "min_score": 0.0, + "avg_score": 0.4962189054726365, + "max_score": 1.0 + }, + "hin": { + "num_samples": 968, + "number_of_characters": 141956, + "unique_pairs": 968, + "min_sentence1_length": 22, + "average_sentence1_len": 71.52685950413223, + "max_sentence1_length": 168, + "unique_sentence1": 962, + "min_sentence2_length": 17, + "average_sentence2_len": 75.12190082644628, + "max_sentence2_length": 176, + "unique_sentence2": 966, + "min_score": 0.02, + "avg_score": 0.496157024793389, + "max_score": 1.0 + }, + "ind": { + "num_samples": 360, + "number_of_characters": 62270, + "unique_pairs": 360, + "min_sentence1_length": 32, + "average_sentence1_len": 86.36111111111111, + "max_sentence1_length": 132, + "unique_sentence1": 344, + "min_sentence2_length": 32, + "average_sentence2_len": 86.61111111111111, + "max_sentence2_length": 125, + "unique_sentence2": 352, + "min_score": 0.0, + "avg_score": 0.4996666666666664, + "max_score": 1.0 + }, + "kin": { + "num_samples": 222, + "number_of_characters": 62833, + "unique_pairs": 222, + "min_sentence1_length": 53, + "average_sentence1_len": 141.56756756756758, + "max_sentence1_length": 234, + "unique_sentence1": 220, + "min_sentence2_length": 53, + "average_sentence2_len": 141.46396396396398, + "max_sentence2_length": 210, + "unique_sentence2": 220, + "min_score": 0.03, + "avg_score": 0.5000900900900903, + "max_score": 0.97 + }, + "mar": { + "num_samples": 298, + "number_of_characters": 50697, + "unique_pairs": 298, + "min_sentence1_length": 22, + "average_sentence1_len": 81.78523489932886, + "max_sentence1_length": 196, + "unique_sentence1": 298, + "min_sentence2_length": 22, + "average_sentence2_len": 88.33892617449665, + "max_sentence2_length": 163, + "unique_sentence2": 298, + "min_score": 0.0, + "avg_score": 0.5016107382550332, + "max_score": 1.0 + }, + "tel": { + "num_samples": 297, + "number_of_characters": 52491, + "unique_pairs": 297, + "min_sentence1_length": 27, + "average_sentence1_len": 87.15151515151516, + "max_sentence1_length": 174, + "unique_sentence1": 297, + "min_sentence2_length": 28, + "average_sentence2_len": 89.58585858585859, + "max_sentence2_length": 170, + "unique_sentence2": 297, + "min_score": 0.02, + "avg_score": 0.49875420875420884, + "max_score": 0.98 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Summarization/SummEvalFrSummarization.v2.json b/mteb/descriptive_stats/Summarization/SummEvalFrSummarization.v2.json new file mode 100644 index 0000000000..28da7f1c24 --- /dev/null +++ b/mteb/descriptive_stats/Summarization/SummEvalFrSummarization.v2.json @@ -0,0 +1,55 @@ +{ + "test": { + "num_samples": 100, + "number_of_characters": 242873, + "min_text_length": 668, + "avg_text_length": 2401.73, + "max_text_length": 3699, + "unique_texts": 100, + "min_human_summaries_length": 11, + "avg_human_summaries_length": 11.0, + "max_human_summaries_length": 11, + "unique_human_summaries": 1100, + "min_machine_summaries_length": 16, + "avg_machine_summaries_length": 16.0, + "max_machine_summaries_length": 16, + "unique_machine_summaries": 1540, + "min_relevance": [ + 1.0, + 1.333333333333333, + 3.666666666666666, + 2.333333333333333, + 3.666666666666666, + 3.0, + 4.333333333333333, + 4.0, + 2.666666666666666, + 4.0, + 2.0, + 4.666666666666667, + 4.333333333333333, + 1.0, + 2.0, + 1.0 + ], + "avg_relevance": 3.7770833333333336, + "max_relevance": [ + 5.0, + 4.666666666666667, + 4.333333333333333, + 2.666666666666666, + 4.666666666666667, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 4.0, + 4.333333333333333, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 2.333333333333333, + 4.666666666666667, + 4.666666666666667 + ] + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Summarization/SummEvalSummarization.v2.json b/mteb/descriptive_stats/Summarization/SummEvalSummarization.v2.json new file mode 100644 index 0000000000..4c2f133abb --- /dev/null +++ b/mteb/descriptive_stats/Summarization/SummEvalSummarization.v2.json @@ -0,0 +1,55 @@ +{ + "test": { + "num_samples": 100, + "number_of_characters": 212735, + "min_text_length": 626, + "avg_text_length": 2100.35, + "max_text_length": 3153, + "unique_texts": 100, + "min_human_summaries_length": 11, + "avg_human_summaries_length": 11.0, + "max_human_summaries_length": 11, + "unique_human_summaries": 1100, + "min_machine_summaries_length": 16, + "avg_machine_summaries_length": 16.0, + "max_machine_summaries_length": 16, + "unique_machine_summaries": 1548, + "min_relevance": [ + 1.0, + 1.3333333333333333, + 3.6666666666666665, + 2.3333333333333335, + 3.6666666666666665, + 3.0, + 4.333333333333333, + 4.0, + 2.6666666666666665, + 4.0, + 2.0, + 4.666666666666667, + 4.333333333333333, + 1.0, + 2.0, + 1.0 + ], + "avg_relevance": 3.7770833333333336, + "max_relevance": [ + 5.0, + 4.666666666666667, + 4.333333333333333, + 2.6666666666666665, + 4.666666666666667, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 4.0, + 4.333333333333333, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 2.3333333333333335, + 4.666666666666667, + 4.666666666666667 + ] + } +} \ No newline at end of file diff --git a/mteb/tasks/Classification/kor/KorFin.py b/mteb/tasks/Classification/kor/KorFin.py index a22b7d5cfe..3622e4aa54 100644 --- a/mteb/tasks/Classification/kor/KorFin.py +++ b/mteb/tasks/Classification/kor/KorFin.py @@ -18,7 +18,7 @@ class KorFin(AbsTaskClassification): type="Classification", category="s2s", modalities=["text"], - eval_splits=["test"], + eval_splits=["train"], eval_langs=["kor-Hang"], main_score="accuracy", date=( @@ -46,5 +46,5 @@ def dataset_transform(self): {"SRC": "text", "SENTIMENT": "label"} ).remove_columns(["SID", "TYPE", "ASPECT"]) self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["test"] + self.dataset, seed=self.seed, splits=self.metadata.eval_splits ) diff --git a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py index 56e599364b..e6c9cc4ee0 100644 --- a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py +++ b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py @@ -83,4 +83,4 @@ def dataset_transform(self, lang): new_dict["labels"] = [labels] new_dict["sentence1"] = [sent1] new_dict["sentence2"] = [sent2] - self.dataset[lang][split] = datasets.Dataset.from_dict(new_dict) + self.dataset[lang][split] = new_dict diff --git a/mteb/tasks/PairClassification/multilingual/XStance.py b/mteb/tasks/PairClassification/multilingual/XStance.py index 515e598940..e2c5037259 100644 --- a/mteb/tasks/PairClassification/multilingual/XStance.py +++ b/mteb/tasks/PairClassification/multilingual/XStance.py @@ -58,7 +58,11 @@ def load_data(self, **kwargs): self.dataset = {} path = self.metadata_dict["dataset"]["path"] revision = self.metadata_dict["dataset"]["revision"] - raw_dataset = load_dataset(path, revision=revision, trust_remote_code=True) + raw_dataset = load_dataset( + path, + revision=revision, + trust_remote_code=self.metadata.dataset["trust_remote_code"], + ) def convert_example(example): return { @@ -83,26 +87,13 @@ def convert_example(example): ) # convert examples - self.dataset[lang][split] = self.dataset[lang][split].map( - convert_example, - remove_columns=self.dataset[lang][split].column_names, + self.dataset[lang][split] = ( + self.dataset[lang][split] + .map( + convert_example, + remove_columns=self.dataset[lang][split].column_names, + ) + .to_dict() ) - self.dataset_transform() self.data_loaded = True - - def dataset_transform(self): - """Transform dataset into sentence-pair format""" - _dataset = {} - - for lang in self.metadata.eval_langs: - _dataset[lang] = {} - for split in self.metadata.eval_splits: - _dataset[lang][split] = [ - { - "sent1": self.dataset[lang][split]["sent1"], - "sent2": self.dataset[lang][split]["sent2"], - "labels": self.dataset[lang][split]["labels"], - } - ] - self.dataset = _dataset diff --git a/mteb/tasks/Retrieval/eng/HagridRetrieval.py b/mteb/tasks/Retrieval/eng/HagridRetrieval.py index f9953caade..546bf99126 100644 --- a/mteb/tasks/Retrieval/eng/HagridRetrieval.py +++ b/mteb/tasks/Retrieval/eng/HagridRetrieval.py @@ -53,6 +53,7 @@ def load_data(self, **kwargs): "miracl/hagrid", split=self.metadata.eval_splits[0], revision=self.metadata_dict["dataset"].get("revision", None), + trust_remote_code=self.metadata.dataset["trust_remote_code"], ) proc_data = self.preprocess_data(data) diff --git a/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py b/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py index 23c916f393..3640881df0 100644 --- a/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py @@ -74,6 +74,7 @@ def _load_data_for_split(self, dataset_path, split): dataset_path, split=split, revision=revision, + trust_remote_code=self.metadata.dataset["trust_remote_code"], ) queries, corpus, qrels = {}, {}, {} for sample in ds: diff --git a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py index 8ef0681dcd..e136bd82f2 100644 --- a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py +++ b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py @@ -62,19 +62,16 @@ def load_data(self, **kwargs): query_rows = datasets.load_dataset( name="queries", split="test", - trust_remote_code=True, **self.metadata_dict["dataset"], ) corpus_rows = datasets.load_dataset( name="corpus.documents", split="test", - trust_remote_code=True, **self.metadata_dict["dataset"], ) qrels_rows = datasets.load_dataset( name="qrels.s2p", split="test", - trust_remote_code=True, **self.metadata_dict["dataset"], ) diff --git a/mteb/tasks/STS/por/SickBrSTS.py b/mteb/tasks/STS/por/SickBrSTS.py index 7f42fadd80..cde9442cf9 100644 --- a/mteb/tasks/STS/por/SickBrSTS.py +++ b/mteb/tasks/STS/por/SickBrSTS.py @@ -64,7 +64,7 @@ def dataset_transform(self): self.dataset.update( { split: self.dataset[split].train_test_split( - test_size=N_SAMPLES, seed=self.seed, label="entailment_label" + test_size=N_SAMPLES, seed=self.seed )["test"] } ) diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index c9fdf22865..e4cf91e0d8 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -1138,6 +1138,7 @@ class MockRerankingTask(AbsTaskReranking): "average_query_length": 27.0, "max_query_length": 27, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1206,6 +1207,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 4, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1232,6 +1234,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1258,6 +1261,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1338,6 +1342,7 @@ class MockRetrievalTask(AbsTaskRetrieval): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1400,6 +1405,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 4, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1426,6 +1432,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1452,6 +1459,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1715,6 +1723,7 @@ class MockInstructionRetrieval(AbsTaskRetrieval): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1782,6 +1791,7 @@ class MockInstructionReranking(AbsTaskReranking): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1854,6 +1864,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 4, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1880,6 +1891,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1906,6 +1918,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1992,6 +2005,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 4, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -2018,6 +2032,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -2044,6 +2059,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "average_query_length": 30.0, "max_query_length": 33, "unique_queries": 2, + "none_queries": 0, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, diff --git a/tests/test_tasks/test_metadata.py b/tests/test_tasks/test_metadata.py index 9c11b4ccf3..3d206da5c8 100644 --- a/tests/test_tasks/test_metadata.py +++ b/tests/test_tasks/test_metadata.py @@ -11,8 +11,6 @@ def test_descriptive_stats(task): # remove descriptive task file task.metadata.descriptive_stat_path.unlink() task_stat = task.expected_stats - print(task.metadata.name) - print(result_stat) for key, value in result_stat.items(): assert key in task_stat assert value == task_stat[key] From 0a5bedb6599b7edeedbb87b0654dc034429c25fa Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Tue, 19 Nov 2024 19:33:55 +0500 Subject: [PATCH 08/40] fix: Fix test for empty descriptive tasks (#1413) * fix test * skip mock * add message to assert * fix test * lint * fix tests * upd tests * update descriptive stats files * add stat to speed --- mteb/abstasks/AbsTaskSpeedTask.py | 2 +- .../Retrieval/IndicQARetrieval.json | 328 ++++++++++ .../descriptive_stats/Speed/CPUSpeedTask.json | 5 + .../descriptive_stats/Speed/GPUSpeedTask.json | 5 + tests/test_TaskMetadata.py | 571 +----------------- tests/test_benchmark/task_grid.py | 2 + ...est_benchmark_integration_with_datasets.py | 2 +- tests/test_tasks/test_metadata.py | 1 + 8 files changed, 363 insertions(+), 553 deletions(-) create mode 100644 mteb/descriptive_stats/Retrieval/IndicQARetrieval.json create mode 100644 mteb/descriptive_stats/Speed/CPUSpeedTask.json create mode 100644 mteb/descriptive_stats/Speed/GPUSpeedTask.json diff --git a/mteb/abstasks/AbsTaskSpeedTask.py b/mteb/abstasks/AbsTaskSpeedTask.py index 7a73da445b..31f6bdb943 100644 --- a/mteb/abstasks/AbsTaskSpeedTask.py +++ b/mteb/abstasks/AbsTaskSpeedTask.py @@ -112,4 +112,4 @@ def _add_main_score(self, scores) -> None: def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> dict[str, float]: - pass + return {"num_samples": 1} diff --git a/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json b/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json new file mode 100644 index 0000000000..e5a62aa559 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json @@ -0,0 +1,328 @@ +{ + "test": { + "number_of_characters": 6031160, + "num_samples": 21319, + "num_queries": 18560, + "num_documents": 2759, + "min_document_length": 8, + "average_document_length": 395.7480971366437, + "max_document_length": 226, + "unique_documents": 2759, + "min_query_length": 146, + "average_query_length": 266.1255926724138, + "max_query_length": 14782, + "unique_queries": 18560, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0007543103448275, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2759, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "as": { + "number_of_characters": 451360, + "num_samples": 2035, + "num_queries": 1785, + "num_documents": 250, + "min_document_length": 13, + "average_document_length": 404.16, + "max_document_length": 184, + "unique_documents": 250, + "min_query_length": 355, + "average_query_length": 196.2577030812325, + "max_query_length": 6654, + "unique_queries": 1785, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0016806722689076, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 250, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "bn": { + "number_of_characters": 649559, + "num_samples": 2012, + "num_queries": 1762, + "num_documents": 250, + "min_document_length": 15, + "average_document_length": 402.224, + "max_document_length": 202, + "unique_documents": 250, + "min_query_length": 684, + "average_query_length": 311.5794551645857, + "max_query_length": 6767, + "unique_queries": 1762, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0005675368898979, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 250, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "gu": { + "number_of_characters": 359851, + "num_samples": 2263, + "num_queries": 2015, + "num_documents": 248, + "min_document_length": 9, + "average_document_length": 490.51612903225805, + "max_document_length": 173, + "unique_documents": 248, + "min_query_length": 147, + "average_query_length": 118.21488833746898, + "max_query_length": 3253, + "unique_queries": 2015, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0009925558312656, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 248, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 747350, + "num_samples": 1805, + "num_queries": 1544, + "num_documents": 261, + "min_document_length": 19, + "average_document_length": 312.63984674329504, + "max_document_length": 138, + "unique_documents": 261, + "min_query_length": 1156, + "average_query_length": 431.18588082901556, + "max_query_length": 8857, + "unique_queries": 1544, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0019430051813472, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 261, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "kn": { + "number_of_characters": 303604, + "num_samples": 1774, + "num_queries": 1517, + "num_documents": 257, + "min_document_length": 14, + "average_document_length": 298.6031128404669, + "max_document_length": 133, + "unique_documents": 257, + "min_query_length": 146, + "average_query_length": 149.547132498352, + "max_query_length": 3130, + "unique_queries": 1517, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 257, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ml": { + "number_of_characters": 743604, + "num_samples": 1834, + "num_queries": 1587, + "num_documents": 247, + "min_document_length": 25, + "average_document_length": 487.8987854251012, + "max_document_length": 219, + "unique_documents": 247, + "min_query_length": 859, + "average_query_length": 392.6231884057971, + "max_query_length": 11919, + "unique_queries": 1587, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 247, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "mr": { + "number_of_characters": 521991, + "num_samples": 1850, + "num_queries": 1600, + "num_documents": 250, + "min_document_length": 13, + "average_document_length": 376.224, + "max_document_length": 215, + "unique_documents": 250, + "min_query_length": 746, + "average_query_length": 267.459375, + "max_query_length": 6702, + "unique_queries": 1600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 250, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "or": { + "number_of_characters": 294386, + "num_samples": 1928, + "num_queries": 1676, + "num_documents": 252, + "min_document_length": 8, + "average_document_length": 366.27777777777777, + "max_document_length": 195, + "unique_documents": 252, + "min_query_length": 260, + "average_query_length": 120.57517899761336, + "max_query_length": 2277, + "unique_queries": 1676, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011933174224343, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 252, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pa": { + "number_of_characters": 432818, + "num_samples": 1778, + "num_queries": 1537, + "num_documents": 241, + "min_document_length": 13, + "average_document_length": 372.4190871369295, + "max_document_length": 226, + "unique_documents": 241, + "min_query_length": 422, + "average_query_length": 223.20429407937542, + "max_query_length": 6082, + "unique_queries": 1537, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0013012361743656, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 241, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ta": { + "number_of_characters": 676404, + "num_samples": 2056, + "num_queries": 1803, + "num_documents": 253, + "min_document_length": 19, + "average_document_length": 385.27272727272725, + "max_document_length": 196, + "unique_documents": 253, + "min_query_length": 769, + "average_query_length": 321.0926234054354, + "max_query_length": 6940, + "unique_queries": 1803, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0005546311702718, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 253, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "te": { + "number_of_characters": 850233, + "num_samples": 1984, + "num_queries": 1734, + "num_documents": 250, + "min_document_length": 13, + "average_document_length": 464.756, + "max_document_length": 147, + "unique_documents": 250, + "min_query_length": 1072, + "average_query_length": 423.32410611303345, + "max_query_length": 14782, + "unique_queries": 1734, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 250, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Speed/CPUSpeedTask.json b/mteb/descriptive_stats/Speed/CPUSpeedTask.json new file mode 100644 index 0000000000..05a46f4513 --- /dev/null +++ b/mteb/descriptive_stats/Speed/CPUSpeedTask.json @@ -0,0 +1,5 @@ +{ + "test": { + "num_samples": 1 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Speed/GPUSpeedTask.json b/mteb/descriptive_stats/Speed/GPUSpeedTask.json new file mode 100644 index 0000000000..05a46f4513 --- /dev/null +++ b/mteb/descriptive_stats/Speed/GPUSpeedTask.json @@ -0,0 +1,5 @@ +{ + "test": { + "num_samples": 1 + } +} \ No newline at end of file diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 91ef4aabea..b72539c3e0 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -4,6 +4,7 @@ import pytest +from mteb import AbsTask from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.overview import get_tasks @@ -380,10 +381,10 @@ def test_disallow_trust_remote_code_in_new_datasets(): "DiaBlaBitextMining", "FloresBitextMining", "IN22ConvBitextMining", + "NTREXBitextMining", "IN22GenBitextMining", "IndicGenBenchFloresBitextMining", "IWSLT2017BitextMining", - "NTREXBitextMining", "SRNCorpusBitextMining", "VieMedEVBitextMining", "HotelReviewSentimentClassification", @@ -525,573 +526,41 @@ def test_disallow_trust_remote_code_in_new_datasets(): ), f"Dataset {task.metadata.name} should not trust remote code" -def test_empy_descriptive_stat_in_new_datasets(): +@pytest.mark.parametrize("task", get_tasks()) +def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): # DON'T ADD NEW DATASETS TO THIS LIST # THIS IS ONLY INTENDED FOR HISTORIC DATASETS exceptions = [ - "TbilisiCityHallBitextMining", - "BibleNLPBitextMining", - "BUCC.v2", - "DiaBlaBitextMining", - "FloresBitextMining", - "IN22GenBitextMining", - "IndicGenBenchFloresBitextMining", - "IWSLT2017BitextMining", - "LinceMTBitextMining", - "NollySentiBitextMining", - "NorwegianCourtsBitextMining", - "NTREXBitextMining", - "NusaXBitextMining", - "PhincBitextMining", - "RomaTalesBitextMining", - "Tatoeba", - "SRNCorpusBitextMining", - "VieMedEVBitextMining", - "AJGT", - "HotelReviewSentimentClassification", - "OnlineStoreReviewSentimentClassification", - "RestaurantReviewSentimentClassification", - "TweetEmotionClassification", - "TweetSarcasmClassification", - "BengaliDocumentClassification", - "BengaliHateSpeechClassification", - "BengaliSentimentAnalysis", - "BulgarianStoreReviewSentimentClassfication", - "CSFDCZMovieReviewSentimentClassification", - "CzechProductReviewSentimentClassification", - "CzechSoMeSentimentClassification", - "CzechSubjectivityClassification", - "AngryTweetsClassification", - "DanishPoliticalCommentsClassification", - "DKHateClassification", - "LccSentimentClassification", - "GermanPoliticiansTwitterSentimentClassification", - "TenKGnadClassification", - "GreekLegalCodeClassification", - "AmazonPolarityClassification", - "ArxivClassification", - "Banking77Classification", - "DBpediaClassification", - "EmotionClassification", - "FinancialPhrasebankClassification", - "FrenkEnClassification", - "ImdbClassification", - "CanadaTaxCourtOutcomesLegalBenchClassification", - "ContractNLIConfidentialityOfAgreementLegalBenchClassification", - "ContractNLIExplicitIdentificationLegalBenchClassification", - "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", - "ContractNLILimitedUseLegalBenchClassification", - "ContractNLINoLicensingLegalBenchClassification", - "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", - "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", - "ContractNLIPermissibleCopyLegalBenchClassification", - "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", - "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", - "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", - "ContractNLISharingWithEmployeesLegalBenchClassification", - "ContractNLISharingWithThirdPartiesLegalBenchClassification", - "ContractNLISurvivalOfObligationsLegalBenchClassification", - "CorporateLobbyingLegalBenchClassification", - "CUADAffiliateLicenseLicenseeLegalBenchClassification", - "CUADAffiliateLicenseLicensorLegalBenchClassification", - "CUADAntiAssignmentLegalBenchClassification", - "CUADAuditRightsLegalBenchClassification", - "CUADCapOnLiabilityLegalBenchClassification", - "CUADChangeOfControlLegalBenchClassification", - "CUADCompetitiveRestrictionExceptionLegalBenchClassification", - "CUADCovenantNotToSueLegalBenchClassification", - "CUADEffectiveDateLegalBenchClassification", - "CUADExclusivityLegalBenchClassification", - "CUADExpirationDateLegalBenchClassification", - "CUADGoverningLawLegalBenchClassification", - "CUADInsuranceLegalBenchClassification", - "CUADIPOwnershipAssignmentLegalBenchClassification", - "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", - "CUADJointIPOwnershipLegalBenchClassification", - "CUADLicenseGrantLegalBenchClassification", - "CUADLiquidatedDamagesLegalBenchClassification", - "CUADMinimumCommitmentLegalBenchClassification", - "CUADMostFavoredNationLegalBenchClassification", - "CUADNoSolicitOfCustomersLegalBenchClassification", - "CUADNoSolicitOfEmployeesLegalBenchClassification", - "CUADNonCompeteLegalBenchClassification", - "CUADNonDisparagementLegalBenchClassification", - "CUADNonTransferableLicenseLegalBenchClassification", - "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", - "CUADPostTerminationServicesLegalBenchClassification", - "CUADPriceRestrictionsLegalBenchClassification", - "CUADRenewalTermLegalBenchClassification", - "CUADRevenueProfitSharingLegalBenchClassification", - "CUADRofrRofoRofnLegalBenchClassification", - "CUADSourceCodeEscrowLegalBenchClassification", - "CUADTerminationForConvenienceLegalBenchClassification", - "CUADThirdPartyBeneficiaryLegalBenchClassification", - "CUADUncappedLiabilityLegalBenchClassification", - "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", - "CUADVolumeRestrictionLegalBenchClassification", - "CUADWarrantyDurationLegalBenchClassification", - "DefinitionClassificationLegalBenchClassification", - "Diversity1LegalBenchClassification", - "Diversity2LegalBenchClassification", - "Diversity3LegalBenchClassification", - "Diversity4LegalBenchClassification", - "Diversity5LegalBenchClassification", - "Diversity6LegalBenchClassification", - "FunctionOfDecisionSectionLegalBenchClassification", - "InsurancePolicyInterpretationLegalBenchClassification", - "InternationalCitizenshipQuestionsLegalBenchClassification", - "JCrewBlockerLegalBenchClassification", - "LearnedHandsBenefitsLegalBenchClassification", - "LearnedHandsBusinessLegalBenchClassification", - "LearnedHandsConsumerLegalBenchClassification", - "LearnedHandsCourtsLegalBenchClassification", - "LearnedHandsCrimeLegalBenchClassification", - "LearnedHandsDivorceLegalBenchClassification", - "LearnedHandsDomesticViolenceLegalBenchClassification", - "LearnedHandsEducationLegalBenchClassification", - "LearnedHandsEmploymentLegalBenchClassification", - "LearnedHandsEstatesLegalBenchClassification", - "LearnedHandsFamilyLegalBenchClassification", - "LearnedHandsHealthLegalBenchClassification", - "LearnedHandsHousingLegalBenchClassification", - "LearnedHandsImmigrationLegalBenchClassification", - "LearnedHandsTortsLegalBenchClassification", - "LearnedHandsTrafficLegalBenchClassification", - "LegalReasoningCausalityLegalBenchClassification", - "MAUDLegalBenchClassification", - "NYSJudicialEthicsLegalBenchClassification", - "OPP115DataRetentionLegalBenchClassification", - "OPP115DataSecurityLegalBenchClassification", - "OPP115DoNotTrackLegalBenchClassification", - "OPP115FirstPartyCollectionUseLegalBenchClassification", - "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", - "OPP115PolicyChangeLegalBenchClassification", - "OPP115ThirdPartySharingCollectionLegalBenchClassification", - "OPP115UserAccessEditAndDeletionLegalBenchClassification", - "OPP115UserChoiceControlLegalBenchClassification", - "OralArgumentQuestionPurposeLegalBenchClassification", - "OverrulingLegalBenchClassification", - "PersonalJurisdictionLegalBenchClassification", - "PROALegalBenchClassification", - "SCDBPAccountabilityLegalBenchClassification", - "SCDBPAuditsLegalBenchClassification", - "SCDBPCertificationLegalBenchClassification", - "SCDBPTrainingLegalBenchClassification", - "SCDBPVerificationLegalBenchClassification", - "SCDDAccountabilityLegalBenchClassification", - "SCDDAuditsLegalBenchClassification", - "SCDDCertificationLegalBenchClassification", - "SCDDTrainingLegalBenchClassification", - "SCDDVerificationLegalBenchClassification", - "TelemarketingSalesRuleLegalBenchClassification", - "TextualismToolDictionariesLegalBenchClassification", - "TextualismToolPlainLegalBenchClassification", - "UCCVCommonLawLegalBenchClassification", - "UnfairTOSLegalBenchClassification", - "NewsClassification", - "PatentClassification", - "PoemSentimentClassification", - "ToxicChatClassification", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", - "TweetTopicSingleClassification", - "YahooAnswersTopicsClassification", - "YelpReviewFullClassification", - "EstonianValenceClassification", - "PersianFoodSentimentClassification", - "FilipinoHateSpeechClassification", - "FilipinoShopeeReviewsClassification", - "FinToxicityClassification", - "FrenchBookReviews", - "MovieReviewSentimentClassification", - "GujaratiNewsClassification", - "HebrewSentimentAnalysis", - "HindiDiscourseClassification", - "SentimentAnalysisHindi", - "FrenkHrClassification", - "IndonesianIdClickbaitClassification", - "IndonesianMongabayConservationClassification", - "ItaCaseholdClassification", - "Itacola", - "JavaneseIMDBClassification", - "WRIMEClassification", - "KannadaNewsClassification", - "KLUE-TC", - "KorFin", - "KorHateClassification", - "KorSarcasmClassification", - "KurdishSentimentClassification", - "MalayalamNewsClassification", - "MarathiNewsClassification", - "MacedonianTweetSentimentClassification", - "AfriSentiClassification", - "AfriSentiLangClassification", - "AmazonCounterfactualClassification", - "AmazonReviewsClassification", - "CataloniaTweetClassification", - "CyrillicTurkicLangClassification", - "HinDialectClassification", - "IndicLangClassification", - "IndicNLPNewsClassification", - "IndicSentimentClassification", - "MasakhaNEWSClassification", - "MassiveIntentClassification", - "MassiveScenarioClassification", - "MTOPDomainClassification", - "MTOPIntentClassification", - "MultiHateClassification", - "MultilingualSentimentClassification", - "NaijaSenti", - "NordicLangClassification", - "NusaParagraphEmotionClassification", - "NusaParagraphTopicClassification", - "NusaX-senti", - "ScalaClassification", - "SIB200Classification", - "SouthAfricanLangClassification", - "SwissJudgementClassification", - "TurkicClassification", - "TweetSentimentClassification", - "MyanmarNews", - "NepaliNewsClassification", - "DutchBookReviewSentimentClassification", - "NoRecClassification", - "NorwegianParliamentClassification", - "OdiaNewsClassification", - "PunjabiNewsClassification", - "CBD", - "PolEmo2.0-IN", - "PolEmo2.0-OUT", - "AllegroReviews", - "PAC", - "HateSpeechPortugueseClassification", - "Moroco", - "RomanianReviewsSentiment", - "RomanianSentimentClassification", - "GeoreviewClassification", - "HeadlineClassification", - "InappropriatenessClassification", - "KinopoiskClassification", - "RuReviewsClassification", - "RuSciBenchGRNTIClassification", - "RuSciBenchOECDClassification", - "SanskritShlokasClassification", - "SinhalaNewsClassification", - "SinhalaNewsSourceClassification", - "CSFDSKMovieReviewSentimentClassification", - "FrenkSlClassification", - "SpanishNewsClassification", - "SpanishSentimentClassification", - "SiswatiNewsClassification", - "SlovakMovieReviewSentimentClassification", - "SwahiliNewsClassification", - "DalajClassification", - "SwedishSentimentClassification", - "SweRecClassification", - "TamilNewsClassification", - "TeluguAndhraJyotiNewsClassification", - "WisesightSentimentClassification", - "TswanaNewsClassification", - "TurkishMovieSentimentClassification", - "TurkishProductSentimentClassification", - "UkrFormalityClassification", - "UrduRomanSentimentClassification", - "VieStudentFeedbackClassification", - "TNews", - "IFlyTek", - "MultilingualSentiment", - "JDReview", - "OnlineShopping", - "Waimai", - "YueOpenriceReviewClassification", - "IsiZuluNewsClassification", - "WikiCitiesClustering", - "IndicReviewsClusteringP2P", - "MasakhaNEWSClusteringP2P", - "MasakhaNEWSClusteringS2S", - "RomaniBibleClustering", - "SpanishNewsClusteringP2P", - "BlurbsClusteringP2P.v2", - "BlurbsClusteringS2S.v2", - "TenKGnadClusteringP2P.v2", - "TenKGnadClusteringS2S.v2", - "ArXivHierarchicalClusteringS2S", - "BigPatentClustering.v2", - "BiorxivClusteringP2P.v2", - "BiorxivClusteringS2S.v2", - "MedrxivClusteringP2P.v2", - "MedrxivClusteringS2S.v2", - "RedditClustering.v2", - "RedditClusteringP2P.v2", - "StackExchangeClustering.v2", - "StackExchangeClusteringP2P.v2", - "TwentyNewsgroupsClustering.v2", - "AlloProfClusteringP2P.v2", - "AlloProfClusteringS2S.v2", - "HALClusteringS2S.v2", - "LivedoorNewsClustering.v2", - "MewsC16JaClustering", - "MLSUMClusteringP2P.v2", - "MLSUMClusteringS2S.v2", - "SIB200ClusteringS2S", - "WikiClusteringP2P.v2", - "SNLHierarchicalClusteringP2P", - "SNLHierarchicalClusteringS2S", - "VGHierarchicalClusteringP2P", - "VGHierarchicalClusteringS2S", - "EightTagsClustering.v2", - "PlscClusteringS2S.v2", - "PlscClusteringP2P.v2", - "GeoreviewClusteringP2P", - "RuSciBenchOECDClusteringP2P", - "SwednClusteringP2P", - "SwednClusteringS2S", - "CLSClusteringS2S.v2", - "CLSClusteringP2P.v2", - "ThuNewsClusteringS2S.v2", - "ThuNewsClusteringP2P.v2", - "SadeemQuestionRetrieval", - "DanFeverRetrieval", - "TV2Nordretrieval", - "TwitterHjerneRetrieval", - "GerDaLIR", - "GerDaLIRSmall", - "GermanDPR", - "GermanGovServiceRetrieval", - "GermanQuAD-Retrieval", - "LegalQuAD", - "GreekCivicsQA", - "AILACasedocs", - "AILAStatutes", - "AlphaNLI", - "ARCChallenge", - "ArguAna", - "BrightRetrieval", - "ClimateFEVER", - "ClimateFEVERHardNegatives", - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval", - "DBPedia", - "DBPediaHardNegatives", - "FaithDial", - "FeedbackQARetrieval", "FEVER", - "FEVERHardNegatives", - "FiQA2018", - "HagridRetrieval", - "HellaSwag", "HotpotQA", - "HotpotQAHardNegatives", - "LegalBenchConsumerContractsQA", - "LegalBenchCorporateLobbying", - "LegalSummarization", - "LEMBNarrativeQARetrieval", - "LEMBNeedleRetrieval", - "LEMBPasskeyRetrieval", - "LEMBQMSumRetrieval", - "LEMBSummScreenFDRetrieval", - "LEMBWikimQARetrieval", - "LitSearchRetrieval", - "MedicalQARetrieval", - "MLQuestions", "MSMARCO", - "MSMARCOHardNegatives", "MSMARCOv2", - "NarrativeQARetrieval", - "NFCorpus", - "NQ", - "NQHardNegatives", - "PIQA", - "Quail", - "QuoraRetrieval", - "QuoraRetrievalHardNegatives", - "RARbCode", - "RARbMath", - "SCIDOCS", - "SciFact", - "SIQA", - "SpartQA", - "TempReasonL1", - "TempReasonL2Context", - "TempReasonL2Fact", - "TempReasonL2Pure", - "TempReasonL3Context", - "TempReasonL3Fact", - "TempReasonL3Pure", "TopiOCQA", - "TopiOCQAHardNegatives", - "TRECCOVID", - "WinoGrande", - "EstQA", - "AlloprofRetrieval", - "BSARDRetrieval", - "FQuADRetrieval", - "SyntecRetrieval", - "HunSum2AbstractiveRetrieval", - "JaGovFaqsRetrieval", - "JaQuADRetrieval", - "NLPJournalAbsIntroRetrieval", - "NLPJournalTitleAbsRetrieval", - "NLPJournalTitleIntroRetrieval", - "GeorgianFAQRetrieval", - "Ko-StrategyQA", - "CrossLingualSemanticDiscriminationWMT19", - "CrossLingualSemanticDiscriminationWMT21", - "IndicQARetrieval", - "MintakaRetrieval", "MIRACLRetrieval", - "MIRACLRetrievalHardNegatives", - "MLQARetrieval", "MrTidyRetrieval", + "BrightRetrieval", "MultiLongDocRetrieval", "NeuCLIR2022Retrieval", - "NeuCLIR2022RetrievalHardNegatives", "NeuCLIR2023Retrieval", - "NeuCLIR2023RetrievalHardNegatives", - "PublicHealthQA", - "StatcanDialogueDatasetRetrieval", - "WikipediaRetrievalMultilingual", - "XMarket", - "XPQARetrieval", - "XQuADRetrieval", - "NorQuadRetrieval", - "SNLRetrieval", - "ArguAna-PL", - "DBPedia-PL", - "DBPedia-PLHardNegatives", - "FiQA-PL", - "HotpotQA-PL", - "HotpotQA-PLHardNegatives", - "MSMARCO-PL", - "MSMARCO-PLHardNegatives", - "NFCorpus-PL", - "NQ-PL", - "NQ-PLHardNegatives", - "Quora-PL", - "Quora-PLHardNegatives", - "SCIDOCS-PL", - "SciFact-PL", - "TRECCOVID-PL", - "RiaNewsRetrieval", - "RiaNewsRetrievalHardNegatives", - "RuBQRetrieval", - "SKQuadRetrieval", - "SlovakSumRetrieval", - "SpanishPassageRetrievalS2P", - "SpanishPassageRetrievalS2S", - "SwednRetrieval", - "SweFaqRetrieval", - "TurHistQuadRetrieval", - "VieQuADRetrieval", - "T2Retrieval", - "MMarcoRetrieval", - "DuRetrieval", - "CovidRetrieval", - "CmedqaRetrieval", - "EcomRetrieval", - "MedicalRetrieval", - "VideoRetrieval", - "LeCaRDv2", - "News21InstructionRetrieval", - "Robust04InstructionRetrieval", - "KorHateSpeechMLClassification", - "MalteseNewsClassification", - "BrazilianToxicTweetsClassification", - "SensitiveTopicsClassification", - "ArEntail", - "CTKFactsNLI", - "FalseFriendsGermanEnglish", - "LegalBenchPC", - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "FarsTail", - "ArmenianParaphrasePC", - "indonli", - "KLUE-NLI", - "OpusparcusPC", - "RTE3", - "XNLIV2", - "XStance", - "SICK-E-PL", - "PpcPC", - "CDSC-E", - "PSC", - "Assin2RTE", - "SICK-BR-PC", - "TERRa", - "Ocnli", - "Cmnli", + "BibleNLPBitextMining", + "FloresBitextMining", + "FilipinoHateSpeechClassification", + "SwissJudgementClassification", + "MultiEURLEXMultilabelClassification", "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", "WebLINXCandidatesReranking", - "AlloprofReranking", - "SyntecReranking", "VoyageMMarcoReranking", "MIRACLReranking", - "RuBQReranking", - "T2Reranking", - "MMarcoReranking", - "CMedQAv1-reranking", - "CMedQAv2-reranking", - "CPUSpeedTask", - "GPUSpeedTask", - "GermanSTSBenchmark", - "BIOSSES", - "SICK-R", - "STS13", - "STS14", - "STS15", - "STS16", - "STSBenchmark", - "FaroeseSTS", - "FinParaSTS", - "SICKFr", - "JSICK", - "JSTS", - "KLUE-STS", - "KorSTS", - "IndicCrosslingualSTS", - "SemRel24STS", - "STS22.v2", - "STSBenchmarkMultilingualSTS", - "SICK-R-PL", - "CDSC-R", - "Assin2STS", - "SICK-BR-STS", - "RonSTS", - "RUParaPhraserSTS", - "RuSTSBenchmarkSTS", - "STSES", - "ATEC", - "BQ", - "LCQMC", - "PAWSX", - "STSB", - "AFQMC", - "QBQTC", - "SummEvalSummarization.v2", - "SummEvalFrSummarization.v2", ] - assert ( - 553 == len(exceptions) - ), "The number of exceptions has changed. Please do not add new datasets to this list." - - exceptions = [] + if task.metadata.name.startswith("Mock"): + return - for task in get_tasks(): - if task.metadata.descriptive_stats is None: - assert ( - task.metadata.name not in exceptions - ), f"Dataset {task.metadata.name} should have descriptive stats" + if task.metadata.name in exceptions: + assert ( + task.metadata.descriptive_stats is None + ), f"Dataset {task.metadata.name} should not have descriptive stats" + else: + assert ( + task.metadata.descriptive_stats is not None + ), f"Dataset {task.metadata.name} should have descriptive stats. You can add metadata to your task by running `YorTask().calculate_metadata_metrics()`" diff --git a/tests/test_benchmark/task_grid.py b/tests/test_benchmark/task_grid.py index 4c73d825e5..8ae310555f 100644 --- a/tests/test_benchmark/task_grid.py +++ b/tests/test_benchmark/task_grid.py @@ -61,6 +61,8 @@ "BrazilianToxicTweetsClassification", # multilabel classification "FaroeseSTS", # STS "SummEval", # summarization + "Core17InstructionRetrieval", # instruction reranking + "InstructIR", # instruction retrieval ] TASK_TEST_GRID_AS_STRING = [ diff --git a/tests/test_benchmark/test_benchmark_integration_with_datasets.py b/tests/test_benchmark/test_benchmark_integration_with_datasets.py index 252b9ff3c6..81d4c6b676 100644 --- a/tests/test_benchmark/test_benchmark_integration_with_datasets.py +++ b/tests/test_benchmark/test_benchmark_integration_with_datasets.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize("task", TASK_TEST_GRID) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_benchmark_sentence_transformer(task: str | AbsTask, model: mteb.Encoder): +def test_benchmark_datasets(task: str | AbsTask, model: mteb.Encoder): """Test that a task can be fetched and run""" eval = MTEB(tasks=[task]) eval.run(model, output_folder="tests/results", overwrite_results=True) diff --git a/tests/test_tasks/test_metadata.py b/tests/test_tasks/test_metadata.py index 3d206da5c8..f4e20b9dc3 100644 --- a/tests/test_tasks/test_metadata.py +++ b/tests/test_tasks/test_metadata.py @@ -11,6 +11,7 @@ def test_descriptive_stats(task): # remove descriptive task file task.metadata.descriptive_stat_path.unlink() task_stat = task.expected_stats + for key, value in result_stat.items(): assert key in task_stat assert value == task_stat[key] From 6da2a1af7e757ef2df673e4d0024a09ccac956b1 Mon Sep 17 00:00:00 2001 From: Napuh <55241721+Napuh@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:34:29 +0100 Subject: [PATCH 09/40] fix: pin datasets version <3.0.0 (#1471) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ebcf63dba6..c5bd396536 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ ] requires-python = ">=3.9" dependencies = [ - "datasets>=2.19.0", + "datasets>=2.19.0,<3.0.0", "numpy>=1.0.0,<3.0.0", "requests>=2.26.0", "scikit_learn>=1.0.2", From a27de3351aa7b880d7e46264bf30bb70b1410754 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Tue, 19 Nov 2024 21:22:52 +0500 Subject: [PATCH 10/40] feat: Multilingual retrieval loader (#1473) * multilingual loader * lint --- mteb/abstasks/AbsTaskRetrieval.py | 118 ++++++++++++++++++++++-------- mteb/abstasks/dataloaders.py | 48 +++++++----- 2 files changed, 116 insertions(+), 50 deletions(-) diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 41b0702509..5345a50a5d 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -132,42 +132,98 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = {}, {}, {} self.instructions, self.top_ranked = None, None - dataset_path = self.metadata_dict["dataset"]["path"] + dataset_path = self.metadata.dataset["path"] hf_repo_qrels = ( dataset_path + "-qrels" if "clarin-knext" in dataset_path else None ) - for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): - corpus, queries, qrels, instructions, top_ranked = HFDataLoader( - hf_repo=dataset_path, - hf_repo_qrels=hf_repo_qrels, - streaming=False, - keep_in_memory=False, - trust_remote_code=self.metadata_dict["dataset"].get( - "trust_remote_code", False - ), - ).load(split=split) - # Conversion from DataSet - queries = {query["id"]: query["text"] for query in queries} - corpus = { - doc["id"]: doc.get("title", "") + " " + doc["text"] for doc in corpus - } - self.corpus[split], self.queries[split], self.relevant_docs[split] = ( - corpus, - queries, - qrels, - ) + if not self.is_multilingual: + for split in kwargs.get("eval_splits", self.metadata.eval_splits): + corpus, queries, qrels, instructions, top_ranked = HFDataLoader( + hf_repo=dataset_path, + hf_repo_qrels=hf_repo_qrels, + streaming=False, + keep_in_memory=False, + trust_remote_code=self.metadata.dataset.get( + "trust_remote_code", False + ), + ).load(split=split) + # Conversion from DataSet + queries = {query["id"]: query["text"] for query in queries} + corpus = { + doc["id"]: doc.get("title", "") + " " + doc["text"] + for doc in corpus + } + self.corpus[split], self.queries[split], self.relevant_docs[split] = ( + corpus, + queries, + qrels, + ) - # optional args - if instructions: - self.instructions = { - split: { - inst["query-id"]: inst["instruction"] for inst in instructions + # optional args + if instructions: + self.instructions = { + split: { + inst["query-id"]: inst["instruction"] + for inst in instructions + } } - } - if top_ranked: - self.top_ranked = { - split: {tr["query-id"]: tr["corpus-ids"] for tr in top_ranked} - } + if top_ranked: + self.top_ranked = { + split: {tr["query-id"]: tr["corpus-ids"] for tr in top_ranked} + } + else: + if not isinstance(self.metadata.eval_langs, dict): + raise ValueError("eval_langs must be a dict for multilingual tasks") + for lang in self.metadata.eval_langs: + self.corpus[lang], self.queries[lang], self.relevant_docs[lang] = ( + {}, + {}, + {}, + ) + for split in kwargs.get("eval_splits", self.metadata.eval_splits): + corpus, queries, qrels, instructions, top_ranked = HFDataLoader( + hf_repo=dataset_path, + hf_repo_qrels=hf_repo_qrels, + streaming=False, + keep_in_memory=False, + trust_remote_code=self.metadata.dataset.get( + "trust_remote_code", False + ), + ).load(split=split, config=lang) + # Conversion from DataSet + queries = {query["id"]: query["text"] for query in queries} + corpus = { + doc["id"]: doc.get("title", "") + " " + doc["text"] + for doc in corpus + } + ( + self.corpus[lang][split], + self.queries[lang][split], + self.relevant_docs[lang][split], + ) = ( + corpus, + queries, + qrels, + ) + + # optional args + if instructions: + if self.instructions is None: + self.instructions = {} + self.instructions[lang] = { + split: { + inst["query-id"]: inst["instruction"] + for inst in instructions + } + } + if top_ranked: + if self.top_ranked is None: + self.top_ranked = {} + self.top_ranked = { + split: { + tr["query-id"]: tr["corpus-ids"] for tr in top_ranked + } + } self.data_loaded = True diff --git a/mteb/abstasks/dataloaders.py b/mteb/abstasks/dataloaders.py index 25a6150a5e..a8c165007e 100644 --- a/mteb/abstasks/dataloaders.py +++ b/mteb/abstasks/dataloaders.py @@ -93,7 +93,7 @@ def check(fIn: str, ext: str): raise ValueError(f"File {fIn} must be present with extension {ext}") def load( - self, split: str = "test" + self, split: str = "test", config: str | None = None ) -> tuple[ dict[str, dict[str, str]], # corpus dict[str, str | list[str]], # queries @@ -118,33 +118,37 @@ def load( if not len(self.corpus): logger.info("Loading Corpus...") - self._load_corpus() + self._load_corpus(config) logger.info("Loaded %d %s Documents.", len(self.corpus), split.upper()) logger.info("Doc Example: %s", self.corpus[0]) if not len(self.queries): logger.info("Loading Queries...") - self._load_queries() + self._load_queries(config) - if "top_ranked" in configs or (not self.hf_repo and self.top_ranked_file): + if any(c.endswith("top_ranked") for c in configs) in configs or ( + not self.hf_repo and self.top_ranked_file + ): logger.info("Loading Top Ranked") - self._load_top_ranked() + self._load_top_ranked(config) logger.info( f"Top ranked loaded: {len(self.top_ranked) if self.top_ranked else 0}" ) else: self.top_ranked = None - if "instruction" in configs or (not self.hf_repo and self.instructions_file): + if any(c.endswith("instruction") for c in configs) or ( + not self.hf_repo and self.instructions_file + ): logger.info("Loading Instructions") - self._load_instructions() + self._load_instructions(config) logger.info( f"Instructions loaded: {len(self.instructions) if self.instructions else 0}" ) else: self.instructions = None - self._load_qrels(split) + self._load_qrels(split, config) # filter queries with no qrels qrels_dict = defaultdict(dict) @@ -159,23 +163,24 @@ def qrels_dict_init(row): return self.corpus, self.queries, self.qrels, self.instructions, self.top_ranked - def load_corpus(self) -> dict[str, dict[str, str]]: + def load_corpus(self, config: str | None = None) -> dict[str, dict[str, str]]: if not self.hf_repo: self.check(fIn=self.corpus_file, ext="jsonl") if not len(self.corpus): logger.info("Loading Corpus...") - self._load_corpus() + self._load_corpus(config) logger.info("Loaded %d %s Documents.", len(self.corpus)) logger.info("Doc Example: %s", self.corpus[0]) return self.corpus - def _load_corpus(self): + def _load_corpus(self, config: str | None = None): + config = f"{config}-corpus" if config is not None else "corpus" if self.hf_repo: corpus_ds = load_dataset( self.hf_repo, - "corpus", + config, keep_in_memory=self.keep_in_memory, streaming=self.streaming, trust_remote_code=self.trust_remote_code, @@ -200,11 +205,12 @@ def _load_corpus(self): ) self.corpus = corpus_ds - def _load_queries(self): + def _load_queries(self, config: str | None = None): + config = f"{config}-queries" if config is not None else "queries" if self.hf_repo: queries_ds = load_dataset( self.hf_repo, - "queries", + config, keep_in_memory=self.keep_in_memory, streaming=self.streaming, trust_remote_code=self.trust_remote_code, @@ -224,10 +230,12 @@ def _load_queries(self): ) self.queries = queries_ds - def _load_qrels(self, split): + def _load_qrels(self, split: str, config: str | None = None): + config = f"{config}-qrels" if config is not None else None if self.hf_repo: qrels_ds = load_dataset( self.hf_repo_qrels, + name=config, keep_in_memory=self.keep_in_memory, streaming=self.streaming, trust_remote_code=self.trust_remote_code, @@ -249,11 +257,12 @@ def _load_qrels(self, split): qrels_ds = qrels_ds.cast(features) self.qrels = qrels_ds - def _load_top_ranked(self): + def _load_top_ranked(self, config: str | None = None): + config = f"top_ranked-{config}" if config is not None else "top_ranked" if self.hf_repo: top_ranked_ds = load_dataset( self.hf_repo, - "top_ranked", + config, keep_in_memory=self.keep_in_memory, streaming=self.streaming, trust_remote_code=self.trust_remote_code, @@ -293,11 +302,12 @@ def _load_top_ranked(self): ) self.top_ranked = top_ranked_ds - def _load_instructions(self): + def _load_instructions(self, config: str | None = None): + config = f"instruction-{config}" if config is not None else "instruction" if self.hf_repo: instructions_ds = load_dataset( self.hf_repo, - "instruction", + config, keep_in_memory=self.keep_in_memory, streaming=self.streaming, trust_remote_code=self.trust_remote_code, From 0df02103bd86f9cd410d8c569dd87d85f48386aa Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 21 Nov 2024 15:12:45 +0500 Subject: [PATCH 11/40] fix: add citations to ModelMeta (#1477) * add citations * fix typo --- mteb/model_meta.py | 2 + mteb/models/arctic_models.py | 9 ++ mteb/models/bge_models.py | 11 ++ mteb/models/e5_instruct.py | 21 ++++ mteb/models/e5_models.py | 25 ++++ mteb/models/gritlm_models.py | 14 +++ mteb/models/gte_models.py | 10 ++ mteb/models/jina_models.py | 11 ++ mteb/models/llm2vec_models.py | 20 ++++ mteb/models/mxbai_models.py | 15 +++ mteb/models/nomic_models.py | 13 ++ mteb/models/promptriever_models.py | 17 +++ mteb/models/repllama_models.py | 11 ++ mteb/models/rerankers_custom.py | 18 +++ mteb/models/rerankers_monot5_based.py | 126 ++++++++++++++++++++ mteb/models/ru_sentence_models.py | 37 ++++++ mteb/models/salesforce_models.py | 7 ++ mteb/models/sentence_transformers_models.py | 31 +++++ mteb/models/uae_models.py | 8 ++ 19 files changed, 406 insertions(+) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 9595dd79a9..df6ac598fb 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -74,6 +74,7 @@ class ModelMeta(BaseModel): input such as "query: {document}" or "passage: {document}". zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models are evaluated non-zero-shot unless specified otherwise. + citation: The citation for the model. This is a bibtex string. """ model_config = ConfigDict(extra="forbid") @@ -96,6 +97,7 @@ class ModelMeta(BaseModel): similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None zero_shot_benchmarks: list[str] | None = None + citation: str | None = None def to_dict(self): dict_repr = self.model_dump() diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index 5f3d41a97e..b0be125891 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -27,4 +27,13 @@ reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5", similarity_fn_name="cosine_similarity", use_instructions=False, + citation="""@misc{merrick2024embeddingclusteringdataimprove, + title={Embedding And Clustering Your Data Can Improve Contrastive Pretraining}, + author={Luke Merrick}, + year={2024}, + eprint={2407.18887}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/2407.18887}, + }""", ) diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index 5ab4294795..0fb3c0242e 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -5,6 +5,14 @@ from mteb.model_meta import ModelMeta, sentence_transformers_loader model_prompts = {"query": "Represent this sentence for searching relevant passages: "} +BGE_15_CITATION = """@misc{bge_embedding, + title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, + author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff}, + year={2023}, + eprint={2309.07597}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +}""" bge_small_en_v1_5 = ModelMeta( loader=partial( @@ -27,6 +35,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=BGE_15_CITATION, ) bge_base_en_v1_5 = ModelMeta( @@ -50,6 +59,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=BGE_15_CITATION, ) bge_large_en_v1_5 = ModelMeta( @@ -73,4 +83,5 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=BGE_15_CITATION, ) diff --git a/mteb/models/e5_instruct.py b/mteb/models/e5_instruct.py index 5d5b1f3ad6..8441ba5978 100644 --- a/mteb/models/e5_instruct.py +++ b/mteb/models/e5_instruct.py @@ -41,6 +41,12 @@ def e5_instruction(instruction: str) -> str: embed_dim=1024, license="mit", max_tokens=514, + citation="""@article{wang2024multilingual, + title={Multilingual E5 Text Embeddings: A Technical Report}, + author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu}, + journal={arXiv preprint arXiv:2402.05672}, + year={2024} + }""", ) e5_mistral = ModelMeta( @@ -70,4 +76,19 @@ def e5_instruction(instruction: str) -> str: embed_dim=4096, license="mit", max_tokens=32768, + citation=""" + @article{wang2023improving, + title={Improving Text Embeddings with Large Language Models}, + author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu}, + journal={arXiv preprint arXiv:2401.00368}, + year={2023} + } + + @article{wang2022text, + title={Text Embeddings by Weakly-Supervised Contrastive Pre-training}, + author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu}, + journal={arXiv preprint arXiv:2212.03533}, + year={2022} + } + """, ) diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index 612130ed65..1a8a3db41c 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -108,6 +108,24 @@ "zho_Hans", ] +MULTILINGUAL_E5_CITATION = """ +@article{wang2024multilingual, + title={Multilingual E5 Text Embeddings: A Technical Report}, + author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Yang, Linjun and Majumder, Rangan and Wei, Furu}, + journal={arXiv preprint arXiv:2402.05672}, + year={2024} +} +""" + +E5_CITATION = """ +@article{wang2022text, + title={Text Embeddings by Weakly-Supervised Contrastive Pre-training}, + author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu}, + journal={arXiv preprint arXiv:2212.03533}, + year={2022} +} +""" + model_prompts = { PromptType.query.value: "query: ", PromptType.passage.value: "passage: ", @@ -134,6 +152,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=MULTILINGUAL_E5_CITATION, ) e5_mult_base = ModelMeta( @@ -156,6 +175,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=MULTILINGUAL_E5_CITATION, ) e5_mult_large = ModelMeta( @@ -179,6 +199,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=MULTILINGUAL_E5_CITATION, ) e5_eng_small_v2 = ModelMeta( @@ -201,6 +222,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=E5_CITATION, ) e5_eng_small = ModelMeta( @@ -224,6 +246,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=E5_CITATION, ) e5_eng_base_v2 = ModelMeta( @@ -247,6 +270,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=E5_CITATION, ) e5_eng_large_v2 = ModelMeta( @@ -270,4 +294,5 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=E5_CITATION, ) diff --git a/mteb/models/gritlm_models.py b/mteb/models/gritlm_models.py index b1c4882bc2..596169b9b4 100644 --- a/mteb/models/gritlm_models.py +++ b/mteb/models/gritlm_models.py @@ -16,6 +16,18 @@ def gritlm_instruction(instruction: str = "") -> str: ) +GRITLM_CITATION = """ +@misc{muennighoff2024generative, + title={Generative Representational Instruction Tuning}, + author={Niklas Muennighoff and Hongjin Su and Liang Wang and Nan Yang and Furu Wei and Tao Yu and Amanpreet Singh and Douwe Kiela}, + year={2024}, + eprint={2402.09906}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +""" + + gritlm7b = ModelMeta( loader=partial( instruct_wrapper, @@ -38,6 +50,7 @@ def gritlm_instruction(instruction: str = "") -> str: similarity_fn_name="cosine", framework=["GritLM", "PyTorch"], use_instructions=True, + citation=GRITLM_CITATION, ) gritlm8x7b = ModelMeta( loader=partial( @@ -61,4 +74,5 @@ def gritlm_instruction(instruction: str = "") -> str: similarity_fn_name="cosine", framework=["GritLM", "PyTorch"], use_instructions=True, + citation=GRITLM_CITATION, ) diff --git a/mteb/models/gte_models.py b/mteb/models/gte_models.py index 2358ef6d5f..5c7043dd8d 100644 --- a/mteb/models/gte_models.py +++ b/mteb/models/gte_models.py @@ -6,6 +6,15 @@ from .instruct_wrapper import instruct_wrapper +GTE_CITATION = """ +@article{li2023towards, + title={Towards general text embeddings with multi-stage contrastive learning}, + author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan}, + journal={arXiv preprint arXiv:2308.03281}, + year={2023} +} +""" + gte_Qwen2_7B_instruct = ModelMeta( loader=partial( instruct_wrapper, @@ -32,4 +41,5 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=GTE_CITATION, ) diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index 08eb6cb63d..a9bc680585 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -222,4 +222,15 @@ def encode( framework=["Sentence Transformers", "PyTorch"], use_instructions=False, reference="https://huggingface.co/jinaai/jina-embeddings-v3", + citation=""" + @misc{sturua2024jinaembeddingsv3multilingualembeddingstask, + title={jina-embeddings-v3: Multilingual Embeddings With Task LoRA}, + author={Saba Sturua and Isabelle Mohr and Mohammad Kalim Akram and Michael Günther and Bo Wang and Markus Krimmel and Feng Wang and Georgios Mastrapas and Andreas Koukounas and Andreas Koukounas and Nan Wang and Han Xiao}, + year={2024}, + eprint={2409.10173}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2409.10173}, + } + """, ) diff --git a/mteb/models/llm2vec_models.py b/mteb/models/llm2vec_models.py index e962289aac..ecee6795bc 100644 --- a/mteb/models/llm2vec_models.py +++ b/mteb/models/llm2vec_models.py @@ -78,6 +78,18 @@ def loader_inner(**kwargs: Any) -> Encoder: return loader_inner +LLM2VEC_CITATION = """ +@misc{behnamghader2024llm2veclargelanguagemodels, + title={LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders}, + author={Parishad BehnamGhader and Vaibhav Adlakha and Marius Mosbach and Dzmitry Bahdanau and Nicolas Chapados and Siva Reddy}, + year={2024}, + eprint={2404.05961}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2404.05961}, +}""" + + llm2vec_llama3_8b_supervised = ModelMeta( loader=_loader( LLM2VecWrapper, @@ -100,6 +112,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) llm2vec_llama3_8b_unsupervised = ModelMeta( @@ -124,6 +137,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) @@ -149,6 +163,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) llm2vec_mistral7b_unsupervised = ModelMeta( @@ -173,6 +188,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) llm2vec_llama2_7b_supervised = ModelMeta( @@ -197,6 +213,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) llm2vec_llama2_7b_unsupervised = ModelMeta( @@ -221,6 +238,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) llm2vec_sheared_llama_supervised = ModelMeta( @@ -245,6 +263,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) llm2vec_sheared_llama_unsupervised = ModelMeta( @@ -269,4 +288,5 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["LLM2Vec", "PyTorch"], use_instructions=True, + citation=LLM2VEC_CITATION, ) diff --git a/mteb/models/mxbai_models.py b/mteb/models/mxbai_models.py index ce7d1808bd..7df0247e7f 100644 --- a/mteb/models/mxbai_models.py +++ b/mteb/models/mxbai_models.py @@ -27,4 +27,19 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=""" + @online{emb2024mxbai, + title={Open Source Strikes Bread - New Fluffy Embeddings Model}, + author={Sean Lee and Aamir Shakir and Darius Koenig and Julius Lipp}, + year={2024}, + url={https://www.mixedbread.ai/blog/mxbai-embed-large-v1}, + } + + @article{li2023angle, + title={AnglE-optimized Text Embeddings}, + author={Li, Xianming and Li, Jing}, + journal={arXiv preprint arXiv:2309.12871}, + year={2023} + } + """, ) diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 0600f01be0..203e00f743 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -74,6 +74,17 @@ def encode( # type: ignore PromptType.passage.value: "search_document: ", } +NOMIC_CITATION = """ +@misc{nussbaum2024nomic, + title={Nomic Embed: Training a Reproducible Long Context Text Embedder}, + author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar}, + year={2024}, + eprint={2402.01613}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +""" + nomic_embed_v1_5 = ModelMeta( loader=partial( # type: ignore NomicWrapper, @@ -87,6 +98,7 @@ def encode( # type: ignore open_weights=True, revision="b0753ae76394dd36bcfb912a46018088bca48be0", release_date="2024-02-10", # first commit + citation=NOMIC_CITATION, ) nomic_embed_v1 = ModelMeta( @@ -111,4 +123,5 @@ def encode( # type: ignore similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation=NOMIC_CITATION, ) diff --git a/mteb/models/promptriever_models.py b/mteb/models/promptriever_models.py index 175f12d685..c4434b4b9c 100644 --- a/mteb/models/promptriever_models.py +++ b/mteb/models/promptriever_models.py @@ -42,6 +42,19 @@ def loader_inner(**kwargs: Any) -> Encoder: return loader_inner +PROMPTRIEVER_CITATION = """ +@article{weller2024promptriever, + title={Promptriever: Instruction-Trained Retrievers Can Be Prompted Like Language Models}, + author={Orion Weller and Benjamin Van Durme and Dawn Lawrie and Ashwin Paranjape and Yuhao Zhang and Jack Hessel}, + year={2024}, + eprint={2409.11136}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2409.11136}, +} +""" + + promptriever_llama2 = ModelMeta( loader=_loader( RepLLaMAWrapper, @@ -64,6 +77,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["PyTorch", "Tevatron"], use_instructions=True, + citation=PROMPTRIEVER_CITATION, ) promptriever_llama3 = ModelMeta( @@ -88,6 +102,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["PyTorch", "Tevatron"], use_instructions=True, + citation=PROMPTRIEVER_CITATION, ) @@ -113,6 +128,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["PyTorch", "Tevatron"], use_instructions=True, + citation=PROMPTRIEVER_CITATION, ) promptriever_mistral_v1 = ModelMeta( @@ -137,4 +153,5 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["PyTorch", "Tevatron"], use_instructions=True, + citation=PROMPTRIEVER_CITATION, ) diff --git a/mteb/models/repllama_models.py b/mteb/models/repllama_models.py index e435f87d7a..2f81b995cd 100644 --- a/mteb/models/repllama_models.py +++ b/mteb/models/repllama_models.py @@ -126,6 +126,15 @@ def loader_inner(**kwargs: Any) -> Encoder: PromptType.passage.value: "passage: ", } +REPLLAMA_CITATION = """ +@article{rankllama, + title={Fine-Tuning LLaMA for Multi-Stage Text Retrieval}, + author={Xueguang Ma and Liang Wang and Nan Yang and Furu Wei and Jimmy Lin}, + year={2023}, + journal={arXiv:2310.08319}, +} +""" + repllama_llama2_original = ModelMeta( loader=_loader( RepLLaMAWrapper, @@ -149,6 +158,7 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["PyTorch", "Tevatron"], use_instructions=True, + citation=REPLLAMA_CITATION, ) @@ -175,4 +185,5 @@ def loader_inner(**kwargs: Any) -> Encoder: similarity_fn_name="cosine", framework=["PyTorch", "Tevatron"], use_instructions=True, + citation=REPLLAMA_CITATION, ) diff --git a/mteb/models/rerankers_custom.py b/mteb/models/rerankers_custom.py index dc354a550c..4555888be0 100644 --- a/mteb/models/rerankers_custom.py +++ b/mteb/models/rerankers_custom.py @@ -248,4 +248,22 @@ def loader_inner(**kwargs: Any) -> Encoder: open_weights=True, revision="953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e", release_date="2024-06-24", + citation=""" + @misc{li2023making, + title={Making Large Language Models A Better Foundation For Dense Retrieval}, + author={Chaofan Li and Zheng Liu and Shitao Xiao and Yingxia Shao}, + year={2023}, + eprint={2312.15503}, + archivePrefix={arXiv}, + primaryClass={cs.CL} + } + @misc{chen2024bge, + title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation}, + author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu}, + year={2024}, + eprint={2402.03216}, + archivePrefix={arXiv}, + primaryClass={cs.CL} + } + """, ) diff --git a/mteb/models/rerankers_monot5_based.py b/mteb/models/rerankers_monot5_based.py index aef4a19e7e..d40c3409ed 100644 --- a/mteb/models/rerankers_monot5_based.py +++ b/mteb/models/rerankers_monot5_based.py @@ -287,6 +287,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="77f8e3f7b1eb1afe353aa21a7c3a2fc8feca702e", release_date="2022-03-28", + citation="""@misc{rosa2022parameterleftbehinddistillation, + title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval}, + author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira}, + year={2022}, + eprint={2206.02873}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2206.02873}, + }""", ) monot5_base = ModelMeta( @@ -301,6 +310,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="f15657ab3d2a5dd0b9a30c8c0b6a0a73c9cb5884", release_date="2022-03-28", + citation="""@misc{rosa2022parameterleftbehinddistillation, + title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval}, + author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira}, + year={2022}, + eprint={2206.02873}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2206.02873}, + }""", ) monot5_large = ModelMeta( @@ -315,6 +333,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="48cfad1d8dd587670393f27ee8ec41fde63e3d98", release_date="2022-03-28", + citation="""@misc{rosa2022parameterleftbehinddistillation, + title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval}, + author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira}, + year={2022}, + eprint={2206.02873}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2206.02873}, + }""", ) monot5_3b = ModelMeta( @@ -329,6 +356,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="bc0c419a438c81f592f878ce32430a1823f5db6c", release_date="2022-03-28", + citation="""@misc{rosa2022parameterleftbehinddistillation, + title={No Parameter Left Behind: How Distillation and Model Size Affect Zero-Shot Retrieval}, + author={Guilherme Moraes Rosa and Luiz Bonifacio and Vitor Jeronymo and Hugo Abonizio and Marzieh Fadaee and Roberto Lotufo and Rodrigo Nogueira}, + year={2022}, + eprint={2206.02873}, + archivePrefix={arXiv}, + primaryClass={cs.IR}, + url={https://arxiv.org/abs/2206.02873}, + }""", ) flant5_base = ModelMeta( @@ -343,6 +379,17 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="7bcac572ce56db69c1ea7c8af255c5d7c9672fc2", release_date="2022-10-21", + citation="""@misc{10.48550/arxiv.2210.11416, + doi = {10.48550/ARXIV.2210.11416}, + url = {https://arxiv.org/abs/2210.11416}, + author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason}, + keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Scaling Instruction-Finetuned Language Models}, + publisher = {arXiv}, + year = {2022}, + copyright = {Creative Commons Attribution 4.0 International} + } + """, ) flant5_large = ModelMeta( @@ -357,6 +404,17 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="0613663d0d48ea86ba8cb3d7a44f0f65dc596a2a", release_date="2022-10-21", + citation="""@misc{10.48550/arxiv.2210.11416, + doi = {10.48550/ARXIV.2210.11416}, + url = {https://arxiv.org/abs/2210.11416}, + author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason}, + keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Scaling Instruction-Finetuned Language Models}, + publisher = {arXiv}, + year = {2022}, + copyright = {Creative Commons Attribution 4.0 International} + } + """, ) flant5_xl = ModelMeta( @@ -371,6 +429,17 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="7d6315df2c2fb742f0f5b556879d730926ca9001", release_date="2022-10-21", + citation="""@misc{10.48550/arxiv.2210.11416, + doi = {10.48550/ARXIV.2210.11416}, + url = {https://arxiv.org/abs/2210.11416}, + author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason}, + keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Scaling Instruction-Finetuned Language Models}, + publisher = {arXiv}, + year = {2022}, + copyright = {Creative Commons Attribution 4.0 International} + } + """, ) flant5_xxl = ModelMeta( @@ -385,6 +454,17 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="ae7c9136adc7555eeccc78cdd960dfd60fb346ce", release_date="2022-10-21", + citation="""@misc{10.48550/arxiv.2210.11416, + doi = {10.48550/ARXIV.2210.11416}, + url = {https://arxiv.org/abs/2210.11416}, + author = {Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and Webson, Albert and Gu, Shixiang Shane and Dai, Zhuyun and Suzgun, Mirac and Chen, Xinyun and Chowdhery, Aakanksha and Narang, Sharan and Mishra, Gaurav and Yu, Adams and Zhao, Vincent and Huang, Yanping and Dai, Andrew and Yu, Hongkun and Petrov, Slav and Chi, Ed H. and Dean, Jeff and Devlin, Jacob and Roberts, Adam and Zhou, Denny and Le, Quoc V. and Wei, Jason}, + keywords = {Machine Learning (cs.LG), Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Scaling Instruction-Finetuned Language Models}, + publisher = {arXiv}, + year = {2022}, + copyright = {Creative Commons Attribution 4.0 International} + } + """, ) @@ -400,6 +480,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="01c7f73d771dfac7d292323805ebc428287df4f9", release_date="2023-07-18", + citation="""@misc{touvron2023llama2openfoundation, + title={Llama 2: Open Foundation and Fine-Tuned Chat Models}, + author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom}, + year={2023}, + eprint={2307.09288}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2307.09288}, + }""", ) llama2_7b_chat = ModelMeta( @@ -414,6 +503,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="f5db02db724555f92da89c216ac04704f23d4590", release_date="2023-07-18", + citation="""@misc{touvron2023llama2openfoundation, + title={Llama 2: Open Foundation and Fine-Tuned Chat Models}, + author={Hugo Touvron and Louis Martin and Kevin Stone and Peter Albert and Amjad Almahairi and Yasmine Babaei and Nikolay Bashlykov and Soumya Batra and Prajjwal Bhargava and Shruti Bhosale and Dan Bikel and Lukas Blecher and Cristian Canton Ferrer and Moya Chen and Guillem Cucurull and David Esiobu and Jude Fernandes and Jeremy Fu and Wenyin Fu and Brian Fuller and Cynthia Gao and Vedanuj Goswami and Naman Goyal and Anthony Hartshorn and Saghar Hosseini and Rui Hou and Hakan Inan and Marcin Kardas and Viktor Kerkez and Madian Khabsa and Isabel Kloumann and Artem Korenev and Punit Singh Koura and Marie-Anne Lachaux and Thibaut Lavril and Jenya Lee and Diana Liskovich and Yinghai Lu and Yuning Mao and Xavier Martinet and Todor Mihaylov and Pushkar Mishra and Igor Molybog and Yixin Nie and Andrew Poulton and Jeremy Reizenstein and Rashi Rungta and Kalyan Saladi and Alan Schelten and Ruan Silva and Eric Michael Smith and Ranjan Subramanian and Xiaoqing Ellen Tan and Binh Tang and Ross Taylor and Adina Williams and Jian Xiang Kuan and Puxin Xu and Zheng Yan and Iliyan Zarov and Yuchen Zhang and Angela Fan and Melanie Kambadur and Sharan Narang and Aurelien Rodriguez and Robert Stojnic and Sergey Edunov and Thomas Scialom}, + year={2023}, + eprint={2307.09288}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2307.09288}, + }""", ) mistral_7b = ModelMeta( @@ -428,6 +526,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="3ad372fc79158a2148299e3318516c786aeded6c", release_date="2023-12-11", + citation="""@misc{jiang2023mistral7b, + title={Mistral 7B}, + author={Albert Q. Jiang and Alexandre Sablayrolles and Arthur Mensch and Chris Bamford and Devendra Singh Chaplot and Diego de las Casas and Florian Bressand and Gianna Lengyel and Guillaume Lample and Lucile Saulnier and Lélio Renard Lavaud and Marie-Anne Lachaux and Pierre Stock and Teven Le Scao and Thibaut Lavril and Thomas Wang and Timothée Lacroix and William El Sayed}, + year={2023}, + eprint={2310.06825}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2310.06825}, + }""", ) followir_7b = ModelMeta( @@ -442,6 +549,16 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="4d25d437e38b510c01852070c0731e8f6e1875d1", release_date="2024-04-29", + citation=""" + @misc{weller2024followir, + title={FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions}, + author={Orion Weller and Benjamin Chang and Sean MacAvaney and Kyle Lo and Arman Cohan and Benjamin Van Durme and Dawn Lawrie and Luca Soldaini}, + year={2024}, + eprint={2403.15246}, + archivePrefix={arXiv}, + primaryClass={cs.IR} + } + """, ) @@ -561,6 +678,15 @@ def get_prediction_tokens(self, *args, **kwargs): open_weights=True, revision="cc0a949b9f21efcaba45c8cabb998ad02ce8d4e7", release_date="2022-01-05", + citation="""@misc{bonifacio2021mmarco, + title={mMARCO: A Multilingual Version of MS MARCO Passage Ranking Dataset}, + author={Luiz Henrique Bonifacio and Vitor Jeronymo and Hugo Queiroz Abonizio and Israel Campiotti and Marzieh Fadaee and and Roberto Lotufo and Rodrigo Nogueira}, + year={2021}, + eprint={2108.13897}, + archivePrefix={arXiv}, + primaryClass={cs.CL} + } + """, ) mt5_13b_mmarco_100k = ModelMeta( diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index cfe8965164..027b7c4840 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -95,6 +95,14 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation="""@misc{deepvk2024user, + title={USER: Universal Sentence Encoder for Russian}, + author={Malashenko, Boris and Zemerov, Anton and Spirin, Egor}, + url={https://huggingface.co/datasets/deepvk/USER-base}, + publisher={Hugging Face} + year={2024}, + } + """, ) deberta_v1_ru = ModelMeta( @@ -129,6 +137,15 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation="""@misc{kuratov2019adaptationdeepbidirectionalmultilingual, + title={Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language}, + author={Yuri Kuratov and Mikhail Arkhipov}, + year={2019}, + eprint={1905.07213}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/1905.07213}, + }""", ) distilrubert_small_cased_conversational = ModelMeta( @@ -146,6 +163,16 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation="""@misc{https://doi.org/10.48550/arxiv.2205.02340, + doi = {10.48550/ARXIV.2205.02340}, + url = {https://arxiv.org/abs/2205.02340}, + author = {Kolesnikova, Alina and Kuratov, Yuri and Konovalov, Vasily and Burtsev, Mikhail}, + keywords = {Computation and Language (cs.CL), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {Knowledge Distillation of Russian Language Models with Reduction of Vocabulary}, + publisher = {arXiv}, + year = {2022}, + copyright = {arXiv.org perpetual, non-exclusive license} + }""", ) rubert_base_cased_sentence = ModelMeta( @@ -234,4 +261,14 @@ open_weights=True, revision="89fb1651989adbb1cfcfdedafd7d102951ad0555", release_date="2024-07-29", + citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb, + title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design}, + author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov}, + year={2024}, + eprint={2408.12503}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2408.12503}, + } + """, ) diff --git a/mteb/models/salesforce_models.py b/mteb/models/salesforce_models.py index eabc4352a0..3a2ab12670 100644 --- a/mteb/models/salesforce_models.py +++ b/mteb/models/salesforce_models.py @@ -40,4 +40,11 @@ def sfr_instruction(instruction: str) -> str: similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + citation="""@misc{SFR-embedding-2, + title={SFR-Embedding-2: Advanced Text Embedding with Multi-stage Training}, + author={Rui Meng*, Ye Liu*, Shafiq Rayhan Joty, Caiming Xiong, Yingbo Zhou, Semih Yavuz}, + year={2024}, + url={https://huggingface.co/Salesforce/SFR-Embedding-2_R} + } + """, ) diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index e546203019..9c0c7f4bcc 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -60,6 +60,17 @@ "zho_Hant", ] +SBERT_CITATION = """@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "http://arxiv.org/abs/1908.10084", +} +""" + all_MiniLM_L6_v2 = ModelMeta( name="sentence-transformers/all-MiniLM-L6-v2", languages=["eng-Latn"], @@ -75,6 +86,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=SBERT_CITATION, ) paraphrase_multilingual_MiniLM_L12_v2 = ModelMeta( @@ -92,6 +104,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=SBERT_CITATION, ) paraphrase_multilingual_mpnet_base_v2 = ModelMeta( @@ -109,6 +122,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=SBERT_CITATION, ) labse = ModelMeta( @@ -126,6 +140,15 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation="""@misc{feng2022languageagnosticbertsentenceembedding, + title={Language-agnostic BERT Sentence Embedding}, + author={Fangxiaoyu Feng and Yinfei Yang and Daniel Cer and Naveen Arivazhagan and Wei Wang}, + year={2022}, + eprint={2007.01852}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2007.01852}, + }""", ) contriever = ModelMeta( @@ -143,4 +166,12 @@ similarity_fn_name="dot", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + citation=""" + @misc{izacard2021contriever, + title={Unsupervised Dense Information Retrieval with Contrastive Learning}, + author={Gautier Izacard and Mathilde Caron and Lucas Hosseini and Sebastian Riedel and Piotr Bojanowski and Armand Joulin and Edouard Grave}, + year={2021}, + url = {https://arxiv.org/abs/2112.09118}, + doi = {10.48550/ARXIV.2112.09118}, + }""", ) diff --git a/mteb/models/uae_models.py b/mteb/models/uae_models.py index b18240b47c..33f2cb03ac 100644 --- a/mteb/models/uae_models.py +++ b/mteb/models/uae_models.py @@ -75,4 +75,12 @@ def encode( framework=["Sentence Transformers", "PyTorch"], reference="https://huggingface.co/WhereIsAI/UAE-Large-V1", use_instructions=False, + citation=""" + @article{li2023angle, + title={AnglE-optimized Text Embeddings}, + author={Li, Xianming and Li, Jing}, + journal={arXiv preprint arXiv:2309.12871}, + year={2023} + } + """, ) From 0abe1a0563bfe6db822e6f30f7a9117330354a2f Mon Sep 17 00:00:00 2001 From: Imene Kerboua <33312980+imenelydiaker@users.noreply.github.com> Date: Thu, 21 Nov 2024 16:24:33 +0100 Subject: [PATCH 12/40] Add descriptive stats to mising tasks and add number of qrels (#1476) * add code for comupting number of qrels * add stats fever hotpotqa msmarco topiocqa * miracl mrtidy * multilongdoc miracl reranking * add multi eurlex * fix tests for descriptive stats * fix tests --------- Co-authored-by: Roman Solomatin <36135455+Samoed@users.noreply.github.com> --- mteb/abstasks/AbsTaskRetrieval.py | 3 + .../MultiEURLEXMultilabelClassification.json | 3750 +++++++++++++++++ .../Reranking/MIRACLReranking.json | 536 +++ .../Retrieval/AlloprofRetrieval.json | 2 + mteb/descriptive_stats/Retrieval/FEVER.json | 30 + .../descriptive_stats/Retrieval/HotpotQA.json | 86 + .../Retrieval/MIRACLRetrieval.json | 536 +++ mteb/descriptive_stats/Retrieval/MSMARCO.json | 86 + .../Retrieval/MrTidyRetrieval.json | 340 ++ .../Retrieval/MultiLongDocRetrieval.json | 790 ++++ .../descriptive_stats/Retrieval/TopiOCQA.json | 30 + .../Retrieval/XPQARetrieval.json | 74 + tests/test_TaskMetadata.py | 9 - tests/test_benchmark/mock_tasks.py | 16 + 14 files changed, 6279 insertions(+), 9 deletions(-) create mode 100644 mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json create mode 100644 mteb/descriptive_stats/Reranking/MIRACLReranking.json create mode 100644 mteb/descriptive_stats/Retrieval/FEVER.json create mode 100644 mteb/descriptive_stats/Retrieval/HotpotQA.json create mode 100644 mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MSMARCO.json create mode 100644 mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/TopiOCQA.json diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 5345a50a5d..bc86928a53 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -58,6 +58,7 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): num_samples: int num_queries: int num_documents: int + num_relevant_docs: int number_of_characters: int min_document_length: int @@ -419,6 +420,7 @@ def _calculate_metrics_from_split( query_len, doc_len = calculate_length(queries, corpus) num_documents = len(corpus) num_queries = len(queries) + num_relevant_docs = sum(len(relevant_docs[qid]) for qid in relevant_docs) none_queries = sum(q is None or len(q) == 0 for q in queries.values()) # create a list of number of relevant docs per query @@ -466,6 +468,7 @@ def _calculate_metrics_from_split( num_samples=num_documents + num_queries, num_queries=num_queries, num_documents=num_documents, + num_relevant_docs=num_relevant_docs, min_document_length=min(doc_len), average_document_length=sum(doc_len) / num_documents, max_document_length=max(doc_len), diff --git a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json new file mode 100644 index 0000000000..9a5c08b6cc --- /dev/null +++ b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json @@ -0,0 +1,3750 @@ +{ + "test": { + "num_samples": 115000, + "number_of_characters": 1381657027, + "number_texts_intersect_with_train": 0, + "min_text_length": 563, + "average_text_length": 12014.408930434782, + "max_text_length": 1458188, + "unique_texts": 115000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 50784 + }, + "15": { + "count": 30981 + }, + "5": { + "count": 24978 + }, + "6": { + "count": 45080 + }, + "3": { + "count": 63687 + }, + "17": { + "count": 37743 + }, + "1": { + "count": 15019 + }, + "20": { + "count": 14030 + }, + "0": { + "count": 17802 + }, + "2": { + "count": 22402 + }, + "19": { + "count": 10212 + }, + "9": { + "count": 3772 + }, + "4": { + "count": 9062 + }, + "10": { + "count": 7705 + }, + "11": { + "count": 12213 + }, + "7": { + "count": 14306 + }, + "12": { + "count": 11799 + }, + "8": { + "count": 13800 + }, + "13": { + "count": 2346 + }, + "14": { + "count": 4255 + }, + "16": { + "count": 1311 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 5000, + "number_of_characters": 58601463, + "number_texts_intersect_with_train": 0, + "min_text_length": 700, + "average_text_length": 11720.2926, + "max_text_length": 1269363, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "de": { + "num_samples": 5000, + "number_of_characters": 64327081, + "number_texts_intersect_with_train": 0, + "min_text_length": 688, + "average_text_length": 12865.4162, + "max_text_length": 1361562, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "fr": { + "num_samples": 5000, + "number_of_characters": 65405549, + "number_texts_intersect_with_train": 0, + "min_text_length": 676, + "average_text_length": 13081.1098, + "max_text_length": 1440461, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "it": { + "num_samples": 5000, + "number_of_characters": 63817393, + "number_texts_intersect_with_train": 0, + "min_text_length": 696, + "average_text_length": 12763.4786, + "max_text_length": 1404333, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "es": { + "num_samples": 5000, + "number_of_characters": 65401450, + "number_texts_intersect_with_train": 0, + "min_text_length": 683, + "average_text_length": 13080.29, + "max_text_length": 1458188, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "pl": { + "num_samples": 5000, + "number_of_characters": 61412963, + "number_texts_intersect_with_train": 0, + "min_text_length": 697, + "average_text_length": 12282.5926, + "max_text_length": 1381409, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "ro": { + "num_samples": 5000, + "number_of_characters": 64184661, + "number_texts_intersect_with_train": 0, + "min_text_length": 645, + "average_text_length": 12836.9322, + "max_text_length": 1450509, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "nl": { + "num_samples": 5000, + "number_of_characters": 64289871, + "number_texts_intersect_with_train": 0, + "min_text_length": 721, + "average_text_length": 12857.9742, + "max_text_length": 1442428, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "el": { + "num_samples": 5000, + "number_of_characters": 64990715, + "number_texts_intersect_with_train": 0, + "min_text_length": 695, + "average_text_length": 12998.143, + "max_text_length": 1436873, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "hu": { + "num_samples": 5000, + "number_of_characters": 62123205, + "number_texts_intersect_with_train": 0, + "min_text_length": 635, + "average_text_length": 12424.641, + "max_text_length": 1405731, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "pt": { + "num_samples": 5000, + "number_of_characters": 62412308, + "number_texts_intersect_with_train": 0, + "min_text_length": 662, + "average_text_length": 12482.4616, + "max_text_length": 1400357, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "cs": { + "num_samples": 5000, + "number_of_characters": 53917338, + "number_texts_intersect_with_train": 0, + "min_text_length": 563, + "average_text_length": 10783.4676, + "max_text_length": 1183634, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "sv": { + "num_samples": 5000, + "number_of_characters": 58062387, + "number_texts_intersect_with_train": 0, + "min_text_length": 660, + "average_text_length": 11612.4774, + "max_text_length": 1257482, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "bg": { + "num_samples": 5000, + "number_of_characters": 61177134, + "number_texts_intersect_with_train": 0, + "min_text_length": 661, + "average_text_length": 12235.4268, + "max_text_length": 1309869, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "da": { + "num_samples": 5000, + "number_of_characters": 58869790, + "number_texts_intersect_with_train": 0, + "min_text_length": 680, + "average_text_length": 11773.958, + "max_text_length": 1297978, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "fi": { + "num_samples": 5000, + "number_of_characters": 60438431, + "number_texts_intersect_with_train": 0, + "min_text_length": 707, + "average_text_length": 12087.6862, + "max_text_length": 1330363, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "sk": { + "num_samples": 5000, + "number_of_characters": 55654070, + "number_texts_intersect_with_train": 0, + "min_text_length": 595, + "average_text_length": 11130.814, + "max_text_length": 1229063, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "lt": { + "num_samples": 5000, + "number_of_characters": 56226783, + "number_texts_intersect_with_train": 0, + "min_text_length": 597, + "average_text_length": 11245.3566, + "max_text_length": 1274867, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "hr": { + "num_samples": 5000, + "number_of_characters": 55110710, + "number_texts_intersect_with_train": 0, + "min_text_length": 610, + "average_text_length": 11022.142, + "max_text_length": 1252581, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "sl": { + "num_samples": 5000, + "number_of_characters": 53100297, + "number_texts_intersect_with_train": 0, + "min_text_length": 573, + "average_text_length": 10620.0594, + "max_text_length": 1208117, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "et": { + "num_samples": 5000, + "number_of_characters": 54492156, + "number_texts_intersect_with_train": 0, + "min_text_length": 599, + "average_text_length": 10898.4312, + "max_text_length": 1370495, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "lv": { + "num_samples": 5000, + "number_of_characters": 54692551, + "number_texts_intersect_with_train": 0, + "min_text_length": 614, + "average_text_length": 10938.5102, + "max_text_length": 1230284, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + }, + "mt": { + "num_samples": 5000, + "number_of_characters": 62948721, + "number_texts_intersect_with_train": 0, + "min_text_length": 703, + "average_text_length": 12589.7442, + "max_text_length": 1403346, + "unique_texts": 5000, + "min_labels_per_text": 1, + "average_label_per_text": 3.5938, + "max_labels_per_text": 9, + "unique_labels": 21, + "labels": { + "18": { + "count": 2208 + }, + "15": { + "count": 1347 + }, + "5": { + "count": 1086 + }, + "6": { + "count": 1960 + }, + "3": { + "count": 2769 + }, + "17": { + "count": 1641 + }, + "1": { + "count": 653 + }, + "20": { + "count": 610 + }, + "0": { + "count": 774 + }, + "2": { + "count": 974 + }, + "19": { + "count": 444 + }, + "9": { + "count": 164 + }, + "4": { + "count": 394 + }, + "10": { + "count": 335 + }, + "11": { + "count": 531 + }, + "7": { + "count": 622 + }, + "12": { + "count": 513 + }, + "8": { + "count": 600 + }, + "13": { + "count": 102 + }, + "14": { + "count": 185 + }, + "16": { + "count": 57 + } + } + } + } + }, + "train": { + "num_samples": 817239, + "number_of_characters": 6311709460, + "number_texts_intersect_with_train": null, + "min_text_length": 450, + "average_text_length": 7723.211276015952, + "max_text_length": 939852, + "unique_texts": 817106, + "min_labels_per_text": 1, + "average_label_per_text": 3.279778620452524, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 85901 + }, + "20": { + "count": 55421 + }, + "7": { + "count": 71231 + }, + "3": { + "count": 445523 + }, + "0": { + "count": 105847 + }, + "2": { + "count": 131330 + }, + "17": { + "count": 392812 + }, + "19": { + "count": 96924 + }, + "6": { + "count": 293802 + }, + "12": { + "count": 63033 + }, + "18": { + "count": 316672 + }, + "4": { + "count": 74760 + }, + "5": { + "count": 128614 + }, + "10": { + "count": 34808 + }, + "8": { + "count": 55990 + }, + "15": { + "count": 216563 + }, + "14": { + "count": 17360 + }, + "9": { + "count": 31691 + }, + "11": { + "count": 39649 + }, + "13": { + "count": 9126 + }, + "16": { + "count": 13306 + } + }, + "hf_subset_descriptive_stats": { + "en": { + "num_samples": 55000, + "number_of_characters": 386261559, + "number_texts_intersect_with_train": null, + "min_text_length": 566, + "average_text_length": 7022.937436363636, + "max_text_length": 850450, + "unique_texts": 54986, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "de": { + "num_samples": 55000, + "number_of_characters": 415962273, + "number_texts_intersect_with_train": null, + "min_text_length": 592, + "average_text_length": 7562.950418181818, + "max_text_length": 888009, + "unique_texts": 54992, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "fr": { + "num_samples": 55000, + "number_of_characters": 423976667, + "number_texts_intersect_with_train": null, + "min_text_length": 551, + "average_text_length": 7708.666672727273, + "max_text_length": 926327, + "unique_texts": 54991, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "it": { + "num_samples": 55000, + "number_of_characters": 423891859, + "number_texts_intersect_with_train": null, + "min_text_length": 566, + "average_text_length": 7707.124709090909, + "max_text_length": 895850, + "unique_texts": 54992, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "es": { + "num_samples": 52785, + "number_of_characters": 423682977, + "number_texts_intersect_with_train": null, + "min_text_length": 569, + "average_text_length": 8026.57908496732, + "max_text_length": 939852, + "unique_texts": 52775, + "min_labels_per_text": 1, + "average_label_per_text": 3.2420384578952355, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 5414 + }, + "20": { + "count": 3043 + }, + "7": { + "count": 4066 + }, + "3": { + "count": 28995 + }, + "0": { + "count": 5887 + }, + "2": { + "count": 8557 + }, + "17": { + "count": 26280 + }, + "19": { + "count": 6704 + }, + "6": { + "count": 18832 + }, + "12": { + "count": 3541 + }, + "18": { + "count": 21935 + }, + "4": { + "count": 4870 + }, + "5": { + "count": 8222 + }, + "10": { + "count": 2053 + }, + "8": { + "count": 3261 + }, + "15": { + "count": 13176 + }, + "14": { + "count": 1050 + }, + "9": { + "count": 1892 + }, + "11": { + "count": 2188 + }, + "13": { + "count": 530 + }, + "16": { + "count": 635 + } + } + }, + "pl": { + "num_samples": 23197, + "number_of_characters": 191501869, + "number_texts_intersect_with_train": null, + "min_text_length": 538, + "average_text_length": 8255.458421347588, + "max_text_length": 834133, + "unique_texts": 23196, + "min_labels_per_text": 1, + "average_label_per_text": 3.327456136569384, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2228 + }, + "20": { + "count": 1999 + }, + "7": { + "count": 2407 + }, + "3": { + "count": 12498 + }, + "0": { + "count": 3717 + }, + "19": { + "count": 2289 + }, + "6": { + "count": 8410 + }, + "17": { + "count": 10886 + }, + "5": { + "count": 3669 + }, + "2": { + "count": 3816 + }, + "10": { + "count": 1107 + }, + "8": { + "count": 1866 + }, + "18": { + "count": 7637 + }, + "15": { + "count": 6788 + }, + "4": { + "count": 2037 + }, + "14": { + "count": 517 + }, + "9": { + "count": 1020 + }, + "13": { + "count": 304 + }, + "12": { + "count": 2159 + }, + "11": { + "count": 1415 + }, + "16": { + "count": 418 + } + } + }, + "ro": { + "num_samples": 15921, + "number_of_characters": 157122999, + "number_texts_intersect_with_train": null, + "min_text_length": 650, + "average_text_length": 9868.915206331261, + "max_text_length": 882427, + "unique_texts": 15920, + "min_labels_per_text": 1, + "average_label_per_text": 3.434143583945732, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 1801 + }, + "20": { + "count": 1721 + }, + "7": { + "count": 2032 + }, + "3": { + "count": 8085 + }, + "0": { + "count": 3121 + }, + "19": { + "count": 1596 + }, + "6": { + "count": 6154 + }, + "2": { + "count": 2293 + }, + "5": { + "count": 2626 + }, + "10": { + "count": 910 + }, + "8": { + "count": 1516 + }, + "18": { + "count": 5269 + }, + "15": { + "count": 5020 + }, + "4": { + "count": 1525 + }, + "17": { + "count": 6103 + }, + "14": { + "count": 413 + }, + "9": { + "count": 765 + }, + "13": { + "count": 247 + }, + "12": { + "count": 1891 + }, + "11": { + "count": 1224 + }, + "16": { + "count": 363 + } + } + }, + "nl": { + "num_samples": 55000, + "number_of_characters": 426734054, + "number_texts_intersect_with_train": null, + "min_text_length": 590, + "average_text_length": 7758.800981818182, + "max_text_length": 921418, + "unique_texts": 54987, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "el": { + "num_samples": 55000, + "number_of_characters": 428853513, + "number_texts_intersect_with_train": null, + "min_text_length": 598, + "average_text_length": 7797.3366, + "max_text_length": 930674, + "unique_texts": 54988, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "hu": { + "num_samples": 22664, + "number_of_characters": 187808803, + "number_texts_intersect_with_train": null, + "min_text_length": 552, + "average_text_length": 8286.657386163079, + "max_text_length": 853678, + "unique_texts": 22663, + "min_labels_per_text": 1, + "average_label_per_text": 3.3263766325450055, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2159 + }, + "20": { + "count": 1959 + }, + "7": { + "count": 2365 + }, + "3": { + "count": 12159 + }, + "0": { + "count": 3608 + }, + "19": { + "count": 2236 + }, + "6": { + "count": 8188 + }, + "17": { + "count": 10693 + }, + "5": { + "count": 3555 + }, + "18": { + "count": 7423 + }, + "10": { + "count": 1067 + }, + "14": { + "count": 510 + }, + "15": { + "count": 6643 + }, + "8": { + "count": 1838 + }, + "9": { + "count": 1014 + }, + "2": { + "count": 3783 + }, + "13": { + "count": 302 + }, + "4": { + "count": 1985 + }, + "12": { + "count": 2114 + }, + "11": { + "count": 1382 + }, + "16": { + "count": 406 + } + } + }, + "pt": { + "num_samples": 52370, + "number_of_characters": 403330428, + "number_texts_intersect_with_train": null, + "min_text_length": 546, + "average_text_length": 7701.554859652473, + "max_text_length": 900744, + "unique_texts": 52356, + "min_labels_per_text": 1, + "average_label_per_text": 3.249665839220928, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 5376 + }, + "20": { + "count": 3032 + }, + "7": { + "count": 4035 + }, + "3": { + "count": 28786 + }, + "0": { + "count": 5852 + }, + "2": { + "count": 8513 + }, + "17": { + "count": 26076 + }, + "19": { + "count": 6673 + }, + "6": { + "count": 18764 + }, + "12": { + "count": 3521 + }, + "18": { + "count": 21803 + }, + "4": { + "count": 4859 + }, + "5": { + "count": 8188 + }, + "10": { + "count": 2048 + }, + "8": { + "count": 3254 + }, + "15": { + "count": 13146 + }, + "14": { + "count": 1044 + }, + "9": { + "count": 1881 + }, + "11": { + "count": 2179 + }, + "13": { + "count": 519 + }, + "16": { + "count": 636 + } + } + }, + "cs": { + "num_samples": 23187, + "number_of_characters": 168437584, + "number_texts_intersect_with_train": null, + "min_text_length": 450, + "average_text_length": 7264.311208867038, + "max_text_length": 743409, + "unique_texts": 23186, + "min_labels_per_text": 1, + "average_label_per_text": 3.3279855091214903, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2230 + }, + "20": { + "count": 1995 + }, + "7": { + "count": 2407 + }, + "3": { + "count": 12501 + }, + "0": { + "count": 3719 + }, + "19": { + "count": 2282 + }, + "6": { + "count": 8402 + }, + "17": { + "count": 10880 + }, + "5": { + "count": 3664 + }, + "2": { + "count": 3818 + }, + "10": { + "count": 1106 + }, + "8": { + "count": 1868 + }, + "18": { + "count": 7630 + }, + "15": { + "count": 6783 + }, + "4": { + "count": 2041 + }, + "14": { + "count": 523 + }, + "9": { + "count": 1020 + }, + "13": { + "count": 305 + }, + "12": { + "count": 2159 + }, + "11": { + "count": 1416 + }, + "16": { + "count": 417 + } + } + }, + "sv": { + "num_samples": 42490, + "number_of_characters": 314595142, + "number_texts_intersect_with_train": null, + "min_text_length": 553, + "average_text_length": 7403.980748411391, + "max_text_length": 808204, + "unique_texts": 42482, + "min_labels_per_text": 1, + "average_label_per_text": 3.3235820192986587, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 3803 + }, + "20": { + "count": 2729 + }, + "7": { + "count": 3536 + }, + "3": { + "count": 23831 + }, + "0": { + "count": 5288 + }, + "2": { + "count": 7279 + }, + "17": { + "count": 22229 + }, + "19": { + "count": 5071 + }, + "6": { + "count": 15684 + }, + "12": { + "count": 3140 + }, + "5": { + "count": 7025 + }, + "18": { + "count": 16072 + }, + "10": { + "count": 1720 + }, + "8": { + "count": 2864 + }, + "15": { + "count": 11415 + }, + "4": { + "count": 3929 + }, + "14": { + "count": 871 + }, + "9": { + "count": 1716 + }, + "11": { + "count": 1954 + }, + "13": { + "count": 465 + }, + "16": { + "count": 598 + } + } + }, + "bg": { + "num_samples": 15986, + "number_of_characters": 152499367, + "number_texts_intersect_with_train": null, + "min_text_length": 604, + "average_text_length": 9539.557550356561, + "max_text_length": 798373, + "unique_texts": 15985, + "min_labels_per_text": 1, + "average_label_per_text": 3.4323783310396596, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 1810 + }, + "20": { + "count": 1730 + }, + "7": { + "count": 2059 + }, + "3": { + "count": 8104 + }, + "0": { + "count": 3125 + }, + "19": { + "count": 1599 + }, + "6": { + "count": 6159 + }, + "2": { + "count": 2294 + }, + "5": { + "count": 2629 + }, + "10": { + "count": 915 + }, + "8": { + "count": 1573 + }, + "18": { + "count": 5273 + }, + "15": { + "count": 5052 + }, + "4": { + "count": 1526 + }, + "17": { + "count": 6106 + }, + "14": { + "count": 412 + }, + "9": { + "count": 768 + }, + "13": { + "count": 250 + }, + "12": { + "count": 1896 + }, + "11": { + "count": 1226 + }, + "16": { + "count": 364 + } + } + }, + "da": { + "num_samples": 55000, + "number_of_characters": 387088427, + "number_texts_intersect_with_train": null, + "min_text_length": 544, + "average_text_length": 7037.9714, + "max_text_length": 839799, + "unique_texts": 54995, + "min_labels_per_text": 1, + "average_label_per_text": 3.231618181818182, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 6150 + }, + "20": { + "count": 3096 + }, + "7": { + "count": 4175 + }, + "3": { + "count": 30222 + }, + "0": { + "count": 6056 + }, + "2": { + "count": 8803 + }, + "17": { + "count": 26931 + }, + "19": { + "count": 7065 + }, + "6": { + "count": 19431 + }, + "12": { + "count": 3640 + }, + "18": { + "count": 22975 + }, + "4": { + "count": 5065 + }, + "5": { + "count": 8444 + }, + "10": { + "count": 2151 + }, + "8": { + "count": 3317 + }, + "15": { + "count": 13519 + }, + "14": { + "count": 1122 + }, + "9": { + "count": 1926 + }, + "11": { + "count": 2233 + }, + "13": { + "count": 541 + }, + "16": { + "count": 877 + } + } + }, + "fi": { + "num_samples": 42497, + "number_of_characters": 320275075, + "number_texts_intersect_with_train": null, + "min_text_length": 555, + "average_text_length": 7536.416099959997, + "max_text_length": 818453, + "unique_texts": 42486, + "min_labels_per_text": 1, + "average_label_per_text": 3.323575781819893, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 3804 + }, + "20": { + "count": 2728 + }, + "7": { + "count": 3537 + }, + "3": { + "count": 23835 + }, + "0": { + "count": 5288 + }, + "2": { + "count": 7280 + }, + "17": { + "count": 22233 + }, + "19": { + "count": 5072 + }, + "6": { + "count": 15686 + }, + "12": { + "count": 3140 + }, + "5": { + "count": 7028 + }, + "18": { + "count": 16075 + }, + "10": { + "count": 1720 + }, + "8": { + "count": 2865 + }, + "15": { + "count": 11418 + }, + "4": { + "count": 3929 + }, + "14": { + "count": 871 + }, + "9": { + "count": 1716 + }, + "11": { + "count": 1954 + }, + "13": { + "count": 465 + }, + "16": { + "count": 598 + } + } + }, + "sk": { + "num_samples": 22971, + "number_of_characters": 171894895, + "number_texts_intersect_with_train": null, + "min_text_length": 461, + "average_text_length": 7483.126333202734, + "max_text_length": 764206, + "unique_texts": 22970, + "min_labels_per_text": 1, + "average_label_per_text": 3.3276304906186063, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2200 + }, + "20": { + "count": 1974 + }, + "7": { + "count": 2381 + }, + "3": { + "count": 12392 + }, + "0": { + "count": 3666 + }, + "19": { + "count": 2261 + }, + "6": { + "count": 8355 + }, + "17": { + "count": 10833 + }, + "5": { + "count": 3609 + }, + "2": { + "count": 3790 + }, + "10": { + "count": 1086 + }, + "8": { + "count": 1848 + }, + "18": { + "count": 7565 + }, + "15": { + "count": 6693 + }, + "4": { + "count": 2013 + }, + "14": { + "count": 517 + }, + "9": { + "count": 1009 + }, + "13": { + "count": 300 + }, + "12": { + "count": 2130 + }, + "11": { + "count": 1403 + }, + "16": { + "count": 414 + } + } + }, + "lt": { + "num_samples": 23188, + "number_of_characters": 174821647, + "number_texts_intersect_with_train": null, + "min_text_length": 509, + "average_text_length": 7539.315464895636, + "max_text_length": 806603, + "unique_texts": 23186, + "min_labels_per_text": 1, + "average_label_per_text": 3.32805761600828, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2233 + }, + "20": { + "count": 1999 + }, + "7": { + "count": 2403 + }, + "3": { + "count": 12497 + }, + "0": { + "count": 3716 + }, + "19": { + "count": 2288 + }, + "6": { + "count": 8409 + }, + "17": { + "count": 10873 + }, + "5": { + "count": 3662 + }, + "2": { + "count": 3819 + }, + "10": { + "count": 1107 + }, + "8": { + "count": 1867 + }, + "18": { + "count": 7638 + }, + "15": { + "count": 6782 + }, + "4": { + "count": 2042 + }, + "14": { + "count": 519 + }, + "9": { + "count": 1021 + }, + "13": { + "count": 302 + }, + "12": { + "count": 2162 + }, + "11": { + "count": 1414 + }, + "16": { + "count": 418 + } + } + }, + "hr": { + "num_samples": 7944, + "number_of_characters": 78244345, + "number_texts_intersect_with_train": null, + "min_text_length": 724, + "average_text_length": 9849.489551863042, + "max_text_length": 756731, + "unique_texts": 7944, + "min_labels_per_text": 1, + "average_label_per_text": 3.50365055387714, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 1153 + }, + "20": { + "count": 937 + }, + "7": { + "count": 1312 + }, + "3": { + "count": 4036 + }, + "0": { + "count": 1755 + }, + "2": { + "count": 719 + }, + "5": { + "count": 1694 + }, + "10": { + "count": 629 + }, + "8": { + "count": 907 + }, + "18": { + "count": 2663 + }, + "15": { + "count": 2944 + }, + "17": { + "count": 1983 + }, + "6": { + "count": 2648 + }, + "14": { + "count": 251 + }, + "19": { + "count": 737 + }, + "9": { + "count": 361 + }, + "13": { + "count": 167 + }, + "12": { + "count": 1211 + }, + "4": { + "count": 730 + }, + "11": { + "count": 754 + }, + "16": { + "count": 242 + } + } + }, + "sl": { + "num_samples": 23184, + "number_of_characters": 165759223, + "number_texts_intersect_with_train": null, + "min_text_length": 486, + "average_text_length": 7149.724939613527, + "max_text_length": 727123, + "unique_texts": 23183, + "min_labels_per_text": 1, + "average_label_per_text": 3.3279416839199447, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2234 + }, + "20": { + "count": 2001 + }, + "7": { + "count": 2406 + }, + "3": { + "count": 12489 + }, + "0": { + "count": 3718 + }, + "19": { + "count": 2280 + }, + "6": { + "count": 8392 + }, + "17": { + "count": 10862 + }, + "5": { + "count": 3670 + }, + "2": { + "count": 3813 + }, + "10": { + "count": 1108 + }, + "8": { + "count": 1866 + }, + "18": { + "count": 7643 + }, + "15": { + "count": 6788 + }, + "4": { + "count": 2045 + }, + "14": { + "count": 523 + }, + "9": { + "count": 1020 + }, + "13": { + "count": 304 + }, + "12": { + "count": 2157 + }, + "11": { + "count": 1418 + }, + "16": { + "count": 418 + } + } + }, + "et": { + "num_samples": 23126, + "number_of_characters": 167111710, + "number_texts_intersect_with_train": null, + "min_text_length": 505, + "average_text_length": 7226.139842601401, + "max_text_length": 466834, + "unique_texts": 23125, + "min_labels_per_text": 1, + "average_label_per_text": 3.32703450661593, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2211 + }, + "20": { + "count": 2002 + }, + "7": { + "count": 2400 + }, + "3": { + "count": 12457 + }, + "0": { + "count": 3710 + }, + "19": { + "count": 2281 + }, + "6": { + "count": 8405 + }, + "17": { + "count": 10865 + }, + "5": { + "count": 3641 + }, + "2": { + "count": 3816 + }, + "10": { + "count": 1101 + }, + "8": { + "count": 1862 + }, + "18": { + "count": 7586 + }, + "15": { + "count": 6749 + }, + "4": { + "count": 2023 + }, + "14": { + "count": 519 + }, + "9": { + "count": 1019 + }, + "13": { + "count": 306 + }, + "12": { + "count": 2154 + }, + "11": { + "count": 1416 + }, + "16": { + "count": 418 + } + } + }, + "lv": { + "num_samples": 23208, + "number_of_characters": 170528142, + "number_texts_intersect_with_train": null, + "min_text_length": 512, + "average_text_length": 7347.81721820062, + "max_text_length": 743348, + "unique_texts": 23207, + "min_labels_per_text": 1, + "average_label_per_text": 3.327171664943123, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 2229 + }, + "20": { + "count": 2001 + }, + "7": { + "count": 2410 + }, + "3": { + "count": 12510 + }, + "0": { + "count": 3720 + }, + "19": { + "count": 2286 + }, + "6": { + "count": 8413 + }, + "17": { + "count": 10891 + }, + "5": { + "count": 3672 + }, + "2": { + "count": 3819 + }, + "10": { + "count": 1103 + }, + "8": { + "count": 1867 + }, + "18": { + "count": 7631 + }, + "15": { + "count": 6783 + }, + "4": { + "count": 2040 + }, + "14": { + "count": 522 + }, + "9": { + "count": 1022 + }, + "13": { + "count": 305 + }, + "12": { + "count": 2157 + }, + "11": { + "count": 1418 + }, + "16": { + "count": 418 + } + } + }, + "mt": { + "num_samples": 17521, + "number_of_characters": 171326902, + "number_texts_intersect_with_train": null, + "min_text_length": 596, + "average_text_length": 9778.374636150904, + "max_text_length": 913989, + "unique_texts": 17520, + "min_labels_per_text": 1, + "average_label_per_text": 3.4455795902060387, + "max_labels_per_text": 10, + "unique_labels": 21, + "labels": { + "1": { + "count": 1966 + }, + "20": { + "count": 1899 + }, + "7": { + "count": 2250 + }, + "3": { + "count": 8794 + }, + "0": { + "count": 3565 + }, + "19": { + "count": 1814 + }, + "6": { + "count": 6884 + }, + "2": { + "count": 2300 + }, + "5": { + "count": 2952 + }, + "10": { + "count": 971 + }, + "8": { + "count": 1649 + }, + "18": { + "count": 6004 + }, + "15": { + "count": 5750 + }, + "4": { + "count": 1711 + }, + "17": { + "count": 6502 + }, + "14": { + "count": 444 + }, + "9": { + "count": 965 + }, + "13": { + "count": 268 + }, + "12": { + "count": 2021 + }, + "11": { + "count": 1257 + }, + "16": { + "count": 404 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/MIRACLReranking.json b/mteb/descriptive_stats/Reranking/MIRACLReranking.json new file mode 100644 index 0000000000..8f37b97947 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/MIRACLReranking.json @@ -0,0 +1,536 @@ +{ + "dev": { + "number_of_characters": 584993395, + "num_samples": 1260008, + "num_queries": 12524, + "num_documents": 1247484, + "num_relevant_docs": 1247483, + "min_document_length": 5, + "average_document_length": 0.3661874621237627, + "max_document_length": 176, + "unique_documents": 1247484, + "min_query_length": 7, + "average_query_length": 46673.31379750878, + "max_query_length": 48058, + "unique_queries": 12524, + "none_queries": 0, + "min_relevant_docs_per_query": 0, + "average_relevant_docs_per_query": 1.8850207601405302, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 1247483, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 1, + "average_top_ranked_per_query": 99.60747365059086, + "max_top_ranked_per_query": 100, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 139203930, + "num_samples": 290077, + "num_queries": 2896, + "num_documents": 287181, + "num_relevant_docs": 287181, + "min_document_length": 12, + "average_document_length": 0.29728986249090295, + "max_document_length": 101, + "unique_documents": 287181, + "min_query_length": 9, + "average_query_length": 48038.17472375691, + "max_query_length": 48058, + "unique_queries": 2896, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.7178867403314917, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 287181, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 1, + "average_top_ranked_per_query": 99.16470994475138, + "max_top_ranked_per_query": 100 + }, + "bn": { + "number_of_characters": 22936306, + "num_samples": 41466, + "num_queries": 411, + "num_documents": 41055, + "num_relevant_docs": 41055, + "min_document_length": 16, + "average_document_length": 0.470320302033857, + "max_document_length": 112, + "unique_documents": 41055, + "min_query_length": 12, + "average_query_length": 55759.11678832117, + "max_query_length": 16749, + "unique_queries": 411, + "none_queries": 0, + "min_relevant_docs_per_query": 55, + "average_relevant_docs_per_query": 1.9172749391727495, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 41055, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 55, + "average_top_ranked_per_query": 99.8905109489051, + "max_top_ranked_per_query": 100 + }, + "de": { + "number_of_characters": 16502961, + "num_samples": 30704, + "num_queries": 304, + "num_documents": 30400, + "num_relevant_docs": 30400, + "min_document_length": 15, + "average_document_length": 0.4606578947368421, + "max_document_length": 87, + "unique_documents": 30400, + "min_query_length": 13, + "average_query_length": 54239.99013157895, + "max_query_length": 5224, + "unique_queries": 304, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.542763157894737, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 30400, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "en": { + "number_of_characters": 51198664, + "num_samples": 79487, + "num_queries": 787, + "num_documents": 78700, + "num_relevant_docs": 78700, + "min_document_length": 16, + "average_document_length": 0.40310038119440916, + "max_document_length": 122, + "unique_documents": 78700, + "min_query_length": 19, + "average_query_length": 65015.171537484115, + "max_query_length": 8110, + "unique_queries": 787, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 2.3824650571791612, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 78700, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "es": { + "number_of_characters": 34643777, + "num_samples": 62317, + "num_queries": 617, + "num_documents": 61700, + "num_relevant_docs": 61700, + "min_document_length": 19, + "average_document_length": 0.47573743922204215, + "max_document_length": 88, + "unique_documents": 61700, + "min_query_length": 21, + "average_query_length": 56101.1734197731, + "max_query_length": 21550, + "unique_queries": 617, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 3.053484602917342, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 61700, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "fa": { + "number_of_characters": 27767876, + "num_samples": 63832, + "num_queries": 632, + "num_documents": 63200, + "num_relevant_docs": 63200, + "min_document_length": 18, + "average_document_length": 0.411503164556962, + "max_document_length": 82, + "unique_documents": 63200, + "min_query_length": 14, + "average_query_length": 43895.362341772154, + "max_query_length": 8151, + "unique_queries": 632, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.4667721518987342, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 63200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "fi": { + "number_of_characters": 52848523, + "num_samples": 117879, + "num_queries": 1183, + "num_documents": 116696, + "num_relevant_docs": 116696, + "min_document_length": 14, + "average_document_length": 0.3929526290532666, + "max_document_length": 130, + "unique_documents": 116696, + "min_query_length": 13, + "average_query_length": 44634.54522400676, + "max_query_length": 6755, + "unique_queries": 1183, + "none_queries": 0, + "min_relevant_docs_per_query": 3, + "average_relevant_docs_per_query": 1.7557058326289094, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 116696, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 3, + "average_top_ranked_per_query": 98.64412510566356, + "max_top_ranked_per_query": 100 + }, + "fr": { + "number_of_characters": 17084953, + "num_samples": 34643, + "num_queries": 343, + "num_documents": 34300, + "num_relevant_docs": 34300, + "min_document_length": 16, + "average_document_length": 0.4388338192419825, + "max_document_length": 83, + "unique_documents": 34300, + "min_query_length": 25, + "average_query_length": 49766.475218658896, + "max_query_length": 4404, + "unique_queries": 343, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.3877551020408163, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 34300, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "hi": { + "number_of_characters": 21162593, + "num_samples": 35350, + "num_queries": 350, + "num_documents": 35000, + "num_relevant_docs": 35000, + "min_document_length": 24, + "average_document_length": 0.5334, + "max_document_length": 120, + "unique_documents": 35000, + "min_query_length": 13, + "average_query_length": 60411.21142857143, + "max_query_length": 29681, + "unique_queries": 350, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.9142857142857144, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 35000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "id": { + "number_of_characters": 51428701, + "num_samples": 94149, + "num_queries": 939, + "num_documents": 93210, + "num_relevant_docs": 93210, + "min_document_length": 13, + "average_document_length": 0.3831563137002468, + "max_document_length": 93, + "unique_documents": 93210, + "min_query_length": 9, + "average_query_length": 54731.615548455804, + "max_query_length": 13961, + "unique_queries": 939, + "none_queries": 0, + "min_relevant_docs_per_query": 3, + "average_relevant_docs_per_query": 2.774227902023429, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 93210, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 3, + "average_top_ranked_per_query": 99.26517571884985, + "max_top_ranked_per_query": 100 + }, + "ja": { + "number_of_characters": 17053080, + "num_samples": 80497, + "num_queries": 797, + "num_documents": 79700, + "num_relevant_docs": 79700, + "min_document_length": 7, + "average_document_length": 0.177465495608532, + "max_document_length": 48, + "unique_documents": 79700, + "min_query_length": 7, + "average_query_length": 21378.840652446674, + "max_query_length": 6592, + "unique_queries": 797, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.7465495608531996, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 79700, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "ko": { + "number_of_characters": 5439444, + "num_samples": 21414, + "num_queries": 213, + "num_documents": 21201, + "num_relevant_docs": 21200, + "min_document_length": 5, + "average_document_length": 0.21725390311777745, + "max_document_length": 92, + "unique_documents": 21201, + "min_query_length": 11, + "average_query_length": 25515.671361502347, + "max_query_length": 4838, + "unique_queries": 213, + "none_queries": 0, + "min_relevant_docs_per_query": 0, + "average_relevant_docs_per_query": 1.9812206572769953, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 21200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 1, + "average_top_ranked_per_query": 99.53521126760563, + "max_top_ranked_per_query": 100 + }, + "ru": { + "number_of_characters": 59556512, + "num_samples": 125947, + "num_queries": 1247, + "num_documents": 124700, + "num_relevant_docs": 124700, + "min_document_length": 15, + "average_document_length": 0.4415878107457899, + "max_document_length": 108, + "unique_documents": 124700, + "min_query_length": 8, + "average_query_length": 47715.67441860465, + "max_query_length": 12427, + "unique_queries": 1247, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.9534883720930232, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 124700, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "sw": { + "number_of_characters": 14840684, + "num_samples": 48581, + "num_queries": 481, + "num_documents": 48100, + "num_relevant_docs": 48100, + "min_document_length": 13, + "average_document_length": 0.38885654885654886, + "max_document_length": 75, + "unique_documents": 48100, + "min_query_length": 10, + "average_query_length": 30814.927234927236, + "max_query_length": 6048, + "unique_queries": 481, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.3846153846153846, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 48100, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "te": { + "number_of_characters": 3910478, + "num_samples": 8484, + "num_queries": 84, + "num_documents": 8400, + "num_relevant_docs": 8400, + "min_document_length": 24, + "average_document_length": 0.3846428571428571, + "max_document_length": 64, + "unique_documents": 8400, + "min_query_length": 19, + "average_query_length": 46514.84523809524, + "max_query_length": 8736, + "unique_queries": 84, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.119047619047619, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 8400, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "th": { + "number_of_characters": 38321622, + "num_samples": 73671, + "num_queries": 730, + "num_documents": 72941, + "num_relevant_docs": 72941, + "min_document_length": 14, + "average_document_length": 0.42866152095529264, + "max_document_length": 176, + "unique_documents": 72941, + "min_query_length": 15, + "average_query_length": 52452.54109589041, + "max_query_length": 12078, + "unique_queries": 730, + "none_queries": 0, + "min_relevant_docs_per_query": 41, + "average_relevant_docs_per_query": 1.632876712328767, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 72941, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 41, + "average_top_ranked_per_query": 99.91917808219178, + "max_top_ranked_per_query": 100 + }, + "yo": { + "number_of_characters": 4939804, + "num_samples": 12019, + "num_queries": 119, + "num_documents": 11900, + "num_relevant_docs": 11900, + "min_document_length": 25, + "average_document_length": 0.376890756302521, + "max_document_length": 56, + "unique_documents": 11900, + "min_query_length": 7, + "average_query_length": 41473.268907563026, + "max_query_length": 5793, + "unique_queries": 119, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 0.8823529411764706, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 11900, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + }, + "zh": { + "number_of_characters": 6153487, + "num_samples": 39491, + "num_queries": 391, + "num_documents": 39100, + "num_relevant_docs": 39100, + "min_document_length": 7, + "average_document_length": 0.10859335038363171, + "max_document_length": 22, + "unique_documents": 39100, + "min_query_length": 7, + "average_query_length": 15726.959079283888, + "max_query_length": 2629, + "unique_queries": 391, + "none_queries": 0, + "min_relevant_docs_per_query": 100, + "average_relevant_docs_per_query": 1.4194373401534526, + "max_relevant_docs_per_query": 100, + "unique_relevant_docs": 39100, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 100, + "average_top_ranked_per_query": 100.0, + "max_top_ranked_per_query": 100 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json b/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json index 2899e4e4d7..6ddd5ce0b8 100644 --- a/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json @@ -4,6 +4,7 @@ "num_samples": 4872, "num_queries": 2316, "num_documents": 2556, + "num_relevant_docs": 2316, "min_document_length": 8, "average_document_length": 154.68348982785602, "max_document_length": 2863, @@ -12,6 +13,7 @@ "average_query_length": 3868.990932642487, "max_query_length": 47930, "unique_queries": 2316, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, diff --git a/mteb/descriptive_stats/Retrieval/FEVER.json b/mteb/descriptive_stats/Retrieval/FEVER.json new file mode 100644 index 0000000000..18a770b3e0 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/FEVER.json @@ -0,0 +1,30 @@ +{ + "test": { + "number_of_characters": 2921128337, + "num_samples": 5423234, + "num_queries": 6666, + "num_documents": 5416568, + "num_relevant_docs": 7937, + "min_document_length": 14, + "average_document_length": 0.061047881241406, + "max_document_length": 189, + "unique_documents": 5416568, + "min_query_length": 2, + "average_query_length": 438163.46639663965, + "max_query_length": 374597, + "unique_queries": 6666, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1906690669066906, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 1499, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/HotpotQA.json b/mteb/descriptive_stats/Retrieval/HotpotQA.json new file mode 100644 index 0000000000..ae51a60fc8 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/HotpotQA.json @@ -0,0 +1,86 @@ +{ + "train": { + "number_of_characters": 1520922083, + "num_samples": 5318329, + "num_queries": 85000, + "num_documents": 5233329, + "num_relevant_docs": 170000, + "min_document_length": 13, + "average_document_length": 1.7143430118763792, + "max_document_length": 654, + "unique_documents": 5233329, + "min_query_length": 9, + "average_query_length": 17787.651317647058, + "max_query_length": 8276, + "unique_queries": 85000, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 101307, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "dev": { + "number_of_characters": 1512524238, + "num_samples": 5238776, + "num_queries": 5447, + "num_documents": 5233329, + "num_relevant_docs": 10894, + "min_document_length": 18, + "average_document_length": 0.10965792519446035, + "max_document_length": 630, + "unique_documents": 5233329, + "min_query_length": 9, + "average_query_length": 277574.8782816229, + "max_query_length": 8276, + "unique_queries": 5447, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 10335, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 1512632888, + "num_samples": 5240734, + "num_queries": 7405, + "num_documents": 5233329, + "num_relevant_docs": 14810, + "min_document_length": 32, + "average_document_length": 0.13041908888204812, + "max_document_length": 288, + "unique_documents": 5233329, + "min_query_length": 9, + "average_query_length": 204179.65725860905, + "max_query_length": 8276, + "unique_queries": 7405, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 13783, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json b/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json new file mode 100644 index 0000000000..a8c8e7075b --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json @@ -0,0 +1,536 @@ +{ + "dev": { + "number_of_characters": 35274535649, + "num_samples": 106345647, + "num_queries": 13495, + "num_documents": 106332152, + "num_relevant_docs": 130408, + "min_document_length": 5, + "average_document_length": 0.004631364932781573, + "max_document_length": 176, + "unique_documents": 106332152, + "min_query_length": 1, + "average_query_length": 2613860.1842163764, + "max_query_length": 84925, + "unique_queries": 13495, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.3059651722860317, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 119924, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 624607465, + "num_samples": 2064310, + "num_queries": 2896, + "num_documents": 2061414, + "num_relevant_docs": 29197, + "min_document_length": 12, + "average_document_length": 0.041416231771007665, + "max_document_length": 101, + "unique_documents": 2061414, + "min_query_length": 1, + "average_query_length": 215649.89261049725, + "max_query_length": 48538, + "unique_queries": 2896, + "none_queries": 0, + "min_relevant_docs_per_query": 7, + "average_relevant_docs_per_query": 1.953729281767956, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 25881, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "bn": { + "number_of_characters": 109132820, + "num_samples": 297676, + "num_queries": 411, + "num_documents": 297265, + "num_relevant_docs": 4206, + "min_document_length": 16, + "average_document_length": 0.06495551107597598, + "max_document_length": 112, + "unique_documents": 297265, + "min_query_length": 1, + "average_query_length": 265482.99513381993, + "max_query_length": 17102, + "unique_queries": 411, + "none_queries": 0, + "min_relevant_docs_per_query": 7, + "average_relevant_docs_per_query": 2.099756690997567, + "max_relevant_docs_per_query": 13, + "unique_relevant_docs": 3729, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 6274005668, + "num_samples": 15866527, + "num_queries": 305, + "num_documents": 15866222, + "num_relevant_docs": 3144, + "min_document_length": 15, + "average_document_length": 0.0008842684792888944, + "max_document_length": 87, + "unique_documents": 15866222, + "min_query_length": 1, + "average_query_length": 20570464.386885244, + "max_query_length": 64939, + "unique_queries": 305, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.6590163934426227, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 3103, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 12534362069, + "num_samples": 32894020, + "num_queries": 799, + "num_documents": 32893221, + "num_relevant_docs": 8350, + "min_document_length": 16, + "average_document_length": 0.0009776482515956707, + "max_document_length": 122, + "unique_documents": 32893221, + "min_query_length": 1, + "average_query_length": 15687521.790988736, + "max_query_length": 36444, + "unique_queries": 799, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.911138923654568, + "max_relevant_docs_per_query": 16, + "unique_relevant_docs": 7921, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 3984898056, + "num_samples": 10374601, + "num_queries": 648, + "num_documents": 10373953, + "num_relevant_docs": 6443, + "min_document_length": 19, + "average_document_length": 0.0029591419972695076, + "max_document_length": 88, + "unique_documents": 10373953, + "min_query_length": 1, + "average_query_length": 6149486.663580247, + "max_query_length": 56999, + "unique_queries": 648, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 4.609567901234568, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 6410, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fa": { + "number_of_characters": 548173051, + "num_samples": 2207804, + "num_queries": 632, + "num_documents": 2207172, + "num_relevant_docs": 6571, + "min_document_length": 18, + "average_document_length": 0.011782951215401427, + "max_document_length": 82, + "unique_documents": 2207172, + "min_query_length": 1, + "average_query_length": 867321.2721518987, + "max_query_length": 36480, + "unique_queries": 632, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.079113924050633, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 6405, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fi": { + "number_of_characters": 647319788, + "num_samples": 1884780, + "num_queries": 1271, + "num_documents": 1883509, + "num_relevant_docs": 12008, + "min_document_length": 14, + "average_document_length": 0.026071019570386975, + "max_document_length": 130, + "unique_documents": 1883509, + "min_query_length": 1, + "average_query_length": 509260.96223446104, + "max_query_length": 11549, + "unique_queries": 1271, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.925255704169945, + "max_relevant_docs_per_query": 16, + "unique_relevant_docs": 11365, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 4741061206, + "num_samples": 14637296, + "num_queries": 343, + "num_documents": 14636953, + "num_relevant_docs": 3429, + "min_document_length": 16, + "average_document_length": 0.0010283561066295698, + "max_document_length": 83, + "unique_documents": 14636953, + "min_query_length": 1, + "average_query_length": 13822291.994169096, + "max_query_length": 52598, + "unique_queries": 343, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.131195335276968, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 3407, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 179956335, + "num_samples": 506614, + "num_queries": 350, + "num_documents": 506264, + "num_relevant_docs": 3494, + "min_document_length": 24, + "average_document_length": 0.0368760172558191, + "max_document_length": 120, + "unique_documents": 506264, + "min_query_length": 1, + "average_query_length": 514107.61714285717, + "max_query_length": 44761, + "unique_queries": 350, + "none_queries": 0, + "min_relevant_docs_per_query": 6, + "average_relevant_docs_per_query": 2.1485714285714286, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 3342, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "id": { + "number_of_characters": 479789527, + "num_samples": 1447275, + "num_queries": 960, + "num_documents": 1446315, + "num_relevant_docs": 9668, + "min_document_length": 13, + "average_document_length": 0.025195064698907223, + "max_document_length": 93, + "unique_documents": 1446315, + "min_query_length": 1, + "average_query_length": 499742.7989583333, + "max_query_length": 39510, + "unique_queries": 960, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 3.216666666666667, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 8286, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ja": { + "number_of_characters": 956943052, + "num_samples": 6954474, + "num_queries": 860, + "num_documents": 6953614, + "num_relevant_docs": 8354, + "min_document_length": 7, + "average_document_length": 0.0021908032283644158, + "max_document_length": 48, + "unique_documents": 6953614, + "min_query_length": 1, + "average_query_length": 1112706.765116279, + "max_query_length": 25232, + "unique_queries": 860, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.0813953488372094, + "max_relevant_docs_per_query": 16, + "unique_relevant_docs": 8066, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ko": { + "number_of_characters": 247737159, + "num_samples": 1486965, + "num_queries": 213, + "num_documents": 1486752, + "num_relevant_docs": 3057, + "min_document_length": 5, + "average_document_length": 0.0030980284539721486, + "max_document_length": 92, + "unique_documents": 1486752, + "min_query_length": 1, + "average_query_length": 1163063.6291079812, + "max_query_length": 25243, + "unique_queries": 213, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.568075117370892, + "max_relevant_docs_per_query": 20, + "unique_relevant_docs": 2835, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ru": { + "number_of_characters": 2969123834, + "num_samples": 9545170, + "num_queries": 1252, + "num_documents": 9543918, + "num_relevant_docs": 13100, + "min_document_length": 15, + "average_document_length": 0.00578944622114314, + "max_document_length": 108, + "unique_documents": 9543918, + "min_query_length": 1, + "average_query_length": 2371460.5271565495, + "max_query_length": 61639, + "unique_queries": 1252, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 2.8434504792332267, + "max_relevant_docs_per_query": 18, + "unique_relevant_docs": 12607, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "sw": { + "number_of_characters": 28413887, + "num_samples": 132406, + "num_queries": 482, + "num_documents": 131924, + "num_relevant_docs": 5092, + "min_document_length": 13, + "average_document_length": 0.14238500955095357, + "max_document_length": 80, + "unique_documents": 131924, + "min_query_length": 1, + "average_query_length": 58911.0020746888, + "max_query_length": 11185, + "unique_queries": 482, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.887966804979253, + "max_relevant_docs_per_query": 17, + "unique_relevant_docs": 3514, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "te": { + "number_of_characters": 197801286, + "num_samples": 518907, + "num_queries": 828, + "num_documents": 518079, + "num_relevant_docs": 1606, + "min_document_length": 14, + "average_document_length": 0.060911559820027446, + "max_document_length": 111, + "unique_documents": 518079, + "min_query_length": 1, + "average_query_length": 238852.32971014493, + "max_query_length": 17811, + "unique_queries": 828, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0314009661835748, + "max_relevant_docs_per_query": 11, + "unique_relevant_docs": 1457, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "th": { + "number_of_characters": 183360331, + "num_samples": 542899, + "num_queries": 733, + "num_documents": 542166, + "num_relevant_docs": 7573, + "min_document_length": 14, + "average_document_length": 0.0579674859729308, + "max_document_length": 176, + "unique_documents": 542166, + "min_query_length": 1, + "average_query_length": 250107.64392905866, + "max_query_length": 31243, + "unique_queries": 733, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8321964529331514, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 6868, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "yo": { + "number_of_characters": 7047836, + "num_samples": 49162, + "num_queries": 119, + "num_documents": 49043, + "num_relevant_docs": 1188, + "min_document_length": 25, + "average_document_length": 0.09145035988826132, + "max_document_length": 56, + "unique_documents": 49043, + "min_query_length": 1, + "average_query_length": 59187.82352941176, + "max_query_length": 10457, + "unique_queries": 119, + "none_queries": 0, + "min_relevant_docs_per_query": 9, + "average_relevant_docs_per_query": 1.2100840336134453, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 942, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zh": { + "number_of_characters": 560802279, + "num_samples": 4934761, + "num_queries": 393, + "num_documents": 4934368, + "num_relevant_docs": 3928, + "min_document_length": 7, + "average_document_length": 0.0008655617092199042, + "max_document_length": 22, + "unique_documents": 4934368, + "min_query_length": 1, + "average_query_length": 1426966.941475827, + "max_query_length": 84925, + "unique_queries": 393, + "none_queries": 0, + "min_relevant_docs_per_query": 8, + "average_relevant_docs_per_query": 2.5292620865139948, + "max_relevant_docs_per_query": 10, + "unique_relevant_docs": 3786, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MSMARCO.json b/mteb/descriptive_stats/Retrieval/MSMARCO.json new file mode 100644 index 0000000000..3a6908af62 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MSMARCO.json @@ -0,0 +1,86 @@ +{ + "train": { + "number_of_characters": 2994608051, + "num_samples": 9344762, + "num_queries": 502939, + "num_documents": 8841823, + "num_relevant_docs": 532751, + "min_document_length": 5, + "average_document_length": 1.8895562600608495, + "max_document_length": 215, + "unique_documents": 8841823, + "min_query_length": 4, + "average_query_length": 5920.9982304016985, + "max_query_length": 1670, + "unique_queries": 502939, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0592755781516248, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 516472, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "dev": { + "number_of_characters": 2978133099, + "num_samples": 8848803, + "num_queries": 6980, + "num_documents": 8841823, + "num_relevant_docs": 7437, + "min_document_length": 9, + "average_document_length": 0.026258159657799075, + "max_document_length": 186, + "unique_documents": 8841823, + "min_query_length": 4, + "average_query_length": 426633.37091690546, + "max_query_length": 1670, + "unique_queries": 6980, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0654727793696275, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 7433, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "test": { + "number_of_characters": 2977902337, + "num_samples": 8841866, + "num_queries": 43, + "num_documents": 8841823, + "num_relevant_docs": 9260, + "min_document_length": 16, + "average_document_length": 0.00015924317869742472, + "max_document_length": 55, + "unique_documents": 8841823, + "min_query_length": 4, + "average_query_length": 69253509.97674419, + "max_query_length": 1670, + "unique_queries": 43, + "none_queries": 0, + "min_relevant_docs_per_query": 132, + "average_relevant_docs_per_query": 95.3953488372093, + "max_relevant_docs_per_query": 582, + "unique_relevant_docs": 9139, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json b/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json new file mode 100644 index 0000000000..fd9d85feb0 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json @@ -0,0 +1,340 @@ +{ + "test": { + "number_of_characters": 19085636965, + "num_samples": 58051987, + "num_queries": 8661, + "num_documents": 58043326, + "num_relevant_docs": 10105, + "min_document_length": 6, + "average_document_length": 0.005544547877907617, + "max_document_length": 144, + "unique_documents": 58043326, + "min_query_length": 1, + "average_query_length": 2203592.557556864, + "max_query_length": 61639, + "unique_queries": 8661, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1667243967209329, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 8926, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "bengali": { + "number_of_characters": 110965082, + "num_samples": 304170, + "num_queries": 111, + "num_documents": 304059, + "num_relevant_docs": 130, + "min_document_length": 20, + "average_document_length": 0.018641118993353262, + "max_document_length": 122, + "unique_documents": 304059, + "min_query_length": 1, + "average_query_length": 999634.3603603604, + "max_query_length": 16791, + "unique_queries": 111, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1711711711711712, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 116, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "english": { + "number_of_characters": 12550942597, + "num_samples": 32907844, + "num_queries": 744, + "num_documents": 32907100, + "num_relevant_docs": 935, + "min_document_length": 16, + "average_document_length": 0.0009153647693051043, + "max_document_length": 108, + "unique_documents": 32907100, + "min_query_length": 1, + "average_query_length": 16869506.014784947, + "max_query_length": 36444, + "unique_queries": 744, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.256720430107527, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 908, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "finnish": { + "number_of_characters": 656119952, + "num_samples": 1910011, + "num_queries": 1254, + "num_documents": 1908757, + "num_relevant_docs": 1451, + "min_document_length": 13, + "average_document_length": 0.024742803824688003, + "max_document_length": 89, + "unique_documents": 1908757, + "min_query_length": 1, + "average_query_length": 523183.990430622, + "max_query_length": 29374, + "unique_queries": 1254, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.157097288676236, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 1186, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "russian": { + "number_of_characters": 2994189913, + "num_samples": 9598499, + "num_queries": 995, + "num_documents": 9597504, + "num_relevant_docs": 1168, + "min_document_length": 14, + "average_document_length": 0.004856262628283353, + "max_document_length": 138, + "unique_documents": 9597504, + "min_query_length": 1, + "average_query_length": 3009189.2512562815, + "max_query_length": 61639, + "unique_queries": 995, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1738693467336683, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 1100, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "korean": { + "number_of_characters": 249708451, + "num_samples": 1496547, + "num_queries": 421, + "num_documents": 1496126, + "num_relevant_docs": 492, + "min_document_length": 6, + "average_document_length": 0.006410556330148664, + "max_document_length": 122, + "unique_documents": 1496126, + "min_query_length": 1, + "average_query_length": 593108.9311163896, + "max_query_length": 25243, + "unique_queries": 421, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.168646080760095, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 397, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "japanese": { + "number_of_characters": 971241388, + "num_samples": 7000747, + "num_queries": 720, + "num_documents": 7000027, + "num_relevant_docs": 923, + "min_document_length": 6, + "average_document_length": 0.0018598499691501189, + "max_document_length": 44, + "unique_documents": 7000027, + "min_query_length": 1, + "average_query_length": 1348928.2902777777, + "max_query_length": 25232, + "unique_queries": 720, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.2819444444444446, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 880, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "telugu": { + "number_of_characters": 202749454, + "num_samples": 548870, + "num_queries": 646, + "num_documents": 548224, + "num_relevant_docs": 677, + "min_document_length": 13, + "average_document_length": 0.04451647501751109, + "max_document_length": 119, + "unique_documents": 548224, + "min_query_length": 1, + "average_query_length": 313815.8653250774, + "max_query_length": 17811, + "unique_queries": 646, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0479876160990713, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 600, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "thai": { + "number_of_characters": 192485963, + "num_samples": 570045, + "num_queries": 1190, + "num_documents": 568855, + "num_relevant_docs": 1368, + "min_document_length": 13, + "average_document_length": 0.08903850717669705, + "max_document_length": 144, + "unique_documents": 568855, + "min_query_length": 1, + "average_query_length": 161710.34705882354, + "max_query_length": 31244, + "unique_queries": 1190, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.149579831932773, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 1163, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "swahili": { + "number_of_characters": 29636822, + "num_samples": 137359, + "num_queries": 670, + "num_documents": 136689, + "num_relevant_docs": 743, + "min_document_length": 15, + "average_document_length": 0.2054664237795287, + "max_document_length": 98, + "unique_documents": 136689, + "min_query_length": 1, + "average_query_length": 44192.1447761194, + "max_query_length": 11185, + "unique_queries": 670, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.108955223880597, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 552, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "arabic": { + "number_of_characters": 640057511, + "num_samples": 2107667, + "num_queries": 1081, + "num_documents": 2106586, + "num_relevant_docs": 1257, + "min_document_length": 12, + "average_document_length": 0.015663257991840828, + "max_document_length": 93, + "unique_documents": 2106586, + "min_query_length": 1, + "average_query_length": 592067.0814061054, + "max_query_length": 48538, + "unique_queries": 1081, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1628122109158188, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 1138, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "indonesian": { + "number_of_characters": 487539832, + "num_samples": 1470228, + "num_queries": 829, + "num_documents": 1469399, + "num_relevant_docs": 961, + "min_document_length": 17, + "average_document_length": 0.02276577022306399, + "max_document_length": 128, + "unique_documents": 1469399, + "min_query_length": 1, + "average_query_length": 588065.5971049457, + "max_query_length": 39510, + "unique_queries": 829, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1592279855247285, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 886, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json b/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json new file mode 100644 index 0000000000..a7de31f155 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json @@ -0,0 +1,790 @@ +{ + "dev": { + "number_of_characters": 6641969996, + "num_samples": 496309, + "num_queries": 2600, + "num_documents": 493709, + "num_relevant_docs": 2600, + "min_document_length": 3, + "average_document_length": 0.49106862544535346, + "max_document_length": 2041, + "unique_documents": 493709, + "min_query_length": 36, + "average_query_length": 2554510.5965384617, + "max_query_length": 471024, + "unique_queries": 2600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 2600, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 222400555, + "num_samples": 7807, + "num_queries": 200, + "num_documents": 7607, + "num_relevant_docs": 200, + "min_document_length": 6, + "average_document_length": 1.8212172998553964, + "max_document_length": 194, + "unique_documents": 7607, + "min_query_length": 2173, + "average_query_length": 1111933.505, + "max_query_length": 276627, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 337742837, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 7, + "average_document_length": 3.0726, + "max_document_length": 2041, + "unique_documents": 10000, + "min_query_length": 104, + "average_query_length": 1688560.555, + "max_query_length": 186335, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 2666569772, + "num_samples": 200200, + "num_queries": 200, + "num_documents": 200000, + "num_relevant_docs": 200, + "min_document_length": 16, + "average_document_length": 0.08122, + "max_document_length": 180, + "unique_documents": 200000, + "min_query_length": 2137, + "average_query_length": 13332767.64, + "max_query_length": 382998, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 349277698, + "num_samples": 9751, + "num_queries": 200, + "num_documents": 9551, + "num_relevant_docs": 200, + "min_document_length": 19, + "average_document_length": 2.5779499528845147, + "max_document_length": 305, + "unique_documents": 9551, + "min_query_length": 2657, + "average_query_length": 1746265.38, + "max_query_length": 471024, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 360123367, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 13, + "average_document_length": 2.8433, + "max_document_length": 1590, + "unique_documents": 10000, + "min_query_length": 2093, + "average_query_length": 1800474.67, + "max_query_length": 425370, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 71144060, + "num_samples": 4006, + "num_queries": 200, + "num_documents": 3806, + "num_relevant_docs": 200, + "min_document_length": 4, + "average_document_length": 4.098528638991067, + "max_document_length": 318, + "unique_documents": 3806, + "min_query_length": 2426, + "average_query_length": 355642.305, + "max_query_length": 227264, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "it": { + "number_of_characters": 366359892, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 9, + "average_document_length": 1.9923, + "max_document_length": 950, + "unique_documents": 10000, + "min_query_length": 2491, + "average_query_length": 1831699.845, + "max_query_length": 312623, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ja": { + "number_of_characters": 144819833, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 3, + "average_document_length": 1.2325, + "max_document_length": 576, + "unique_documents": 10000, + "min_query_length": 1245, + "average_query_length": 724037.54, + "max_query_length": 234888, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ko": { + "number_of_characters": 85323582, + "num_samples": 6376, + "num_queries": 200, + "num_documents": 6176, + "num_relevant_docs": 200, + "min_document_length": 8, + "average_document_length": 1.9056023316062176, + "max_document_length": 664, + "unique_documents": 6176, + "min_query_length": 1490, + "average_query_length": 426559.065, + "max_query_length": 171299, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pt": { + "number_of_characters": 211070508, + "num_samples": 6769, + "num_queries": 200, + "num_documents": 6569, + "num_relevant_docs": 200, + "min_document_length": 7, + "average_document_length": 3.722788856751408, + "max_document_length": 506, + "unique_documents": 6569, + "min_query_length": 3078, + "average_query_length": 1055230.265, + "max_query_length": 400864, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ru": { + "number_of_characters": 359366331, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 8, + "average_document_length": 1.7575, + "max_document_length": 216, + "unique_documents": 10000, + "min_query_length": 2901, + "average_query_length": 1796743.78, + "max_query_length": 303226, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "th": { + "number_of_characters": 259954258, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 30, + "average_document_length": 2.1562, + "max_document_length": 1123, + "unique_documents": 10000, + "min_query_length": 36, + "average_query_length": 1299663.48, + "max_query_length": 183497, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zh": { + "number_of_characters": 1207817303, + "num_samples": 200200, + "num_queries": 200, + "num_documents": 200000, + "num_relevant_docs": 200, + "min_document_length": 5, + "average_document_length": 0.02679, + "max_document_length": 476, + "unique_documents": 200000, + "min_query_length": 1038, + "average_query_length": 6039059.725, + "max_query_length": 278468, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "test": { + "number_of_characters": 6642036446, + "num_samples": 497509, + "num_queries": 3800, + "num_documents": 493709, + "num_relevant_docs": 3800, + "min_document_length": 3, + "average_document_length": 0.6256620802942624, + "max_document_length": 2589, + "unique_documents": 493709, + "min_query_length": 36, + "average_query_length": 1747823.039736842, + "max_query_length": 471024, + "unique_queries": 3800, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3800, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "ar": { + "number_of_characters": 222401855, + "num_samples": 7807, + "num_queries": 200, + "num_documents": 7607, + "num_relevant_docs": 200, + "min_document_length": 7, + "average_document_length": 1.9921125279347969, + "max_document_length": 695, + "unique_documents": 7607, + "min_query_length": 2173, + "average_query_length": 1111933.505, + "max_query_length": 276627, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "de": { + "number_of_characters": 337736841, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 10, + "average_document_length": 2.473, + "max_document_length": 957, + "unique_documents": 10000, + "min_query_length": 104, + "average_query_length": 1688560.555, + "max_query_length": 186335, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "en": { + "number_of_characters": 2666618592, + "num_samples": 200800, + "num_queries": 800, + "num_documents": 200000, + "num_relevant_docs": 800, + "min_document_length": 18, + "average_document_length": 0.32532, + "max_document_length": 255, + "unique_documents": 200000, + "min_query_length": 2137, + "average_query_length": 3333191.91, + "max_query_length": 382998, + "unique_queries": 800, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 800, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 349279473, + "num_samples": 9751, + "num_queries": 200, + "num_documents": 9551, + "num_relevant_docs": 200, + "min_document_length": 40, + "average_document_length": 2.763794367081981, + "max_document_length": 480, + "unique_documents": 9551, + "min_query_length": 2657, + "average_query_length": 1746265.38, + "max_query_length": 471024, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 360124893, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 33, + "average_document_length": 2.9959, + "max_document_length": 2589, + "unique_documents": 10000, + "min_query_length": 2093, + "average_query_length": 1800474.67, + "max_query_length": 425370, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "hi": { + "number_of_characters": 71149213, + "num_samples": 4006, + "num_queries": 200, + "num_documents": 3806, + "num_relevant_docs": 200, + "min_document_length": 6, + "average_document_length": 5.452443510246979, + "max_document_length": 2022, + "unique_documents": 3806, + "min_query_length": 2426, + "average_query_length": 355642.305, + "max_query_length": 227264, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "it": { + "number_of_characters": 366362888, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 12, + "average_document_length": 2.2919, + "max_document_length": 1899, + "unique_documents": 10000, + "min_query_length": 2491, + "average_query_length": 1831699.845, + "max_query_length": 312623, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ja": { + "number_of_characters": 144818654, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 6, + "average_document_length": 1.1146, + "max_document_length": 416, + "unique_documents": 10000, + "min_query_length": 1245, + "average_query_length": 724037.54, + "max_query_length": 234888, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ko": { + "number_of_characters": 85323557, + "num_samples": 6376, + "num_queries": 200, + "num_documents": 6176, + "num_relevant_docs": 200, + "min_document_length": 8, + "average_document_length": 1.9015544041450778, + "max_document_length": 330, + "unique_documents": 6176, + "min_query_length": 1490, + "average_query_length": 426559.065, + "max_query_length": 171299, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pt": { + "number_of_characters": 211068744, + "num_samples": 6769, + "num_queries": 200, + "num_documents": 6569, + "num_relevant_docs": 200, + "min_document_length": 4, + "average_document_length": 3.4542548333079615, + "max_document_length": 511, + "unique_documents": 6569, + "min_query_length": 3078, + "average_query_length": 1055230.265, + "max_query_length": 400864, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "ru": { + "number_of_characters": 359367730, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 12, + "average_document_length": 1.8974, + "max_document_length": 413, + "unique_documents": 10000, + "min_query_length": 2901, + "average_query_length": 1796743.78, + "max_query_length": 303226, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "th": { + "number_of_characters": 259952294, + "num_samples": 10200, + "num_queries": 200, + "num_documents": 10000, + "num_relevant_docs": 200, + "min_document_length": 11, + "average_document_length": 1.9598, + "max_document_length": 309, + "unique_documents": 10000, + "min_query_length": 36, + "average_query_length": 1299663.48, + "max_query_length": 183497, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 200, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zh": { + "number_of_characters": 1207831712, + "num_samples": 200800, + "num_queries": 800, + "num_documents": 200000, + "num_relevant_docs": 800, + "min_document_length": 3, + "average_document_length": 0.098835, + "max_document_length": 646, + "unique_documents": 200000, + "min_query_length": 1038, + "average_query_length": 1509764.93125, + "max_query_length": 278468, + "unique_queries": 800, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 800, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TopiOCQA.json b/mteb/descriptive_stats/Retrieval/TopiOCQA.json new file mode 100644 index 0000000000..bc3bca51ef --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/TopiOCQA.json @@ -0,0 +1,30 @@ +{ + "validation": { + "number_of_characters": 11369989152, + "num_samples": 25703106, + "num_queries": 2514, + "num_documents": 25700592, + "num_relevant_docs": 2514, + "min_document_length": 1, + "average_document_length": 0.0012305553117220023, + "max_document_length": 31, + "unique_documents": 25700592, + "min_query_length": 1, + "average_query_length": 4522656.136038186, + "max_query_length": 28038, + "unique_queries": 2514, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1940, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/XPQARetrieval.json b/mteb/descriptive_stats/Retrieval/XPQARetrieval.json index b00f01f22b..9b33a6cb7d 100644 --- a/mteb/descriptive_stats/Retrieval/XPQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/XPQARetrieval.json @@ -4,6 +4,7 @@ "num_samples": 81710, "num_queries": 27856, "num_documents": 53854, + "num_relevant_docs": 55424, "min_document_length": 3, "average_document_length": 20.861588739926468, "max_document_length": 298, @@ -12,6 +13,7 @@ "average_query_length": 150.2376866743251, "max_query_length": 4229, "unique_queries": 27856, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9896611143021252, "max_relevant_docs_per_query": 17, @@ -30,6 +32,7 @@ "num_samples": 2245, "num_queries": 750, "num_documents": 1495, + "num_relevant_docs": 1503, "min_document_length": 8, "average_document_length": 14.893645484949833, "max_document_length": 111, @@ -38,6 +41,7 @@ "average_query_length": 123.35466666666666, "max_query_length": 1200, "unique_queries": 750, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.004, "max_relevant_docs_per_query": 5, @@ -56,6 +60,7 @@ "num_samples": 2283, "num_queries": 750, "num_documents": 1533, + "num_relevant_docs": 1544, "min_document_length": 8, "average_document_length": 14.524461839530332, "max_document_length": 111, @@ -64,6 +69,7 @@ "average_query_length": 256.05066666666664, "max_query_length": 4229, "unique_queries": 750, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.058666666666667, "max_relevant_docs_per_query": 5, @@ -82,6 +88,7 @@ "num_samples": 2237, "num_queries": 742, "num_documents": 1495, + "num_relevant_docs": 1502, "min_document_length": 11, "average_document_length": 19.614046822742473, "max_document_length": 162, @@ -90,6 +97,7 @@ "average_query_length": 124.68463611859838, "max_query_length": 1200, "unique_queries": 742, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.024258760107817, "max_relevant_docs_per_query": 5, @@ -108,6 +116,7 @@ "num_samples": 2014, "num_queries": 766, "num_documents": 1248, + "num_relevant_docs": 1250, "min_document_length": 17, "average_document_length": 34.076121794871796, "max_document_length": 144, @@ -116,6 +125,7 @@ "average_query_length": 113.31070496083551, "max_query_length": 383, "unique_queries": 766, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6318537859007833, "max_relevant_docs_per_query": 5, @@ -134,6 +144,7 @@ "num_samples": 2265, "num_queries": 766, "num_documents": 1499, + "num_relevant_docs": 1504, "min_document_length": 17, "average_document_length": 28.370246831220815, "max_document_length": 144, @@ -142,6 +153,7 @@ "average_query_length": 226.55483028720627, "max_query_length": 1130, "unique_queries": 766, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9634464751958225, "max_relevant_docs_per_query": 5, @@ -160,6 +172,7 @@ "num_samples": 2014, "num_queries": 766, "num_documents": 1248, + "num_relevant_docs": 1250, "min_document_length": 15, "average_document_length": 31.848557692307693, "max_document_length": 144, @@ -168,6 +181,7 @@ "average_query_length": 113.31070496083551, "max_query_length": 383, "unique_queries": 766, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6318537859007833, "max_relevant_docs_per_query": 5, @@ -186,6 +200,7 @@ "num_samples": 2734, "num_queries": 793, "num_documents": 1941, + "num_relevant_docs": 1942, "min_document_length": 12, "average_document_length": 19.08397733127254, "max_document_length": 140, @@ -194,6 +209,7 @@ "average_query_length": 167.11475409836066, "max_query_length": 266, "unique_queries": 793, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.4489281210592684, "max_relevant_docs_per_query": 5, @@ -212,6 +228,7 @@ "num_samples": 2729, "num_queries": 793, "num_documents": 1936, + "num_relevant_docs": 1961, "min_document_length": 12, "average_document_length": 19.13326446280992, "max_document_length": 140, @@ -220,6 +237,7 @@ "average_query_length": 301.3543505674653, "max_query_length": 1401, "unique_queries": 793, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.472887767969735, "max_relevant_docs_per_query": 5, @@ -238,6 +256,7 @@ "num_samples": 2734, "num_queries": 793, "num_documents": 1941, + "num_relevant_docs": 1942, "min_document_length": 12, "average_document_length": 19.287995878413188, "max_document_length": 133, @@ -246,6 +265,7 @@ "average_query_length": 167.11475409836066, "max_query_length": 266, "unique_queries": 793, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.4489281210592684, "max_relevant_docs_per_query": 5, @@ -264,6 +284,7 @@ "num_samples": 2297, "num_queries": 749, "num_documents": 1548, + "num_relevant_docs": 1550, "min_document_length": 12, "average_document_length": 27.120801033591732, "max_document_length": 110, @@ -272,6 +293,7 @@ "average_query_length": 159.1268357810414, "max_query_length": 359, "unique_queries": 749, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.069425901201602, "max_relevant_docs_per_query": 5, @@ -290,6 +312,7 @@ "num_samples": 2423, "num_queries": 749, "num_documents": 1674, + "num_relevant_docs": 1684, "min_document_length": 12, "average_document_length": 25.079450418160096, "max_document_length": 110, @@ -298,6 +321,7 @@ "average_query_length": 306.890520694259, "max_query_length": 1798, "unique_queries": 749, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.248331108144192, "max_relevant_docs_per_query": 5, @@ -316,6 +340,7 @@ "num_samples": 2297, "num_queries": 749, "num_documents": 1548, + "num_relevant_docs": 1550, "min_document_length": 11, "average_document_length": 23.992894056847547, "max_document_length": 110, @@ -324,6 +349,7 @@ "average_query_length": 159.1268357810414, "max_query_length": 359, "unique_queries": 749, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.069425901201602, "max_relevant_docs_per_query": 5, @@ -342,6 +368,7 @@ "num_samples": 2176, "num_queries": 925, "num_documents": 1251, + "num_relevant_docs": 1286, "min_document_length": 8, "average_document_length": 24.753796962430055, "max_document_length": 97, @@ -350,6 +377,7 @@ "average_query_length": 63.84540540540541, "max_query_length": 246, "unique_queries": 925, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.3902702702702703, "max_relevant_docs_per_query": 5, @@ -368,6 +396,7 @@ "num_samples": 2431, "num_queries": 925, "num_documents": 1506, + "num_relevant_docs": 1670, "min_document_length": 8, "average_document_length": 20.562416998671978, "max_document_length": 97, @@ -376,6 +405,7 @@ "average_query_length": 173.6810810810811, "max_query_length": 2000, "unique_queries": 925, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8054054054054054, "max_relevant_docs_per_query": 5, @@ -394,6 +424,7 @@ "num_samples": 2163, "num_queries": 912, "num_documents": 1251, + "num_relevant_docs": 1286, "min_document_length": 8, "average_document_length": 25.50519584332534, "max_document_length": 118, @@ -402,6 +433,7 @@ "average_query_length": 64.75548245614036, "max_query_length": 246, "unique_queries": 912, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.4100877192982457, "max_relevant_docs_per_query": 8, @@ -420,6 +452,7 @@ "num_samples": 1935, "num_queries": 663, "num_documents": 1272, + "num_relevant_docs": 1276, "min_document_length": 13, "average_document_length": 25.617924528301888, "max_document_length": 134, @@ -428,6 +461,7 @@ "average_query_length": 114.68778280542986, "max_query_length": 293, "unique_queries": 663, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9245852187028658, "max_relevant_docs_per_query": 5, @@ -446,6 +480,7 @@ "num_samples": 1964, "num_queries": 663, "num_documents": 1301, + "num_relevant_docs": 1316, "min_document_length": 13, "average_document_length": 25.046887009992314, "max_document_length": 134, @@ -454,6 +489,7 @@ "average_query_length": 241.5052790346908, "max_query_length": 1561, "unique_queries": 663, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9849170437405732, "max_relevant_docs_per_query": 5, @@ -472,6 +508,7 @@ "num_samples": 1935, "num_queries": 663, "num_documents": 1272, + "num_relevant_docs": 1276, "min_document_length": 11, "average_document_length": 25.56132075471698, "max_document_length": 131, @@ -480,6 +517,7 @@ "average_query_length": 114.68778280542986, "max_query_length": 293, "unique_queries": 663, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9245852187028658, "max_relevant_docs_per_query": 5, @@ -498,6 +536,7 @@ "num_samples": 2426, "num_queries": 825, "num_documents": 1601, + "num_relevant_docs": 1601, "min_document_length": 5, "average_document_length": 12.004996876951905, "max_document_length": 49, @@ -506,6 +545,7 @@ "average_query_length": 79.62424242424242, "max_query_length": 368, "unique_queries": 825, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9406060606060607, "max_relevant_docs_per_query": 5, @@ -524,6 +564,7 @@ "num_samples": 2570, "num_queries": 825, "num_documents": 1745, + "num_relevant_docs": 1748, "min_document_length": 5, "average_document_length": 11.01432664756447, "max_document_length": 49, @@ -532,6 +573,7 @@ "average_query_length": 267.0690909090909, "max_query_length": 1116, "unique_queries": 825, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.1187878787878787, "max_relevant_docs_per_query": 5, @@ -550,6 +592,7 @@ "num_samples": 2423, "num_queries": 822, "num_documents": 1601, + "num_relevant_docs": 1601, "min_document_length": 13, "average_document_length": 26.398500936914427, "max_document_length": 154, @@ -558,6 +601,7 @@ "average_query_length": 79.91484184914842, "max_query_length": 368, "unique_queries": 822, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9476885644768855, "max_relevant_docs_per_query": 6, @@ -576,6 +620,7 @@ "num_samples": 1543, "num_queries": 654, "num_documents": 889, + "num_relevant_docs": 1023, "min_document_length": 4, "average_document_length": 16.050618672665916, "max_document_length": 149, @@ -584,6 +629,7 @@ "average_query_length": 42.448012232415905, "max_query_length": 231, "unique_queries": 654, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5642201834862386, "max_relevant_docs_per_query": 5, @@ -602,6 +648,7 @@ "num_samples": 1823, "num_queries": 654, "num_documents": 1169, + "num_relevant_docs": 1277, "min_document_length": 4, "average_document_length": 12.206159110350727, "max_document_length": 149, @@ -610,6 +657,7 @@ "average_query_length": 200.93272171253824, "max_query_length": 1948, "unique_queries": 654, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.952599388379205, "max_relevant_docs_per_query": 5, @@ -628,6 +676,7 @@ "num_samples": 1503, "num_queries": 614, "num_documents": 889, + "num_relevant_docs": 1023, "min_document_length": 5, "average_document_length": 30.35658042744657, "max_document_length": 298, @@ -636,6 +685,7 @@ "average_query_length": 45.21335504885994, "max_query_length": 231, "unique_queries": 614, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6661237785016287, "max_relevant_docs_per_query": 9, @@ -654,6 +704,7 @@ "num_samples": 2364, "num_queries": 785, "num_documents": 1579, + "num_relevant_docs": 1633, "min_document_length": 8, "average_document_length": 26.707409753008232, "max_document_length": 150, @@ -662,6 +713,7 @@ "average_query_length": 101.9171974522293, "max_query_length": 219, "unique_queries": 785, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.080254777070064, "max_relevant_docs_per_query": 5, @@ -680,6 +732,7 @@ "num_samples": 2538, "num_queries": 785, "num_documents": 1753, + "num_relevant_docs": 1873, "min_document_length": 8, "average_document_length": 24.056474614945806, "max_document_length": 150, @@ -688,6 +741,7 @@ "average_query_length": 252.27388535031847, "max_query_length": 1459, "unique_queries": 785, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.385987261146497, "max_relevant_docs_per_query": 5, @@ -706,6 +760,7 @@ "num_samples": 2356, "num_queries": 777, "num_documents": 1579, + "num_relevant_docs": 1633, "min_document_length": 5, "average_document_length": 26.67067764407853, "max_document_length": 180, @@ -714,6 +769,7 @@ "average_query_length": 102.96653796653797, "max_query_length": 219, "unique_queries": 777, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.101673101673102, "max_relevant_docs_per_query": 6, @@ -732,6 +788,7 @@ "num_samples": 2422, "num_queries": 800, "num_documents": 1622, + "num_relevant_docs": 1712, "min_document_length": 9, "average_document_length": 21.005548705302097, "max_document_length": 126, @@ -740,6 +797,7 @@ "average_query_length": 154.05875, "max_query_length": 500, "unique_queries": 800, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.14, "max_relevant_docs_per_query": 5, @@ -758,6 +816,7 @@ "num_samples": 2439, "num_queries": 800, "num_documents": 1639, + "num_relevant_docs": 1775, "min_document_length": 9, "average_document_length": 20.787675411836485, "max_document_length": 126, @@ -766,6 +825,7 @@ "average_query_length": 228.2825, "max_query_length": 1206, "unique_queries": 800, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.21875, "max_relevant_docs_per_query": 5, @@ -784,6 +844,7 @@ "num_samples": 2419, "num_queries": 797, "num_documents": 1622, + "num_relevant_docs": 1712, "min_document_length": 9, "average_document_length": 22.887792848335387, "max_document_length": 136, @@ -792,6 +853,7 @@ "average_query_length": 154.63864491844416, "max_query_length": 500, "unique_queries": 797, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.148055207026349, "max_relevant_docs_per_query": 6, @@ -810,6 +872,7 @@ "num_samples": 2057, "num_queries": 782, "num_documents": 1275, + "num_relevant_docs": 1329, "min_document_length": 3, "average_document_length": 20.40392156862745, "max_document_length": 146, @@ -818,6 +881,7 @@ "average_query_length": 105.79923273657289, "max_query_length": 441, "unique_queries": 782, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6994884910485935, "max_relevant_docs_per_query": 5, @@ -836,6 +900,7 @@ "num_samples": 2266, "num_queries": 782, "num_documents": 1484, + "num_relevant_docs": 1584, "min_document_length": 3, "average_document_length": 17.53032345013477, "max_document_length": 146, @@ -844,6 +909,7 @@ "average_query_length": 184.0076726342711, "max_query_length": 1240, "unique_queries": 782, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0255754475703327, "max_relevant_docs_per_query": 5, @@ -862,6 +928,7 @@ "num_samples": 2044, "num_queries": 769, "num_documents": 1275, + "num_relevant_docs": 1329, "min_document_length": 6, "average_document_length": 20.975686274509805, "max_document_length": 162, @@ -870,6 +937,7 @@ "average_query_length": 107.58777633289986, "max_query_length": 441, "unique_queries": 769, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.728218465539662, "max_relevant_docs_per_query": 17, @@ -888,6 +956,7 @@ "num_samples": 2529, "num_queries": 824, "num_documents": 1705, + "num_relevant_docs": 1707, "min_document_length": 5, "average_document_length": 5.901466275659824, "max_document_length": 29, @@ -896,6 +965,7 @@ "average_query_length": 43.36771844660194, "max_query_length": 236, "unique_queries": 824, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0716019417475726, "max_relevant_docs_per_query": 5, @@ -914,6 +984,7 @@ "num_samples": 2587, "num_queries": 824, "num_documents": 1763, + "num_relevant_docs": 1865, "min_document_length": 5, "average_document_length": 5.7073170731707314, "max_document_length": 29, @@ -922,6 +993,7 @@ "average_query_length": 231.748786407767, "max_query_length": 965, "unique_queries": 824, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.2633495145631066, "max_relevant_docs_per_query": 5, @@ -940,6 +1012,7 @@ "num_samples": 2525, "num_queries": 820, "num_documents": 1705, + "num_relevant_docs": 1707, "min_document_length": 10, "average_document_length": 19.835777126099707, "max_document_length": 130, @@ -948,6 +1021,7 @@ "average_query_length": 43.579268292682926, "max_query_length": 236, "unique_queries": 820, + "none_queries": 0, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0817073170731706, "max_relevant_docs_per_query": 6, diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index b72539c3e0..40acaca430 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -531,26 +531,17 @@ def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): # DON'T ADD NEW DATASETS TO THIS LIST # THIS IS ONLY INTENDED FOR HISTORIC DATASETS exceptions = [ - "FEVER", - "HotpotQA", - "MSMARCO", "MSMARCOv2", - "TopiOCQA", - "MIRACLRetrieval", - "MrTidyRetrieval", "BrightRetrieval", - "MultiLongDocRetrieval", "NeuCLIR2022Retrieval", "NeuCLIR2023Retrieval", "BibleNLPBitextMining", "FloresBitextMining", "FilipinoHateSpeechClassification", "SwissJudgementClassification", - "MultiEURLEXMultilabelClassification", "MindSmallReranking", "WebLINXCandidatesReranking", "VoyageMMarcoReranking", - "MIRACLReranking", ] if task.metadata.name.startswith("Mock"): diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index e4cf91e0d8..ad011dfcef 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -1130,6 +1130,7 @@ class MockRerankingTask(AbsTaskReranking): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1199,6 +1200,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "num_samples": 8, "num_queries": 4, "num_documents": 4, + "num_relevant_docs": 8, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1226,6 +1228,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1253,6 +1256,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1334,6 +1338,7 @@ class MockRetrievalTask(AbsTaskRetrieval): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1397,6 +1402,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "num_samples": 8, "num_queries": 4, "num_documents": 4, + "num_relevant_docs": 8, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1424,6 +1430,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1451,6 +1458,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1715,6 +1723,7 @@ class MockInstructionRetrieval(AbsTaskRetrieval): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1783,6 +1792,7 @@ class MockInstructionReranking(AbsTaskReranking): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1856,6 +1866,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "num_samples": 8, "num_queries": 4, "num_documents": 4, + "num_relevant_docs": 8, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1883,6 +1894,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1910,6 +1922,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -1997,6 +2010,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "num_samples": 8, "num_queries": 4, "num_documents": 4, + "num_relevant_docs": 8, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -2024,6 +2038,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, @@ -2051,6 +2066,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "num_samples": 4, "num_queries": 2, "num_documents": 2, + "num_relevant_docs": 4, "min_document_length": 23, "average_document_length": 26.0, "max_document_length": 29, From a7a52143c50fc94e0748f4418c678d3ea82aece5 Mon Sep 17 00:00:00 2001 From: dokato Date: Sat, 23 Nov 2024 12:44:56 +0000 Subject: [PATCH 13/40] 1475 add descriptive stats to all tasks v2 (#1482) * add code for comupting number of qrels * BibleNLPBitextMining descriptive stats added * SwissJudgementClassification descriptive stats added * VoyageMMarcoReranking descriptive stats added * WebLINXCandidatesReranking descriptive stats added * MultiEURLEXMultilabelClassification descriptive stats added * MIRACLReranking descriptive stats added * MindSmallReranking descriptive stats added * updated test_TaskMetadata * fix test --------- Co-authored-by: Imene Kerboua Co-authored-by: Imene Kerboua <33312980+imenelydiaker@users.noreply.github.com> Co-authored-by: Roman Solomatin <36135455+Samoed@users.noreply.github.com> --- .../BitextMining/BibleNLPBitextMining.json | 21545 ++++++++++++++++ .../SwissJudgementClassification.json | 150 + .../Reranking/MindSmallReranking.json | 30 + .../Reranking/VoyageMMarcoReranking.json | 30 + .../Reranking/WebLINXCandidatesReranking.json | 170 + tests/test_TaskMetadata.py | 5 - 6 files changed, 21925 insertions(+), 5 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json create mode 100644 mteb/descriptive_stats/Classification/SwissJudgementClassification.json create mode 100644 mteb/descriptive_stats/Reranking/MindSmallReranking.json create mode 100644 mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json create mode 100644 mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json diff --git a/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json b/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json new file mode 100644 index 0000000000..d7730746a6 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json @@ -0,0 +1,21545 @@ +{ + "train": { + "num_samples": 417452, + "number_of_characters": 132355840, + "unique_pairs": 416080, + "min_sentence1_length": 1, + "average_sentence1_length": 158.52821402221093, + "max_sentence1_length": 4949, + "unique_sentence1": 213216, + "min_sentence2_length": 1, + "average_sentence2_length": 158.52821402221093, + "max_sentence2_length": 4949, + "unique_sentence2": 213216, + "hf_subset_descriptive_stats": { + "eng_Latn-aai_Latn": { + "num_samples": 256, + "number_of_characters": 66320, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 146.66796875, + "max_sentence2_length": 322, + "unique_sentence2": 256 + }, + "aai_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66320, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 146.66796875, + "max_sentence1_length": 322, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-aak_Arab": { + "num_samples": 256, + "number_of_characters": 103517, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 112.16015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 292.203125, + "max_sentence2_length": 809, + "unique_sentence2": 256 + }, + "aak_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 103517, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 292.203125, + "max_sentence1_length": 809, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 112.16015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-aau_Latn": { + "num_samples": 256, + "number_of_characters": 78838, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.42578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 197.53515625, + "max_sentence2_length": 496, + "unique_sentence2": 256 + }, + "aau_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78838, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 197.53515625, + "max_sentence1_length": 496, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.42578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-aaz_Latn": { + "num_samples": 256, + "number_of_characters": 101375, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "unique_sentence1": 255, + "min_sentence2_length": 33, + "average_sentence2_length": 281.2265625, + "max_sentence2_length": 1407, + "unique_sentence2": 255 + }, + "aaz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101375, + "unique_pairs": 255, + "min_sentence1_length": 33, + "average_sentence1_length": 281.2265625, + "max_sentence1_length": 1407, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "unique_sentence2": 255 + }, + "eng_Latn-abt_Latn": { + "num_samples": 256, + "number_of_characters": 107325, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 146.171875, + "max_sentence1_length": 341, + "unique_sentence1": 255, + "min_sentence2_length": 29, + "average_sentence2_length": 273.06640625, + "max_sentence2_length": 758, + "unique_sentence2": 255 + }, + "abt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 107325, + "unique_pairs": 255, + "min_sentence1_length": 29, + "average_sentence1_length": 273.06640625, + "max_sentence1_length": 758, + "unique_sentence1": 255, + "min_sentence2_length": 1, + "average_sentence2_length": 146.171875, + "max_sentence2_length": 341, + "unique_sentence2": 255 + }, + "eng_Latn-abx_Latn": { + "num_samples": 256, + "number_of_characters": 76702, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.1796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 185.4375, + "max_sentence2_length": 606, + "unique_sentence2": 256 + }, + "abx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76702, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 185.4375, + "max_sentence1_length": 606, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.1796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aby_Latn": { + "num_samples": 256, + "number_of_characters": 101648, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.16015625, + "max_sentence1_length": 228, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 282.90234375, + "max_sentence2_length": 931, + "unique_sentence2": 256 + }, + "aby_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101648, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 282.90234375, + "max_sentence1_length": 931, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.16015625, + "max_sentence2_length": 228, + "unique_sentence2": 256 + }, + "eng_Latn-acf_Latn": { + "num_samples": 256, + "number_of_characters": 66720, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.02734375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 146.59765625, + "max_sentence2_length": 441, + "unique_sentence2": 256 + }, + "acf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66720, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 146.59765625, + "max_sentence1_length": 441, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.02734375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-acr_Latn": { + "num_samples": 256, + "number_of_characters": 88168, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 145.234375, + "max_sentence1_length": 341, + "unique_sentence1": 253, + "min_sentence2_length": 53, + "average_sentence2_length": 199.171875, + "max_sentence2_length": 474, + "unique_sentence2": 256 + }, + "acr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88168, + "unique_pairs": 256, + "min_sentence1_length": 53, + "average_sentence1_length": 199.171875, + "max_sentence1_length": 474, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 145.234375, + "max_sentence2_length": 341, + "unique_sentence2": 253 + }, + "eng_Latn-acu_Latn": { + "num_samples": 256, + "number_of_characters": 92554, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.046875, + "max_sentence1_length": 238, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 249.4921875, + "max_sentence2_length": 641, + "unique_sentence2": 256 + }, + "acu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92554, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 249.4921875, + "max_sentence1_length": 641, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.046875, + "max_sentence2_length": 238, + "unique_sentence2": 256 + }, + "eng_Latn-adz_Latn": { + "num_samples": 256, + "number_of_characters": 65268, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 108.69140625, + "max_sentence1_length": 248, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 146.26171875, + "max_sentence2_length": 456, + "unique_sentence2": 256 + }, + "adz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65268, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 146.26171875, + "max_sentence1_length": 456, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 108.69140625, + "max_sentence2_length": 248, + "unique_sentence2": 256 + }, + "eng_Latn-aer_Latn": { + "num_samples": 256, + "number_of_characters": 147074, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.55859375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 48, + "average_sentence2_length": 463.94921875, + "max_sentence2_length": 1597, + "unique_sentence2": 256 + }, + "aer_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 147074, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 463.94921875, + "max_sentence1_length": 1597, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.55859375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-aey_Latn": { + "num_samples": 256, + "number_of_characters": 81800, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.3203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 208.2109375, + "max_sentence2_length": 769, + "unique_sentence2": 256 + }, + "aey_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81800, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 208.2109375, + "max_sentence1_length": 769, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.3203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-agd_Latn": { + "num_samples": 256, + "number_of_characters": 75440, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.18359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 180.50390625, + "max_sentence2_length": 442, + "unique_sentence2": 256 + }, + "agd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75440, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 180.50390625, + "max_sentence1_length": 442, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.18359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-agg_Latn": { + "num_samples": 256, + "number_of_characters": 92330, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.45703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 247.20703125, + "max_sentence2_length": 852, + "unique_sentence2": 256 + }, + "agg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92330, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 247.20703125, + "max_sentence1_length": 852, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.45703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-agm_Latn": { + "num_samples": 256, + "number_of_characters": 121086, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.86328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 60, + "average_sentence2_length": 359.12890625, + "max_sentence2_length": 1291, + "unique_sentence2": 256 + }, + "agm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 121086, + "unique_pairs": 256, + "min_sentence1_length": 60, + "average_sentence1_length": 359.12890625, + "max_sentence1_length": 1291, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.86328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-agn_Latn": { + "num_samples": 256, + "number_of_characters": 72743, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.05078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 174.1015625, + "max_sentence2_length": 455, + "unique_sentence2": 256 + }, + "agn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72743, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 174.1015625, + "max_sentence1_length": 455, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.05078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-agr_Latn": { + "num_samples": 256, + "number_of_characters": 71589, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.0078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 164.63671875, + "max_sentence2_length": 402, + "unique_sentence2": 256 + }, + "agr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71589, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 164.63671875, + "max_sentence1_length": 402, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.0078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-agt_Latn": { + "num_samples": 256, + "number_of_characters": 94222, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.78125, + "max_sentence1_length": 271, + "unique_sentence1": 254, + "min_sentence2_length": 37, + "average_sentence2_length": 255.2734375, + "max_sentence2_length": 1289, + "unique_sentence2": 256 + }, + "agt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 94222, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 255.2734375, + "max_sentence1_length": 1289, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.78125, + "max_sentence2_length": 271, + "unique_sentence2": 254 + }, + "eng_Latn-agu_Latn": { + "num_samples": 256, + "number_of_characters": 77016, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.25, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 186.59375, + "max_sentence2_length": 486, + "unique_sentence2": 256 + }, + "agu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77016, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 186.59375, + "max_sentence1_length": 486, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.25, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aia_Latn": { + "num_samples": 256, + "number_of_characters": 83511, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.66015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 211.5546875, + "max_sentence2_length": 753, + "unique_sentence2": 256 + }, + "aia_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83511, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 211.5546875, + "max_sentence1_length": 753, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.66015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aii_Syrc": { + "num_samples": 256, + "number_of_characters": 59217, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 118.921875, + "max_sentence2_length": 264, + "unique_sentence2": 256 + }, + "aii_Syrc-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59217, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 118.921875, + "max_sentence1_length": 264, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-aka_Latn": { + "num_samples": 256, + "number_of_characters": 56547, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 108.4921875, + "max_sentence2_length": 240, + "unique_sentence2": 255 + }, + "aka_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56547, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 108.4921875, + "max_sentence1_length": 240, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ake_Latn": { + "num_samples": 256, + "number_of_characters": 65876, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.05078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 144.27734375, + "max_sentence2_length": 348, + "unique_sentence2": 256 + }, + "ake_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65876, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 144.27734375, + "max_sentence1_length": 348, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.05078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-alp_Latn": { + "num_samples": 256, + "number_of_characters": 78102, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.28515625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 193.80078125, + "max_sentence2_length": 605, + "unique_sentence2": 256 + }, + "alp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78102, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 193.80078125, + "max_sentence1_length": 605, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.28515625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-alq_Latn": { + "num_samples": 256, + "number_of_characters": 83431, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.6328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 210.26953125, + "max_sentence2_length": 629, + "unique_sentence2": 256 + }, + "alq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83431, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 210.26953125, + "max_sentence1_length": 629, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.6328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-als_Latn": { + "num_samples": 256, + "number_of_characters": 58436, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 114.828125, + "max_sentence2_length": 265, + "unique_sentence2": 256 + }, + "als_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58436, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 114.828125, + "max_sentence1_length": 265, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aly_Latn": { + "num_samples": 256, + "number_of_characters": 108438, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.77734375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 57, + "average_sentence2_length": 306.80859375, + "max_sentence2_length": 2026, + "unique_sentence2": 256 + }, + "aly_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 108438, + "unique_pairs": 256, + "min_sentence1_length": 57, + "average_sentence1_length": 306.80859375, + "max_sentence1_length": 2026, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.77734375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ame_Latn": { + "num_samples": 256, + "number_of_characters": 119767, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.58984375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 358.25, + "max_sentence2_length": 1180, + "unique_sentence2": 256 + }, + "ame_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 119767, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 358.25, + "max_sentence1_length": 1180, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.58984375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-amf_Latn": { + "num_samples": 256, + "number_of_characters": 66084, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.21484375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 147.92578125, + "max_sentence2_length": 402, + "unique_sentence2": 256 + }, + "amf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66084, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 147.92578125, + "max_sentence1_length": 402, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.21484375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-amk_Latn": { + "num_samples": 256, + "number_of_characters": 82454, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.08984375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 48, + "average_sentence2_length": 207.99609375, + "max_sentence2_length": 556, + "unique_sentence2": 256 + }, + "amk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82454, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 207.99609375, + "max_sentence1_length": 556, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.08984375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-amm_Latn": { + "num_samples": 256, + "number_of_characters": 83832, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 215.09375, + "max_sentence2_length": 714, + "unique_sentence2": 256 + }, + "amm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83832, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 215.09375, + "max_sentence1_length": 714, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-amn_Latn": { + "num_samples": 256, + "number_of_characters": 83291, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 29, + "average_sentence2_length": 180.0234375, + "max_sentence2_length": 484, + "unique_sentence2": 255 + }, + "amn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83291, + "unique_pairs": 255, + "min_sentence1_length": 29, + "average_sentence1_length": 180.0234375, + "max_sentence1_length": 484, + "unique_sentence1": 255, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-amo_Latn": { + "num_samples": 256, + "number_of_characters": 57698, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.08203125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 7, + "average_sentence2_length": 111.30078125, + "max_sentence2_length": 294, + "unique_sentence2": 256 + }, + "amo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57698, + "unique_pairs": 256, + "min_sentence1_length": 7, + "average_sentence1_length": 111.30078125, + "max_sentence1_length": 294, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.08203125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-amp_Latn": { + "num_samples": 256, + "number_of_characters": 96272, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.0625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 264.0, + "max_sentence2_length": 1162, + "unique_sentence2": 256 + }, + "amp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96272, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 264.0, + "max_sentence1_length": 1162, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-amr_Latn": { + "num_samples": 256, + "number_of_characters": 98557, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.26953125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 272.71875, + "max_sentence2_length": 805, + "unique_sentence2": 256 + }, + "amr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98557, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 272.71875, + "max_sentence1_length": 805, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.26953125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-amu_Latn": { + "num_samples": 256, + "number_of_characters": 76635, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.24609375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 189.109375, + "max_sentence2_length": 505, + "unique_sentence2": 256 + }, + "amu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76635, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 189.109375, + "max_sentence1_length": 505, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.24609375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-amx_Latn": { + "num_samples": 256, + "number_of_characters": 96369, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.9609375, + "max_sentence1_length": 243, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 265.48046875, + "max_sentence2_length": 925, + "unique_sentence2": 256 + }, + "amx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96369, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 265.48046875, + "max_sentence1_length": 925, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.9609375, + "max_sentence2_length": 243, + "unique_sentence2": 256 + }, + "eng_Latn-anh_Latn": { + "num_samples": 111, + "number_of_characters": 36533, + "unique_pairs": 111, + "min_sentence1_length": 50, + "average_sentence1_length": 110.45045045045045, + "max_sentence1_length": 257, + "unique_sentence1": 110, + "min_sentence2_length": 58, + "average_sentence2_length": 218.67567567567568, + "max_sentence2_length": 1063, + "unique_sentence2": 111 + }, + "anh_Latn-eng_Latn": { + "num_samples": 111, + "number_of_characters": 36533, + "unique_pairs": 111, + "min_sentence1_length": 58, + "average_sentence1_length": 218.67567567567568, + "max_sentence1_length": 1063, + "unique_sentence1": 111, + "min_sentence2_length": 50, + "average_sentence2_length": 110.45045045045045, + "max_sentence2_length": 257, + "unique_sentence2": 110 + }, + "eng_Latn-anv_Latn": { + "num_samples": 256, + "number_of_characters": 68939, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 155.27734375, + "max_sentence2_length": 441, + "unique_sentence2": 256 + }, + "anv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68939, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 155.27734375, + "max_sentence1_length": 441, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aoi_Latn": { + "num_samples": 256, + "number_of_characters": 139564, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 103.12890625, + "max_sentence1_length": 245, + "unique_sentence1": 256, + "min_sentence2_length": 84, + "average_sentence2_length": 442.04296875, + "max_sentence2_length": 1797, + "unique_sentence2": 256 + }, + "aoi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 139564, + "unique_pairs": 256, + "min_sentence1_length": 84, + "average_sentence1_length": 442.04296875, + "max_sentence1_length": 1797, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 103.12890625, + "max_sentence2_length": 245, + "unique_sentence2": 256 + }, + "eng_Latn-aoj_Latn": { + "num_samples": 256, + "number_of_characters": 101320, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 250.44921875, + "max_sentence2_length": 607, + "unique_sentence2": 256 + }, + "aoj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101320, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 250.44921875, + "max_sentence1_length": 607, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-aom_Latn": { + "num_samples": 256, + "number_of_characters": 88120, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.6953125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 229.5234375, + "max_sentence2_length": 756, + "unique_sentence2": 256 + }, + "aom_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88120, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 229.5234375, + "max_sentence1_length": 756, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.6953125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aon_Latn": { + "num_samples": 256, + "number_of_characters": 107978, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 110.48046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 311.30859375, + "max_sentence2_length": 1125, + "unique_sentence2": 256 + }, + "aon_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 107978, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 311.30859375, + "max_sentence1_length": 1125, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 110.48046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-apb_Latn": { + "num_samples": 256, + "number_of_characters": 92614, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.05078125, + "max_sentence1_length": 246, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 249.72265625, + "max_sentence2_length": 1260, + "unique_sentence2": 256 + }, + "apb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92614, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 249.72265625, + "max_sentence1_length": 1260, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.05078125, + "max_sentence2_length": 246, + "unique_sentence2": 256 + }, + "eng_Latn-ape_Latn": { + "num_samples": 256, + "number_of_characters": 117019, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 146.4375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 69, + "average_sentence2_length": 310.66796875, + "max_sentence2_length": 772, + "unique_sentence2": 256 + }, + "ape_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 117019, + "unique_pairs": 256, + "min_sentence1_length": 69, + "average_sentence1_length": 310.66796875, + "max_sentence1_length": 772, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 146.4375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-apn_Latn": { + "num_samples": 256, + "number_of_characters": 144000, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.38671875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 61, + "average_sentence2_length": 448.11328125, + "max_sentence2_length": 1608, + "unique_sentence2": 256 + }, + "apn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 144000, + "unique_pairs": 256, + "min_sentence1_length": 61, + "average_sentence1_length": 448.11328125, + "max_sentence1_length": 1608, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.38671875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-apr_Latn": { + "num_samples": 256, + "number_of_characters": 97297, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.44140625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 269.625, + "max_sentence2_length": 956, + "unique_sentence2": 256 + }, + "apr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97297, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 269.625, + "max_sentence1_length": 956, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.44140625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-apu_Latn": { + "num_samples": 256, + "number_of_characters": 80398, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 198.68359375, + "max_sentence2_length": 719, + "unique_sentence2": 256 + }, + "apu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80398, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 198.68359375, + "max_sentence1_length": 719, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-apw_Latn": { + "num_samples": 256, + "number_of_characters": 67616, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 150.6875, + "max_sentence2_length": 321, + "unique_sentence2": 256 + }, + "apw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67616, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 150.6875, + "max_sentence1_length": 321, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-apz_Latn": { + "num_samples": 256, + "number_of_characters": 109019, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.77734375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 314.078125, + "max_sentence2_length": 1291, + "unique_sentence2": 256 + }, + "apz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 109019, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 314.078125, + "max_sentence1_length": 1291, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.77734375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-arb_Arab": { + "num_samples": 256, + "number_of_characters": 79287, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 54, + "average_sentence2_length": 162.15625, + "max_sentence2_length": 381, + "unique_sentence2": 255 + }, + "arb_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79287, + "unique_pairs": 255, + "min_sentence1_length": 54, + "average_sentence1_length": 162.15625, + "max_sentence1_length": 381, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-are_Latn": { + "num_samples": 256, + "number_of_characters": 76005, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 182.50390625, + "max_sentence2_length": 559, + "unique_sentence2": 256 + }, + "are_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76005, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 182.50390625, + "max_sentence1_length": 559, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-arl_Latn": { + "num_samples": 256, + "number_of_characters": 103620, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.2421875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 55, + "average_sentence2_length": 294.5234375, + "max_sentence2_length": 1347, + "unique_sentence2": 256 + }, + "arl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 103620, + "unique_pairs": 256, + "min_sentence1_length": 55, + "average_sentence1_length": 294.5234375, + "max_sentence1_length": 1347, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2421875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-arn_Latn": { + "num_samples": 256, + "number_of_characters": 72620, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 113.11328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 170.55859375, + "max_sentence2_length": 485, + "unique_sentence2": 256 + }, + "arn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72620, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 170.55859375, + "max_sentence1_length": 485, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 113.11328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-arp_Latn": { + "num_samples": 93, + "number_of_characters": 21116, + "unique_pairs": 93, + "min_sentence1_length": 37, + "average_sentence1_length": 107.47311827956989, + "max_sentence1_length": 245, + "unique_sentence1": 92, + "min_sentence2_length": 34, + "average_sentence2_length": 119.58064516129032, + "max_sentence2_length": 272, + "unique_sentence2": 93 + }, + "arp_Latn-eng_Latn": { + "num_samples": 93, + "number_of_characters": 21116, + "unique_pairs": 93, + "min_sentence1_length": 34, + "average_sentence1_length": 119.58064516129032, + "max_sentence1_length": 272, + "unique_sentence1": 93, + "min_sentence2_length": 37, + "average_sentence2_length": 107.47311827956989, + "max_sentence2_length": 245, + "unique_sentence2": 92 + }, + "eng_Latn-asm_Beng": { + "num_samples": 256, + "number_of_characters": 60257, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 22, + "average_sentence2_length": 122.984375, + "max_sentence2_length": 307, + "unique_sentence2": 256 + }, + "asm_Beng-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60257, + "unique_pairs": 256, + "min_sentence1_length": 22, + "average_sentence1_length": 122.984375, + "max_sentence1_length": 307, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-aso_Latn": { + "num_samples": 256, + "number_of_characters": 106955, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.3828125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 309.41015625, + "max_sentence2_length": 1257, + "unique_sentence2": 255 + }, + "aso_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 106955, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 309.41015625, + "max_sentence1_length": 1257, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 108.3828125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ata_Latn": { + "num_samples": 256, + "number_of_characters": 79821, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.84375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 198.95703125, + "max_sentence2_length": 571, + "unique_sentence2": 256 + }, + "ata_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79821, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 198.95703125, + "max_sentence1_length": 571, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.84375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-atb_Latn": { + "num_samples": 256, + "number_of_characters": 71974, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.2265625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 170.921875, + "max_sentence2_length": 439, + "unique_sentence2": 256 + }, + "atb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71974, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 170.921875, + "max_sentence1_length": 439, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2265625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-atd_Latn": { + "num_samples": 256, + "number_of_characters": 88789, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.36328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 235.46875, + "max_sentence2_length": 815, + "unique_sentence2": 256 + }, + "atd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88789, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 235.46875, + "max_sentence1_length": 815, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.36328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-atg_Latn": { + "num_samples": 256, + "number_of_characters": 62871, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.75390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 135.8359375, + "max_sentence2_length": 372, + "unique_sentence2": 256 + }, + "atg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62871, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 135.8359375, + "max_sentence1_length": 372, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.75390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-att_Latn": { + "num_samples": 256, + "number_of_characters": 84332, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 217.02734375, + "max_sentence2_length": 622, + "unique_sentence2": 256 + }, + "att_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84332, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 217.02734375, + "max_sentence1_length": 622, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-auc_Latn": { + "num_samples": 256, + "number_of_characters": 96444, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 58, + "average_sentence2_length": 262.625, + "max_sentence2_length": 912, + "unique_sentence2": 256 + }, + "auc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96444, + "unique_pairs": 256, + "min_sentence1_length": 58, + "average_sentence1_length": 262.625, + "max_sentence1_length": 912, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-aui_Latn": { + "num_samples": 256, + "number_of_characters": 74752, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 115.68359375, + "max_sentence1_length": 269, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 176.31640625, + "max_sentence2_length": 760, + "unique_sentence2": 256 + }, + "aui_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74752, + "unique_pairs": 256, + "min_sentence1_length": 56, + "average_sentence1_length": 176.31640625, + "max_sentence1_length": 760, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 115.68359375, + "max_sentence2_length": 269, + "unique_sentence2": 255 + }, + "eng_Latn-auy_Latn": { + "num_samples": 256, + "number_of_characters": 80390, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.5234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 205.5, + "max_sentence2_length": 632, + "unique_sentence2": 256 + }, + "auy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80390, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 205.5, + "max_sentence1_length": 632, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.5234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-avt_Latn": { + "num_samples": 256, + "number_of_characters": 79665, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.71484375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 200.4765625, + "max_sentence2_length": 657, + "unique_sentence2": 256 + }, + "avt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79665, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 200.4765625, + "max_sentence1_length": 657, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.71484375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-awb_Latn": { + "num_samples": 256, + "number_of_characters": 73673, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.54296875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 178.2421875, + "max_sentence2_length": 492, + "unique_sentence2": 256 + }, + "awb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73673, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 178.2421875, + "max_sentence1_length": 492, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.54296875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-awk_Latn": { + "num_samples": 99, + "number_of_characters": 23125, + "unique_pairs": 99, + "min_sentence1_length": 37, + "average_sentence1_length": 106.98989898989899, + "max_sentence1_length": 245, + "unique_sentence1": 98, + "min_sentence2_length": 49, + "average_sentence2_length": 126.5959595959596, + "max_sentence2_length": 250, + "unique_sentence2": 99 + }, + "awk_Latn-eng_Latn": { + "num_samples": 99, + "number_of_characters": 23125, + "unique_pairs": 99, + "min_sentence1_length": 49, + "average_sentence1_length": 126.5959595959596, + "max_sentence1_length": 250, + "unique_sentence1": 99, + "min_sentence2_length": 37, + "average_sentence2_length": 106.98989898989899, + "max_sentence2_length": 245, + "unique_sentence2": 98 + }, + "eng_Latn-awx_Latn": { + "num_samples": 256, + "number_of_characters": 65407, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 120.97265625, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 134.5234375, + "max_sentence2_length": 439, + "unique_sentence2": 256 + }, + "awx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65407, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 134.5234375, + "max_sentence1_length": 439, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 120.97265625, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-azb_Arab": { + "num_samples": 256, + "number_of_characters": 56042, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.0859375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 108.828125, + "max_sentence2_length": 333, + "unique_sentence2": 255 + }, + "azb_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56042, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 108.828125, + "max_sentence1_length": 333, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 110.0859375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-azg_Latn": { + "num_samples": 256, + "number_of_characters": 89539, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.5390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 52, + "average_sentence2_length": 236.22265625, + "max_sentence2_length": 641, + "unique_sentence2": 256 + }, + "azg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89539, + "unique_pairs": 256, + "min_sentence1_length": 52, + "average_sentence1_length": 236.22265625, + "max_sentence1_length": 641, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.5390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-azz_Latn": { + "num_samples": 256, + "number_of_characters": 100818, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 74, + "average_sentence2_length": 282.5859375, + "max_sentence2_length": 618, + "unique_sentence2": 256 + }, + "azz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100818, + "unique_pairs": 256, + "min_sentence1_length": 74, + "average_sentence1_length": 282.5859375, + "max_sentence1_length": 618, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bao_Latn": { + "num_samples": 256, + "number_of_characters": 73239, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.05078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 177.0390625, + "max_sentence2_length": 444, + "unique_sentence2": 256 + }, + "bao_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73239, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 177.0390625, + "max_sentence1_length": 444, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.05078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bba_Latn": { + "num_samples": 256, + "number_of_characters": 59605, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.53125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 22, + "average_sentence2_length": 122.30078125, + "max_sentence2_length": 582, + "unique_sentence2": 256 + }, + "bba_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59605, + "unique_pairs": 256, + "min_sentence1_length": 22, + "average_sentence1_length": 122.30078125, + "max_sentence1_length": 582, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.53125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bbb_Latn": { + "num_samples": 256, + "number_of_characters": 92270, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.83984375, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 251.58984375, + "max_sentence2_length": 671, + "unique_sentence2": 256 + }, + "bbb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92270, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 251.58984375, + "max_sentence1_length": 671, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.83984375, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-bbr_Latn": { + "num_samples": 256, + "number_of_characters": 85199, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.89453125, + "max_sentence1_length": 216, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 220.9140625, + "max_sentence2_length": 1295, + "unique_sentence2": 256 + }, + "bbr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85199, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 220.9140625, + "max_sentence1_length": 1295, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.89453125, + "max_sentence2_length": 216, + "unique_sentence2": 256 + }, + "eng_Latn-bch_Latn": { + "num_samples": 256, + "number_of_characters": 78218, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.9765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 194.5625, + "max_sentence2_length": 699, + "unique_sentence2": 256 + }, + "bch_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78218, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 194.5625, + "max_sentence1_length": 699, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.9765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bco_Latn": { + "num_samples": 256, + "number_of_characters": 87515, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 111.3359375, + "max_sentence1_length": 243, + "unique_sentence1": 255, + "min_sentence2_length": 34, + "average_sentence2_length": 230.51953125, + "max_sentence2_length": 688, + "unique_sentence2": 256 + }, + "bco_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87515, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 230.51953125, + "max_sentence1_length": 688, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 111.3359375, + "max_sentence2_length": 243, + "unique_sentence2": 255 + }, + "eng_Latn-bdd_Latn": { + "num_samples": 256, + "number_of_characters": 70106, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.73828125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 164.11328125, + "max_sentence2_length": 432, + "unique_sentence2": 256 + }, + "bdd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70106, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 164.11328125, + "max_sentence1_length": 432, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.73828125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bea_Latn": { + "num_samples": 150, + "number_of_characters": 35820, + "unique_pairs": 150, + "min_sentence1_length": 45, + "average_sentence1_length": 114.08666666666667, + "max_sentence1_length": 257, + "unique_sentence1": 149, + "min_sentence2_length": 45, + "average_sentence2_length": 124.71333333333334, + "max_sentence2_length": 289, + "unique_sentence2": 150 + }, + "bea_Latn-eng_Latn": { + "num_samples": 150, + "number_of_characters": 35820, + "unique_pairs": 150, + "min_sentence1_length": 45, + "average_sentence1_length": 124.71333333333334, + "max_sentence1_length": 289, + "unique_sentence1": 150, + "min_sentence2_length": 45, + "average_sentence2_length": 114.08666666666667, + "max_sentence2_length": 257, + "unique_sentence2": 149 + }, + "eng_Latn-bef_Latn": { + "num_samples": 256, + "number_of_characters": 90500, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 110.01171875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 57, + "average_sentence2_length": 243.50390625, + "max_sentence2_length": 693, + "unique_sentence2": 256 + }, + "bef_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90500, + "unique_pairs": 256, + "min_sentence1_length": 57, + "average_sentence1_length": 243.50390625, + "max_sentence1_length": 693, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 110.01171875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bel_Cyrl": { + "num_samples": 256, + "number_of_characters": 70443, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 40, + "average_sentence2_length": 127.609375, + "max_sentence2_length": 264, + "unique_sentence2": 255 + }, + "bel_Cyrl-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70443, + "unique_pairs": 255, + "min_sentence1_length": 40, + "average_sentence1_length": 127.609375, + "max_sentence1_length": 264, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-ben_Beng": { + "num_samples": 256, + "number_of_characters": 58059, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 114.9375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 111.85546875, + "max_sentence2_length": 238, + "unique_sentence2": 256 + }, + "ben_Beng-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58059, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 111.85546875, + "max_sentence1_length": 238, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 114.9375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-beo_Latn": { + "num_samples": 256, + "number_of_characters": 74029, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.91796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 176.2578125, + "max_sentence2_length": 515, + "unique_sentence2": 251 + }, + "beo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74029, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 176.2578125, + "max_sentence1_length": 515, + "unique_sentence1": 251, + "min_sentence2_length": 24, + "average_sentence2_length": 112.91796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-beu_Latn": { + "num_samples": 256, + "number_of_characters": 99330, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 114.171875, + "max_sentence1_length": 257, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 273.8359375, + "max_sentence2_length": 1204, + "unique_sentence2": 256 + }, + "beu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99330, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 273.8359375, + "max_sentence1_length": 1204, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 114.171875, + "max_sentence2_length": 257, + "unique_sentence2": 254 + }, + "eng_Latn-bgs_Latn": { + "num_samples": 256, + "number_of_characters": 72317, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.97265625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 167.515625, + "max_sentence2_length": 596, + "unique_sentence2": 256 + }, + "bgs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72317, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 167.515625, + "max_sentence1_length": 596, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.97265625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bgt_Latn": { + "num_samples": 256, + "number_of_characters": 88221, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.68359375, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 230.9296875, + "max_sentence2_length": 876, + "unique_sentence2": 256 + }, + "bgt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88221, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 230.9296875, + "max_sentence1_length": 876, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.68359375, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-bhg_Latn": { + "num_samples": 256, + "number_of_characters": 76245, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 111.7578125, + "max_sentence1_length": 243, + "unique_sentence1": 254, + "min_sentence2_length": 46, + "average_sentence2_length": 186.07421875, + "max_sentence2_length": 589, + "unique_sentence2": 256 + }, + "bhg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76245, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 186.07421875, + "max_sentence1_length": 589, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 111.7578125, + "max_sentence2_length": 243, + "unique_sentence2": 254 + }, + "eng_Latn-bhl_Latn": { + "num_samples": 256, + "number_of_characters": 97461, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.8671875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 60, + "average_sentence2_length": 268.83984375, + "max_sentence2_length": 1263, + "unique_sentence2": 256 + }, + "bhl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97461, + "unique_pairs": 256, + "min_sentence1_length": 60, + "average_sentence1_length": 268.83984375, + "max_sentence1_length": 1263, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.8671875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-big_Latn": { + "num_samples": 256, + "number_of_characters": 91431, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.85546875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 245.296875, + "max_sentence2_length": 1052, + "unique_sentence2": 256 + }, + "big_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91431, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 245.296875, + "max_sentence1_length": 1052, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.85546875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bjk_Latn": { + "num_samples": 256, + "number_of_characters": 78424, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.4609375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 196.8828125, + "max_sentence2_length": 654, + "unique_sentence2": 256 + }, + "bjk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78424, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 196.8828125, + "max_sentence1_length": 654, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.4609375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bjp_Latn": { + "num_samples": 256, + "number_of_characters": 68218, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.14453125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 151.33203125, + "max_sentence2_length": 412, + "unique_sentence2": 256 + }, + "bjp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68218, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 151.33203125, + "max_sentence1_length": 412, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.14453125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bjr_Latn": { + "num_samples": 256, + "number_of_characters": 107418, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.734375, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 306.8671875, + "max_sentence2_length": 1198, + "unique_sentence2": 256 + }, + "bjr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 107418, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 306.8671875, + "max_sentence1_length": 1198, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.734375, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-bjv_Latn": { + "num_samples": 256, + "number_of_characters": 65683, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 141.203125, + "max_sentence2_length": 331, + "unique_sentence2": 256 + }, + "bjv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65683, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 141.203125, + "max_sentence1_length": 331, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bjz_Latn": { + "num_samples": 256, + "number_of_characters": 97546, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.48828125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 269.55078125, + "max_sentence2_length": 907, + "unique_sentence2": 256 + }, + "bjz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97546, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 269.55078125, + "max_sentence1_length": 907, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.48828125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bkd_Latn": { + "num_samples": 256, + "number_of_characters": 67869, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.11328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 154.0, + "max_sentence2_length": 436, + "unique_sentence2": 255 + }, + "bkd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67869, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 154.0, + "max_sentence1_length": 436, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 111.11328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bki_Latn": { + "num_samples": 256, + "number_of_characters": 97698, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.66015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 266.97265625, + "max_sentence2_length": 1015, + "unique_sentence2": 256 + }, + "bki_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97698, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 266.97265625, + "max_sentence1_length": 1015, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.66015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bkq_Latn": { + "num_samples": 256, + "number_of_characters": 89576, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.3125, + "max_sentence1_length": 216, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 237.59375, + "max_sentence2_length": 901, + "unique_sentence2": 256 + }, + "bkq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89576, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 237.59375, + "max_sentence1_length": 901, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.3125, + "max_sentence2_length": 216, + "unique_sentence2": 256 + }, + "eng_Latn-bkx_Latn": { + "num_samples": 128, + "number_of_characters": 42219, + "unique_pairs": 127, + "min_sentence1_length": 45, + "average_sentence1_length": 112.90625, + "max_sentence1_length": 257, + "unique_sentence1": 127, + "min_sentence2_length": 54, + "average_sentence2_length": 216.9296875, + "max_sentence2_length": 789, + "unique_sentence2": 127 + }, + "bkx_Latn-eng_Latn": { + "num_samples": 128, + "number_of_characters": 42219, + "unique_pairs": 127, + "min_sentence1_length": 54, + "average_sentence1_length": 216.9296875, + "max_sentence1_length": 789, + "unique_sentence1": 127, + "min_sentence2_length": 45, + "average_sentence2_length": 112.90625, + "max_sentence2_length": 257, + "unique_sentence2": 127 + }, + "eng_Latn-blw_Latn": { + "num_samples": 256, + "number_of_characters": 81606, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.12890625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 209.64453125, + "max_sentence2_length": 667, + "unique_sentence2": 256 + }, + "blw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81606, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 209.64453125, + "max_sentence1_length": 667, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.12890625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-blz_Latn": { + "num_samples": 256, + "number_of_characters": 71107, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.46875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 162.29296875, + "max_sentence2_length": 397, + "unique_sentence2": 256 + }, + "blz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71107, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 162.29296875, + "max_sentence1_length": 397, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.46875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bmh_Latn": { + "num_samples": 256, + "number_of_characters": 86345, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.51171875, + "max_sentence1_length": 273, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 225.7734375, + "max_sentence2_length": 1302, + "unique_sentence2": 256 + }, + "bmh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86345, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 225.7734375, + "max_sentence1_length": 1302, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.51171875, + "max_sentence2_length": 273, + "unique_sentence2": 256 + }, + "eng_Latn-bmk_Latn": { + "num_samples": 127, + "number_of_characters": 34118, + "unique_pairs": 127, + "min_sentence1_length": 39, + "average_sentence1_length": 111.0, + "max_sentence1_length": 257, + "unique_sentence1": 126, + "min_sentence2_length": 44, + "average_sentence2_length": 157.64566929133858, + "max_sentence2_length": 722, + "unique_sentence2": 127 + }, + "bmk_Latn-eng_Latn": { + "num_samples": 127, + "number_of_characters": 34118, + "unique_pairs": 127, + "min_sentence1_length": 44, + "average_sentence1_length": 157.64566929133858, + "max_sentence1_length": 722, + "unique_sentence1": 127, + "min_sentence2_length": 39, + "average_sentence2_length": 111.0, + "max_sentence2_length": 257, + "unique_sentence2": 126 + }, + "eng_Latn-bmr_Latn": { + "num_samples": 256, + "number_of_characters": 67617, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.98828125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 152.140625, + "max_sentence2_length": 348, + "unique_sentence2": 256 + }, + "bmr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67617, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 152.140625, + "max_sentence1_length": 348, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.98828125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-bmu_Latn": { + "num_samples": 256, + "number_of_characters": 84080, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 217.7734375, + "max_sentence2_length": 623, + "unique_sentence2": 256 + }, + "bmu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84080, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 217.7734375, + "max_sentence1_length": 623, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-bnp_Latn": { + "num_samples": 256, + "number_of_characters": 70748, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.5703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 164.7890625, + "max_sentence2_length": 753, + "unique_sentence2": 256 + }, + "bnp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70748, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 164.7890625, + "max_sentence1_length": 753, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.5703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-boa_Latn": { + "num_samples": 256, + "number_of_characters": 80696, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 115.5859375, + "max_sentence1_length": 273, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 199.6328125, + "max_sentence2_length": 612, + "unique_sentence2": 256 + }, + "boa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80696, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 199.6328125, + "max_sentence1_length": 612, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 115.5859375, + "max_sentence2_length": 273, + "unique_sentence2": 256 + }, + "eng_Latn-boj_Latn": { + "num_samples": 256, + "number_of_characters": 93099, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 43, + "average_sentence2_length": 218.3359375, + "max_sentence2_length": 614, + "unique_sentence2": 254 + }, + "boj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93099, + "unique_pairs": 254, + "min_sentence1_length": 43, + "average_sentence1_length": 218.3359375, + "max_sentence1_length": 614, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-bon_Latn": { + "num_samples": 256, + "number_of_characters": 81949, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.4765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 209.63671875, + "max_sentence2_length": 808, + "unique_sentence2": 256 + }, + "bon_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81949, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 209.63671875, + "max_sentence1_length": 808, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.4765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-box_Latn": { + "num_samples": 256, + "number_of_characters": 68162, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.04296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 151.21484375, + "max_sentence2_length": 408, + "unique_sentence2": 256 + }, + "box_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68162, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 151.21484375, + "max_sentence1_length": 408, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.04296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bpr_Latn": { + "num_samples": 256, + "number_of_characters": 60407, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.71484375, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 124.25, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "bpr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60407, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 124.25, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.71484375, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-bps_Latn": { + "num_samples": 256, + "number_of_characters": 66193, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.96875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 147.59765625, + "max_sentence2_length": 536, + "unique_sentence2": 256 + }, + "bps_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66193, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 147.59765625, + "max_sentence1_length": 536, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.96875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bqc_Latn": { + "num_samples": 256, + "number_of_characters": 49809, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 84.66015625, + "max_sentence2_length": 215, + "unique_sentence2": 256 + }, + "bqc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 49809, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 84.66015625, + "max_sentence1_length": 215, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bqp_Latn": { + "num_samples": 256, + "number_of_characters": 53337, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 22, + "average_sentence2_length": 98.44140625, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "bqp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 53337, + "unique_pairs": 256, + "min_sentence1_length": 22, + "average_sentence1_length": 98.44140625, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bre_Latn": { + "num_samples": 256, + "number_of_characters": 55085, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 107.06640625, + "max_sentence1_length": 245, + "unique_sentence1": 254, + "min_sentence2_length": 32, + "average_sentence2_length": 108.109375, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "bre_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 55085, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 108.109375, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 107.06640625, + "max_sentence2_length": 245, + "unique_sentence2": 254 + }, + "eng_Latn-bsj_Latn": { + "num_samples": 256, + "number_of_characters": 57602, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.1328125, + "max_sentence2_length": 401, + "unique_sentence2": 256 + }, + "bsj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57602, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.1328125, + "max_sentence1_length": 401, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bsn_Latn": { + "num_samples": 256, + "number_of_characters": 101102, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.49609375, + "max_sentence1_length": 230, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 284.43359375, + "max_sentence2_length": 974, + "unique_sentence2": 256 + }, + "bsn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101102, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 284.43359375, + "max_sentence1_length": 974, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.49609375, + "max_sentence2_length": 230, + "unique_sentence2": 254 + }, + "eng_Latn-bsp_Latn": { + "num_samples": 256, + "number_of_characters": 57985, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 113.30859375, + "max_sentence1_length": 245, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 113.1953125, + "max_sentence2_length": 276, + "unique_sentence2": 256 + }, + "bsp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57985, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 113.1953125, + "max_sentence1_length": 276, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 113.30859375, + "max_sentence2_length": 245, + "unique_sentence2": 256 + }, + "eng_Latn-bss_Latn": { + "num_samples": 256, + "number_of_characters": 68361, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 154.640625, + "max_sentence2_length": 497, + "unique_sentence2": 256 + }, + "bss_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68361, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 154.640625, + "max_sentence1_length": 497, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-buk_Latn": { + "num_samples": 256, + "number_of_characters": 77969, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.93359375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 194.6328125, + "max_sentence2_length": 586, + "unique_sentence2": 256 + }, + "buk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77969, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 194.6328125, + "max_sentence1_length": 586, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.93359375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bus_Latn": { + "num_samples": 256, + "number_of_characters": 53228, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 22, + "average_sentence2_length": 98.015625, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "bus_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 53228, + "unique_pairs": 256, + "min_sentence1_length": 22, + "average_sentence1_length": 98.015625, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bvd_Latn": { + "num_samples": 256, + "number_of_characters": 81383, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.15234375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 204.75, + "max_sentence2_length": 615, + "unique_sentence2": 256 + }, + "bvd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81383, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 204.75, + "max_sentence1_length": 615, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.15234375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bvr_Latn": { + "num_samples": 256, + "number_of_characters": 107539, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.515625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 49, + "average_sentence2_length": 306.55859375, + "max_sentence2_length": 773, + "unique_sentence2": 256 + }, + "bvr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 107539, + "unique_pairs": 256, + "min_sentence1_length": 49, + "average_sentence1_length": 306.55859375, + "max_sentence1_length": 773, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.515625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bxh_Latn": { + "num_samples": 256, + "number_of_characters": 63690, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 115.453125, + "max_sentence1_length": 257, + "unique_sentence1": 254, + "min_sentence2_length": 23, + "average_sentence2_length": 133.3359375, + "max_sentence2_length": 387, + "unique_sentence2": 256 + }, + "bxh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63690, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 133.3359375, + "max_sentence1_length": 387, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 115.453125, + "max_sentence2_length": 257, + "unique_sentence2": 254 + }, + "eng_Latn-byr_Latn": { + "num_samples": 256, + "number_of_characters": 85268, + "unique_pairs": 253, + "min_sentence1_length": 24, + "average_sentence1_length": 107.50390625, + "max_sentence1_length": 227, + "unique_sentence1": 250, + "min_sentence2_length": 33, + "average_sentence2_length": 225.57421875, + "max_sentence2_length": 556, + "unique_sentence2": 253 + }, + "byr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85268, + "unique_pairs": 253, + "min_sentence1_length": 33, + "average_sentence1_length": 225.57421875, + "max_sentence1_length": 556, + "unique_sentence1": 253, + "min_sentence2_length": 24, + "average_sentence2_length": 107.50390625, + "max_sentence2_length": 227, + "unique_sentence2": 250 + }, + "eng_Latn-byx_Latn": { + "num_samples": 256, + "number_of_characters": 98784, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.93359375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 276.94140625, + "max_sentence2_length": 1031, + "unique_sentence2": 256 + }, + "byx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98784, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 276.94140625, + "max_sentence1_length": 1031, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.93359375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bzd_Latn": { + "num_samples": 256, + "number_of_characters": 80046, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 202.2890625, + "max_sentence2_length": 587, + "unique_sentence2": 256 + }, + "bzd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80046, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 202.2890625, + "max_sentence1_length": 587, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-bzh_Latn": { + "num_samples": 256, + "number_of_characters": 72250, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.41796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 166.80859375, + "max_sentence2_length": 511, + "unique_sentence2": 256 + }, + "bzh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72250, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 166.80859375, + "max_sentence1_length": 511, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.41796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-bzj_Latn": { + "num_samples": 256, + "number_of_characters": 65571, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.90625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 146.23046875, + "max_sentence2_length": 447, + "unique_sentence2": 256 + }, + "bzj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65571, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 146.23046875, + "max_sentence1_length": 447, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-caa_Latn": { + "num_samples": 256, + "number_of_characters": 88161, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 229.0078125, + "max_sentence2_length": 628, + "unique_sentence2": 256 + }, + "caa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88161, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 229.0078125, + "max_sentence1_length": 628, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cab_Latn": { + "num_samples": 256, + "number_of_characters": 69881, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.90234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 163.0703125, + "max_sentence2_length": 518, + "unique_sentence2": 256 + }, + "cab_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69881, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 163.0703125, + "max_sentence1_length": 518, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.90234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cac_Latn": { + "num_samples": 256, + "number_of_characters": 72972, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.96484375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 176.08203125, + "max_sentence2_length": 431, + "unique_sentence2": 256 + }, + "cac_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72972, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 176.08203125, + "max_sentence1_length": 431, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.96484375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-caf_Latn": { + "num_samples": 256, + "number_of_characters": 73709, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 174.11328125, + "max_sentence2_length": 433, + "unique_sentence2": 256 + }, + "caf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73709, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 174.11328125, + "max_sentence1_length": 433, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cak_Latn": { + "num_samples": 256, + "number_of_characters": 99811, + "unique_pairs": 256, + "min_sentence1_length": 56, + "average_sentence1_length": 147.15234375, + "max_sentence1_length": 341, + "unique_sentence1": 253, + "min_sentence2_length": 78, + "average_sentence2_length": 242.734375, + "max_sentence2_length": 584, + "unique_sentence2": 256 + }, + "cak_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99811, + "unique_pairs": 256, + "min_sentence1_length": 78, + "average_sentence1_length": 242.734375, + "max_sentence1_length": 584, + "unique_sentence1": 256, + "min_sentence2_length": 56, + "average_sentence2_length": 147.15234375, + "max_sentence2_length": 341, + "unique_sentence2": 253 + }, + "eng_Latn-cao_Latn": { + "num_samples": 256, + "number_of_characters": 73169, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.76953125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 175.046875, + "max_sentence2_length": 445, + "unique_sentence2": 256 + }, + "cao_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73169, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 175.046875, + "max_sentence1_length": 445, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.76953125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cap_Latn": { + "num_samples": 256, + "number_of_characters": 85803, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.47265625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 224.6953125, + "max_sentence2_length": 667, + "unique_sentence2": 256 + }, + "cap_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85803, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 224.6953125, + "max_sentence1_length": 667, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.47265625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-car_Latn": { + "num_samples": 256, + "number_of_characters": 67129, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.234375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 147.98828125, + "max_sentence2_length": 386, + "unique_sentence2": 256 + }, + "car_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67129, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 147.98828125, + "max_sentence1_length": 386, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.234375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cav_Latn": { + "num_samples": 256, + "number_of_characters": 109488, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.59765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 313.08984375, + "max_sentence2_length": 1077, + "unique_sentence2": 256 + }, + "cav_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 109488, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 313.08984375, + "max_sentence1_length": 1077, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.59765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cax_Latn": { + "num_samples": 256, + "number_of_characters": 74168, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.59375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 179.125, + "max_sentence2_length": 409, + "unique_sentence2": 256 + }, + "cax_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74168, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 179.125, + "max_sentence1_length": 409, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.59375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cbc_Latn": { + "num_samples": 256, + "number_of_characters": 102227, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.39453125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 61, + "average_sentence2_length": 284.9296875, + "max_sentence2_length": 1210, + "unique_sentence2": 256 + }, + "cbc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 102227, + "unique_pairs": 256, + "min_sentence1_length": 61, + "average_sentence1_length": 284.9296875, + "max_sentence1_length": 1210, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.39453125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cbi_Latn": { + "num_samples": 256, + "number_of_characters": 77003, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 113.79296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 187.0, + "max_sentence2_length": 733, + "unique_sentence2": 256 + }, + "cbi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77003, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 187.0, + "max_sentence1_length": 733, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 113.79296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cbk_Latn": { + "num_samples": 256, + "number_of_characters": 79435, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.02734375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 196.265625, + "max_sentence2_length": 532, + "unique_sentence2": 256 + }, + "cbk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79435, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 196.265625, + "max_sentence1_length": 532, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.02734375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cbr_Latn": { + "num_samples": 256, + "number_of_characters": 82587, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.453125, + "max_sentence1_length": 238, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 212.15234375, + "max_sentence2_length": 814, + "unique_sentence2": 256 + }, + "cbr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82587, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 212.15234375, + "max_sentence1_length": 814, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.453125, + "max_sentence2_length": 238, + "unique_sentence2": 256 + }, + "eng_Latn-cbs_Latn": { + "num_samples": 256, + "number_of_characters": 92917, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.0546875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 250.90234375, + "max_sentence2_length": 1059, + "unique_sentence2": 256 + }, + "cbs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92917, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 250.90234375, + "max_sentence1_length": 1059, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.0546875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cbt_Latn": { + "num_samples": 256, + "number_of_characters": 90064, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.8671875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 236.9453125, + "max_sentence2_length": 635, + "unique_sentence2": 256 + }, + "cbt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90064, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 236.9453125, + "max_sentence1_length": 635, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.8671875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cbu_Latn": { + "num_samples": 256, + "number_of_characters": 96593, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 107.87890625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 269.4375, + "max_sentence2_length": 1004, + "unique_sentence2": 256 + }, + "cbu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96593, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 269.4375, + "max_sentence1_length": 1004, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 107.87890625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cbv_Latn": { + "num_samples": 256, + "number_of_characters": 97921, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 271.37890625, + "max_sentence2_length": 927, + "unique_sentence2": 256 + }, + "cbv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97921, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 271.37890625, + "max_sentence1_length": 927, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cco_Latn": { + "num_samples": 256, + "number_of_characters": 95658, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 63, + "average_sentence2_length": 259.8671875, + "max_sentence2_length": 671, + "unique_sentence2": 256 + }, + "cco_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 95658, + "unique_pairs": 256, + "min_sentence1_length": 63, + "average_sentence1_length": 259.8671875, + "max_sentence1_length": 671, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ceb_Latn": { + "num_samples": 256, + "number_of_characters": 64468, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 139.43359375, + "max_sentence2_length": 381, + "unique_sentence2": 256 + }, + "ceb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64468, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 139.43359375, + "max_sentence1_length": 381, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-cek_Latn": { + "num_samples": 256, + "number_of_characters": 65140, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.72265625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 138.73046875, + "max_sentence2_length": 301, + "unique_sentence2": 256 + }, + "cek_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65140, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 138.73046875, + "max_sentence1_length": 301, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.72265625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ces_Latn": { + "num_samples": 256, + "number_of_characters": 69637, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 36, + "average_sentence2_length": 125.69921875, + "max_sentence2_length": 301, + "unique_sentence2": 255 + }, + "ces_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69637, + "unique_pairs": 255, + "min_sentence1_length": 36, + "average_sentence1_length": 125.69921875, + "max_sentence1_length": 301, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-cgc_Latn": { + "num_samples": 256, + "number_of_characters": 85774, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.75390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 224.30078125, + "max_sentence2_length": 618, + "unique_sentence2": 256 + }, + "cgc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85774, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 224.30078125, + "max_sentence1_length": 618, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.75390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cha_Latn": { + "num_samples": 256, + "number_of_characters": 56725, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 108.57421875, + "max_sentence1_length": 243, + "unique_sentence1": 253, + "min_sentence2_length": 35, + "average_sentence2_length": 113.0078125, + "max_sentence2_length": 258, + "unique_sentence2": 256 + }, + "cha_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56725, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 113.0078125, + "max_sentence1_length": 258, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 108.57421875, + "max_sentence2_length": 243, + "unique_sentence2": 253 + }, + "eng_Latn-chd_Latn": { + "num_samples": 256, + "number_of_characters": 86545, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.58203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 227.484375, + "max_sentence2_length": 789, + "unique_sentence2": 256 + }, + "chd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86545, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 227.484375, + "max_sentence1_length": 789, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.58203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-chf_Latn": { + "num_samples": 256, + "number_of_characters": 81471, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.53125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 202.71484375, + "max_sentence2_length": 987, + "unique_sentence2": 256 + }, + "chf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81471, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 202.71484375, + "max_sentence1_length": 987, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.53125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-chk_Latn": { + "num_samples": 256, + "number_of_characters": 63450, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 135.45703125, + "max_sentence2_length": 330, + "unique_sentence2": 256 + }, + "chk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63450, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 135.45703125, + "max_sentence1_length": 330, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-chq_Latn": { + "num_samples": 256, + "number_of_characters": 73580, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.43359375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 176.98828125, + "max_sentence2_length": 602, + "unique_sentence2": 256 + }, + "chq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73580, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 176.98828125, + "max_sentence1_length": 602, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.43359375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-chz_Latn": { + "num_samples": 256, + "number_of_characters": 73728, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 172.62890625, + "max_sentence2_length": 439, + "unique_sentence2": 256 + }, + "chz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73728, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 172.62890625, + "max_sentence1_length": 439, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cjo_Latn": { + "num_samples": 256, + "number_of_characters": 101711, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.18359375, + "max_sentence1_length": 263, + "unique_sentence1": 256, + "min_sentence2_length": 49, + "average_sentence2_length": 285.125, + "max_sentence2_length": 1225, + "unique_sentence2": 256 + }, + "cjo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101711, + "unique_pairs": 256, + "min_sentence1_length": 49, + "average_sentence1_length": 285.125, + "max_sentence1_length": 1225, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.18359375, + "max_sentence2_length": 263, + "unique_sentence2": 256 + }, + "eng_Latn-cjv_Latn": { + "num_samples": 256, + "number_of_characters": 91766, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 248.8359375, + "max_sentence2_length": 908, + "unique_sentence2": 256 + }, + "cjv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91766, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 248.8359375, + "max_sentence1_length": 908, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ckb_Arab": { + "num_samples": 256, + "number_of_characters": 55658, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 20, + "average_sentence2_length": 102.04296875, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "ckb_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 55658, + "unique_pairs": 256, + "min_sentence1_length": 20, + "average_sentence1_length": 102.04296875, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cle_Latn": { + "num_samples": 256, + "number_of_characters": 79316, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 52, + "average_sentence2_length": 197.43359375, + "max_sentence2_length": 439, + "unique_sentence2": 256 + }, + "cle_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79316, + "unique_pairs": 256, + "min_sentence1_length": 52, + "average_sentence1_length": 197.43359375, + "max_sentence1_length": 439, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-clu_Latn": { + "num_samples": 256, + "number_of_characters": 74521, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.1640625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 177.93359375, + "max_sentence2_length": 843, + "unique_sentence2": 256 + }, + "clu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74521, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 177.93359375, + "max_sentence1_length": 843, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.1640625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cme_Latn": { + "num_samples": 256, + "number_of_characters": 66267, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.1875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 143.66796875, + "max_sentence2_length": 436, + "unique_sentence2": 255 + }, + "cme_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66267, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 143.66796875, + "max_sentence1_length": 436, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cmn_Hans": { + "num_samples": 256, + "number_of_characters": 47494, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 12, + "average_sentence2_length": 40.19140625, + "max_sentence2_length": 106, + "unique_sentence2": 255 + }, + "cmn_Hans-eng_Latn": { + "num_samples": 256, + "number_of_characters": 47494, + "unique_pairs": 255, + "min_sentence1_length": 12, + "average_sentence1_length": 40.19140625, + "max_sentence1_length": 106, + "unique_sentence1": 255, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-cni_Latn": { + "num_samples": 256, + "number_of_characters": 90170, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.59765625, + "max_sentence1_length": 263, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 240.62890625, + "max_sentence2_length": 1132, + "unique_sentence2": 256 + }, + "cni_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90170, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 240.62890625, + "max_sentence1_length": 1132, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.59765625, + "max_sentence2_length": 263, + "unique_sentence2": 256 + }, + "eng_Latn-cnl_Latn": { + "num_samples": 256, + "number_of_characters": 88073, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 229.64453125, + "max_sentence2_length": 668, + "unique_sentence2": 256 + }, + "cnl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88073, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 229.64453125, + "max_sentence1_length": 668, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cnt_Latn": { + "num_samples": 256, + "number_of_characters": 74810, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.20703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 177.01953125, + "max_sentence2_length": 384, + "unique_sentence2": 256 + }, + "cnt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74810, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 177.01953125, + "max_sentence1_length": 384, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.20703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cof_Latn": { + "num_samples": 256, + "number_of_characters": 100652, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.8203125, + "max_sentence1_length": 256, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 281.3515625, + "max_sentence2_length": 1760, + "unique_sentence2": 256 + }, + "cof_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100652, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 281.3515625, + "max_sentence1_length": 1760, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.8203125, + "max_sentence2_length": 256, + "unique_sentence2": 256 + }, + "eng_Latn-con_Latn": { + "num_samples": 256, + "number_of_characters": 76257, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8359375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 187.04296875, + "max_sentence2_length": 681, + "unique_sentence2": 256 + }, + "con_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76257, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 187.04296875, + "max_sentence1_length": 681, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8359375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cop_Copt": { + "num_samples": 256, + "number_of_characters": 72668, + "unique_pairs": 256, + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 34, + "average_sentence2_length": 137.5390625, + "max_sentence2_length": 279, + "unique_sentence2": 256 + }, + "cop_Copt-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72668, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 137.5390625, + "max_sentence1_length": 279, + "unique_sentence1": 256, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-cot_Latn": { + "num_samples": 256, + "number_of_characters": 98864, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.234375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 271.953125, + "max_sentence2_length": 882, + "unique_sentence2": 256 + }, + "cot_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98864, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 271.953125, + "max_sentence1_length": 882, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.234375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cpa_Latn": { + "num_samples": 256, + "number_of_characters": 72477, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 167.7421875, + "max_sentence2_length": 387, + "unique_sentence2": 256 + }, + "cpa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72477, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 167.7421875, + "max_sentence1_length": 387, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cpb_Latn": { + "num_samples": 256, + "number_of_characters": 70765, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 162.61328125, + "max_sentence2_length": 426, + "unique_sentence2": 256 + }, + "cpb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70765, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 162.61328125, + "max_sentence1_length": 426, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cpc_Latn": { + "num_samples": 256, + "number_of_characters": 71670, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 166.1484375, + "max_sentence2_length": 431, + "unique_sentence2": 256 + }, + "cpc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71670, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 166.1484375, + "max_sentence1_length": 431, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cpu_Latn": { + "num_samples": 256, + "number_of_characters": 73281, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 172.44140625, + "max_sentence2_length": 441, + "unique_sentence2": 255 + }, + "cpu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73281, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 172.44140625, + "max_sentence1_length": 441, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cpy_Latn": { + "num_samples": 256, + "number_of_characters": 75504, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 181.5, + "max_sentence2_length": 453, + "unique_sentence2": 256 + }, + "cpy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75504, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 181.5, + "max_sentence1_length": 453, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-crn_Latn": { + "num_samples": 256, + "number_of_characters": 108452, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 108.921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 314.71875, + "max_sentence2_length": 1123, + "unique_sentence2": 256 + }, + "crn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 108452, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 314.71875, + "max_sentence1_length": 1123, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 108.921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-crx_Latn": { + "num_samples": 256, + "number_of_characters": 74100, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 175.640625, + "max_sentence2_length": 457, + "unique_sentence2": 256 + }, + "crx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74100, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 175.640625, + "max_sentence1_length": 457, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cso_Latn": { + "num_samples": 256, + "number_of_characters": 84779, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.13671875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 218.03125, + "max_sentence2_length": 535, + "unique_sentence2": 256 + }, + "cso_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84779, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 218.03125, + "max_sentence1_length": 535, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.13671875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-csy_Latn": { + "num_samples": 256, + "number_of_characters": 62242, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 130.73828125, + "max_sentence2_length": 283, + "unique_sentence2": 256 + }, + "csy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62242, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 130.73828125, + "max_sentence1_length": 283, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-cta_Latn": { + "num_samples": 256, + "number_of_characters": 100733, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.4296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 279.05859375, + "max_sentence2_length": 909, + "unique_sentence2": 256 + }, + "cta_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100733, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 279.05859375, + "max_sentence1_length": 909, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cth_Latn": { + "num_samples": 256, + "number_of_characters": 63569, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 135.921875, + "max_sentence2_length": 291, + "unique_sentence2": 256 + }, + "cth_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63569, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 135.921875, + "max_sentence1_length": 291, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ctp_Latn": { + "num_samples": 256, + "number_of_characters": 96284, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.09765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 266.01171875, + "max_sentence2_length": 864, + "unique_sentence2": 256 + }, + "ctp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96284, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 266.01171875, + "max_sentence1_length": 864, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.09765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ctu_Latn": { + "num_samples": 256, + "number_of_characters": 98784, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 61, + "average_sentence2_length": 238.31640625, + "max_sentence2_length": 670, + "unique_sentence2": 254 + }, + "ctu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98784, + "unique_pairs": 254, + "min_sentence1_length": 61, + "average_sentence1_length": 238.31640625, + "max_sentence1_length": 670, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-cub_Latn": { + "num_samples": 256, + "number_of_characters": 99401, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.7109375, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 276.57421875, + "max_sentence2_length": 1218, + "unique_sentence2": 256 + }, + "cub_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99401, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 276.57421875, + "max_sentence1_length": 1218, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.7109375, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-cuc_Latn": { + "num_samples": 256, + "number_of_characters": 69143, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 160.85546875, + "max_sentence2_length": 404, + "unique_sentence2": 256 + }, + "cuc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69143, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 160.85546875, + "max_sentence1_length": 404, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cui_Latn": { + "num_samples": 256, + "number_of_characters": 113631, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.5078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 79, + "average_sentence2_length": 333.36328125, + "max_sentence2_length": 1448, + "unique_sentence2": 256 + }, + "cui_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 113631, + "unique_pairs": 256, + "min_sentence1_length": 79, + "average_sentence1_length": 333.36328125, + "max_sentence1_length": 1448, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.5078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-cuk_Latn": { + "num_samples": 256, + "number_of_characters": 88234, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 63, + "average_sentence2_length": 198.34375, + "max_sentence2_length": 513, + "unique_sentence2": 255 + }, + "cuk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88234, + "unique_pairs": 255, + "min_sentence1_length": 63, + "average_sentence1_length": 198.34375, + "max_sentence1_length": 513, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-cut_Latn": { + "num_samples": 256, + "number_of_characters": 75616, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.66015625, + "max_sentence1_length": 216, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 185.71484375, + "max_sentence2_length": 531, + "unique_sentence2": 256 + }, + "cut_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75616, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 185.71484375, + "max_sentence1_length": 531, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.66015625, + "max_sentence2_length": 216, + "unique_sentence2": 256 + }, + "eng_Latn-cux_Latn": { + "num_samples": 256, + "number_of_characters": 89392, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 233.81640625, + "max_sentence2_length": 599, + "unique_sentence2": 256 + }, + "cux_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89392, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 233.81640625, + "max_sentence1_length": 599, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-cwe_Latn": { + "num_samples": 256, + "number_of_characters": 62866, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 133.17578125, + "max_sentence2_length": 317, + "unique_sentence2": 256 + }, + "cwe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62866, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 133.17578125, + "max_sentence1_length": 317, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-cya_Latn": { + "num_samples": 256, + "number_of_characters": 71441, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 163.6953125, + "max_sentence2_length": 346, + "unique_sentence2": 256 + }, + "cya_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71441, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 163.6953125, + "max_sentence1_length": 346, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-daa_Latn": { + "num_samples": 256, + "number_of_characters": 67869, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8984375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 151.21484375, + "max_sentence2_length": 365, + "unique_sentence2": 256 + }, + "daa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67869, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 151.21484375, + "max_sentence1_length": 365, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8984375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-dad_Latn": { + "num_samples": 256, + "number_of_characters": 62202, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 128.60546875, + "max_sentence2_length": 308, + "unique_sentence2": 256 + }, + "dad_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62202, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 128.60546875, + "max_sentence1_length": 308, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-dah_Latn": { + "num_samples": 256, + "number_of_characters": 98779, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 275.80859375, + "max_sentence2_length": 796, + "unique_sentence2": 256 + }, + "dah_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98779, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 275.80859375, + "max_sentence1_length": 796, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-dan_Latn": { + "num_samples": 256, + "number_of_characters": 56678, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 109.00390625, + "max_sentence2_length": 226, + "unique_sentence2": 256 + }, + "dan_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56678, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 109.00390625, + "max_sentence1_length": 226, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ded_Latn": { + "num_samples": 256, + "number_of_characters": 73870, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.8671875, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 178.6875, + "max_sentence2_length": 777, + "unique_sentence2": 256 + }, + "ded_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73870, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 178.6875, + "max_sentence1_length": 777, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.8671875, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-deu_Latn": { + "num_samples": 256, + "number_of_characters": 77912, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 55, + "average_sentence2_length": 156.78515625, + "max_sentence2_length": 392, + "unique_sentence2": 254 + }, + "deu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77912, + "unique_pairs": 254, + "min_sentence1_length": 55, + "average_sentence1_length": 156.78515625, + "max_sentence1_length": 392, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-dgc_Latn": { + "num_samples": 256, + "number_of_characters": 70417, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.234375, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 164.83203125, + "max_sentence2_length": 394, + "unique_sentence2": 256 + }, + "dgc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70417, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 164.83203125, + "max_sentence1_length": 394, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.234375, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-dgr_Latn": { + "num_samples": 256, + "number_of_characters": 69339, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.671875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 160.18359375, + "max_sentence2_length": 492, + "unique_sentence2": 256 + }, + "dgr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69339, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 160.18359375, + "max_sentence1_length": 492, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.671875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-dgz_Latn": { + "num_samples": 256, + "number_of_characters": 73488, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.01171875, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 175.05078125, + "max_sentence2_length": 597, + "unique_sentence2": 256 + }, + "dgz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73488, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 175.05078125, + "max_sentence1_length": 597, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.01171875, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-dhg_Latn": { + "num_samples": 256, + "number_of_characters": 96890, + "unique_pairs": 249, + "min_sentence1_length": 28, + "average_sentence1_length": 110.27734375, + "max_sentence1_length": 215, + "unique_sentence1": 239, + "min_sentence2_length": 52, + "average_sentence2_length": 268.19921875, + "max_sentence2_length": 1116, + "unique_sentence2": 249 + }, + "dhg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96890, + "unique_pairs": 249, + "min_sentence1_length": 52, + "average_sentence1_length": 268.19921875, + "max_sentence1_length": 1116, + "unique_sentence1": 249, + "min_sentence2_length": 28, + "average_sentence2_length": 110.27734375, + "max_sentence2_length": 215, + "unique_sentence2": 239 + }, + "eng_Latn-dif_Latn": { + "num_samples": 256, + "number_of_characters": 63423, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 135.3515625, + "max_sentence2_length": 345, + "unique_sentence2": 256 + }, + "dif_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63423, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 135.3515625, + "max_sentence1_length": 345, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-dik_Latn": { + "num_samples": 256, + "number_of_characters": 61958, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.18359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 126.83984375, + "max_sentence2_length": 350, + "unique_sentence2": 256 + }, + "dik_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61958, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 126.83984375, + "max_sentence1_length": 350, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.18359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-dji_Latn": { + "num_samples": 190, + "number_of_characters": 71721, + "unique_pairs": 190, + "min_sentence1_length": 26, + "average_sentence1_length": 121.51578947368421, + "max_sentence1_length": 259, + "unique_sentence1": 190, + "min_sentence2_length": 50, + "average_sentence2_length": 255.96315789473684, + "max_sentence2_length": 933, + "unique_sentence2": 190 + }, + "dji_Latn-eng_Latn": { + "num_samples": 190, + "number_of_characters": 71721, + "unique_pairs": 190, + "min_sentence1_length": 50, + "average_sentence1_length": 255.96315789473684, + "max_sentence1_length": 933, + "unique_sentence1": 190, + "min_sentence2_length": 26, + "average_sentence2_length": 121.51578947368421, + "max_sentence2_length": 259, + "unique_sentence2": 190 + }, + "eng_Latn-djk_Latn": { + "num_samples": 256, + "number_of_characters": 75836, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.34765625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 181.88671875, + "max_sentence2_length": 717, + "unique_sentence2": 256 + }, + "djk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75836, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 181.88671875, + "max_sentence1_length": 717, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.34765625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-djr_Latn": { + "num_samples": 256, + "number_of_characters": 132660, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.65234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 59, + "average_sentence2_length": 406.55078125, + "max_sentence2_length": 1457, + "unique_sentence2": 256 + }, + "djr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 132660, + "unique_pairs": 256, + "min_sentence1_length": 59, + "average_sentence1_length": 406.55078125, + "max_sentence1_length": 1457, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.65234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-dob_Latn": { + "num_samples": 256, + "number_of_characters": 69579, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.67578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 162.1171875, + "max_sentence2_length": 427, + "unique_sentence2": 256 + }, + "dob_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69579, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 162.1171875, + "max_sentence1_length": 427, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.67578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-dop_Latn": { + "num_samples": 256, + "number_of_characters": 64281, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.33203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 140.765625, + "max_sentence2_length": 333, + "unique_sentence2": 256 + }, + "dop_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64281, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 140.765625, + "max_sentence1_length": 333, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.33203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-dov_Latn": { + "num_samples": 256, + "number_of_characters": 58684, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 19, + "average_sentence2_length": 116.83984375, + "max_sentence2_length": 294, + "unique_sentence2": 256 + }, + "dov_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58684, + "unique_pairs": 256, + "min_sentence1_length": 19, + "average_sentence1_length": 116.83984375, + "max_sentence1_length": 294, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-dwr_Latn": { + "num_samples": 256, + "number_of_characters": 81700, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 46, + "average_sentence2_length": 173.80859375, + "max_sentence2_length": 480, + "unique_sentence2": 254 + }, + "dwr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81700, + "unique_pairs": 254, + "min_sentence1_length": 46, + "average_sentence1_length": 173.80859375, + "max_sentence1_length": 480, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-dww_Latn": { + "num_samples": 256, + "number_of_characters": 75606, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.01953125, + "max_sentence1_length": 231, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 185.31640625, + "max_sentence2_length": 606, + "unique_sentence2": 255 + }, + "dww_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75606, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 185.31640625, + "max_sentence1_length": 606, + "unique_sentence1": 255, + "min_sentence2_length": 31, + "average_sentence2_length": 110.01953125, + "max_sentence2_length": 231, + "unique_sentence2": 256 + }, + "eng_Latn-dwy_Latn": { + "num_samples": 133, + "number_of_characters": 56723, + "unique_pairs": 133, + "min_sentence1_length": 42, + "average_sentence1_length": 113.89473684210526, + "max_sentence1_length": 257, + "unique_sentence1": 132, + "min_sentence2_length": 3, + "average_sentence2_length": 312.593984962406, + "max_sentence2_length": 1213, + "unique_sentence2": 132 + }, + "dwy_Latn-eng_Latn": { + "num_samples": 133, + "number_of_characters": 56723, + "unique_pairs": 133, + "min_sentence1_length": 3, + "average_sentence1_length": 312.593984962406, + "max_sentence1_length": 1213, + "unique_sentence1": 132, + "min_sentence2_length": 42, + "average_sentence2_length": 113.89473684210526, + "max_sentence2_length": 257, + "unique_sentence2": 132 + }, + "eng_Latn-ebk_Latn": { + "num_samples": 256, + "number_of_characters": 75795, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 184.87109375, + "max_sentence2_length": 492, + "unique_sentence2": 256 + }, + "ebk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75795, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 184.87109375, + "max_sentence1_length": 492, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-eko_Latn": { + "num_samples": 256, + "number_of_characters": 57909, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 105.97265625, + "max_sentence1_length": 217, + "unique_sentence1": 254, + "min_sentence2_length": 17, + "average_sentence2_length": 120.234375, + "max_sentence2_length": 280, + "unique_sentence2": 256 + }, + "eko_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57909, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 120.234375, + "max_sentence1_length": 280, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 105.97265625, + "max_sentence2_length": 217, + "unique_sentence2": 254 + }, + "eng_Latn-emi_Latn": { + "num_samples": 256, + "number_of_characters": 70274, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.42578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 160.08203125, + "max_sentence2_length": 475, + "unique_sentence2": 256 + }, + "emi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70274, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 160.08203125, + "max_sentence1_length": 475, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.42578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-emp_Latn": { + "num_samples": 256, + "number_of_characters": 75909, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.54296875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 185.9765625, + "max_sentence2_length": 529, + "unique_sentence2": 256 + }, + "emp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75909, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 185.9765625, + "max_sentence1_length": 529, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.54296875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-enq_Latn": { + "num_samples": 256, + "number_of_characters": 89350, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.29296875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 52, + "average_sentence2_length": 234.73046875, + "max_sentence2_length": 800, + "unique_sentence2": 255 + }, + "enq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89350, + "unique_pairs": 256, + "min_sentence1_length": 52, + "average_sentence1_length": 234.73046875, + "max_sentence1_length": 800, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.29296875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-epo_Latn": { + "num_samples": 256, + "number_of_characters": 54872, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 100.90625, + "max_sentence2_length": 240, + "unique_sentence2": 256 + }, + "epo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 54872, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 100.90625, + "max_sentence1_length": 240, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-eri_Latn": { + "num_samples": 256, + "number_of_characters": 86538, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.0703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 223.96875, + "max_sentence2_length": 854, + "unique_sentence2": 256 + }, + "eri_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86538, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 223.96875, + "max_sentence1_length": 854, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ese_Latn": { + "num_samples": 256, + "number_of_characters": 104310, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.25, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 58, + "average_sentence2_length": 298.2109375, + "max_sentence2_length": 934, + "unique_sentence2": 256 + }, + "ese_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104310, + "unique_pairs": 256, + "min_sentence1_length": 58, + "average_sentence1_length": 298.2109375, + "max_sentence1_length": 934, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.25, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-esk_Latn": { + "num_samples": 256, + "number_of_characters": 69438, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.61328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 156.62890625, + "max_sentence2_length": 463, + "unique_sentence2": 256 + }, + "esk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69438, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 156.62890625, + "max_sentence1_length": 463, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.61328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-etr_Latn": { + "num_samples": 256, + "number_of_characters": 69080, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 109.7265625, + "max_sentence1_length": 243, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 160.1171875, + "max_sentence2_length": 424, + "unique_sentence2": 256 + }, + "etr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69080, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 160.1171875, + "max_sentence1_length": 424, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 109.7265625, + "max_sentence2_length": 243, + "unique_sentence2": 256 + }, + "eng_Latn-ewe_Latn": { + "num_samples": 256, + "number_of_characters": 62734, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 129.68359375, + "max_sentence2_length": 305, + "unique_sentence2": 256 + }, + "ewe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62734, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 129.68359375, + "max_sentence1_length": 305, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-faa_Latn": { + "num_samples": 256, + "number_of_characters": 98521, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.94921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 58, + "average_sentence2_length": 272.8984375, + "max_sentence2_length": 925, + "unique_sentence2": 256 + }, + "faa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98521, + "unique_pairs": 256, + "min_sentence1_length": 58, + "average_sentence1_length": 272.8984375, + "max_sentence1_length": 925, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.94921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-fai_Latn": { + "num_samples": 256, + "number_of_characters": 77632, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.20703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 193.04296875, + "max_sentence2_length": 578, + "unique_sentence2": 256 + }, + "fai_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77632, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 193.04296875, + "max_sentence1_length": 578, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.20703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-far_Latn": { + "num_samples": 256, + "number_of_characters": 77337, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.6328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 192.46484375, + "max_sentence2_length": 640, + "unique_sentence2": 256 + }, + "far_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77337, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 192.46484375, + "max_sentence1_length": 640, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ffm_Latn": { + "num_samples": 256, + "number_of_characters": 60526, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.828125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 122.6015625, + "max_sentence2_length": 361, + "unique_sentence2": 256 + }, + "ffm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60526, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 122.6015625, + "max_sentence1_length": 361, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.828125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-for_Latn": { + "num_samples": 256, + "number_of_characters": 68231, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.94921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 156.578125, + "max_sentence2_length": 363, + "unique_sentence2": 256 + }, + "for_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68231, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 156.578125, + "max_sentence1_length": 363, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.94921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-fra_Latn": { + "num_samples": 256, + "number_of_characters": 77098, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 155.83203125, + "max_sentence2_length": 386, + "unique_sentence2": 254 + }, + "fra_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77098, + "unique_pairs": 254, + "min_sentence1_length": 44, + "average_sentence1_length": 155.83203125, + "max_sentence1_length": 386, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-fue_Latn": { + "num_samples": 256, + "number_of_characters": 57793, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 116.5546875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 109.19921875, + "max_sentence2_length": 247, + "unique_sentence2": 256 + }, + "fue_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57793, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 109.19921875, + "max_sentence1_length": 247, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 116.5546875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-fuf_Latn": { + "num_samples": 256, + "number_of_characters": 59597, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.234375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 119.56640625, + "max_sentence2_length": 264, + "unique_sentence2": 256 + }, + "fuf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59597, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 119.56640625, + "max_sentence1_length": 264, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.234375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-fuh_Latn": { + "num_samples": 256, + "number_of_characters": 60407, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.3046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 120.66015625, + "max_sentence2_length": 295, + "unique_sentence2": 256 + }, + "fuh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60407, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 120.66015625, + "max_sentence1_length": 295, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gah_Latn": { + "num_samples": 256, + "number_of_characters": 76721, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.3125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 189.37890625, + "max_sentence2_length": 539, + "unique_sentence2": 256 + }, + "gah_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76721, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 189.37890625, + "max_sentence1_length": 539, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.3125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gai_Latn": { + "num_samples": 256, + "number_of_characters": 90020, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.75390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 241.88671875, + "max_sentence2_length": 919, + "unique_sentence2": 256 + }, + "gai_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90020, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 241.88671875, + "max_sentence1_length": 919, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.75390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gam_Latn": { + "num_samples": 256, + "number_of_characters": 75523, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.83984375, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 183.171875, + "max_sentence2_length": 607, + "unique_sentence2": 256 + }, + "gam_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75523, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 183.171875, + "max_sentence1_length": 607, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.83984375, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-gaw_Latn": { + "num_samples": 256, + "number_of_characters": 79263, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.5, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 200.12109375, + "max_sentence2_length": 771, + "unique_sentence2": 256 + }, + "gaw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79263, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 200.12109375, + "max_sentence1_length": 771, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.5, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gdn_Latn": { + "num_samples": 256, + "number_of_characters": 103392, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8984375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 52, + "average_sentence2_length": 292.9765625, + "max_sentence2_length": 1210, + "unique_sentence2": 256 + }, + "gdn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 103392, + "unique_pairs": 256, + "min_sentence1_length": 52, + "average_sentence1_length": 292.9765625, + "max_sentence1_length": 1210, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8984375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gdr_Latn": { + "num_samples": 256, + "number_of_characters": 84347, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.28515625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 219.1953125, + "max_sentence2_length": 620, + "unique_sentence2": 256 + }, + "gdr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84347, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 219.1953125, + "max_sentence1_length": 620, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.28515625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-geb_Latn": { + "num_samples": 256, + "number_of_characters": 93197, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 115.3046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 248.74609375, + "max_sentence2_length": 830, + "unique_sentence2": 256 + }, + "geb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93197, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 248.74609375, + "max_sentence1_length": 830, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 115.3046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gfk_Latn": { + "num_samples": 256, + "number_of_characters": 92557, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 146.45703125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 41, + "average_sentence2_length": 215.09375, + "max_sentence2_length": 553, + "unique_sentence2": 254 + }, + "gfk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92557, + "unique_pairs": 254, + "min_sentence1_length": 41, + "average_sentence1_length": 215.09375, + "max_sentence1_length": 553, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 146.45703125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-ghs_Latn": { + "num_samples": 256, + "number_of_characters": 82265, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 107.6953125, + "max_sentence1_length": 248, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 213.65234375, + "max_sentence2_length": 845, + "unique_sentence2": 256 + }, + "ghs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82265, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 213.65234375, + "max_sentence1_length": 845, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 107.6953125, + "max_sentence2_length": 248, + "unique_sentence2": 256 + }, + "eng_Latn-glk_Arab": { + "num_samples": 93, + "number_of_characters": 18109, + "unique_pairs": 93, + "min_sentence1_length": 40, + "average_sentence1_length": 101.6774193548387, + "max_sentence1_length": 209, + "unique_sentence1": 93, + "min_sentence2_length": 31, + "average_sentence2_length": 93.04301075268818, + "max_sentence2_length": 205, + "unique_sentence2": 93 + }, + "glk_Arab-eng_Latn": { + "num_samples": 93, + "number_of_characters": 18109, + "unique_pairs": 93, + "min_sentence1_length": 31, + "average_sentence1_length": 93.04301075268818, + "max_sentence1_length": 205, + "unique_sentence1": 93, + "min_sentence2_length": 40, + "average_sentence2_length": 101.6774193548387, + "max_sentence2_length": 209, + "unique_sentence2": 93 + }, + "eng_Latn-gmv_Latn": { + "num_samples": 256, + "number_of_characters": 76193, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 41, + "average_sentence2_length": 152.296875, + "max_sentence2_length": 399, + "unique_sentence2": 254 + }, + "gmv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76193, + "unique_pairs": 254, + "min_sentence1_length": 41, + "average_sentence1_length": 152.296875, + "max_sentence1_length": 399, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-gng_Latn": { + "num_samples": 256, + "number_of_characters": 61253, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.6640625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 125.60546875, + "max_sentence2_length": 372, + "unique_sentence2": 256 + }, + "gng_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61253, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 125.60546875, + "max_sentence1_length": 372, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.6640625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gnn_Latn": { + "num_samples": 256, + "number_of_characters": 161744, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.2265625, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 64, + "average_sentence2_length": 520.5859375, + "max_sentence2_length": 2125, + "unique_sentence2": 256 + }, + "gnn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 161744, + "unique_pairs": 256, + "min_sentence1_length": 64, + "average_sentence1_length": 520.5859375, + "max_sentence1_length": 2125, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.2265625, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-gnw_Latn": { + "num_samples": 256, + "number_of_characters": 72479, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.3984375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 167.72265625, + "max_sentence2_length": 575, + "unique_sentence2": 256 + }, + "gnw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72479, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 167.72265625, + "max_sentence1_length": 575, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3984375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gof_Latn": { + "num_samples": 256, + "number_of_characters": 73342, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 34, + "average_sentence2_length": 141.16015625, + "max_sentence2_length": 385, + "unique_sentence2": 254 + }, + "gof_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73342, + "unique_pairs": 254, + "min_sentence1_length": 34, + "average_sentence1_length": 141.16015625, + "max_sentence1_length": 385, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-grc_Grek": { + "num_samples": 256, + "number_of_characters": 74555, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 144.91015625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "grc_Grek-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74555, + "unique_pairs": 254, + "min_sentence1_length": 44, + "average_sentence1_length": 144.91015625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-gub_Latn": { + "num_samples": 256, + "number_of_characters": 102019, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.65625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 287.85546875, + "max_sentence2_length": 1950, + "unique_sentence2": 256 + }, + "gub_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 102019, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 287.85546875, + "max_sentence1_length": 1950, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.65625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-guh_Latn": { + "num_samples": 256, + "number_of_characters": 113944, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.51953125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 334.57421875, + "max_sentence2_length": 1331, + "unique_sentence2": 256 + }, + "guh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 113944, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 334.57421875, + "max_sentence1_length": 1331, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.51953125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gui_Latn": { + "num_samples": 256, + "number_of_characters": 72987, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.3984375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 169.70703125, + "max_sentence2_length": 557, + "unique_sentence2": 256 + }, + "gui_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72987, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 169.70703125, + "max_sentence1_length": 557, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3984375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-guj_Gujr": { + "num_samples": 256, + "number_of_characters": 55669, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 19, + "average_sentence2_length": 105.0625, + "max_sentence2_length": 237, + "unique_sentence2": 256 + }, + "guj_Gujr-eng_Latn": { + "num_samples": 256, + "number_of_characters": 55669, + "unique_pairs": 256, + "min_sentence1_length": 19, + "average_sentence1_length": 105.0625, + "max_sentence1_length": 237, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-gul_Latn": { + "num_samples": 256, + "number_of_characters": 70252, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 159.125, + "max_sentence2_length": 391, + "unique_sentence2": 256 + }, + "gul_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70252, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 159.125, + "max_sentence1_length": 391, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gum_Latn": { + "num_samples": 256, + "number_of_characters": 75071, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.81640625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 183.4296875, + "max_sentence2_length": 417, + "unique_sentence2": 256 + }, + "gum_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75071, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 183.4296875, + "max_sentence1_length": 417, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.81640625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gun_Latn": { + "num_samples": 256, + "number_of_characters": 62943, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 133.4765625, + "max_sentence2_length": 314, + "unique_sentence2": 256 + }, + "gun_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62943, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 133.4765625, + "max_sentence1_length": 314, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-guo_Latn": { + "num_samples": 256, + "number_of_characters": 88369, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.98046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 233.2109375, + "max_sentence2_length": 700, + "unique_sentence2": 256 + }, + "guo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88369, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 233.2109375, + "max_sentence1_length": 700, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.98046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gup_Latn": { + "num_samples": 256, + "number_of_characters": 105600, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.01171875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 75, + "average_sentence2_length": 300.48828125, + "max_sentence2_length": 1198, + "unique_sentence2": 256 + }, + "gup_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 105600, + "unique_pairs": 256, + "min_sentence1_length": 75, + "average_sentence1_length": 300.48828125, + "max_sentence1_length": 1198, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.01171875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gux_Latn": { + "num_samples": 256, + "number_of_characters": 77107, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 45, + "average_sentence2_length": 155.8671875, + "max_sentence2_length": 379, + "unique_sentence2": 256 + }, + "gux_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77107, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 155.8671875, + "max_sentence1_length": 379, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-gvc_Latn": { + "num_samples": 256, + "number_of_characters": 93358, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.828125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 251.8515625, + "max_sentence2_length": 832, + "unique_sentence2": 256 + }, + "gvc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93358, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 251.8515625, + "max_sentence1_length": 832, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.828125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gvf_Latn": { + "num_samples": 256, + "number_of_characters": 87272, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.30078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 225.60546875, + "max_sentence2_length": 783, + "unique_sentence2": 256 + }, + "gvf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87272, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 225.60546875, + "max_sentence1_length": 783, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.30078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gvn_Latn": { + "num_samples": 256, + "number_of_characters": 91252, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.87890625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 244.57421875, + "max_sentence2_length": 1121, + "unique_sentence2": 256 + }, + "gvn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91252, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 244.57421875, + "max_sentence1_length": 1121, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.87890625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gvs_Latn": { + "num_samples": 256, + "number_of_characters": 78026, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 192.9140625, + "max_sentence2_length": 663, + "unique_sentence2": 256 + }, + "gvs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78026, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 192.9140625, + "max_sentence1_length": 663, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gwi_Latn": { + "num_samples": 256, + "number_of_characters": 82247, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.51171875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 61, + "average_sentence2_length": 209.765625, + "max_sentence2_length": 749, + "unique_sentence2": 256 + }, + "gwi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82247, + "unique_pairs": 256, + "min_sentence1_length": 61, + "average_sentence1_length": 209.765625, + "max_sentence1_length": 749, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.51171875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-gym_Latn": { + "num_samples": 256, + "number_of_characters": 94904, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 115.24609375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 255.47265625, + "max_sentence2_length": 810, + "unique_sentence2": 256 + }, + "gym_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 94904, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 255.47265625, + "max_sentence1_length": 810, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 115.24609375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-gyr_Latn": { + "num_samples": 256, + "number_of_characters": 71681, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.3203125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 165.68359375, + "max_sentence2_length": 643, + "unique_sentence2": 256 + }, + "gyr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71681, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 165.68359375, + "max_sentence1_length": 643, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.3203125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hat_Latn": { + "num_samples": 256, + "number_of_characters": 72750, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 39, + "average_sentence2_length": 136.62109375, + "max_sentence2_length": 328, + "unique_sentence2": 254 + }, + "hat_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72750, + "unique_pairs": 254, + "min_sentence1_length": 39, + "average_sentence1_length": 136.62109375, + "max_sentence1_length": 328, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-hau_Latn": { + "num_samples": 256, + "number_of_characters": 74783, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 144.5625, + "max_sentence2_length": 317, + "unique_sentence2": 254 + }, + "hau_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74783, + "unique_pairs": 254, + "min_sentence1_length": 44, + "average_sentence1_length": 144.5625, + "max_sentence1_length": 317, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-haw_Latn": { + "num_samples": 256, + "number_of_characters": 61274, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 20, + "average_sentence2_length": 125.9140625, + "max_sentence2_length": 289, + "unique_sentence2": 256 + }, + "haw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61274, + "unique_pairs": 256, + "min_sentence1_length": 20, + "average_sentence1_length": 125.9140625, + "max_sentence1_length": 289, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hbo_Hebr": { + "num_samples": 256, + "number_of_characters": 74575, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 151.23828125, + "max_sentence1_length": 305, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 140.0703125, + "max_sentence2_length": 249, + "unique_sentence2": 255 + }, + "hbo_Hebr-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74575, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 140.0703125, + "max_sentence1_length": 249, + "unique_sentence1": 255, + "min_sentence2_length": 32, + "average_sentence2_length": 151.23828125, + "max_sentence2_length": 305, + "unique_sentence2": 256 + }, + "eng_Latn-hch_Latn": { + "num_samples": 256, + "number_of_characters": 68309, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.94921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 22, + "average_sentence2_length": 153.8828125, + "max_sentence2_length": 334, + "unique_sentence2": 256 + }, + "hch_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68309, + "unique_pairs": 256, + "min_sentence1_length": 22, + "average_sentence1_length": 153.8828125, + "max_sentence1_length": 334, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.94921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-heb_Hebr": { + "num_samples": 256, + "number_of_characters": 45939, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 8, + "average_sentence2_length": 66.01171875, + "max_sentence2_length": 145, + "unique_sentence2": 256 + }, + "heb_Hebr-eng_Latn": { + "num_samples": 256, + "number_of_characters": 45939, + "unique_pairs": 256, + "min_sentence1_length": 8, + "average_sentence1_length": 66.01171875, + "max_sentence1_length": 145, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-heg_Latn": { + "num_samples": 256, + "number_of_characters": 99334, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.07421875, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 272.94921875, + "max_sentence2_length": 1307, + "unique_sentence2": 256 + }, + "heg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99334, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 272.94921875, + "max_sentence1_length": 1307, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.07421875, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-hin_Deva": { + "num_samples": 256, + "number_of_characters": 57607, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 112.6328125, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "hin_Deva-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57607, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 112.6328125, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-hix_Latn": { + "num_samples": 256, + "number_of_characters": 111196, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.28515625, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 326.07421875, + "max_sentence2_length": 1365, + "unique_sentence2": 255 + }, + "hix_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 111196, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 326.07421875, + "max_sentence1_length": 1365, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 108.28515625, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-hla_Latn": { + "num_samples": 256, + "number_of_characters": 84151, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.8203125, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 215.89453125, + "max_sentence2_length": 721, + "unique_sentence2": 256 + }, + "hla_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84151, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 215.89453125, + "max_sentence1_length": 721, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.8203125, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-hlt_Latn": { + "num_samples": 256, + "number_of_characters": 77827, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 41, + "average_sentence2_length": 158.6796875, + "max_sentence2_length": 362, + "unique_sentence2": 254 + }, + "hlt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77827, + "unique_pairs": 254, + "min_sentence1_length": 41, + "average_sentence1_length": 158.6796875, + "max_sentence1_length": 362, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-hmo_Latn": { + "num_samples": 256, + "number_of_characters": 78095, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 189.68359375, + "max_sentence2_length": 389, + "unique_sentence2": 256 + }, + "hmo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78095, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 189.68359375, + "max_sentence1_length": 389, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hns_Latn": { + "num_samples": 256, + "number_of_characters": 74854, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.45703125, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 179.94140625, + "max_sentence2_length": 917, + "unique_sentence2": 256 + }, + "hns_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74854, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 179.94140625, + "max_sentence1_length": 917, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.45703125, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-hop_Latn": { + "num_samples": 256, + "number_of_characters": 65844, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.4609375, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 145.7421875, + "max_sentence2_length": 568, + "unique_sentence2": 256 + }, + "hop_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65844, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 145.7421875, + "max_sentence1_length": 568, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4609375, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-hot_Latn": { + "num_samples": 256, + "number_of_characters": 71847, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.5078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 172.14453125, + "max_sentence2_length": 638, + "unique_sentence2": 256 + }, + "hot_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71847, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 172.14453125, + "max_sentence1_length": 638, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.5078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-hrv_Latn": { + "num_samples": 256, + "number_of_characters": 51252, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.0703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 88.1328125, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "hrv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 51252, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 88.1328125, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hto_Latn": { + "num_samples": 256, + "number_of_characters": 81912, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.77734375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 206.19140625, + "max_sentence2_length": 990, + "unique_sentence2": 256 + }, + "hto_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81912, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 206.19140625, + "max_sentence1_length": 990, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.77734375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hub_Latn": { + "num_samples": 256, + "number_of_characters": 76256, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.31640625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 189.55859375, + "max_sentence2_length": 946, + "unique_sentence2": 256 + }, + "hub_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76256, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 189.55859375, + "max_sentence1_length": 946, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.31640625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-hui_Latn": { + "num_samples": 256, + "number_of_characters": 82305, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 108.33203125, + "max_sentence1_length": 242, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 213.171875, + "max_sentence2_length": 597, + "unique_sentence2": 256 + }, + "hui_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82305, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 213.171875, + "max_sentence1_length": 597, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 108.33203125, + "max_sentence2_length": 242, + "unique_sentence2": 256 + }, + "eng_Latn-hun_Latn": { + "num_samples": 256, + "number_of_characters": 57058, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4453125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.4375, + "max_sentence2_length": 305, + "unique_sentence2": 256 + }, + "hun_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57058, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.4375, + "max_sentence1_length": 305, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4453125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hus_Latn": { + "num_samples": 256, + "number_of_characters": 92588, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 60, + "average_sentence2_length": 214.11328125, + "max_sentence2_length": 558, + "unique_sentence2": 255 + }, + "hus_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92588, + "unique_pairs": 255, + "min_sentence1_length": 60, + "average_sentence1_length": 214.11328125, + "max_sentence1_length": 558, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-huu_Latn": { + "num_samples": 256, + "number_of_characters": 77703, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.05859375, + "max_sentence1_length": 228, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 193.46875, + "max_sentence2_length": 812, + "unique_sentence2": 256 + }, + "huu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77703, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 193.46875, + "max_sentence1_length": 812, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.05859375, + "max_sentence2_length": 228, + "unique_sentence2": 256 + }, + "eng_Latn-huv_Latn": { + "num_samples": 256, + "number_of_characters": 73889, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 173.2578125, + "max_sentence2_length": 400, + "unique_sentence2": 256 + }, + "huv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73889, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 173.2578125, + "max_sentence1_length": 400, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-hvn_Latn": { + "num_samples": 126, + "number_of_characters": 41044, + "unique_pairs": 125, + "min_sentence1_length": 45, + "average_sentence1_length": 113.0952380952381, + "max_sentence1_length": 257, + "unique_sentence1": 125, + "min_sentence2_length": 61, + "average_sentence2_length": 212.65079365079364, + "max_sentence2_length": 763, + "unique_sentence2": 125 + }, + "hvn_Latn-eng_Latn": { + "num_samples": 126, + "number_of_characters": 41044, + "unique_pairs": 125, + "min_sentence1_length": 61, + "average_sentence1_length": 212.65079365079364, + "max_sentence1_length": 763, + "unique_sentence1": 125, + "min_sentence2_length": 45, + "average_sentence2_length": 113.0952380952381, + "max_sentence2_length": 257, + "unique_sentence2": 125 + }, + "eng_Latn-ian_Latn": { + "num_samples": 256, + "number_of_characters": 87736, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.0234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 230.6953125, + "max_sentence2_length": 718, + "unique_sentence2": 256 + }, + "ian_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87736, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 230.6953125, + "max_sentence1_length": 718, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ign_Latn": { + "num_samples": 256, + "number_of_characters": 122093, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.6796875, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 58, + "average_sentence2_length": 362.24609375, + "max_sentence2_length": 1229, + "unique_sentence2": 256 + }, + "ign_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 122093, + "unique_pairs": 256, + "min_sentence1_length": 58, + "average_sentence1_length": 362.24609375, + "max_sentence1_length": 1229, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.6796875, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-ikk_Latn": { + "num_samples": 256, + "number_of_characters": 62154, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 130.39453125, + "max_sentence2_length": 338, + "unique_sentence2": 256 + }, + "ikk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62154, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 130.39453125, + "max_sentence1_length": 338, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ikw_Latn": { + "num_samples": 256, + "number_of_characters": 58846, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 117.47265625, + "max_sentence2_length": 288, + "unique_sentence2": 256 + }, + "ikw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58846, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 117.47265625, + "max_sentence1_length": 288, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ilo_Latn": { + "num_samples": 256, + "number_of_characters": 64722, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 140.42578125, + "max_sentence2_length": 314, + "unique_sentence2": 256 + }, + "ilo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64722, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 140.42578125, + "max_sentence1_length": 314, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-imo_Latn": { + "num_samples": 256, + "number_of_characters": 106217, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.90234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 61, + "average_sentence2_length": 304.0078125, + "max_sentence2_length": 1169, + "unique_sentence2": 256 + }, + "imo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 106217, + "unique_pairs": 256, + "min_sentence1_length": 61, + "average_sentence1_length": 304.0078125, + "max_sentence1_length": 1169, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.90234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-inb_Latn": { + "num_samples": 256, + "number_of_characters": 79253, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.0390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 51, + "average_sentence2_length": 195.54296875, + "max_sentence2_length": 520, + "unique_sentence2": 256 + }, + "inb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79253, + "unique_pairs": 256, + "min_sentence1_length": 51, + "average_sentence1_length": 195.54296875, + "max_sentence1_length": 520, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ind_Latn": { + "num_samples": 256, + "number_of_characters": 83986, + "unique_pairs": 251, + "min_sentence1_length": 35, + "average_sentence1_length": 146.52734375, + "max_sentence1_length": 341, + "unique_sentence1": 245, + "min_sentence2_length": 41, + "average_sentence2_length": 181.54296875, + "max_sentence2_length": 504, + "unique_sentence2": 251 + }, + "ind_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83986, + "unique_pairs": 251, + "min_sentence1_length": 41, + "average_sentence1_length": 181.54296875, + "max_sentence1_length": 504, + "unique_sentence1": 251, + "min_sentence2_length": 35, + "average_sentence2_length": 146.52734375, + "max_sentence2_length": 341, + "unique_sentence2": 245 + }, + "eng_Latn-ino_Latn": { + "num_samples": 256, + "number_of_characters": 104722, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.29296875, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 299.77734375, + "max_sentence2_length": 940, + "unique_sentence2": 256 + }, + "ino_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104722, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 299.77734375, + "max_sentence1_length": 940, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.29296875, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-iou_Latn": { + "num_samples": 256, + "number_of_characters": 84753, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.2890625, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 218.77734375, + "max_sentence2_length": 810, + "unique_sentence2": 256 + }, + "iou_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84753, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 218.77734375, + "max_sentence1_length": 810, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.2890625, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-ipi_Latn": { + "num_samples": 256, + "number_of_characters": 182068, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 116.83203125, + "max_sentence1_length": 273, + "unique_sentence1": 255, + "min_sentence2_length": 113, + "average_sentence2_length": 594.37109375, + "max_sentence2_length": 1750, + "unique_sentence2": 255 + }, + "ipi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 182068, + "unique_pairs": 255, + "min_sentence1_length": 113, + "average_sentence1_length": 594.37109375, + "max_sentence1_length": 1750, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 116.83203125, + "max_sentence2_length": 273, + "unique_sentence2": 255 + }, + "eng_Latn-isn_Latn": { + "num_samples": 256, + "number_of_characters": 60001, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 121.984375, + "max_sentence2_length": 325, + "unique_sentence2": 256 + }, + "isn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60001, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 121.984375, + "max_sentence1_length": 325, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ita_Latn": { + "num_samples": 256, + "number_of_characters": 75118, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 146.515625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 40, + "average_sentence2_length": 146.9140625, + "max_sentence2_length": 381, + "unique_sentence2": 255 + }, + "ita_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75118, + "unique_pairs": 255, + "min_sentence1_length": 40, + "average_sentence1_length": 146.9140625, + "max_sentence1_length": 381, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 146.515625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-iws_Latn": { + "num_samples": 256, + "number_of_characters": 104782, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 296.91015625, + "max_sentence2_length": 854, + "unique_sentence2": 256 + }, + "iws_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104782, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 296.91015625, + "max_sentence1_length": 854, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ixl_Latn": { + "num_samples": 256, + "number_of_characters": 100093, + "unique_pairs": 254, + "min_sentence1_length": 35, + "average_sentence1_length": 146.31640625, + "max_sentence1_length": 341, + "unique_sentence1": 253, + "min_sentence2_length": 47, + "average_sentence2_length": 244.671875, + "max_sentence2_length": 565, + "unique_sentence2": 254 + }, + "ixl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100093, + "unique_pairs": 254, + "min_sentence1_length": 47, + "average_sentence1_length": 244.671875, + "max_sentence1_length": 565, + "unique_sentence1": 254, + "min_sentence2_length": 35, + "average_sentence2_length": 146.31640625, + "max_sentence2_length": 341, + "unique_sentence2": 253 + }, + "eng_Latn-jac_Latn": { + "num_samples": 256, + "number_of_characters": 76567, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 116.2421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 182.84765625, + "max_sentence2_length": 573, + "unique_sentence2": 256 + }, + "jac_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76567, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 182.84765625, + "max_sentence1_length": 573, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 116.2421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-jae_Latn": { + "num_samples": 256, + "number_of_characters": 61746, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.0625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 127.1328125, + "max_sentence2_length": 294, + "unique_sentence2": 256 + }, + "jae_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61746, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 127.1328125, + "max_sentence1_length": 294, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-jao_Latn": { + "num_samples": 127, + "number_of_characters": 67440, + "unique_pairs": 127, + "min_sentence1_length": 34, + "average_sentence1_length": 116.4724409448819, + "max_sentence1_length": 248, + "unique_sentence1": 127, + "min_sentence2_length": 59, + "average_sentence2_length": 414.5511811023622, + "max_sentence2_length": 1292, + "unique_sentence2": 127 + }, + "jao_Latn-eng_Latn": { + "num_samples": 127, + "number_of_characters": 67440, + "unique_pairs": 127, + "min_sentence1_length": 59, + "average_sentence1_length": 414.5511811023622, + "max_sentence1_length": 1292, + "unique_sentence1": 127, + "min_sentence2_length": 34, + "average_sentence2_length": 116.4724409448819, + "max_sentence2_length": 248, + "unique_sentence2": 127 + }, + "eng_Latn-jic_Latn": { + "num_samples": 256, + "number_of_characters": 89796, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.5078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 236.2578125, + "max_sentence2_length": 691, + "unique_sentence2": 256 + }, + "jic_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89796, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 236.2578125, + "max_sentence1_length": 691, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-jid_Latn": { + "num_samples": 256, + "number_of_characters": 54301, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.1484375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 16, + "average_sentence2_length": 96.96484375, + "max_sentence2_length": 269, + "unique_sentence2": 256 + }, + "jid_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 54301, + "unique_pairs": 256, + "min_sentence1_length": 16, + "average_sentence1_length": 96.96484375, + "max_sentence1_length": 269, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1484375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-jiv_Latn": { + "num_samples": 256, + "number_of_characters": 73161, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.49609375, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 175.2890625, + "max_sentence2_length": 584, + "unique_sentence2": 256 + }, + "jiv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73161, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 175.2890625, + "max_sentence1_length": 584, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.49609375, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-jni_Latn": { + "num_samples": 256, + "number_of_characters": 57522, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 111.0703125, + "max_sentence2_length": 255, + "unique_sentence2": 256 + }, + "jni_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57522, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 111.0703125, + "max_sentence1_length": 255, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-jpn_Jpan": { + "num_samples": 256, + "number_of_characters": 43036, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.05859375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 13, + "average_sentence2_length": 57.05078125, + "max_sentence2_length": 116, + "unique_sentence2": 256 + }, + "jpn_Jpan-eng_Latn": { + "num_samples": 256, + "number_of_characters": 43036, + "unique_pairs": 256, + "min_sentence1_length": 13, + "average_sentence1_length": 57.05078125, + "max_sentence1_length": 116, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.05859375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-jvn_Latn": { + "num_samples": 256, + "number_of_characters": 87874, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.91015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 230.34765625, + "max_sentence2_length": 904, + "unique_sentence2": 256 + }, + "jvn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87874, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 230.34765625, + "max_sentence1_length": 904, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.91015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kan_Knda": { + "num_samples": 256, + "number_of_characters": 61165, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.9609375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 126.96484375, + "max_sentence2_length": 407, + "unique_sentence2": 256 + }, + "kan_Knda-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61165, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 126.96484375, + "max_sentence1_length": 407, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.9609375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kaq_Latn": { + "num_samples": 256, + "number_of_characters": 72285, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 171.16015625, + "max_sentence2_length": 551, + "unique_sentence2": 256 + }, + "kaq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72285, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 171.16015625, + "max_sentence1_length": 551, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kbc_Latn": { + "num_samples": 256, + "number_of_characters": 97126, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 51, + "average_sentence2_length": 264.6015625, + "max_sentence2_length": 948, + "unique_sentence2": 256 + }, + "kbc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97126, + "unique_pairs": 256, + "min_sentence1_length": 51, + "average_sentence1_length": 264.6015625, + "max_sentence1_length": 948, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kbh_Latn": { + "num_samples": 256, + "number_of_characters": 93237, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.74609375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 253.4609375, + "max_sentence2_length": 903, + "unique_sentence2": 256 + }, + "kbh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93237, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 253.4609375, + "max_sentence1_length": 903, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.74609375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kbm_Latn": { + "num_samples": 256, + "number_of_characters": 81788, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.171875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 209.3125, + "max_sentence2_length": 748, + "unique_sentence2": 256 + }, + "kbm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81788, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 209.3125, + "max_sentence1_length": 748, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.171875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kbq_Latn": { + "num_samples": 256, + "number_of_characters": 68456, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.42578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 151.98046875, + "max_sentence2_length": 381, + "unique_sentence2": 256 + }, + "kbq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68456, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 151.98046875, + "max_sentence1_length": 381, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.42578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kdc_Latn": { + "num_samples": 256, + "number_of_characters": 61346, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 127.23828125, + "max_sentence2_length": 285, + "unique_sentence2": 256 + }, + "kdc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61346, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 127.23828125, + "max_sentence1_length": 285, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-kde_Latn": { + "num_samples": 256, + "number_of_characters": 68320, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.59375, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 155.28125, + "max_sentence2_length": 424, + "unique_sentence2": 255 + }, + "kde_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68320, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 155.28125, + "max_sentence1_length": 424, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 111.59375, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-kdl_Latn": { + "num_samples": 256, + "number_of_characters": 74999, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.4921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 181.47265625, + "max_sentence2_length": 664, + "unique_sentence2": 256 + }, + "kdl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74999, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 181.47265625, + "max_sentence1_length": 664, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kek_Latn": { + "num_samples": 256, + "number_of_characters": 76150, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 182.1640625, + "max_sentence2_length": 517, + "unique_sentence2": 256 + }, + "kek_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76150, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 182.1640625, + "max_sentence1_length": 517, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ken_Latn": { + "num_samples": 256, + "number_of_characters": 67584, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.41796875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 152.58203125, + "max_sentence2_length": 419, + "unique_sentence2": 256 + }, + "ken_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67584, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 152.58203125, + "max_sentence1_length": 419, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41796875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kew_Latn": { + "num_samples": 256, + "number_of_characters": 74818, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.6328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 182.625, + "max_sentence2_length": 531, + "unique_sentence2": 256 + }, + "kew_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74818, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 182.625, + "max_sentence1_length": 531, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kgf_Latn": { + "num_samples": 256, + "number_of_characters": 66689, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.9609375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 150.54296875, + "max_sentence2_length": 389, + "unique_sentence2": 256 + }, + "kgf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66689, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 150.54296875, + "max_sentence1_length": 389, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.9609375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kgk_Latn": { + "num_samples": 256, + "number_of_characters": 117007, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.0625, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 345.99609375, + "max_sentence2_length": 1240, + "unique_sentence2": 256 + }, + "kgk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 117007, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 345.99609375, + "max_sentence1_length": 1240, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.0625, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-kgp_Latn": { + "num_samples": 256, + "number_of_characters": 74740, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.8984375, + "max_sentence1_length": 228, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 180.0546875, + "max_sentence2_length": 529, + "unique_sentence2": 256 + }, + "kgp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74740, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 180.0546875, + "max_sentence1_length": 529, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.8984375, + "max_sentence2_length": 228, + "unique_sentence2": 256 + }, + "eng_Latn-khs_Latn": { + "num_samples": 256, + "number_of_characters": 99038, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.5859375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 52, + "average_sentence2_length": 277.28125, + "max_sentence2_length": 953, + "unique_sentence2": 256 + }, + "khs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99038, + "unique_pairs": 256, + "min_sentence1_length": 52, + "average_sentence1_length": 277.28125, + "max_sentence1_length": 953, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.5859375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-khz_Latn": { + "num_samples": 256, + "number_of_characters": 71876, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.4921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 166.2734375, + "max_sentence2_length": 750, + "unique_sentence2": 256 + }, + "khz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71876, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 166.2734375, + "max_sentence1_length": 750, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kik_Latn": { + "num_samples": 256, + "number_of_characters": 62010, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.34765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 132.87890625, + "max_sentence2_length": 353, + "unique_sentence2": 256 + }, + "kik_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62010, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 132.87890625, + "max_sentence1_length": 353, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.34765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kiw_Latn": { + "num_samples": 83, + "number_of_characters": 19848, + "unique_pairs": 83, + "min_sentence1_length": 45, + "average_sentence1_length": 105.37349397590361, + "max_sentence1_length": 245, + "unique_sentence1": 83, + "min_sentence2_length": 38, + "average_sentence2_length": 133.75903614457832, + "max_sentence2_length": 262, + "unique_sentence2": 83 + }, + "kiw_Latn-eng_Latn": { + "num_samples": 83, + "number_of_characters": 19848, + "unique_pairs": 83, + "min_sentence1_length": 38, + "average_sentence1_length": 133.75903614457832, + "max_sentence1_length": 262, + "unique_sentence1": 83, + "min_sentence2_length": 45, + "average_sentence2_length": 105.37349397590361, + "max_sentence2_length": 245, + "unique_sentence2": 83 + }, + "eng_Latn-kiz_Latn": { + "num_samples": 256, + "number_of_characters": 62935, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8828125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 16, + "average_sentence2_length": 131.95703125, + "max_sentence2_length": 386, + "unique_sentence2": 256 + }, + "kiz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62935, + "unique_pairs": 256, + "min_sentence1_length": 16, + "average_sentence1_length": 131.95703125, + "max_sentence1_length": 386, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8828125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kje_Latn": { + "num_samples": 256, + "number_of_characters": 85569, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.15234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 224.1015625, + "max_sentence2_length": 857, + "unique_sentence2": 256 + }, + "kje_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85569, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 224.1015625, + "max_sentence1_length": 857, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.15234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kjs_Latn": { + "num_samples": 256, + "number_of_characters": 76152, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.6328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 187.8359375, + "max_sentence2_length": 530, + "unique_sentence2": 256 + }, + "kjs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76152, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 187.8359375, + "max_sentence1_length": 530, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kkc_Latn": { + "num_samples": 256, + "number_of_characters": 88358, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.05859375, + "max_sentence1_length": 243, + "unique_sentence1": 254, + "min_sentence2_length": 33, + "average_sentence2_length": 231.08984375, + "max_sentence2_length": 836, + "unique_sentence2": 256 + }, + "kkc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88358, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 231.08984375, + "max_sentence1_length": 836, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.05859375, + "max_sentence2_length": 243, + "unique_sentence2": 254 + }, + "eng_Latn-kkl_Latn": { + "num_samples": 256, + "number_of_characters": 121932, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 111.1640625, + "max_sentence1_length": 243, + "unique_sentence1": 254, + "min_sentence2_length": 44, + "average_sentence2_length": 365.1328125, + "max_sentence2_length": 1372, + "unique_sentence2": 256 + }, + "kkl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 121932, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 365.1328125, + "max_sentence1_length": 1372, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 111.1640625, + "max_sentence2_length": 243, + "unique_sentence2": 254 + }, + "eng_Latn-klt_Latn": { + "num_samples": 256, + "number_of_characters": 81192, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.5546875, + "max_sentence1_length": 243, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 203.6015625, + "max_sentence2_length": 765, + "unique_sentence2": 256 + }, + "klt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81192, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 203.6015625, + "max_sentence1_length": 765, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.5546875, + "max_sentence2_length": 243, + "unique_sentence2": 256 + }, + "eng_Latn-klv_Latn": { + "num_samples": 256, + "number_of_characters": 71510, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 168.53125, + "max_sentence2_length": 465, + "unique_sentence2": 256 + }, + "klv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71510, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 168.53125, + "max_sentence1_length": 465, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kmg_Latn": { + "num_samples": 256, + "number_of_characters": 59356, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 119.46484375, + "max_sentence2_length": 253, + "unique_sentence2": 256 + }, + "kmg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59356, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 119.46484375, + "max_sentence1_length": 253, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-kmh_Latn": { + "num_samples": 256, + "number_of_characters": 88089, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 145.40625, + "max_sentence1_length": 341, + "unique_sentence1": 255, + "min_sentence2_length": 37, + "average_sentence2_length": 198.69140625, + "max_sentence2_length": 544, + "unique_sentence2": 256 + }, + "kmh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88089, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 198.69140625, + "max_sentence1_length": 544, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 145.40625, + "max_sentence2_length": 341, + "unique_sentence2": 255 + }, + "eng_Latn-kmk_Latn": { + "num_samples": 256, + "number_of_characters": 72246, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.44921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 166.76171875, + "max_sentence2_length": 763, + "unique_sentence2": 256 + }, + "kmk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72246, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 166.76171875, + "max_sentence1_length": 763, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.44921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kmo_Latn": { + "num_samples": 256, + "number_of_characters": 79605, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.0859375, + "max_sentence1_length": 248, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 198.87109375, + "max_sentence2_length": 627, + "unique_sentence2": 256 + }, + "kmo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79605, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 198.87109375, + "max_sentence1_length": 627, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.0859375, + "max_sentence2_length": 248, + "unique_sentence2": 256 + }, + "eng_Latn-kms_Latn": { + "num_samples": 256, + "number_of_characters": 81253, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.30078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 203.09375, + "max_sentence2_length": 631, + "unique_sentence2": 256 + }, + "kms_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81253, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 203.09375, + "max_sentence1_length": 631, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.30078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kmu_Latn": { + "num_samples": 256, + "number_of_characters": 93633, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.50390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 53, + "average_sentence2_length": 256.25, + "max_sentence2_length": 873, + "unique_sentence2": 256 + }, + "kmu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93633, + "unique_pairs": 256, + "min_sentence1_length": 53, + "average_sentence1_length": 256.25, + "max_sentence1_length": 873, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.50390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kne_Latn": { + "num_samples": 256, + "number_of_characters": 76340, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.421875, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 187.78125, + "max_sentence2_length": 678, + "unique_sentence2": 256 + }, + "kne_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76340, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 187.78125, + "max_sentence1_length": 678, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.421875, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-knf_Latn": { + "num_samples": 256, + "number_of_characters": 59366, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.7578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 122.140625, + "max_sentence2_length": 328, + "unique_sentence2": 256 + }, + "knf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59366, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 122.140625, + "max_sentence1_length": 328, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.7578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-knj_Latn": { + "num_samples": 256, + "number_of_characters": 77826, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.7578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 193.25, + "max_sentence2_length": 528, + "unique_sentence2": 256 + }, + "knj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77826, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 193.25, + "max_sentence1_length": 528, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.7578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-knv_Latn": { + "num_samples": 256, + "number_of_characters": 117422, + "unique_pairs": 256, + "min_sentence1_length": 56, + "average_sentence1_length": 147.7265625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 75, + "average_sentence2_length": 310.953125, + "max_sentence2_length": 789, + "unique_sentence2": 256 + }, + "knv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 117422, + "unique_pairs": 256, + "min_sentence1_length": 75, + "average_sentence1_length": 310.953125, + "max_sentence1_length": 789, + "unique_sentence1": 256, + "min_sentence2_length": 56, + "average_sentence2_length": 147.7265625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-kos_Latn": { + "num_samples": 256, + "number_of_characters": 62039, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 129.9453125, + "max_sentence2_length": 322, + "unique_sentence2": 256 + }, + "kos_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62039, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 129.9453125, + "max_sentence1_length": 322, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-kpf_Latn": { + "num_samples": 256, + "number_of_characters": 64909, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.43359375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 143.1171875, + "max_sentence2_length": 520, + "unique_sentence2": 256 + }, + "kpf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64909, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 143.1171875, + "max_sentence1_length": 520, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.43359375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kpg_Latn": { + "num_samples": 256, + "number_of_characters": 70883, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.41015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 165.4765625, + "max_sentence2_length": 502, + "unique_sentence2": 256 + }, + "kpg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70883, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 165.4765625, + "max_sentence1_length": 502, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kpj_Latn": { + "num_samples": 256, + "number_of_characters": 70346, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.46875, + "max_sentence1_length": 249, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 163.3203125, + "max_sentence2_length": 583, + "unique_sentence2": 256 + }, + "kpj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70346, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 163.3203125, + "max_sentence1_length": 583, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.46875, + "max_sentence2_length": 249, + "unique_sentence2": 256 + }, + "eng_Latn-kpr_Latn": { + "num_samples": 256, + "number_of_characters": 100685, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.37109375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 281.9296875, + "max_sentence2_length": 1215, + "unique_sentence2": 256 + }, + "kpr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100685, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 281.9296875, + "max_sentence1_length": 1215, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.37109375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kpw_Latn": { + "num_samples": 256, + "number_of_characters": 80446, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.72265625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 204.51953125, + "max_sentence2_length": 617, + "unique_sentence2": 256 + }, + "kpw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80446, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 204.51953125, + "max_sentence1_length": 617, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.72265625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kpx_Latn": { + "num_samples": 256, + "number_of_characters": 75284, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.4765625, + "max_sentence1_length": 216, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 183.6015625, + "max_sentence2_length": 549, + "unique_sentence2": 256 + }, + "kpx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75284, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 183.6015625, + "max_sentence1_length": 549, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4765625, + "max_sentence2_length": 216, + "unique_sentence2": 256 + }, + "eng_Latn-kqa_Latn": { + "num_samples": 63, + "number_of_characters": 21102, + "unique_pairs": 63, + "min_sentence1_length": 42, + "average_sentence1_length": 105.74603174603175, + "max_sentence1_length": 257, + "unique_sentence1": 63, + "min_sentence2_length": 75, + "average_sentence2_length": 229.20634920634922, + "max_sentence2_length": 718, + "unique_sentence2": 63 + }, + "kqa_Latn-eng_Latn": { + "num_samples": 63, + "number_of_characters": 21102, + "unique_pairs": 63, + "min_sentence1_length": 75, + "average_sentence1_length": 229.20634920634922, + "max_sentence1_length": 718, + "unique_sentence1": 63, + "min_sentence2_length": 42, + "average_sentence2_length": 105.74603174603175, + "max_sentence2_length": 257, + "unique_sentence2": 63 + }, + "eng_Latn-kqc_Latn": { + "num_samples": 256, + "number_of_characters": 64803, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.890625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 137.24609375, + "max_sentence2_length": 357, + "unique_sentence2": 255 + }, + "kqc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64803, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 137.24609375, + "max_sentence1_length": 357, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 115.890625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kqf_Latn": { + "num_samples": 256, + "number_of_characters": 67325, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 114.31640625, + "max_sentence1_length": 257, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 148.671875, + "max_sentence2_length": 670, + "unique_sentence2": 256 + }, + "kqf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67325, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 148.671875, + "max_sentence1_length": 670, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 114.31640625, + "max_sentence2_length": 257, + "unique_sentence2": 256 + }, + "eng_Latn-kql_Latn": { + "num_samples": 140, + "number_of_characters": 38320, + "unique_pairs": 140, + "min_sentence1_length": 45, + "average_sentence1_length": 112.87857142857143, + "max_sentence1_length": 257, + "unique_sentence1": 139, + "min_sentence2_length": 34, + "average_sentence2_length": 160.8357142857143, + "max_sentence2_length": 501, + "unique_sentence2": 140 + }, + "kql_Latn-eng_Latn": { + "num_samples": 140, + "number_of_characters": 38320, + "unique_pairs": 140, + "min_sentence1_length": 34, + "average_sentence1_length": 160.8357142857143, + "max_sentence1_length": 501, + "unique_sentence1": 140, + "min_sentence2_length": 45, + "average_sentence2_length": 112.87857142857143, + "max_sentence2_length": 257, + "unique_sentence2": 139 + }, + "eng_Latn-kqw_Latn": { + "num_samples": 256, + "number_of_characters": 70886, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.9453125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 166.953125, + "max_sentence2_length": 437, + "unique_sentence2": 256 + }, + "kqw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70886, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 166.953125, + "max_sentence1_length": 437, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.9453125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ksd_Latn": { + "num_samples": 256, + "number_of_characters": 64976, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 138.44140625, + "max_sentence2_length": 387, + "unique_sentence2": 256 + }, + "ksd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64976, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 138.44140625, + "max_sentence1_length": 387, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ksj_Latn": { + "num_samples": 256, + "number_of_characters": 68059, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.5078125, + "max_sentence1_length": 245, + "unique_sentence1": 254, + "min_sentence2_length": 43, + "average_sentence2_length": 153.34765625, + "max_sentence2_length": 471, + "unique_sentence2": 256 + }, + "ksj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68059, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 153.34765625, + "max_sentence1_length": 471, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.5078125, + "max_sentence2_length": 245, + "unique_sentence2": 254 + }, + "eng_Latn-ksr_Latn": { + "num_samples": 256, + "number_of_characters": 85498, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 223.3125, + "max_sentence2_length": 652, + "unique_sentence2": 256 + }, + "ksr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85498, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 223.3125, + "max_sentence1_length": 652, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ktm_Latn": { + "num_samples": 256, + "number_of_characters": 70124, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 113.71484375, + "max_sentence1_length": 257, + "unique_sentence1": 255, + "min_sentence2_length": 36, + "average_sentence2_length": 160.20703125, + "max_sentence2_length": 676, + "unique_sentence2": 256 + }, + "ktm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70124, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 160.20703125, + "max_sentence1_length": 676, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 113.71484375, + "max_sentence2_length": 257, + "unique_sentence2": 255 + }, + "eng_Latn-kto_Latn": { + "num_samples": 256, + "number_of_characters": 92436, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.33984375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 250.73828125, + "max_sentence2_length": 1025, + "unique_sentence2": 256 + }, + "kto_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92436, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 250.73828125, + "max_sentence1_length": 1025, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.33984375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kud_Latn": { + "num_samples": 256, + "number_of_characters": 71861, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.34765625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 164.359375, + "max_sentence2_length": 449, + "unique_sentence2": 256 + }, + "kud_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71861, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 164.359375, + "max_sentence1_length": 449, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.34765625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kue_Latn": { + "num_samples": 256, + "number_of_characters": 72720, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 52, + "average_sentence2_length": 171.66796875, + "max_sentence2_length": 416, + "unique_sentence2": 256 + }, + "kue_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72720, + "unique_pairs": 256, + "min_sentence1_length": 52, + "average_sentence1_length": 171.66796875, + "max_sentence1_length": 416, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-kup_Latn": { + "num_samples": 256, + "number_of_characters": 103764, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 110.52734375, + "max_sentence1_length": 232, + "unique_sentence1": 255, + "min_sentence2_length": 72, + "average_sentence2_length": 294.80078125, + "max_sentence2_length": 801, + "unique_sentence2": 255 + }, + "kup_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 103764, + "unique_pairs": 255, + "min_sentence1_length": 72, + "average_sentence1_length": 294.80078125, + "max_sentence1_length": 801, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 110.52734375, + "max_sentence2_length": 232, + "unique_sentence2": 255 + }, + "eng_Latn-kvg_Latn": { + "num_samples": 256, + "number_of_characters": 74370, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.64453125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 179.86328125, + "max_sentence2_length": 422, + "unique_sentence2": 256 + }, + "kvg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74370, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 179.86328125, + "max_sentence1_length": 422, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.64453125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kvn_Latn": { + "num_samples": 256, + "number_of_characters": 75857, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.171875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 51, + "average_sentence2_length": 182.14453125, + "max_sentence2_length": 451, + "unique_sentence2": 256 + }, + "kvn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75857, + "unique_pairs": 256, + "min_sentence1_length": 51, + "average_sentence1_length": 182.14453125, + "max_sentence1_length": 451, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.171875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kwd_Latn": { + "num_samples": 256, + "number_of_characters": 85451, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.453125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 48, + "average_sentence2_length": 221.33984375, + "max_sentence2_length": 826, + "unique_sentence2": 256 + }, + "kwd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85451, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 221.33984375, + "max_sentence1_length": 826, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.453125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kwf_Latn": { + "num_samples": 256, + "number_of_characters": 83989, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.73046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 214.3515625, + "max_sentence2_length": 719, + "unique_sentence2": 256 + }, + "kwf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83989, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 214.3515625, + "max_sentence1_length": 719, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.73046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kwi_Latn": { + "num_samples": 256, + "number_of_characters": 74914, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.2109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 178.421875, + "max_sentence2_length": 526, + "unique_sentence2": 256 + }, + "kwi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74914, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 178.421875, + "max_sentence1_length": 526, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.2109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kwj_Latn": { + "num_samples": 256, + "number_of_characters": 84588, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.15234375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 49, + "average_sentence2_length": 220.26953125, + "max_sentence2_length": 697, + "unique_sentence2": 256 + }, + "kwj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84588, + "unique_pairs": 256, + "min_sentence1_length": 49, + "average_sentence1_length": 220.26953125, + "max_sentence1_length": 697, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.15234375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kyc_Latn": { + "num_samples": 256, + "number_of_characters": 75954, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 114.67578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 182.01953125, + "max_sentence2_length": 766, + "unique_sentence2": 256 + }, + "kyc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75954, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 182.01953125, + "max_sentence1_length": 766, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 114.67578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kyf_Latn": { + "num_samples": 256, + "number_of_characters": 65747, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 143.33203125, + "max_sentence2_length": 476, + "unique_sentence2": 256 + }, + "kyf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65747, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 143.33203125, + "max_sentence1_length": 476, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-kyg_Latn": { + "num_samples": 256, + "number_of_characters": 88349, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.09375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 56, + "average_sentence2_length": 234.01953125, + "max_sentence2_length": 726, + "unique_sentence2": 256 + }, + "kyg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88349, + "unique_pairs": 256, + "min_sentence1_length": 56, + "average_sentence1_length": 234.01953125, + "max_sentence1_length": 726, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.09375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kyq_Latn": { + "num_samples": 256, + "number_of_characters": 70837, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.11328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 166.59375, + "max_sentence2_length": 586, + "unique_sentence2": 255 + }, + "kyq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70837, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 166.59375, + "max_sentence1_length": 586, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 110.11328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-kyz_Latn": { + "num_samples": 256, + "number_of_characters": 133151, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 113.44921875, + "max_sentence1_length": 246, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 406.671875, + "max_sentence2_length": 1885, + "unique_sentence2": 256 + }, + "kyz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 133151, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 406.671875, + "max_sentence1_length": 1885, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 113.44921875, + "max_sentence2_length": 246, + "unique_sentence2": 256 + }, + "eng_Latn-kze_Latn": { + "num_samples": 256, + "number_of_characters": 81619, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.52734375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 210.296875, + "max_sentence2_length": 632, + "unique_sentence2": 256 + }, + "kze_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81619, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 210.296875, + "max_sentence1_length": 632, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.52734375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-lac_Latn": { + "num_samples": 256, + "number_of_characters": 93123, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.69921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 251.0625, + "max_sentence2_length": 821, + "unique_sentence2": 256 + }, + "lac_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93123, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 251.0625, + "max_sentence1_length": 821, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.69921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-lat_Latn": { + "num_samples": 256, + "number_of_characters": 59581, + "unique_pairs": 256, + "min_sentence1_length": 20, + "average_sentence1_length": 122.43359375, + "max_sentence1_length": 422, + "unique_sentence1": 256, + "min_sentence2_length": 20, + "average_sentence2_length": 110.3046875, + "max_sentence2_length": 365, + "unique_sentence2": 256 + }, + "lat_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59581, + "unique_pairs": 256, + "min_sentence1_length": 20, + "average_sentence1_length": 110.3046875, + "max_sentence1_length": 365, + "unique_sentence1": 256, + "min_sentence2_length": 20, + "average_sentence2_length": 122.43359375, + "max_sentence2_length": 422, + "unique_sentence2": 256 + }, + "eng_Latn-lbb_Latn": { + "num_samples": 256, + "number_of_characters": 71416, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8828125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 168.0859375, + "max_sentence2_length": 456, + "unique_sentence2": 256 + }, + "lbb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71416, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 168.0859375, + "max_sentence1_length": 456, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8828125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-lbk_Latn": { + "num_samples": 256, + "number_of_characters": 72807, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.3125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 171.08984375, + "max_sentence2_length": 529, + "unique_sentence2": 256 + }, + "lbk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72807, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 171.08984375, + "max_sentence1_length": 529, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.3125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-lcm_Latn": { + "num_samples": 256, + "number_of_characters": 68451, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 154.9921875, + "max_sentence2_length": 347, + "unique_sentence2": 256 + }, + "lcm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68451, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 154.9921875, + "max_sentence1_length": 347, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-leu_Latn": { + "num_samples": 256, + "number_of_characters": 78014, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.5546875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 189.1875, + "max_sentence2_length": 676, + "unique_sentence2": 256 + }, + "leu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78014, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 189.1875, + "max_sentence1_length": 676, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.5546875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-lex_Latn": { + "num_samples": 256, + "number_of_characters": 92773, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.51171875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 247.8828125, + "max_sentence2_length": 675, + "unique_sentence2": 256 + }, + "lex_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92773, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 247.8828125, + "max_sentence1_length": 675, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.51171875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-lgl_Latn": { + "num_samples": 256, + "number_of_characters": 77049, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.73046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 187.2421875, + "max_sentence2_length": 651, + "unique_sentence2": 256 + }, + "lgl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77049, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 187.2421875, + "max_sentence1_length": 651, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.73046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-lid_Latn": { + "num_samples": 256, + "number_of_characters": 90128, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.03515625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 241.02734375, + "max_sentence2_length": 922, + "unique_sentence2": 256 + }, + "lid_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90128, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 241.02734375, + "max_sentence1_length": 922, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.03515625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-lif_Deva": { + "num_samples": 256, + "number_of_characters": 99454, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 65, + "average_sentence2_length": 243.16015625, + "max_sentence2_length": 598, + "unique_sentence2": 254 + }, + "lif_Deva-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99454, + "unique_pairs": 254, + "min_sentence1_length": 65, + "average_sentence1_length": 243.16015625, + "max_sentence1_length": 598, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-lin_Latn": { + "num_samples": 256, + "number_of_characters": 64220, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 138.46484375, + "max_sentence2_length": 325, + "unique_sentence2": 256 + }, + "lin_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64220, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 138.46484375, + "max_sentence1_length": 325, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-lit_Latn": { + "num_samples": 256, + "number_of_characters": 56899, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.9921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 108.26953125, + "max_sentence2_length": 264, + "unique_sentence2": 256 + }, + "lit_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56899, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 108.26953125, + "max_sentence1_length": 264, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.9921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-llg_Latn": { + "num_samples": 256, + "number_of_characters": 99325, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 114.48046875, + "max_sentence1_length": 251, + "unique_sentence1": 255, + "min_sentence2_length": 28, + "average_sentence2_length": 273.5078125, + "max_sentence2_length": 1339, + "unique_sentence2": 255 + }, + "llg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99325, + "unique_pairs": 255, + "min_sentence1_length": 28, + "average_sentence1_length": 273.5078125, + "max_sentence1_length": 1339, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.48046875, + "max_sentence2_length": 251, + "unique_sentence2": 255 + }, + "eng_Latn-lug_Latn": { + "num_samples": 256, + "number_of_characters": 59095, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 118.4453125, + "max_sentence2_length": 288, + "unique_sentence2": 256 + }, + "lug_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59095, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 118.4453125, + "max_sentence1_length": 288, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-luo_Latn": { + "num_samples": 256, + "number_of_characters": 59092, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 115.45703125, + "max_sentence2_length": 312, + "unique_sentence2": 256 + }, + "luo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59092, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 115.45703125, + "max_sentence1_length": 312, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-lww_Latn": { + "num_samples": 256, + "number_of_characters": 95919, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.66015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 260.0234375, + "max_sentence2_length": 992, + "unique_sentence2": 256 + }, + "lww_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 95919, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 260.0234375, + "max_sentence1_length": 992, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.66015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-maa_Latn": { + "num_samples": 256, + "number_of_characters": 75650, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.65234375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 181.85546875, + "max_sentence2_length": 479, + "unique_sentence2": 256 + }, + "maa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75650, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 181.85546875, + "max_sentence1_length": 479, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.65234375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-maj_Latn": { + "num_samples": 256, + "number_of_characters": 70081, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 161.359375, + "max_sentence2_length": 353, + "unique_sentence2": 256 + }, + "maj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70081, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 161.359375, + "max_sentence1_length": 353, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mal_Mlym": { + "num_samples": 256, + "number_of_characters": 77790, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.140625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 52, + "average_sentence2_length": 156.7265625, + "max_sentence2_length": 376, + "unique_sentence2": 255 + }, + "mal_Mlym-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77790, + "unique_pairs": 255, + "min_sentence1_length": 52, + "average_sentence1_length": 156.7265625, + "max_sentence1_length": 376, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.140625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-mam_Latn": { + "num_samples": 256, + "number_of_characters": 82515, + "unique_pairs": 255, + "min_sentence1_length": 35, + "average_sentence1_length": 145.8828125, + "max_sentence1_length": 341, + "unique_sentence1": 252, + "min_sentence2_length": 46, + "average_sentence2_length": 176.44140625, + "max_sentence2_length": 399, + "unique_sentence2": 255 + }, + "mam_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82515, + "unique_pairs": 255, + "min_sentence1_length": 46, + "average_sentence1_length": 176.44140625, + "max_sentence1_length": 399, + "unique_sentence1": 255, + "min_sentence2_length": 35, + "average_sentence2_length": 145.8828125, + "max_sentence2_length": 341, + "unique_sentence2": 252 + }, + "eng_Latn-maq_Latn": { + "num_samples": 256, + "number_of_characters": 77694, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.89453125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 193.59765625, + "max_sentence2_length": 460, + "unique_sentence2": 256 + }, + "maq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77694, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 193.59765625, + "max_sentence1_length": 460, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.89453125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mar_Deva": { + "num_samples": 256, + "number_of_characters": 58184, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 118.359375, + "max_sentence2_length": 295, + "unique_sentence2": 256 + }, + "mar_Deva-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58184, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 118.359375, + "max_sentence1_length": 295, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mau_Latn": { + "num_samples": 256, + "number_of_characters": 73001, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 169.7890625, + "max_sentence2_length": 442, + "unique_sentence2": 256 + }, + "mau_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73001, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 169.7890625, + "max_sentence1_length": 442, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mav_Latn": { + "num_samples": 256, + "number_of_characters": 115246, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 337.78515625, + "max_sentence2_length": 1352, + "unique_sentence2": 256 + }, + "mav_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 115246, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 337.78515625, + "max_sentence1_length": 1352, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-maz_Latn": { + "num_samples": 256, + "number_of_characters": 86514, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.00390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 224.94140625, + "max_sentence2_length": 949, + "unique_sentence2": 256 + }, + "maz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86514, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 224.94140625, + "max_sentence1_length": 949, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.00390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mbb_Latn": { + "num_samples": 256, + "number_of_characters": 96246, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 128.66015625, + "max_sentence1_length": 422, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 247.30078125, + "max_sentence2_length": 815, + "unique_sentence2": 256 + }, + "mbb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 96246, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 247.30078125, + "max_sentence1_length": 815, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 128.66015625, + "max_sentence2_length": 422, + "unique_sentence2": 256 + }, + "eng_Latn-mbc_Latn": { + "num_samples": 256, + "number_of_characters": 89184, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.953125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 51, + "average_sentence2_length": 235.421875, + "max_sentence2_length": 640, + "unique_sentence2": 256 + }, + "mbc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89184, + "unique_pairs": 256, + "min_sentence1_length": 51, + "average_sentence1_length": 235.421875, + "max_sentence1_length": 640, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.953125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mbh_Latn": { + "num_samples": 256, + "number_of_characters": 83848, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.6328125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 216.8984375, + "max_sentence2_length": 828, + "unique_sentence2": 256 + }, + "mbh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83848, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 216.8984375, + "max_sentence1_length": 828, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6328125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mbj_Latn": { + "num_samples": 256, + "number_of_characters": 85975, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.0703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 219.76953125, + "max_sentence2_length": 796, + "unique_sentence2": 256 + }, + "mbj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85975, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 219.76953125, + "max_sentence1_length": 796, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.0703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mbl_Latn": { + "num_samples": 256, + "number_of_characters": 72932, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.20703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 169.68359375, + "max_sentence2_length": 515, + "unique_sentence2": 256 + }, + "mbl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72932, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 169.68359375, + "max_sentence1_length": 515, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.20703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mbs_Latn": { + "num_samples": 256, + "number_of_characters": 79065, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.09765625, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 196.75, + "max_sentence2_length": 742, + "unique_sentence2": 256 + }, + "mbs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79065, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 196.75, + "max_sentence1_length": 742, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.09765625, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mbt_Latn": { + "num_samples": 256, + "number_of_characters": 71671, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.73046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 168.234375, + "max_sentence2_length": 461, + "unique_sentence2": 256 + }, + "mbt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71671, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 168.234375, + "max_sentence1_length": 461, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.73046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mca_Latn": { + "num_samples": 256, + "number_of_characters": 78530, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 191.4609375, + "max_sentence2_length": 448, + "unique_sentence2": 256 + }, + "mca_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78530, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 191.4609375, + "max_sentence1_length": 448, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mcb_Latn": { + "num_samples": 256, + "number_of_characters": 93643, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.671875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 250.12109375, + "max_sentence2_length": 799, + "unique_sentence2": 256 + }, + "mcb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93643, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 250.12109375, + "max_sentence1_length": 799, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.671875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mcd_Latn": { + "num_samples": 256, + "number_of_characters": 117035, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.93359375, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 341.234375, + "max_sentence2_length": 1750, + "unique_sentence2": 256 + }, + "mcd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 117035, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 341.234375, + "max_sentence1_length": 1750, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.93359375, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-mcf_Latn": { + "num_samples": 256, + "number_of_characters": 89852, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.546875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 238.4375, + "max_sentence2_length": 842, + "unique_sentence2": 256 + }, + "mcf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89852, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 238.4375, + "max_sentence1_length": 842, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.546875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mco_Latn": { + "num_samples": 256, + "number_of_characters": 91293, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.02734375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 244.5859375, + "max_sentence2_length": 778, + "unique_sentence2": 256 + }, + "mco_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91293, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 244.5859375, + "max_sentence1_length": 778, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.02734375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mcp_Latn": { + "num_samples": 256, + "number_of_characters": 68564, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 51, + "average_sentence2_length": 155.43359375, + "max_sentence2_length": 476, + "unique_sentence2": 256 + }, + "mcp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68564, + "unique_pairs": 256, + "min_sentence1_length": 51, + "average_sentence1_length": 155.43359375, + "max_sentence1_length": 476, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mcq_Latn": { + "num_samples": 256, + "number_of_characters": 65402, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 142.0390625, + "max_sentence2_length": 379, + "unique_sentence2": 256 + }, + "mcq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65402, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 142.0390625, + "max_sentence1_length": 379, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mcr_Latn": { + "num_samples": 256, + "number_of_characters": 91248, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.4453125, + "max_sentence1_length": 225, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 245.9921875, + "max_sentence2_length": 845, + "unique_sentence2": 256 + }, + "mcr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91248, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 245.9921875, + "max_sentence1_length": 845, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.4453125, + "max_sentence2_length": 225, + "unique_sentence2": 256 + }, + "eng_Latn-mdy_Latn": { + "num_samples": 256, + "number_of_characters": 50138, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.859375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 80.9921875, + "max_sentence2_length": 240, + "unique_sentence2": 256 + }, + "mdy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 50138, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 80.9921875, + "max_sentence1_length": 240, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.859375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-med_Latn": { + "num_samples": 256, + "number_of_characters": 87610, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 110.2578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 231.96875, + "max_sentence2_length": 914, + "unique_sentence2": 256 + }, + "med_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87610, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 231.96875, + "max_sentence1_length": 914, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 110.2578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mee_Latn": { + "num_samples": 256, + "number_of_characters": 87505, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.90234375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 226.9140625, + "max_sentence2_length": 624, + "unique_sentence2": 256 + }, + "mee_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87505, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 226.9140625, + "max_sentence1_length": 624, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.90234375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mek_Latn": { + "num_samples": 256, + "number_of_characters": 101586, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.62890625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 285.19140625, + "max_sentence2_length": 880, + "unique_sentence2": 256 + }, + "mek_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101586, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 285.19140625, + "max_sentence1_length": 880, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.62890625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-meq_Latn": { + "num_samples": 256, + "number_of_characters": 78280, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.39453125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 190.38671875, + "max_sentence2_length": 727, + "unique_sentence2": 256 + }, + "meq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78280, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 190.38671875, + "max_sentence1_length": 727, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.39453125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-met_Latn": { + "num_samples": 256, + "number_of_characters": 71606, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.3359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 165.375, + "max_sentence2_length": 717, + "unique_sentence2": 256 + }, + "met_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71606, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 165.375, + "max_sentence1_length": 717, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.3359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-meu_Latn": { + "num_samples": 256, + "number_of_characters": 60875, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.01171875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 123.78125, + "max_sentence2_length": 301, + "unique_sentence2": 256 + }, + "meu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60875, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 123.78125, + "max_sentence1_length": 301, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.01171875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mgc_Latn": { + "num_samples": 256, + "number_of_characters": 56490, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 109.9375, + "max_sentence1_length": 243, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 110.7265625, + "max_sentence2_length": 313, + "unique_sentence2": 256 + }, + "mgc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56490, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 110.7265625, + "max_sentence1_length": 313, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 109.9375, + "max_sentence2_length": 243, + "unique_sentence2": 256 + }, + "eng_Latn-mgh_Latn": { + "num_samples": 256, + "number_of_characters": 60847, + "unique_pairs": 235, + "min_sentence1_length": 28, + "average_sentence1_length": 108.90234375, + "max_sentence1_length": 210, + "unique_sentence1": 228, + "min_sentence2_length": 29, + "average_sentence2_length": 128.78125, + "max_sentence2_length": 284, + "unique_sentence2": 235 + }, + "mgh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60847, + "unique_pairs": 235, + "min_sentence1_length": 29, + "average_sentence1_length": 128.78125, + "max_sentence1_length": 284, + "unique_sentence1": 235, + "min_sentence2_length": 28, + "average_sentence2_length": 108.90234375, + "max_sentence2_length": 210, + "unique_sentence2": 228 + }, + "eng_Latn-mgw_Latn": { + "num_samples": 209, + "number_of_characters": 49488, + "unique_pairs": 209, + "min_sentence1_length": 37, + "average_sentence1_length": 111.75598086124401, + "max_sentence1_length": 243, + "unique_sentence1": 209, + "min_sentence2_length": 39, + "average_sentence2_length": 125.02870813397129, + "max_sentence2_length": 322, + "unique_sentence2": 209 + }, + "mgw_Latn-eng_Latn": { + "num_samples": 209, + "number_of_characters": 49488, + "unique_pairs": 209, + "min_sentence1_length": 39, + "average_sentence1_length": 125.02870813397129, + "max_sentence1_length": 322, + "unique_sentence1": 209, + "min_sentence2_length": 37, + "average_sentence2_length": 111.75598086124401, + "max_sentence2_length": 243, + "unique_sentence2": 209 + }, + "eng_Latn-mhl_Latn": { + "num_samples": 256, + "number_of_characters": 82464, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 211.4609375, + "max_sentence2_length": 653, + "unique_sentence2": 256 + }, + "mhl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82464, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 211.4609375, + "max_sentence1_length": 653, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mib_Latn": { + "num_samples": 256, + "number_of_characters": 71241, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.32421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 162.9609375, + "max_sentence2_length": 367, + "unique_sentence2": 256 + }, + "mib_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71241, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 162.9609375, + "max_sentence1_length": 367, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.32421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mic_Latn": { + "num_samples": 256, + "number_of_characters": 78794, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.13671875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 196.65234375, + "max_sentence2_length": 1958, + "unique_sentence2": 256 + }, + "mic_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78794, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 196.65234375, + "max_sentence1_length": 1958, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.13671875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mie_Latn": { + "num_samples": 256, + "number_of_characters": 71348, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 163.33203125, + "max_sentence2_length": 380, + "unique_sentence2": 256 + }, + "mie_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71348, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 163.33203125, + "max_sentence1_length": 380, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mig_Latn": { + "num_samples": 256, + "number_of_characters": 66683, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 145.109375, + "max_sentence2_length": 302, + "unique_sentence2": 256 + }, + "mig_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66683, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 145.109375, + "max_sentence1_length": 302, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mih_Latn": { + "num_samples": 256, + "number_of_characters": 74833, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 177.89453125, + "max_sentence2_length": 571, + "unique_sentence2": 256 + }, + "mih_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74833, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 177.89453125, + "max_sentence1_length": 571, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mil_Latn": { + "num_samples": 256, + "number_of_characters": 86500, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.60546875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 227.28515625, + "max_sentence2_length": 613, + "unique_sentence2": 256 + }, + "mil_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86500, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 227.28515625, + "max_sentence1_length": 613, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.60546875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mio_Latn": { + "num_samples": 256, + "number_of_characters": 74714, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.41015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 181.44140625, + "max_sentence2_length": 444, + "unique_sentence2": 256 + }, + "mio_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74714, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 181.44140625, + "max_sentence1_length": 444, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.41015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mir_Latn": { + "num_samples": 256, + "number_of_characters": 100394, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8046875, + "max_sentence1_length": 246, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 281.359375, + "max_sentence2_length": 1042, + "unique_sentence2": 256 + }, + "mir_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100394, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 281.359375, + "max_sentence1_length": 1042, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8046875, + "max_sentence2_length": 246, + "unique_sentence2": 256 + }, + "eng_Latn-mit_Latn": { + "num_samples": 256, + "number_of_characters": 84962, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.02734375, + "max_sentence1_length": 229, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 220.85546875, + "max_sentence2_length": 664, + "unique_sentence2": 256 + }, + "mit_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84962, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 220.85546875, + "max_sentence1_length": 664, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.02734375, + "max_sentence2_length": 229, + "unique_sentence2": 256 + }, + "eng_Latn-miz_Latn": { + "num_samples": 256, + "number_of_characters": 68049, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 150.4453125, + "max_sentence2_length": 340, + "unique_sentence2": 256 + }, + "miz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68049, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 150.4453125, + "max_sentence1_length": 340, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mjc_Latn": { + "num_samples": 256, + "number_of_characters": 78332, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.140625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 191.84375, + "max_sentence2_length": 545, + "unique_sentence2": 256 + }, + "mjc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78332, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 191.84375, + "max_sentence1_length": 545, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.140625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mkj_Latn": { + "num_samples": 256, + "number_of_characters": 59576, + "unique_pairs": 255, + "min_sentence1_length": 37, + "average_sentence1_length": 109.86328125, + "max_sentence1_length": 257, + "unique_sentence1": 254, + "min_sentence2_length": 39, + "average_sentence2_length": 122.85546875, + "max_sentence2_length": 319, + "unique_sentence2": 255 + }, + "mkj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59576, + "unique_pairs": 255, + "min_sentence1_length": 39, + "average_sentence1_length": 122.85546875, + "max_sentence1_length": 319, + "unique_sentence1": 255, + "min_sentence2_length": 37, + "average_sentence2_length": 109.86328125, + "max_sentence2_length": 257, + "unique_sentence2": 254 + }, + "eng_Latn-mkl_Latn": { + "num_samples": 256, + "number_of_characters": 61966, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.32421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 19, + "average_sentence2_length": 127.73046875, + "max_sentence2_length": 372, + "unique_sentence2": 256 + }, + "mkl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61966, + "unique_pairs": 256, + "min_sentence1_length": 19, + "average_sentence1_length": 127.73046875, + "max_sentence1_length": 372, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.32421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mkn_Latn": { + "num_samples": 256, + "number_of_characters": 100840, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "unique_sentence1": 255, + "min_sentence2_length": 37, + "average_sentence2_length": 279.13671875, + "max_sentence2_length": 1299, + "unique_sentence2": 255 + }, + "mkn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100840, + "unique_pairs": 255, + "min_sentence1_length": 37, + "average_sentence1_length": 279.13671875, + "max_sentence1_length": 1299, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "unique_sentence2": 255 + }, + "eng_Latn-mks_Latn": { + "num_samples": 256, + "number_of_characters": 83708, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 211.61328125, + "max_sentence2_length": 521, + "unique_sentence2": 256 + }, + "mks_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83708, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 211.61328125, + "max_sentence1_length": 521, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mle_Latn": { + "num_samples": 256, + "number_of_characters": 113546, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.97265625, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 332.56640625, + "max_sentence2_length": 1304, + "unique_sentence2": 256 + }, + "mle_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 113546, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 332.56640625, + "max_sentence1_length": 1304, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.97265625, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-mlh_Latn": { + "num_samples": 256, + "number_of_characters": 82464, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.6640625, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 211.4609375, + "max_sentence2_length": 653, + "unique_sentence2": 256 + }, + "mlh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82464, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 211.4609375, + "max_sentence1_length": 653, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6640625, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mlp_Latn": { + "num_samples": 256, + "number_of_characters": 89136, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.23828125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 232.94921875, + "max_sentence2_length": 748, + "unique_sentence2": 256 + }, + "mlp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89136, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 232.94921875, + "max_sentence1_length": 748, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.23828125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mmo_Latn": { + "num_samples": 256, + "number_of_characters": 80208, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 53, + "average_sentence2_length": 200.91796875, + "max_sentence2_length": 528, + "unique_sentence2": 256 + }, + "mmo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80208, + "unique_pairs": 256, + "min_sentence1_length": 53, + "average_sentence1_length": 200.91796875, + "max_sentence1_length": 528, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mmx_Latn": { + "num_samples": 256, + "number_of_characters": 74674, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.87890625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 180.81640625, + "max_sentence2_length": 520, + "unique_sentence2": 256 + }, + "mmx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74674, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 180.81640625, + "max_sentence1_length": 520, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.87890625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mna_Latn": { + "num_samples": 256, + "number_of_characters": 88152, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.01953125, + "max_sentence1_length": 231, + "unique_sentence1": 256, + "min_sentence2_length": 49, + "average_sentence2_length": 234.32421875, + "max_sentence2_length": 760, + "unique_sentence2": 256 + }, + "mna_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88152, + "unique_pairs": 256, + "min_sentence1_length": 49, + "average_sentence1_length": 234.32421875, + "max_sentence1_length": 760, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.01953125, + "max_sentence2_length": 231, + "unique_sentence2": 256 + }, + "eng_Latn-mop_Latn": { + "num_samples": 256, + "number_of_characters": 80222, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.30078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 200.06640625, + "max_sentence2_length": 757, + "unique_sentence2": 256 + }, + "mop_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80222, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 200.06640625, + "max_sentence1_length": 757, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.30078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mox_Latn": { + "num_samples": 256, + "number_of_characters": 85990, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.58984375, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 225.30859375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "mox_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85990, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 225.30859375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.58984375, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-mph_Latn": { + "num_samples": 83, + "number_of_characters": 40103, + "unique_pairs": 83, + "min_sentence1_length": 42, + "average_sentence1_length": 104.98795180722891, + "max_sentence1_length": 207, + "unique_sentence1": 82, + "min_sentence2_length": 117, + "average_sentence2_length": 378.1807228915663, + "max_sentence2_length": 1106, + "unique_sentence2": 83 + }, + "mph_Latn-eng_Latn": { + "num_samples": 83, + "number_of_characters": 40103, + "unique_pairs": 83, + "min_sentence1_length": 117, + "average_sentence1_length": 378.1807228915663, + "max_sentence1_length": 1106, + "unique_sentence1": 83, + "min_sentence2_length": 42, + "average_sentence2_length": 104.98795180722891, + "max_sentence2_length": 207, + "unique_sentence2": 82 + }, + "eng_Latn-mpj_Latn": { + "num_samples": 256, + "number_of_characters": 115208, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.93359375, + "max_sentence1_length": 246, + "unique_sentence1": 256, + "min_sentence2_length": 57, + "average_sentence2_length": 339.09765625, + "max_sentence2_length": 1893, + "unique_sentence2": 256 + }, + "mpj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 115208, + "unique_pairs": 256, + "min_sentence1_length": 57, + "average_sentence1_length": 339.09765625, + "max_sentence1_length": 1893, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.93359375, + "max_sentence2_length": 246, + "unique_sentence2": 256 + }, + "eng_Latn-mpm_Latn": { + "num_samples": 256, + "number_of_characters": 87995, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.17578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 234.5546875, + "max_sentence2_length": 727, + "unique_sentence2": 256 + }, + "mpm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87995, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 234.5546875, + "max_sentence1_length": 727, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.17578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mpp_Latn": { + "num_samples": 256, + "number_of_characters": 85326, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.265625, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 223.0390625, + "max_sentence2_length": 978, + "unique_sentence2": 256 + }, + "mpp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85326, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 223.0390625, + "max_sentence1_length": 978, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.265625, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-mps_Latn": { + "num_samples": 256, + "number_of_characters": 106083, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.17578125, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 305.2109375, + "max_sentence2_length": 1272, + "unique_sentence2": 256 + }, + "mps_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 106083, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 305.2109375, + "max_sentence1_length": 1272, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.17578125, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-mpt_Latn": { + "num_samples": 256, + "number_of_characters": 99802, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.9140625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 276.9375, + "max_sentence2_length": 1274, + "unique_sentence2": 256 + }, + "mpt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99802, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 276.9375, + "max_sentence1_length": 1274, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.9140625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mpx_Latn": { + "num_samples": 256, + "number_of_characters": 74185, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.59375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 175.19140625, + "max_sentence2_length": 568, + "unique_sentence2": 256 + }, + "mpx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74185, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 175.19140625, + "max_sentence1_length": 568, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.59375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mqb_Latn": { + "num_samples": 256, + "number_of_characters": 79139, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 194.33984375, + "max_sentence2_length": 670, + "unique_sentence2": 256 + }, + "mqb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79139, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 194.33984375, + "max_sentence1_length": 670, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mqj_Latn": { + "num_samples": 256, + "number_of_characters": 73654, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.84375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 176.8671875, + "max_sentence2_length": 452, + "unique_sentence2": 256 + }, + "mqj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73654, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 176.8671875, + "max_sentence1_length": 452, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.84375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-msb_Latn": { + "num_samples": 256, + "number_of_characters": 69696, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.91015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 157.33984375, + "max_sentence2_length": 394, + "unique_sentence2": 256 + }, + "msb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69696, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 157.33984375, + "max_sentence1_length": 394, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.91015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-msc_Latn": { + "num_samples": 256, + "number_of_characters": 62775, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 117.7265625, + "max_sentence1_length": 248, + "unique_sentence1": 256, + "min_sentence2_length": 20, + "average_sentence2_length": 127.48828125, + "max_sentence2_length": 336, + "unique_sentence2": 256 + }, + "msc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62775, + "unique_pairs": 256, + "min_sentence1_length": 20, + "average_sentence1_length": 127.48828125, + "max_sentence1_length": 336, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 117.7265625, + "max_sentence2_length": 248, + "unique_sentence2": 256 + }, + "eng_Latn-msk_Latn": { + "num_samples": 256, + "number_of_characters": 87552, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.06640625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 232.93359375, + "max_sentence2_length": 837, + "unique_sentence2": 256 + }, + "msk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87552, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 232.93359375, + "max_sentence1_length": 837, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.06640625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-msm_Latn": { + "num_samples": 256, + "number_of_characters": 74633, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 180.16015625, + "max_sentence2_length": 673, + "unique_sentence2": 256 + }, + "msm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74633, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 180.16015625, + "max_sentence1_length": 673, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-msy_Latn": { + "num_samples": 256, + "number_of_characters": 95549, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 146.0, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 31, + "average_sentence2_length": 227.23828125, + "max_sentence2_length": 585, + "unique_sentence2": 255 + }, + "msy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 95549, + "unique_pairs": 255, + "min_sentence1_length": 31, + "average_sentence1_length": 227.23828125, + "max_sentence1_length": 585, + "unique_sentence1": 255, + "min_sentence2_length": 1, + "average_sentence2_length": 146.0, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-mti_Latn": { + "num_samples": 256, + "number_of_characters": 61653, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.96875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 3, + "average_sentence2_length": 126.86328125, + "max_sentence2_length": 327, + "unique_sentence2": 256 + }, + "mti_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61653, + "unique_pairs": 256, + "min_sentence1_length": 3, + "average_sentence1_length": 126.86328125, + "max_sentence1_length": 327, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.96875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mto_Latn": { + "num_samples": 256, + "number_of_characters": 86441, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 57, + "average_sentence2_length": 224.1171875, + "max_sentence2_length": 552, + "unique_sentence2": 256 + }, + "mto_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86441, + "unique_pairs": 256, + "min_sentence1_length": 57, + "average_sentence1_length": 224.1171875, + "max_sentence1_length": 552, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mux_Latn": { + "num_samples": 256, + "number_of_characters": 123440, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.97265625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 372.21484375, + "max_sentence2_length": 1454, + "unique_sentence2": 256 + }, + "mux_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 123440, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 372.21484375, + "max_sentence1_length": 1454, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.97265625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-muy_Latn": { + "num_samples": 256, + "number_of_characters": 76268, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.53125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 184.390625, + "max_sentence2_length": 416, + "unique_sentence2": 256 + }, + "muy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76268, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 184.390625, + "max_sentence1_length": 416, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.53125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mva_Latn": { + "num_samples": 256, + "number_of_characters": 76608, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.2109375, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 189.0390625, + "max_sentence2_length": 523, + "unique_sentence2": 256 + }, + "mva_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76608, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 189.0390625, + "max_sentence1_length": 523, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2109375, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mvn_Latn": { + "num_samples": 256, + "number_of_characters": 93961, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.421875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 255.61328125, + "max_sentence2_length": 1123, + "unique_sentence2": 256 + }, + "mvn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93961, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 255.61328125, + "max_sentence1_length": 1123, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.421875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-mwc_Latn": { + "num_samples": 256, + "number_of_characters": 62114, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 112.6875, + "max_sentence1_length": 245, + "unique_sentence1": 255, + "min_sentence2_length": 38, + "average_sentence2_length": 129.9453125, + "max_sentence2_length": 273, + "unique_sentence2": 256 + }, + "mwc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62114, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 129.9453125, + "max_sentence1_length": 273, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 112.6875, + "max_sentence2_length": 245, + "unique_sentence2": 255 + }, + "eng_Latn-mwe_Latn": { + "num_samples": 256, + "number_of_characters": 64571, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 139.8359375, + "max_sentence2_length": 343, + "unique_sentence2": 256 + }, + "mwe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64571, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 139.8359375, + "max_sentence1_length": 343, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-mwf_Latn": { + "num_samples": 256, + "number_of_characters": 121128, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 105.94140625, + "max_sentence1_length": 245, + "unique_sentence1": 256, + "min_sentence2_length": 53, + "average_sentence2_length": 367.21484375, + "max_sentence2_length": 1738, + "unique_sentence2": 256 + }, + "mwf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 121128, + "unique_pairs": 256, + "min_sentence1_length": 53, + "average_sentence1_length": 367.21484375, + "max_sentence1_length": 1738, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 105.94140625, + "max_sentence2_length": 245, + "unique_sentence2": 256 + }, + "eng_Latn-mwp_Latn": { + "num_samples": 256, + "number_of_characters": 77799, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.61328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 192.2890625, + "max_sentence2_length": 634, + "unique_sentence2": 256 + }, + "mwp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77799, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 192.2890625, + "max_sentence1_length": 634, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.61328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mxb_Latn": { + "num_samples": 256, + "number_of_characters": 77282, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 186.51171875, + "max_sentence2_length": 503, + "unique_sentence2": 256 + }, + "mxb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77282, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 186.51171875, + "max_sentence1_length": 503, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mxp_Latn": { + "num_samples": 256, + "number_of_characters": 74553, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 178.78515625, + "max_sentence2_length": 465, + "unique_sentence2": 256 + }, + "mxp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74553, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 178.78515625, + "max_sentence1_length": 465, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mxq_Latn": { + "num_samples": 256, + "number_of_characters": 71177, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 162.6640625, + "max_sentence2_length": 395, + "unique_sentence2": 256 + }, + "mxq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71177, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 162.6640625, + "max_sentence1_length": 395, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mxt_Latn": { + "num_samples": 256, + "number_of_characters": 76978, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 185.3046875, + "max_sentence2_length": 540, + "unique_sentence2": 256 + }, + "mxt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76978, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 185.3046875, + "max_sentence1_length": 540, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mya_Latn": { + "num_samples": 256, + "number_of_characters": 80444, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 146.3203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 167.9140625, + "max_sentence2_length": 392, + "unique_sentence2": 255 + }, + "mya_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80444, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 167.9140625, + "max_sentence1_length": 392, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 146.3203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-myk_Latn": { + "num_samples": 256, + "number_of_characters": 68477, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 155.09375, + "max_sentence2_length": 372, + "unique_sentence2": 256 + }, + "myk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68477, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 155.09375, + "max_sentence1_length": 372, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-myu_Latn": { + "num_samples": 256, + "number_of_characters": 93550, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.890625, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 254.5390625, + "max_sentence2_length": 1078, + "unique_sentence2": 256 + }, + "myu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93550, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 254.5390625, + "max_sentence1_length": 1078, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.890625, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-myw_Latn": { + "num_samples": 256, + "number_of_characters": 93048, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.5234375, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 246.9453125, + "max_sentence2_length": 663, + "unique_sentence2": 256 + }, + "myw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93048, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 246.9453125, + "max_sentence1_length": 663, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.5234375, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-myy_Latn": { + "num_samples": 256, + "number_of_characters": 90856, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.23828125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 241.66796875, + "max_sentence2_length": 1002, + "unique_sentence2": 256 + }, + "myy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90856, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 241.66796875, + "max_sentence1_length": 1002, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.23828125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-mzz_Latn": { + "num_samples": 125, + "number_of_characters": 35308, + "unique_pairs": 125, + "min_sentence1_length": 45, + "average_sentence1_length": 113.664, + "max_sentence1_length": 257, + "unique_sentence1": 124, + "min_sentence2_length": 43, + "average_sentence2_length": 168.8, + "max_sentence2_length": 597, + "unique_sentence2": 125 + }, + "mzz_Latn-eng_Latn": { + "num_samples": 125, + "number_of_characters": 35308, + "unique_pairs": 125, + "min_sentence1_length": 43, + "average_sentence1_length": 168.8, + "max_sentence1_length": 597, + "unique_sentence1": 125, + "min_sentence2_length": 45, + "average_sentence2_length": 113.664, + "max_sentence2_length": 257, + "unique_sentence2": 124 + }, + "eng_Latn-nab_Latn": { + "num_samples": 256, + "number_of_characters": 128025, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 51, + "average_sentence2_length": 389.17578125, + "max_sentence2_length": 1402, + "unique_sentence2": 256 + }, + "nab_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 128025, + "unique_pairs": 256, + "min_sentence1_length": 51, + "average_sentence1_length": 389.17578125, + "max_sentence1_length": 1402, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-naf_Latn": { + "num_samples": 256, + "number_of_characters": 85981, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.5390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 223.32421875, + "max_sentence2_length": 882, + "unique_sentence2": 256 + }, + "naf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85981, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 223.32421875, + "max_sentence1_length": 882, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.5390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nak_Latn": { + "num_samples": 256, + "number_of_characters": 93521, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.59765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 53, + "average_sentence2_length": 254.71875, + "max_sentence2_length": 1236, + "unique_sentence2": 256 + }, + "nak_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93521, + "unique_pairs": 256, + "min_sentence1_length": 53, + "average_sentence1_length": 254.71875, + "max_sentence1_length": 1236, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.59765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nas_Latn": { + "num_samples": 256, + "number_of_characters": 73089, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.05078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 175.453125, + "max_sentence2_length": 465, + "unique_sentence2": 256 + }, + "nas_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73089, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 175.453125, + "max_sentence1_length": 465, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.05078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nbq_Latn": { + "num_samples": 256, + "number_of_characters": 94201, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.86328125, + "max_sentence1_length": 243, + "unique_sentence1": 255, + "min_sentence2_length": 33, + "average_sentence2_length": 255.109375, + "max_sentence2_length": 973, + "unique_sentence2": 256 + }, + "nbq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 94201, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 255.109375, + "max_sentence1_length": 973, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.86328125, + "max_sentence2_length": 243, + "unique_sentence2": 255 + }, + "eng_Latn-nca_Latn": { + "num_samples": 256, + "number_of_characters": 68902, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.53125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 157.6171875, + "max_sentence2_length": 395, + "unique_sentence2": 256 + }, + "nca_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68902, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 157.6171875, + "max_sentence1_length": 395, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.53125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nch_Latn": { + "num_samples": 256, + "number_of_characters": 82032, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.34375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 207.09375, + "max_sentence2_length": 519, + "unique_sentence2": 256 + }, + "nch_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82032, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 207.09375, + "max_sentence1_length": 519, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.34375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ncj_Latn": { + "num_samples": 256, + "number_of_characters": 72693, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.16015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 173.796875, + "max_sentence2_length": 460, + "unique_sentence2": 256 + }, + "ncj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72693, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 173.796875, + "max_sentence1_length": 460, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.16015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ncl_Latn": { + "num_samples": 256, + "number_of_characters": 103749, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 115.09375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 290.17578125, + "max_sentence2_length": 794, + "unique_sentence2": 256 + }, + "ncl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 103749, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 290.17578125, + "max_sentence1_length": 794, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 115.09375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ncu_Latn": { + "num_samples": 256, + "number_of_characters": 97291, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.92578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 57, + "average_sentence2_length": 269.1171875, + "max_sentence2_length": 1008, + "unique_sentence2": 256 + }, + "ncu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97291, + "unique_pairs": 256, + "min_sentence1_length": 57, + "average_sentence1_length": 269.1171875, + "max_sentence1_length": 1008, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.92578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ndg_Latn": { + "num_samples": 256, + "number_of_characters": 58109, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.41796875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 116.5703125, + "max_sentence2_length": 277, + "unique_sentence2": 256 + }, + "ndg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58109, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 116.5703125, + "max_sentence1_length": 277, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.41796875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ndj_Latn": { + "num_samples": 256, + "number_of_characters": 62460, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 131.58984375, + "max_sentence2_length": 321, + "unique_sentence2": 256 + }, + "ndj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62460, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 131.58984375, + "max_sentence1_length": 321, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nfa_Latn": { + "num_samples": 256, + "number_of_characters": 97436, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 112.23046875, + "max_sentence1_length": 227, + "unique_sentence1": 255, + "min_sentence2_length": 31, + "average_sentence2_length": 268.37890625, + "max_sentence2_length": 1222, + "unique_sentence2": 255 + }, + "nfa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97436, + "unique_pairs": 255, + "min_sentence1_length": 31, + "average_sentence1_length": 268.37890625, + "max_sentence1_length": 1222, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 112.23046875, + "max_sentence2_length": 227, + "unique_sentence2": 255 + }, + "eng_Latn-ngp_Latn": { + "num_samples": 256, + "number_of_characters": 61761, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 128.859375, + "max_sentence2_length": 319, + "unique_sentence2": 256 + }, + "ngp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61761, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 128.859375, + "max_sentence1_length": 319, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ngu_Latn": { + "num_samples": 256, + "number_of_characters": 77838, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 193.3515625, + "max_sentence2_length": 601, + "unique_sentence2": 256 + }, + "ngu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77838, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 193.3515625, + "max_sentence1_length": 601, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nhe_Latn": { + "num_samples": 256, + "number_of_characters": 102593, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 80, + "average_sentence2_length": 253.1953125, + "max_sentence2_length": 615, + "unique_sentence2": 254 + }, + "nhe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 102593, + "unique_pairs": 254, + "min_sentence1_length": 80, + "average_sentence1_length": 253.1953125, + "max_sentence1_length": 615, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-nhg_Latn": { + "num_samples": 256, + "number_of_characters": 74655, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 177.26171875, + "max_sentence2_length": 664, + "unique_sentence2": 256 + }, + "nhg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74655, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 177.26171875, + "max_sentence1_length": 664, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nhi_Latn": { + "num_samples": 256, + "number_of_characters": 72708, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 171.62109375, + "max_sentence2_length": 418, + "unique_sentence2": 256 + }, + "nhi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72708, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 171.62109375, + "max_sentence1_length": 418, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nho_Latn": { + "num_samples": 256, + "number_of_characters": 77360, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 113.66796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 188.51953125, + "max_sentence2_length": 474, + "unique_sentence2": 256 + }, + "nho_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77360, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 188.51953125, + "max_sentence1_length": 474, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 113.66796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nhr_Latn": { + "num_samples": 256, + "number_of_characters": 69514, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.19140625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 156.34765625, + "max_sentence2_length": 465, + "unique_sentence2": 256 + }, + "nhr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69514, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 156.34765625, + "max_sentence1_length": 465, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.19140625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nhu_Latn": { + "num_samples": 256, + "number_of_characters": 69059, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 157.3671875, + "max_sentence2_length": 507, + "unique_sentence2": 256 + }, + "nhu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69059, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 157.3671875, + "max_sentence1_length": 507, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nhw_Latn": { + "num_samples": 256, + "number_of_characters": 83398, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.34375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 212.4296875, + "max_sentence2_length": 531, + "unique_sentence2": 256 + }, + "nhw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83398, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 212.4296875, + "max_sentence1_length": 531, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.34375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nhy_Latn": { + "num_samples": 256, + "number_of_characters": 78387, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.60546875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 190.59375, + "max_sentence2_length": 450, + "unique_sentence2": 256 + }, + "nhy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78387, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 190.59375, + "max_sentence1_length": 450, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.60546875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nif_Latn": { + "num_samples": 256, + "number_of_characters": 69676, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 106.3046875, + "max_sentence1_length": 245, + "unique_sentence1": 255, + "min_sentence2_length": 48, + "average_sentence2_length": 165.8671875, + "max_sentence2_length": 641, + "unique_sentence2": 256 + }, + "nif_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69676, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 165.8671875, + "max_sentence1_length": 641, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 106.3046875, + "max_sentence2_length": 245, + "unique_sentence2": 255 + }, + "eng_Latn-nii_Latn": { + "num_samples": 256, + "number_of_characters": 93801, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.26953125, + "max_sentence1_length": 273, + "unique_sentence1": 256, + "min_sentence2_length": 59, + "average_sentence2_length": 255.140625, + "max_sentence2_length": 984, + "unique_sentence2": 256 + }, + "nii_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93801, + "unique_pairs": 256, + "min_sentence1_length": 59, + "average_sentence1_length": 255.140625, + "max_sentence1_length": 984, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.26953125, + "max_sentence2_length": 273, + "unique_sentence2": 256 + }, + "eng_Latn-nin_Latn": { + "num_samples": 256, + "number_of_characters": 69111, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 157.5703125, + "max_sentence2_length": 411, + "unique_sentence2": 256 + }, + "nin_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69111, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 157.5703125, + "max_sentence1_length": 411, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nko_Latn": { + "num_samples": 256, + "number_of_characters": 62040, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.01171875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 130.33203125, + "max_sentence2_length": 394, + "unique_sentence2": 256 + }, + "nko_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62040, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 130.33203125, + "max_sentence1_length": 394, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.01171875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nld_Latn": { + "num_samples": 256, + "number_of_characters": 60088, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.88671875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 119.83203125, + "max_sentence2_length": 226, + "unique_sentence2": 256 + }, + "nld_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60088, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 119.83203125, + "max_sentence1_length": 226, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.88671875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nlg_Latn": { + "num_samples": 256, + "number_of_characters": 75477, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.23046875, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 182.6015625, + "max_sentence2_length": 525, + "unique_sentence2": 256 + }, + "nlg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75477, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 182.6015625, + "max_sentence1_length": 525, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.23046875, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nna_Latn": { + "num_samples": 256, + "number_of_characters": 111717, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 117.91796875, + "max_sentence1_length": 257, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 318.4765625, + "max_sentence2_length": 2306, + "unique_sentence2": 256 + }, + "nna_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 111717, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 318.4765625, + "max_sentence1_length": 2306, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 117.91796875, + "max_sentence2_length": 257, + "unique_sentence2": 256 + }, + "eng_Latn-nnq_Latn": { + "num_samples": 256, + "number_of_characters": 60932, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 125.62109375, + "max_sentence2_length": 323, + "unique_sentence2": 256 + }, + "nnq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60932, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 125.62109375, + "max_sentence1_length": 323, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-noa_Latn": { + "num_samples": 256, + "number_of_characters": 106140, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 147.40625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 42, + "average_sentence2_length": 267.203125, + "max_sentence2_length": 709, + "unique_sentence2": 256 + }, + "noa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 106140, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 267.203125, + "max_sentence1_length": 709, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 147.40625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-nop_Latn": { + "num_samples": 256, + "number_of_characters": 74301, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.6640625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 176.57421875, + "max_sentence2_length": 511, + "unique_sentence2": 256 + }, + "nop_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74301, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 176.57421875, + "max_sentence1_length": 511, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.6640625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-not_Latn": { + "num_samples": 256, + "number_of_characters": 86791, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.58984375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 228.4375, + "max_sentence2_length": 807, + "unique_sentence2": 256 + }, + "not_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86791, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 228.4375, + "max_sentence1_length": 807, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.58984375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nou_Latn": { + "num_samples": 256, + "number_of_characters": 83058, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.13671875, + "max_sentence1_length": 228, + "unique_sentence1": 256, + "min_sentence2_length": 3, + "average_sentence2_length": 214.30859375, + "max_sentence2_length": 811, + "unique_sentence2": 256 + }, + "nou_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83058, + "unique_pairs": 256, + "min_sentence1_length": 3, + "average_sentence1_length": 214.30859375, + "max_sentence1_length": 811, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.13671875, + "max_sentence2_length": 228, + "unique_sentence2": 256 + }, + "eng_Latn-npi_Deva": { + "num_samples": 256, + "number_of_characters": 59596, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 120.40234375, + "max_sentence2_length": 313, + "unique_sentence2": 256 + }, + "npi_Deva-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59596, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 120.40234375, + "max_sentence1_length": 313, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-npl_Latn": { + "num_samples": 256, + "number_of_characters": 66338, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 146.73828125, + "max_sentence2_length": 337, + "unique_sentence2": 256 + }, + "npl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66338, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 146.73828125, + "max_sentence1_length": 337, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nsn_Latn": { + "num_samples": 256, + "number_of_characters": 77648, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 192.5078125, + "max_sentence2_length": 683, + "unique_sentence2": 256 + }, + "nsn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77648, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 192.5078125, + "max_sentence1_length": 683, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nss_Latn": { + "num_samples": 256, + "number_of_characters": 71538, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 112.18359375, + "max_sentence1_length": 245, + "unique_sentence1": 255, + "min_sentence2_length": 44, + "average_sentence2_length": 167.26171875, + "max_sentence2_length": 449, + "unique_sentence2": 256 + }, + "nss_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71538, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 167.26171875, + "max_sentence1_length": 449, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 112.18359375, + "max_sentence2_length": 245, + "unique_sentence2": 255 + }, + "eng_Latn-ntj_Latn": { + "num_samples": 256, + "number_of_characters": 94468, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.59765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 56, + "average_sentence2_length": 258.41796875, + "max_sentence2_length": 857, + "unique_sentence2": 256 + }, + "ntj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 94468, + "unique_pairs": 256, + "min_sentence1_length": 56, + "average_sentence1_length": 258.41796875, + "max_sentence1_length": 857, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.59765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ntp_Latn": { + "num_samples": 256, + "number_of_characters": 89472, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.97265625, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 239.52734375, + "max_sentence2_length": 1474, + "unique_sentence2": 256 + }, + "ntp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89472, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 239.52734375, + "max_sentence1_length": 1474, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.97265625, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-ntu_Latn": { + "num_samples": 256, + "number_of_characters": 91144, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.15625, + "max_sentence1_length": 273, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 241.875, + "max_sentence2_length": 1550, + "unique_sentence2": 256 + }, + "ntu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91144, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 241.875, + "max_sentence1_length": 1550, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.15625, + "max_sentence2_length": 273, + "unique_sentence2": 256 + }, + "eng_Latn-nuy_Latn": { + "num_samples": 256, + "number_of_characters": 109057, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 112.17578125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 86, + "average_sentence2_length": 313.828125, + "max_sentence2_length": 1006, + "unique_sentence2": 256 + }, + "nuy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 109057, + "unique_pairs": 256, + "min_sentence1_length": 86, + "average_sentence1_length": 313.828125, + "max_sentence1_length": 1006, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 112.17578125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-nvm_Latn": { + "num_samples": 256, + "number_of_characters": 94027, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.984375, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 257.30859375, + "max_sentence2_length": 979, + "unique_sentence2": 256 + }, + "nvm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 94027, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 257.30859375, + "max_sentence1_length": 979, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.984375, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-nwi_Latn": { + "num_samples": 256, + "number_of_characters": 84946, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.6796875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 219.140625, + "max_sentence2_length": 738, + "unique_sentence2": 256 + }, + "nwi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84946, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 219.140625, + "max_sentence1_length": 738, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.6796875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-nya_Latn": { + "num_samples": 256, + "number_of_characters": 61367, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 127.3203125, + "max_sentence2_length": 328, + "unique_sentence2": 256 + }, + "nya_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61367, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 127.3203125, + "max_sentence1_length": 328, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-nys_Latn": { + "num_samples": 122, + "number_of_characters": 42051, + "unique_pairs": 122, + "min_sentence1_length": 37, + "average_sentence1_length": 114.35245901639344, + "max_sentence1_length": 268, + "unique_sentence1": 121, + "min_sentence2_length": 50, + "average_sentence2_length": 230.327868852459, + "max_sentence2_length": 1366, + "unique_sentence2": 122 + }, + "nys_Latn-eng_Latn": { + "num_samples": 122, + "number_of_characters": 42051, + "unique_pairs": 122, + "min_sentence1_length": 50, + "average_sentence1_length": 230.327868852459, + "max_sentence1_length": 1366, + "unique_sentence1": 122, + "min_sentence2_length": 37, + "average_sentence2_length": 114.35245901639344, + "max_sentence2_length": 268, + "unique_sentence2": 121 + }, + "eng_Latn-nyu_Latn": { + "num_samples": 256, + "number_of_characters": 60494, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 110.2890625, + "max_sentence1_length": 257, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 126.015625, + "max_sentence2_length": 288, + "unique_sentence2": 256 + }, + "nyu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60494, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 126.015625, + "max_sentence1_length": 288, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 110.2890625, + "max_sentence2_length": 257, + "unique_sentence2": 256 + }, + "eng_Latn-obo_Latn": { + "num_samples": 256, + "number_of_characters": 79773, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 115.52734375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 196.0859375, + "max_sentence2_length": 668, + "unique_sentence2": 256 + }, + "obo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79773, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 196.0859375, + "max_sentence1_length": 668, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 115.52734375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-okv_Latn": { + "num_samples": 256, + "number_of_characters": 82175, + "unique_pairs": 255, + "min_sentence1_length": 35, + "average_sentence1_length": 146.515625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 50, + "average_sentence2_length": 174.48046875, + "max_sentence2_length": 807, + "unique_sentence2": 255 + }, + "okv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82175, + "unique_pairs": 255, + "min_sentence1_length": 50, + "average_sentence1_length": 174.48046875, + "max_sentence1_length": 807, + "unique_sentence1": 255, + "min_sentence2_length": 35, + "average_sentence2_length": 146.515625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-omw_Latn": { + "num_samples": 256, + "number_of_characters": 83123, + "unique_pairs": 252, + "min_sentence1_length": 37, + "average_sentence1_length": 122.4375, + "max_sentence1_length": 290, + "unique_sentence1": 250, + "min_sentence2_length": 58, + "average_sentence2_length": 202.26171875, + "max_sentence2_length": 460, + "unique_sentence2": 252 + }, + "omw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83123, + "unique_pairs": 252, + "min_sentence1_length": 58, + "average_sentence1_length": 202.26171875, + "max_sentence1_length": 460, + "unique_sentence1": 252, + "min_sentence2_length": 37, + "average_sentence2_length": 122.4375, + "max_sentence2_length": 290, + "unique_sentence2": 250 + }, + "eng_Latn-ong_Latn": { + "num_samples": 256, + "number_of_characters": 86322, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.62890625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 222.56640625, + "max_sentence2_length": 864, + "unique_sentence2": 256 + }, + "ong_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86322, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 222.56640625, + "max_sentence1_length": 864, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.62890625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ons_Latn": { + "num_samples": 256, + "number_of_characters": 77426, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.984375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 188.4609375, + "max_sentence2_length": 580, + "unique_sentence2": 256 + }, + "ons_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77426, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 188.4609375, + "max_sentence1_length": 580, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.984375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ood_Latn": { + "num_samples": 256, + "number_of_characters": 79363, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.00390625, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 199.0078125, + "max_sentence2_length": 631, + "unique_sentence2": 256 + }, + "ood_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79363, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 199.0078125, + "max_sentence1_length": 631, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.00390625, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-opm_Latn": { + "num_samples": 256, + "number_of_characters": 104595, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.70703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 59, + "average_sentence2_length": 298.8671875, + "max_sentence2_length": 1026, + "unique_sentence2": 256 + }, + "opm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104595, + "unique_pairs": 256, + "min_sentence1_length": 59, + "average_sentence1_length": 298.8671875, + "max_sentence1_length": 1026, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.70703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ory_Orya": { + "num_samples": 256, + "number_of_characters": 60460, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.76171875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 120.41015625, + "max_sentence2_length": 279, + "unique_sentence2": 256 + }, + "ory_Orya-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60460, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 120.41015625, + "max_sentence1_length": 279, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.76171875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ote_Latn": { + "num_samples": 256, + "number_of_characters": 72346, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 167.05078125, + "max_sentence2_length": 424, + "unique_sentence2": 256 + }, + "ote_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72346, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 167.05078125, + "max_sentence1_length": 424, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-otm_Latn": { + "num_samples": 256, + "number_of_characters": 71293, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.12890625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 164.359375, + "max_sentence2_length": 500, + "unique_sentence2": 256 + }, + "otm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71293, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 164.359375, + "max_sentence1_length": 500, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.12890625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-otn_Latn": { + "num_samples": 256, + "number_of_characters": 80639, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.01953125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 199.9765625, + "max_sentence2_length": 741, + "unique_sentence2": 256 + }, + "otn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80639, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 199.9765625, + "max_sentence1_length": 741, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.01953125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-otq_Latn": { + "num_samples": 256, + "number_of_characters": 62865, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 130.1953125, + "max_sentence2_length": 328, + "unique_sentence2": 256 + }, + "otq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62865, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 130.1953125, + "max_sentence1_length": 328, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ots_Latn": { + "num_samples": 256, + "number_of_characters": 97141, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.46875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 265.98828125, + "max_sentence2_length": 818, + "unique_sentence2": 256 + }, + "ots_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97141, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 265.98828125, + "max_sentence1_length": 818, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.46875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-pab_Latn": { + "num_samples": 256, + "number_of_characters": 70981, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 164.875, + "max_sentence2_length": 422, + "unique_sentence2": 256 + }, + "pab_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70981, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 164.875, + "max_sentence1_length": 422, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-pad_Latn": { + "num_samples": 256, + "number_of_characters": 108382, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.1796875, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 313.1875, + "max_sentence2_length": 1173, + "unique_sentence2": 256 + }, + "pad_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 108382, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 313.1875, + "max_sentence1_length": 1173, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.1796875, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-pah_Latn": { + "num_samples": 256, + "number_of_characters": 103694, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.34375, + "max_sentence1_length": 216, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 293.7109375, + "max_sentence2_length": 1340, + "unique_sentence2": 256 + }, + "pah_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 103694, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 293.7109375, + "max_sentence1_length": 1340, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.34375, + "max_sentence2_length": 216, + "unique_sentence2": 256 + }, + "eng_Latn-pan_Guru": { + "num_samples": 256, + "number_of_characters": 56864, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 18, + "average_sentence2_length": 109.73046875, + "max_sentence2_length": 287, + "unique_sentence2": 256 + }, + "pan_Guru-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56864, + "unique_pairs": 256, + "min_sentence1_length": 18, + "average_sentence1_length": 109.73046875, + "max_sentence1_length": 287, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-pao_Latn": { + "num_samples": 256, + "number_of_characters": 98184, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.69140625, + "max_sentence1_length": 254, + "unique_sentence1": 254, + "min_sentence2_length": 27, + "average_sentence2_length": 266.83984375, + "max_sentence2_length": 1493, + "unique_sentence2": 256 + }, + "pao_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98184, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 266.83984375, + "max_sentence1_length": 1493, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.69140625, + "max_sentence2_length": 254, + "unique_sentence2": 254 + }, + "eng_Latn-pes_Arab": { + "num_samples": 256, + "number_of_characters": 51444, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.51953125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 87.43359375, + "max_sentence2_length": 190, + "unique_sentence2": 256 + }, + "pes_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 51444, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 87.43359375, + "max_sentence1_length": 190, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.51953125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-pib_Latn": { + "num_samples": 256, + "number_of_characters": 62991, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.3046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 130.75390625, + "max_sentence2_length": 409, + "unique_sentence2": 256 + }, + "pib_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62991, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 130.75390625, + "max_sentence1_length": 409, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-pio_Latn": { + "num_samples": 256, + "number_of_characters": 102925, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.78515625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 292.265625, + "max_sentence2_length": 997, + "unique_sentence2": 256 + }, + "pio_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 102925, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 292.265625, + "max_sentence1_length": 997, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.78515625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-pir_Latn": { + "num_samples": 256, + "number_of_characters": 98670, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.71484375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 273.71484375, + "max_sentence2_length": 1050, + "unique_sentence2": 256 + }, + "pir_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98670, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 273.71484375, + "max_sentence1_length": 1050, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.71484375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-piu_Latn": { + "num_samples": 256, + "number_of_characters": 138919, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.66015625, + "max_sentence1_length": 233, + "unique_sentence1": 256, + "min_sentence2_length": 62, + "average_sentence2_length": 426.9921875, + "max_sentence2_length": 1444, + "unique_sentence2": 256 + }, + "piu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 138919, + "unique_pairs": 256, + "min_sentence1_length": 62, + "average_sentence1_length": 426.9921875, + "max_sentence1_length": 1444, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.66015625, + "max_sentence2_length": 233, + "unique_sentence2": 256 + }, + "eng_Latn-pjt_Latn": { + "num_samples": 256, + "number_of_characters": 131729, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.1953125, + "max_sentence1_length": 235, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 404.37109375, + "max_sentence2_length": 1358, + "unique_sentence2": 256 + }, + "pjt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 131729, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 404.37109375, + "max_sentence1_length": 1358, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.1953125, + "max_sentence2_length": 235, + "unique_sentence2": 256 + }, + "eng_Latn-pls_Latn": { + "num_samples": 256, + "number_of_characters": 78006, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.71484375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 190.99609375, + "max_sentence2_length": 497, + "unique_sentence2": 256 + }, + "pls_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78006, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 190.99609375, + "max_sentence1_length": 497, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.71484375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-plu_Latn": { + "num_samples": 256, + "number_of_characters": 82925, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.98828125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 212.9375, + "max_sentence2_length": 1292, + "unique_sentence2": 256 + }, + "plu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82925, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 212.9375, + "max_sentence1_length": 1292, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.98828125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-pma_Latn": { + "num_samples": 256, + "number_of_characters": 77418, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.41015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 188.00390625, + "max_sentence2_length": 549, + "unique_sentence2": 256 + }, + "pma_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77418, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 188.00390625, + "max_sentence1_length": 549, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.41015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-poe_Latn": { + "num_samples": 256, + "number_of_characters": 72901, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.53515625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 171.234375, + "max_sentence2_length": 435, + "unique_sentence2": 256 + }, + "poe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72901, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 171.234375, + "max_sentence1_length": 435, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.53515625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-poh_Latn": { + "num_samples": 256, + "number_of_characters": 119889, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 113, + "average_sentence2_length": 352.9453125, + "max_sentence2_length": 889, + "unique_sentence2": 256 + }, + "poh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 119889, + "unique_pairs": 256, + "min_sentence1_length": 113, + "average_sentence1_length": 352.9453125, + "max_sentence1_length": 889, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-poi_Latn": { + "num_samples": 256, + "number_of_characters": 72960, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 169.44921875, + "max_sentence2_length": 390, + "unique_sentence2": 256 + }, + "poi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72960, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 169.44921875, + "max_sentence1_length": 390, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-pol_Latn": { + "num_samples": 256, + "number_of_characters": 71618, + "unique_pairs": 252, + "min_sentence1_length": 35, + "average_sentence1_length": 146.67578125, + "max_sentence1_length": 341, + "unique_sentence1": 249, + "min_sentence2_length": 33, + "average_sentence2_length": 133.08203125, + "max_sentence2_length": 307, + "unique_sentence2": 252 + }, + "pol_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71618, + "unique_pairs": 252, + "min_sentence1_length": 33, + "average_sentence1_length": 133.08203125, + "max_sentence1_length": 307, + "unique_sentence1": 252, + "min_sentence2_length": 35, + "average_sentence2_length": 146.67578125, + "max_sentence2_length": 341, + "unique_sentence2": 249 + }, + "eng_Latn-pon_Latn": { + "num_samples": 256, + "number_of_characters": 75669, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 37, + "average_sentence2_length": 148.0234375, + "max_sentence2_length": 401, + "unique_sentence2": 254 + }, + "pon_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75669, + "unique_pairs": 254, + "min_sentence1_length": 37, + "average_sentence1_length": 148.0234375, + "max_sentence1_length": 401, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-por_Latn": { + "num_samples": 256, + "number_of_characters": 78999, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 53, + "average_sentence2_length": 161.03125, + "max_sentence2_length": 398, + "unique_sentence2": 255 + }, + "por_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78999, + "unique_pairs": 255, + "min_sentence1_length": 53, + "average_sentence1_length": 161.03125, + "max_sentence1_length": 398, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-poy_Latn": { + "num_samples": 256, + "number_of_characters": 65285, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 142.625, + "max_sentence2_length": 360, + "unique_sentence2": 256 + }, + "poy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65285, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 142.625, + "max_sentence1_length": 360, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ppo_Latn": { + "num_samples": 256, + "number_of_characters": 98328, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 108.71875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 59, + "average_sentence2_length": 275.375, + "max_sentence2_length": 1155, + "unique_sentence2": 256 + }, + "ppo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 98328, + "unique_pairs": 256, + "min_sentence1_length": 59, + "average_sentence1_length": 275.375, + "max_sentence1_length": 1155, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 108.71875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-prf_Latn": { + "num_samples": 256, + "number_of_characters": 65871, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.51953125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 141.7890625, + "max_sentence2_length": 318, + "unique_sentence2": 256 + }, + "prf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65871, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 141.7890625, + "max_sentence1_length": 318, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.51953125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-pri_Latn": { + "num_samples": 256, + "number_of_characters": 88880, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.42578125, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 234.76171875, + "max_sentence2_length": 691, + "unique_sentence2": 256 + }, + "pri_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88880, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 234.76171875, + "max_sentence1_length": 691, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.42578125, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-ptp_Latn": { + "num_samples": 256, + "number_of_characters": 76415, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.2421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 184.25390625, + "max_sentence2_length": 592, + "unique_sentence2": 255 + }, + "ptp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76415, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 184.25390625, + "max_sentence1_length": 592, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ptu_Latn": { + "num_samples": 256, + "number_of_characters": 76023, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.14453125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 186.8203125, + "max_sentence2_length": 566, + "unique_sentence2": 256 + }, + "ptu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76023, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 186.8203125, + "max_sentence1_length": 566, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.14453125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-pwg_Latn": { + "num_samples": 256, + "number_of_characters": 71655, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.4921875, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 168.41015625, + "max_sentence2_length": 546, + "unique_sentence2": 256 + }, + "pwg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71655, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 168.41015625, + "max_sentence1_length": 546, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4921875, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-qub_Latn": { + "num_samples": 256, + "number_of_characters": 71383, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.26953125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 162.5703125, + "max_sentence2_length": 540, + "unique_sentence2": 256 + }, + "qub_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71383, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 162.5703125, + "max_sentence1_length": 540, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.26953125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-quc_Latn": { + "num_samples": 256, + "number_of_characters": 93370, + "unique_pairs": 253, + "min_sentence1_length": 56, + "average_sentence1_length": 147.15234375, + "max_sentence1_length": 341, + "unique_sentence1": 253, + "min_sentence2_length": 61, + "average_sentence2_length": 217.57421875, + "max_sentence2_length": 504, + "unique_sentence2": 253 + }, + "quc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93370, + "unique_pairs": 253, + "min_sentence1_length": 61, + "average_sentence1_length": 217.57421875, + "max_sentence1_length": 504, + "unique_sentence1": 253, + "min_sentence2_length": 56, + "average_sentence2_length": 147.15234375, + "max_sentence2_length": 341, + "unique_sentence2": 253 + }, + "eng_Latn-quf_Latn": { + "num_samples": 256, + "number_of_characters": 88893, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 110.26953125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 61, + "average_sentence2_length": 236.96875, + "max_sentence2_length": 657, + "unique_sentence2": 256 + }, + "quf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88893, + "unique_pairs": 256, + "min_sentence1_length": 61, + "average_sentence1_length": 236.96875, + "max_sentence1_length": 657, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 110.26953125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-quh_Latn": { + "num_samples": 256, + "number_of_characters": 71214, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.81640625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 163.36328125, + "max_sentence2_length": 600, + "unique_sentence2": 256 + }, + "quh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71214, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 163.36328125, + "max_sentence1_length": 600, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.81640625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-qul_Latn": { + "num_samples": 256, + "number_of_characters": 63056, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.6328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 130.6796875, + "max_sentence2_length": 286, + "unique_sentence2": 256 + }, + "qul_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63056, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 130.6796875, + "max_sentence1_length": 286, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.6328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-qup_Latn": { + "num_samples": 256, + "number_of_characters": 101796, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.0, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 286.640625, + "max_sentence2_length": 955, + "unique_sentence2": 256 + }, + "qup_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101796, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 286.640625, + "max_sentence1_length": 955, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.0, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-qvc_Latn": { + "num_samples": 256, + "number_of_characters": 87158, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.984375, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 229.4765625, + "max_sentence2_length": 807, + "unique_sentence2": 256 + }, + "qvc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87158, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 229.4765625, + "max_sentence1_length": 807, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.984375, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-qve_Latn": { + "num_samples": 256, + "number_of_characters": 84491, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 63, + "average_sentence2_length": 217.6484375, + "max_sentence2_length": 630, + "unique_sentence2": 256 + }, + "qve_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84491, + "unique_pairs": 256, + "min_sentence1_length": 63, + "average_sentence1_length": 217.6484375, + "max_sentence1_length": 630, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-qvh_Latn": { + "num_samples": 256, + "number_of_characters": 88177, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.80078125, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 230.640625, + "max_sentence2_length": 759, + "unique_sentence2": 256 + }, + "qvh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88177, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 230.640625, + "max_sentence1_length": 759, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.80078125, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-qvm_Latn": { + "num_samples": 256, + "number_of_characters": 90244, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.80078125, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 238.71484375, + "max_sentence2_length": 762, + "unique_sentence2": 256 + }, + "qvm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90244, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 238.71484375, + "max_sentence1_length": 762, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.80078125, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-qvn_Latn": { + "num_samples": 256, + "number_of_characters": 74578, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.42578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 175.89453125, + "max_sentence2_length": 523, + "unique_sentence2": 256 + }, + "qvn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74578, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 175.89453125, + "max_sentence1_length": 523, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.42578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-qvs_Latn": { + "num_samples": 256, + "number_of_characters": 80924, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.2265625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 201.8828125, + "max_sentence2_length": 637, + "unique_sentence2": 256 + }, + "qvs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80924, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 201.8828125, + "max_sentence1_length": 637, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2265625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-qvw_Latn": { + "num_samples": 256, + "number_of_characters": 73228, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.88671875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 174.16015625, + "max_sentence2_length": 539, + "unique_sentence2": 256 + }, + "qvw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73228, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 174.16015625, + "max_sentence1_length": 539, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.88671875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-qvz_Latn": { + "num_samples": 256, + "number_of_characters": 81286, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.54296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 202.98046875, + "max_sentence2_length": 625, + "unique_sentence2": 256 + }, + "qvz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81286, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 202.98046875, + "max_sentence1_length": 625, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.54296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-qwh_Latn": { + "num_samples": 256, + "number_of_characters": 72925, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.5078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 173.35546875, + "max_sentence2_length": 490, + "unique_sentence2": 256 + }, + "qwh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72925, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 173.35546875, + "max_sentence1_length": 490, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.5078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-qxh_Latn": { + "num_samples": 256, + "number_of_characters": 76253, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.95703125, + "max_sentence1_length": 227, + "unique_sentence1": 255, + "min_sentence2_length": 31, + "average_sentence2_length": 188.90625, + "max_sentence2_length": 569, + "unique_sentence2": 256 + }, + "qxh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76253, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 188.90625, + "max_sentence1_length": 569, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.95703125, + "max_sentence2_length": 227, + "unique_sentence2": 255 + }, + "eng_Latn-qxn_Latn": { + "num_samples": 256, + "number_of_characters": 79403, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.30078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 48, + "average_sentence2_length": 196.8671875, + "max_sentence2_length": 528, + "unique_sentence2": 256 + }, + "qxn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79403, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 196.8671875, + "max_sentence1_length": 528, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.30078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-qxo_Latn": { + "num_samples": 256, + "number_of_characters": 91522, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 246.4609375, + "max_sentence2_length": 910, + "unique_sentence2": 256 + }, + "qxo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91522, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 246.4609375, + "max_sentence1_length": 910, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-rai_Latn": { + "num_samples": 256, + "number_of_characters": 74435, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.71875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 177.04296875, + "max_sentence2_length": 445, + "unique_sentence2": 256 + }, + "rai_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74435, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 177.04296875, + "max_sentence1_length": 445, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.71875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-reg_Latn": { + "num_samples": 256, + "number_of_characters": 62819, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.75390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 134.6328125, + "max_sentence2_length": 342, + "unique_sentence2": 256 + }, + "reg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62819, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 134.6328125, + "max_sentence1_length": 342, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.75390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-rgu_Latn": { + "num_samples": 256, + "number_of_characters": 97367, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 114.55859375, + "max_sentence1_length": 251, + "unique_sentence1": 255, + "min_sentence2_length": 38, + "average_sentence2_length": 265.78125, + "max_sentence2_length": 1233, + "unique_sentence2": 255 + }, + "rgu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97367, + "unique_pairs": 255, + "min_sentence1_length": 38, + "average_sentence1_length": 265.78125, + "max_sentence1_length": 1233, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.55859375, + "max_sentence2_length": 251, + "unique_sentence2": 255 + }, + "eng_Latn-rkb_Latn": { + "num_samples": 256, + "number_of_characters": 106118, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 109.2421875, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 305.28125, + "max_sentence2_length": 1129, + "unique_sentence2": 256 + }, + "rkb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 106118, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 305.28125, + "max_sentence1_length": 1129, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 109.2421875, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-rmc_Latn": { + "num_samples": 256, + "number_of_characters": 61817, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.87109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 126.6015625, + "max_sentence2_length": 288, + "unique_sentence2": 256 + }, + "rmc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61817, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 126.6015625, + "max_sentence1_length": 288, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.87109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-rmy_Latn": { + "num_samples": 256, + "number_of_characters": 53188, + "unique_pairs": 252, + "min_sentence1_length": 26, + "average_sentence1_length": 100.94140625, + "max_sentence1_length": 218, + "unique_sentence1": 245, + "min_sentence2_length": 26, + "average_sentence2_length": 106.82421875, + "max_sentence2_length": 247, + "unique_sentence2": 252 + }, + "rmy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 53188, + "unique_pairs": 252, + "min_sentence1_length": 26, + "average_sentence1_length": 106.82421875, + "max_sentence1_length": 247, + "unique_sentence1": 252, + "min_sentence2_length": 26, + "average_sentence2_length": 100.94140625, + "max_sentence2_length": 218, + "unique_sentence2": 245 + }, + "eng_Latn-ron_Latn": { + "num_samples": 256, + "number_of_characters": 74468, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 54, + "average_sentence2_length": 143.33203125, + "max_sentence2_length": 339, + "unique_sentence2": 254 + }, + "ron_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74468, + "unique_pairs": 254, + "min_sentence1_length": 54, + "average_sentence1_length": 143.33203125, + "max_sentence1_length": 339, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-roo_Latn": { + "num_samples": 256, + "number_of_characters": 101889, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.71484375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 285.2890625, + "max_sentence2_length": 1183, + "unique_sentence2": 256 + }, + "roo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101889, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 285.2890625, + "max_sentence1_length": 1183, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.71484375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-rop_Latn": { + "num_samples": 256, + "number_of_characters": 97353, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.8125, + "max_sentence1_length": 231, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 269.47265625, + "max_sentence2_length": 844, + "unique_sentence2": 256 + }, + "rop_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97353, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 269.47265625, + "max_sentence1_length": 844, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.8125, + "max_sentence2_length": 231, + "unique_sentence2": 256 + }, + "eng_Latn-row_Latn": { + "num_samples": 256, + "number_of_characters": 95222, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "unique_sentence1": 255, + "min_sentence2_length": 33, + "average_sentence2_length": 257.19140625, + "max_sentence2_length": 1241, + "unique_sentence2": 255 + }, + "row_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 95222, + "unique_pairs": 255, + "min_sentence1_length": 33, + "average_sentence1_length": 257.19140625, + "max_sentence1_length": 1241, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "unique_sentence2": 255 + }, + "eng_Latn-rro_Latn": { + "num_samples": 256, + "number_of_characters": 68107, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 150.671875, + "max_sentence2_length": 352, + "unique_sentence2": 256 + }, + "rro_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68107, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 150.671875, + "max_sentence1_length": 352, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ruf_Latn": { + "num_samples": 256, + "number_of_characters": 62207, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 130.6015625, + "max_sentence2_length": 316, + "unique_sentence2": 256 + }, + "ruf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62207, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 130.6015625, + "max_sentence1_length": 316, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-rug_Latn": { + "num_samples": 256, + "number_of_characters": 74146, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.87109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 174.76171875, + "max_sentence2_length": 480, + "unique_sentence2": 256 + }, + "rug_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74146, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 174.76171875, + "max_sentence1_length": 480, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.87109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-rus_Cyrl": { + "num_samples": 256, + "number_of_characters": 52947, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 92.38671875, + "max_sentence2_length": 233, + "unique_sentence2": 256 + }, + "rus_Cyrl-eng_Latn": { + "num_samples": 256, + "number_of_characters": 52947, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 92.38671875, + "max_sentence1_length": 233, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-rwo_Latn": { + "num_samples": 256, + "number_of_characters": 115690, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 145.0625, + "max_sentence1_length": 341, + "unique_sentence1": 255, + "min_sentence2_length": 85, + "average_sentence2_length": 306.8515625, + "max_sentence2_length": 871, + "unique_sentence2": 256 + }, + "rwo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 115690, + "unique_pairs": 256, + "min_sentence1_length": 85, + "average_sentence1_length": 306.8515625, + "max_sentence1_length": 871, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 145.0625, + "max_sentence2_length": 341, + "unique_sentence2": 255 + }, + "eng_Latn-sab_Latn": { + "num_samples": 256, + "number_of_characters": 143658, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.7109375, + "max_sentence1_length": 246, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 446.453125, + "max_sentence2_length": 1781, + "unique_sentence2": 256 + }, + "sab_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 143658, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 446.453125, + "max_sentence1_length": 1781, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.7109375, + "max_sentence2_length": 246, + "unique_sentence2": 256 + }, + "eng_Latn-san_Latn": { + "num_samples": 256, + "number_of_characters": 77941, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 2, + "average_sentence2_length": 159.125, + "max_sentence2_length": 429, + "unique_sentence2": 254 + }, + "san_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77941, + "unique_pairs": 254, + "min_sentence1_length": 2, + "average_sentence1_length": 159.125, + "max_sentence1_length": 429, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-sbe_Latn": { + "num_samples": 256, + "number_of_characters": 74051, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.0546875, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 179.20703125, + "max_sentence2_length": 666, + "unique_sentence2": 256 + }, + "sbe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74051, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 179.20703125, + "max_sentence1_length": 666, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.0546875, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-sbk_Latn": { + "num_samples": 256, + "number_of_characters": 58375, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.32421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 112.703125, + "max_sentence2_length": 250, + "unique_sentence2": 256 + }, + "sbk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58375, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 112.703125, + "max_sentence1_length": 250, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.32421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-sbs_Latn": { + "num_samples": 256, + "number_of_characters": 58485, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.984375, + "max_sentence1_length": 237, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 117.47265625, + "max_sentence2_length": 434, + "unique_sentence2": 256 + }, + "sbs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58485, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 117.47265625, + "max_sentence1_length": 434, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.984375, + "max_sentence2_length": 237, + "unique_sentence2": 256 + }, + "eng_Latn-seh_Latn": { + "num_samples": 256, + "number_of_characters": 61069, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 109.953125, + "max_sentence1_length": 257, + "unique_sentence1": 254, + "min_sentence2_length": 28, + "average_sentence2_length": 128.59765625, + "max_sentence2_length": 298, + "unique_sentence2": 256 + }, + "seh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61069, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 128.59765625, + "max_sentence1_length": 298, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 109.953125, + "max_sentence2_length": 257, + "unique_sentence2": 254 + }, + "eng_Latn-sey_Latn": { + "num_samples": 256, + "number_of_characters": 72594, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.765625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 168.8046875, + "max_sentence2_length": 483, + "unique_sentence2": 256 + }, + "sey_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72594, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 168.8046875, + "max_sentence1_length": 483, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.765625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-sgb_Latn": { + "num_samples": 256, + "number_of_characters": 72880, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.53515625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 173.15234375, + "max_sentence2_length": 574, + "unique_sentence2": 256 + }, + "sgb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72880, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 173.15234375, + "max_sentence1_length": 574, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.53515625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-sgz_Latn": { + "num_samples": 256, + "number_of_characters": 90808, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.1171875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 245.6015625, + "max_sentence2_length": 964, + "unique_sentence2": 256 + }, + "sgz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90808, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 245.6015625, + "max_sentence1_length": 964, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.1171875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-shj_Latn": { + "num_samples": 91, + "number_of_characters": 20259, + "unique_pairs": 91, + "min_sentence1_length": 37, + "average_sentence1_length": 108.34065934065934, + "max_sentence1_length": 245, + "unique_sentence1": 91, + "min_sentence2_length": 25, + "average_sentence2_length": 114.28571428571429, + "max_sentence2_length": 254, + "unique_sentence2": 91 + }, + "shj_Latn-eng_Latn": { + "num_samples": 91, + "number_of_characters": 20259, + "unique_pairs": 91, + "min_sentence1_length": 25, + "average_sentence1_length": 114.28571428571429, + "max_sentence1_length": 254, + "unique_sentence1": 91, + "min_sentence2_length": 37, + "average_sentence2_length": 108.34065934065934, + "max_sentence2_length": 245, + "unique_sentence2": 91 + }, + "eng_Latn-shp_Latn": { + "num_samples": 256, + "number_of_characters": 67295, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 150.4765625, + "max_sentence2_length": 322, + "unique_sentence2": 256 + }, + "shp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67295, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 150.4765625, + "max_sentence1_length": 322, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-sim_Latn": { + "num_samples": 256, + "number_of_characters": 92400, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.4453125, + "max_sentence1_length": 238, + "unique_sentence1": 256, + "min_sentence2_length": 47, + "average_sentence2_length": 246.4921875, + "max_sentence2_length": 975, + "unique_sentence2": 256 + }, + "sim_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92400, + "unique_pairs": 256, + "min_sentence1_length": 47, + "average_sentence1_length": 246.4921875, + "max_sentence1_length": 975, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.4453125, + "max_sentence2_length": 238, + "unique_sentence2": 256 + }, + "eng_Latn-sja_Latn": { + "num_samples": 256, + "number_of_characters": 84080, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.34765625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 213.08984375, + "max_sentence2_length": 513, + "unique_sentence2": 256 + }, + "sja_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84080, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 213.08984375, + "max_sentence1_length": 513, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.34765625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-sll_Latn": { + "num_samples": 256, + "number_of_characters": 84656, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.078125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 220.609375, + "max_sentence2_length": 1080, + "unique_sentence2": 256 + }, + "sll_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84656, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 220.609375, + "max_sentence1_length": 1080, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.078125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-smk_Latn": { + "num_samples": 256, + "number_of_characters": 81502, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.5078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 203.859375, + "max_sentence2_length": 567, + "unique_sentence2": 256 + }, + "smk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81502, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 203.859375, + "max_sentence1_length": 567, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-snc_Latn": { + "num_samples": 256, + "number_of_characters": 75296, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.4921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 179.6328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "snc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75296, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 179.6328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.4921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-snn_Latn": { + "num_samples": 256, + "number_of_characters": 93479, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.234375, + "max_sentence1_length": 233, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 254.91796875, + "max_sentence2_length": 841, + "unique_sentence2": 256 + }, + "snn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93479, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 254.91796875, + "max_sentence1_length": 841, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.234375, + "max_sentence2_length": 233, + "unique_sentence2": 256 + }, + "eng_Latn-snp_Latn": { + "num_samples": 256, + "number_of_characters": 93493, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 49, + "average_sentence2_length": 219.875, + "max_sentence2_length": 619, + "unique_sentence2": 256 + }, + "snp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93493, + "unique_pairs": 256, + "min_sentence1_length": 49, + "average_sentence1_length": 219.875, + "max_sentence1_length": 619, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-snx_Latn": { + "num_samples": 140, + "number_of_characters": 33814, + "unique_pairs": 140, + "min_sentence1_length": 39, + "average_sentence1_length": 108.87857142857143, + "max_sentence1_length": 257, + "unique_sentence1": 139, + "min_sentence2_length": 42, + "average_sentence2_length": 132.65, + "max_sentence2_length": 313, + "unique_sentence2": 140 + }, + "snx_Latn-eng_Latn": { + "num_samples": 140, + "number_of_characters": 33814, + "unique_pairs": 140, + "min_sentence1_length": 42, + "average_sentence1_length": 132.65, + "max_sentence1_length": 313, + "unique_sentence1": 140, + "min_sentence2_length": 39, + "average_sentence2_length": 108.87857142857143, + "max_sentence2_length": 257, + "unique_sentence2": 139 + }, + "eng_Latn-sny_Latn": { + "num_samples": 256, + "number_of_characters": 109633, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.15625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 317.09765625, + "max_sentence2_length": 1158, + "unique_sentence2": 256 + }, + "sny_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 109633, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 317.09765625, + "max_sentence1_length": 1158, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.15625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-som_Latn": { + "num_samples": 256, + "number_of_characters": 61570, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 18, + "average_sentence2_length": 125.13671875, + "max_sentence2_length": 311, + "unique_sentence2": 256 + }, + "som_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61570, + "unique_pairs": 256, + "min_sentence1_length": 18, + "average_sentence1_length": 125.13671875, + "max_sentence1_length": 311, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-soq_Latn": { + "num_samples": 256, + "number_of_characters": 83230, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.28515625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 215.83203125, + "max_sentence2_length": 727, + "unique_sentence2": 256 + }, + "soq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83230, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 215.83203125, + "max_sentence1_length": 727, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.28515625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-soy_Latn": { + "num_samples": 256, + "number_of_characters": 64435, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.36328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 136.3359375, + "max_sentence2_length": 317, + "unique_sentence2": 256 + }, + "soy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64435, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 136.3359375, + "max_sentence1_length": 317, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.36328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-spa_Latn": { + "num_samples": 256, + "number_of_characters": 75375, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 50, + "average_sentence2_length": 146.875, + "max_sentence2_length": 371, + "unique_sentence2": 255 + }, + "spa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75375, + "unique_pairs": 255, + "min_sentence1_length": 50, + "average_sentence1_length": 146.875, + "max_sentence1_length": 371, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-spl_Latn": { + "num_samples": 256, + "number_of_characters": 131575, + "unique_pairs": 254, + "min_sentence1_length": 25, + "average_sentence1_length": 112.36328125, + "max_sentence1_length": 265, + "unique_sentence1": 254, + "min_sentence2_length": 47, + "average_sentence2_length": 401.6015625, + "max_sentence2_length": 1260, + "unique_sentence2": 254 + }, + "spl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 131575, + "unique_pairs": 254, + "min_sentence1_length": 47, + "average_sentence1_length": 401.6015625, + "max_sentence1_length": 1260, + "unique_sentence1": 254, + "min_sentence2_length": 25, + "average_sentence2_length": 112.36328125, + "max_sentence2_length": 265, + "unique_sentence2": 254 + }, + "eng_Latn-spm_Latn": { + "num_samples": 256, + "number_of_characters": 85488, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 114.765625, + "max_sentence1_length": 257, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 219.171875, + "max_sentence2_length": 596, + "unique_sentence2": 256 + }, + "spm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85488, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 219.171875, + "max_sentence1_length": 596, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 114.765625, + "max_sentence2_length": 257, + "unique_sentence2": 256 + }, + "eng_Latn-spp_Latn": { + "num_samples": 256, + "number_of_characters": 71174, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.95703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 167.06640625, + "max_sentence2_length": 601, + "unique_sentence2": 256 + }, + "spp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71174, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 167.06640625, + "max_sentence1_length": 601, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.95703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-sps_Latn": { + "num_samples": 256, + "number_of_characters": 91698, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.609375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 242.5859375, + "max_sentence2_length": 789, + "unique_sentence2": 256 + }, + "sps_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91698, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 242.5859375, + "max_sentence1_length": 789, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.609375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-spy_Latn": { + "num_samples": 256, + "number_of_characters": 66196, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4140625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 145.1640625, + "max_sentence2_length": 398, + "unique_sentence2": 256 + }, + "spy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66196, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 145.1640625, + "max_sentence1_length": 398, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4140625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-sri_Latn": { + "num_samples": 256, + "number_of_characters": 81672, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.62109375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 208.41015625, + "max_sentence2_length": 543, + "unique_sentence2": 255 + }, + "sri_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81672, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 208.41015625, + "max_sentence1_length": 543, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 110.62109375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-srm_Latn": { + "num_samples": 256, + "number_of_characters": 92372, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.73046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 250.09765625, + "max_sentence2_length": 997, + "unique_sentence2": 256 + }, + "srm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92372, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 250.09765625, + "max_sentence1_length": 997, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.73046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-srn_Latn": { + "num_samples": 256, + "number_of_characters": 63158, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.71484375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 131.99609375, + "max_sentence2_length": 315, + "unique_sentence2": 256 + }, + "srn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63158, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 131.99609375, + "max_sentence1_length": 315, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.71484375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-srp_Latn": { + "num_samples": 256, + "number_of_characters": 52022, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.67578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 89.53515625, + "max_sentence2_length": 220, + "unique_sentence2": 256 + }, + "srp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 52022, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 89.53515625, + "max_sentence1_length": 220, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.67578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-srq_Latn": { + "num_samples": 256, + "number_of_characters": 71590, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 165.5703125, + "max_sentence2_length": 532, + "unique_sentence2": 255 + }, + "srq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71590, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 165.5703125, + "max_sentence1_length": 532, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ssd_Latn": { + "num_samples": 256, + "number_of_characters": 72465, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.75, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 174.31640625, + "max_sentence2_length": 502, + "unique_sentence2": 256 + }, + "ssd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72465, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 174.31640625, + "max_sentence1_length": 502, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.75, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ssg_Latn": { + "num_samples": 256, + "number_of_characters": 71281, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.9296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 164.51171875, + "max_sentence2_length": 461, + "unique_sentence2": 256 + }, + "ssg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71281, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 164.51171875, + "max_sentence1_length": 461, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.9296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ssx_Latn": { + "num_samples": 256, + "number_of_characters": 84322, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.21875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 213.1640625, + "max_sentence2_length": 493, + "unique_sentence2": 256 + }, + "ssx_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84322, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 213.1640625, + "max_sentence1_length": 493, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.21875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-stp_Latn": { + "num_samples": 256, + "number_of_characters": 99562, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.640625, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 279.2734375, + "max_sentence2_length": 743, + "unique_sentence2": 256 + }, + "stp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99562, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 279.2734375, + "max_sentence1_length": 743, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.640625, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-sua_Latn": { + "num_samples": 256, + "number_of_characters": 136992, + "unique_pairs": 255, + "min_sentence1_length": 25, + "average_sentence1_length": 109.33984375, + "max_sentence1_length": 227, + "unique_sentence1": 255, + "min_sentence2_length": 53, + "average_sentence2_length": 425.78515625, + "max_sentence2_length": 1832, + "unique_sentence2": 255 + }, + "sua_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 136992, + "unique_pairs": 255, + "min_sentence1_length": 53, + "average_sentence1_length": 425.78515625, + "max_sentence1_length": 1832, + "unique_sentence1": 255, + "min_sentence2_length": 25, + "average_sentence2_length": 109.33984375, + "max_sentence2_length": 227, + "unique_sentence2": 255 + }, + "eng_Latn-sue_Latn": { + "num_samples": 256, + "number_of_characters": 73084, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.70703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 170.77734375, + "max_sentence2_length": 777, + "unique_sentence2": 256 + }, + "sue_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73084, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 170.77734375, + "max_sentence1_length": 777, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.70703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-sus_Arab": { + "num_samples": 256, + "number_of_characters": 75002, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 27, + "average_sentence2_length": 147.64453125, + "max_sentence2_length": 435, + "unique_sentence2": 256 + }, + "sus_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75002, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 147.64453125, + "max_sentence1_length": 435, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-suz_Latn": { + "num_samples": 256, + "number_of_characters": 67319, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 150.5703125, + "max_sentence2_length": 456, + "unique_sentence2": 256 + }, + "suz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67319, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 150.5703125, + "max_sentence1_length": 456, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-swe_Latn": { + "num_samples": 256, + "number_of_characters": 99056, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 274.54296875, + "max_sentence2_length": 1575, + "unique_sentence2": 255 + }, + "swe_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99056, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 274.54296875, + "max_sentence1_length": 1575, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-swh_Latn": { + "num_samples": 256, + "number_of_characters": 76934, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 43, + "average_sentence2_length": 152.96484375, + "max_sentence2_length": 378, + "unique_sentence2": 255 + }, + "swh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76934, + "unique_pairs": 255, + "min_sentence1_length": 43, + "average_sentence1_length": 152.96484375, + "max_sentence1_length": 378, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-swp_Latn": { + "num_samples": 256, + "number_of_characters": 61397, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.3125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 129.51953125, + "max_sentence2_length": 378, + "unique_sentence2": 256 + }, + "swp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61397, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 129.51953125, + "max_sentence1_length": 378, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.3125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-sxb_Latn": { + "num_samples": 256, + "number_of_characters": 67824, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.4453125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 154.4921875, + "max_sentence2_length": 393, + "unique_sentence2": 256 + }, + "sxb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67824, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 154.4921875, + "max_sentence1_length": 393, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4453125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tac_Latn": { + "num_samples": 256, + "number_of_characters": 85792, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 224.921875, + "max_sentence2_length": 673, + "unique_sentence2": 256 + }, + "tac_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85792, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 224.921875, + "max_sentence1_length": 673, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-taj_Deva": { + "num_samples": 256, + "number_of_characters": 67747, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.34375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 151.29296875, + "max_sentence2_length": 392, + "unique_sentence2": 256 + }, + "taj_Deva-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67747, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 151.29296875, + "max_sentence1_length": 392, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.34375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tam_Taml": { + "num_samples": 256, + "number_of_characters": 64233, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 135.61328125, + "max_sentence2_length": 304, + "unique_sentence2": 256 + }, + "tam_Taml-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64233, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 135.61328125, + "max_sentence1_length": 304, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tav_Latn": { + "num_samples": 256, + "number_of_characters": 112479, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.55859375, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 326.8125, + "max_sentence2_length": 1637, + "unique_sentence2": 256 + }, + "tav_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 112479, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 326.8125, + "max_sentence1_length": 1637, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.55859375, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-taw_Latn": { + "num_samples": 256, + "number_of_characters": 86679, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.49609375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 227.09375, + "max_sentence2_length": 647, + "unique_sentence2": 256 + }, + "taw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86679, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 227.09375, + "max_sentence1_length": 647, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.49609375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tbc_Latn": { + "num_samples": 256, + "number_of_characters": 78946, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 113.28515625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 195.09765625, + "max_sentence2_length": 552, + "unique_sentence2": 256 + }, + "tbc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78946, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 195.09765625, + "max_sentence1_length": 552, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 113.28515625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tbf_Latn": { + "num_samples": 256, + "number_of_characters": 71971, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.50390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 170.6328125, + "max_sentence2_length": 527, + "unique_sentence2": 256 + }, + "tbf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71971, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 170.6328125, + "max_sentence1_length": 527, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.50390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tbg_Latn": { + "num_samples": 256, + "number_of_characters": 72269, + "unique_pairs": 252, + "min_sentence1_length": 29, + "average_sentence1_length": 105.53515625, + "max_sentence1_length": 217, + "unique_sentence1": 243, + "min_sentence2_length": 18, + "average_sentence2_length": 176.765625, + "max_sentence2_length": 557, + "unique_sentence2": 252 + }, + "tbg_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72269, + "unique_pairs": 252, + "min_sentence1_length": 18, + "average_sentence1_length": 176.765625, + "max_sentence1_length": 557, + "unique_sentence1": 252, + "min_sentence2_length": 29, + "average_sentence2_length": 105.53515625, + "max_sentence2_length": 217, + "unique_sentence2": 243 + }, + "eng_Latn-tbo_Latn": { + "num_samples": 256, + "number_of_characters": 66856, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.03125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 147.125, + "max_sentence2_length": 495, + "unique_sentence2": 256 + }, + "tbo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66856, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 147.125, + "max_sentence1_length": 495, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.03125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tbz_Latn": { + "num_samples": 256, + "number_of_characters": 55797, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 107.83203125, + "max_sentence2_length": 275, + "unique_sentence2": 256 + }, + "tbz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 55797, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 107.83203125, + "max_sentence1_length": 275, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tca_Latn": { + "num_samples": 256, + "number_of_characters": 99018, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.3046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 274.484375, + "max_sentence2_length": 1003, + "unique_sentence2": 256 + }, + "tca_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99018, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 274.484375, + "max_sentence1_length": 1003, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.3046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tcs_Latn": { + "num_samples": 256, + "number_of_characters": 73303, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.69921875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 175.640625, + "max_sentence2_length": 559, + "unique_sentence2": 256 + }, + "tcs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73303, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 175.640625, + "max_sentence1_length": 559, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.69921875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tcz_Latn": { + "num_samples": 256, + "number_of_characters": 64893, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 141.09375, + "max_sentence2_length": 363, + "unique_sentence2": 256 + }, + "tcz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64893, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 141.09375, + "max_sentence1_length": 363, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-tdt_Latn": { + "num_samples": 256, + "number_of_characters": 72662, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 115.015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 168.8203125, + "max_sentence2_length": 617, + "unique_sentence2": 256 + }, + "tdt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72662, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 168.8203125, + "max_sentence1_length": 617, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 115.015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tee_Latn": { + "num_samples": 256, + "number_of_characters": 88472, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.37890625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 231.21484375, + "max_sentence2_length": 533, + "unique_sentence2": 256 + }, + "tee_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88472, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 231.21484375, + "max_sentence1_length": 533, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.37890625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tel_Telu": { + "num_samples": 256, + "number_of_characters": 58963, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.12109375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 120.203125, + "max_sentence2_length": 369, + "unique_sentence2": 256 + }, + "tel_Telu-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58963, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 120.203125, + "max_sentence1_length": 369, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.12109375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ter_Latn": { + "num_samples": 256, + "number_of_characters": 80501, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 202.0625, + "max_sentence2_length": 599, + "unique_sentence2": 256 + }, + "ter_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80501, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 202.0625, + "max_sentence1_length": 599, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-tet_Latn": { + "num_samples": 256, + "number_of_characters": 88705, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 113.78125, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 232.72265625, + "max_sentence2_length": 1185, + "unique_sentence2": 256 + }, + "tet_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 88705, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 232.72265625, + "max_sentence1_length": 1185, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 113.78125, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-tew_Latn": { + "num_samples": 256, + "number_of_characters": 82320, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.140625, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 210.421875, + "max_sentence2_length": 585, + "unique_sentence2": 256 + }, + "tew_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82320, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 210.421875, + "max_sentence1_length": 585, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.140625, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-tfr_Latn": { + "num_samples": 256, + "number_of_characters": 85996, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.41796875, + "max_sentence1_length": 235, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 224.50390625, + "max_sentence2_length": 717, + "unique_sentence2": 256 + }, + "tfr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85996, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 224.50390625, + "max_sentence1_length": 717, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41796875, + "max_sentence2_length": 235, + "unique_sentence2": 256 + }, + "eng_Latn-tgk_Cyrl": { + "num_samples": 256, + "number_of_characters": 57798, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 104.37109375, + "max_sentence1_length": 245, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 121.40234375, + "max_sentence2_length": 360, + "unique_sentence2": 256 + }, + "tgk_Cyrl-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57798, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 121.40234375, + "max_sentence1_length": 360, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 104.37109375, + "max_sentence2_length": 245, + "unique_sentence2": 256 + }, + "eng_Latn-tgl_Latn": { + "num_samples": 256, + "number_of_characters": 66077, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 145.71875, + "max_sentence2_length": 357, + "unique_sentence2": 256 + }, + "tgl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66077, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 145.71875, + "max_sentence1_length": 357, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-tgo_Latn": { + "num_samples": 256, + "number_of_characters": 73526, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.2109375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 177.0, + "max_sentence2_length": 472, + "unique_sentence2": 256 + }, + "tgo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73526, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 177.0, + "max_sentence1_length": 472, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.2109375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tgp_Latn": { + "num_samples": 256, + "number_of_characters": 68797, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.50390625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 157.234375, + "max_sentence2_length": 609, + "unique_sentence2": 256 + }, + "tgp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 68797, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 157.234375, + "max_sentence1_length": 609, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.50390625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tha_Thai": { + "num_samples": 256, + "number_of_characters": 60554, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 16, + "average_sentence2_length": 123.1015625, + "max_sentence2_length": 312, + "unique_sentence2": 256 + }, + "tha_Thai-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60554, + "unique_pairs": 256, + "min_sentence1_length": 16, + "average_sentence1_length": 123.1015625, + "max_sentence1_length": 312, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tif_Latn": { + "num_samples": 256, + "number_of_characters": 132436, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.62890625, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 404.69921875, + "max_sentence2_length": 1636, + "unique_sentence2": 256 + }, + "tif_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 132436, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 404.69921875, + "max_sentence1_length": 1636, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.62890625, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-tim_Latn": { + "num_samples": 256, + "number_of_characters": 79600, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.66796875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 202.26953125, + "max_sentence2_length": 868, + "unique_sentence2": 256 + }, + "tim_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79600, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 202.26953125, + "max_sentence1_length": 868, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.66796875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tiw_Latn": { + "num_samples": 215, + "number_of_characters": 173656, + "unique_pairs": 215, + "min_sentence1_length": 40, + "average_sentence1_length": 103.73023255813953, + "max_sentence1_length": 257, + "unique_sentence1": 214, + "min_sentence2_length": 95, + "average_sentence2_length": 703.9720930232559, + "max_sentence2_length": 2787, + "unique_sentence2": 215 + }, + "tiw_Latn-eng_Latn": { + "num_samples": 215, + "number_of_characters": 173656, + "unique_pairs": 215, + "min_sentence1_length": 95, + "average_sentence1_length": 703.9720930232559, + "max_sentence1_length": 2787, + "unique_sentence1": 215, + "min_sentence2_length": 40, + "average_sentence2_length": 103.73023255813953, + "max_sentence2_length": 257, + "unique_sentence2": 214 + }, + "eng_Latn-tiy_Latn": { + "num_samples": 256, + "number_of_characters": 73610, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.0859375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 173.453125, + "max_sentence2_length": 679, + "unique_sentence2": 256 + }, + "tiy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73610, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 173.453125, + "max_sentence1_length": 679, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.0859375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tke_Latn": { + "num_samples": 92, + "number_of_characters": 20046, + "unique_pairs": 92, + "min_sentence1_length": 37, + "average_sentence1_length": 107.67391304347827, + "max_sentence1_length": 245, + "unique_sentence1": 92, + "min_sentence2_length": 31, + "average_sentence2_length": 110.21739130434783, + "max_sentence2_length": 212, + "unique_sentence2": 92 + }, + "tke_Latn-eng_Latn": { + "num_samples": 92, + "number_of_characters": 20046, + "unique_pairs": 92, + "min_sentence1_length": 31, + "average_sentence1_length": 110.21739130434783, + "max_sentence1_length": 212, + "unique_sentence1": 92, + "min_sentence2_length": 37, + "average_sentence2_length": 107.67391304347827, + "max_sentence2_length": 245, + "unique_sentence2": 92 + }, + "eng_Latn-tku_Latn": { + "num_samples": 256, + "number_of_characters": 74169, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 174.171875, + "max_sentence2_length": 415, + "unique_sentence2": 256 + }, + "tku_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74169, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 174.171875, + "max_sentence1_length": 415, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tlf_Latn": { + "num_samples": 256, + "number_of_characters": 126553, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.07421875, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 383.2734375, + "max_sentence2_length": 1527, + "unique_sentence2": 256 + }, + "tlf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 126553, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 383.2734375, + "max_sentence1_length": 1527, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.07421875, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-tmd_Latn": { + "num_samples": 256, + "number_of_characters": 83917, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.16015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 218.640625, + "max_sentence2_length": 702, + "unique_sentence2": 256 + }, + "tmd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 83917, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 218.640625, + "max_sentence1_length": 702, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.16015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tna_Latn": { + "num_samples": 256, + "number_of_characters": 81365, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 109.80859375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 208.0234375, + "max_sentence2_length": 741, + "unique_sentence2": 256 + }, + "tna_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81365, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 208.0234375, + "max_sentence1_length": 741, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 109.80859375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tnc_Latn": { + "num_samples": 256, + "number_of_characters": 106641, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.6015625, + "max_sentence1_length": 263, + "unique_sentence1": 254, + "min_sentence2_length": 46, + "average_sentence2_length": 302.96484375, + "max_sentence2_length": 899, + "unique_sentence2": 256 + }, + "tnc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 106641, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 302.96484375, + "max_sentence1_length": 899, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.6015625, + "max_sentence2_length": 263, + "unique_sentence2": 254 + }, + "eng_Latn-tnk_Latn": { + "num_samples": 256, + "number_of_characters": 78022, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.58203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 195.19140625, + "max_sentence2_length": 547, + "unique_sentence2": 256 + }, + "tnk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78022, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 195.19140625, + "max_sentence1_length": 547, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.58203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tnn_Latn": { + "num_samples": 256, + "number_of_characters": 81309, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 203.25390625, + "max_sentence2_length": 1039, + "unique_sentence2": 256 + }, + "tnn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81309, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 203.25390625, + "max_sentence1_length": 1039, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tnp_Latn": { + "num_samples": 256, + "number_of_characters": 84121, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.2734375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 214.32421875, + "max_sentence2_length": 588, + "unique_sentence2": 256 + }, + "tnp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84121, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 214.32421875, + "max_sentence1_length": 588, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2734375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-toc_Latn": { + "num_samples": 256, + "number_of_characters": 101728, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.421875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 287.953125, + "max_sentence2_length": 808, + "unique_sentence2": 256 + }, + "toc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101728, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 287.953125, + "max_sentence1_length": 808, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.421875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-tod_Latn": { + "num_samples": 256, + "number_of_characters": 63476, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 123.76171875, + "max_sentence1_length": 306, + "unique_sentence1": 254, + "min_sentence2_length": 17, + "average_sentence2_length": 124.19140625, + "max_sentence2_length": 293, + "unique_sentence2": 256 + }, + "tod_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63476, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 124.19140625, + "max_sentence1_length": 293, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 123.76171875, + "max_sentence2_length": 306, + "unique_sentence2": 254 + }, + "eng_Latn-tof_Latn": { + "num_samples": 256, + "number_of_characters": 80483, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.203125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 199.18359375, + "max_sentence2_length": 546, + "unique_sentence2": 256 + }, + "tof_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80483, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 199.18359375, + "max_sentence1_length": 546, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.203125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-toj_Latn": { + "num_samples": 256, + "number_of_characters": 90667, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 108.55078125, + "max_sentence1_length": 227, + "unique_sentence1": 255, + "min_sentence2_length": 32, + "average_sentence2_length": 245.6171875, + "max_sentence2_length": 816, + "unique_sentence2": 256 + }, + "toj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90667, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 245.6171875, + "max_sentence1_length": 816, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 108.55078125, + "max_sentence2_length": 227, + "unique_sentence2": 255 + }, + "eng_Latn-ton_Latn": { + "num_samples": 256, + "number_of_characters": 63460, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 134.453125, + "max_sentence2_length": 302, + "unique_sentence2": 256 + }, + "ton_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63460, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 134.453125, + "max_sentence1_length": 302, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-too_Latn": { + "num_samples": 256, + "number_of_characters": 79783, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.55078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 196.1015625, + "max_sentence2_length": 458, + "unique_sentence2": 256 + }, + "too_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79783, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 196.1015625, + "max_sentence1_length": 458, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.55078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-top_Latn": { + "num_samples": 256, + "number_of_characters": 75881, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 182.8671875, + "max_sentence2_length": 628, + "unique_sentence2": 256 + }, + "top_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75881, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 182.8671875, + "max_sentence1_length": 628, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tos_Latn": { + "num_samples": 256, + "number_of_characters": 102824, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.44140625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 60, + "average_sentence2_length": 288.21484375, + "max_sentence2_length": 845, + "unique_sentence2": 256 + }, + "tos_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 102824, + "unique_pairs": 256, + "min_sentence1_length": 60, + "average_sentence1_length": 288.21484375, + "max_sentence1_length": 845, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.44140625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tpa_Latn": { + "num_samples": 141, + "number_of_characters": 36809, + "unique_pairs": 141, + "min_sentence1_length": 39, + "average_sentence1_length": 110.51063829787235, + "max_sentence1_length": 257, + "unique_sentence1": 140, + "min_sentence2_length": 47, + "average_sentence2_length": 150.54609929078015, + "max_sentence2_length": 540, + "unique_sentence2": 141 + }, + "tpa_Latn-eng_Latn": { + "num_samples": 141, + "number_of_characters": 36809, + "unique_pairs": 141, + "min_sentence1_length": 47, + "average_sentence1_length": 150.54609929078015, + "max_sentence1_length": 540, + "unique_sentence1": 141, + "min_sentence2_length": 39, + "average_sentence2_length": 110.51063829787235, + "max_sentence2_length": 257, + "unique_sentence2": 140 + }, + "eng_Latn-tpi_Latn": { + "num_samples": 256, + "number_of_characters": 97954, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 138.0859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 46, + "average_sentence2_length": 244.546875, + "max_sentence2_length": 2947, + "unique_sentence2": 254 + }, + "tpi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97954, + "unique_pairs": 254, + "min_sentence1_length": 46, + "average_sentence1_length": 244.546875, + "max_sentence1_length": 2947, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 138.0859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-tpt_Latn": { + "num_samples": 256, + "number_of_characters": 71465, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 163.7890625, + "max_sentence2_length": 441, + "unique_sentence2": 256 + }, + "tpt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71465, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 163.7890625, + "max_sentence1_length": 441, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tpz_Latn": { + "num_samples": 256, + "number_of_characters": 85269, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.09765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 222.984375, + "max_sentence2_length": 780, + "unique_sentence2": 256 + }, + "tpz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85269, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 222.984375, + "max_sentence1_length": 780, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.09765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-trc_Latn": { + "num_samples": 256, + "number_of_characters": 97710, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.23046875, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 46, + "average_sentence2_length": 270.44921875, + "max_sentence2_length": 859, + "unique_sentence2": 256 + }, + "trc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 97710, + "unique_pairs": 256, + "min_sentence1_length": 46, + "average_sentence1_length": 270.44921875, + "max_sentence1_length": 859, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.23046875, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-tsw_Latn": { + "num_samples": 256, + "number_of_characters": 75638, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.4765625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 184.984375, + "max_sentence2_length": 1078, + "unique_sentence2": 256 + }, + "tsw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75638, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 184.984375, + "max_sentence1_length": 1078, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4765625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ttc_Latn": { + "num_samples": 256, + "number_of_characters": 72942, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 172.53515625, + "max_sentence2_length": 410, + "unique_sentence2": 256 + }, + "ttc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72942, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 172.53515625, + "max_sentence1_length": 410, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-tte_Latn": { + "num_samples": 256, + "number_of_characters": 87707, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.7734375, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 233.83203125, + "max_sentence2_length": 973, + "unique_sentence2": 256 + }, + "tte_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87707, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 233.83203125, + "max_sentence1_length": 973, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.7734375, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-tuc_Latn": { + "num_samples": 256, + "number_of_characters": 94183, + "unique_pairs": 254, + "min_sentence1_length": 1, + "average_sentence1_length": 146.65625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 32, + "average_sentence2_length": 221.24609375, + "max_sentence2_length": 533, + "unique_sentence2": 254 + }, + "tuc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 94183, + "unique_pairs": 254, + "min_sentence1_length": 32, + "average_sentence1_length": 221.24609375, + "max_sentence1_length": 533, + "unique_sentence1": 254, + "min_sentence2_length": 1, + "average_sentence2_length": 146.65625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-tue_Latn": { + "num_samples": 256, + "number_of_characters": 74850, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.07421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 179.30859375, + "max_sentence2_length": 980, + "unique_sentence2": 256 + }, + "tue_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74850, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 179.30859375, + "max_sentence1_length": 980, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.07421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tuf_Latn": { + "num_samples": 256, + "number_of_characters": 82733, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.47265625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 208.703125, + "max_sentence2_length": 628, + "unique_sentence2": 256 + }, + "tuf_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82733, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 208.703125, + "max_sentence1_length": 628, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.47265625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-tuo_Latn": { + "num_samples": 256, + "number_of_characters": 85317, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.75390625, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 221.515625, + "max_sentence2_length": 945, + "unique_sentence2": 256 + }, + "tuo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85317, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 221.515625, + "max_sentence1_length": 945, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.75390625, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-tur_Latn": { + "num_samples": 256, + "number_of_characters": 67809, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.5703125, + "max_sentence1_length": 225, + "unique_sentence1": 256, + "min_sentence2_length": 18, + "average_sentence2_length": 156.30859375, + "max_sentence2_length": 448, + "unique_sentence2": 256 + }, + "tur_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67809, + "unique_pairs": 256, + "min_sentence1_length": 18, + "average_sentence1_length": 156.30859375, + "max_sentence1_length": 448, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.5703125, + "max_sentence2_length": 225, + "unique_sentence2": 256 + }, + "eng_Latn-tvk_Latn": { + "num_samples": 256, + "number_of_characters": 79093, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.1484375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 193.80859375, + "max_sentence2_length": 500, + "unique_sentence2": 256 + }, + "tvk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79093, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 193.80859375, + "max_sentence1_length": 500, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1484375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-twi_Latn": { + "num_samples": 256, + "number_of_characters": 56547, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 108.4921875, + "max_sentence2_length": 240, + "unique_sentence2": 255 + }, + "twi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 56547, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 108.4921875, + "max_sentence1_length": 240, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-txq_Latn": { + "num_samples": 256, + "number_of_characters": 104537, + "unique_pairs": 255, + "min_sentence1_length": 24, + "average_sentence1_length": 114.76953125, + "max_sentence1_length": 251, + "unique_sentence1": 255, + "min_sentence2_length": 37, + "average_sentence2_length": 293.578125, + "max_sentence2_length": 1375, + "unique_sentence2": 255 + }, + "txq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104537, + "unique_pairs": 255, + "min_sentence1_length": 37, + "average_sentence1_length": 293.578125, + "max_sentence1_length": 1375, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 114.76953125, + "max_sentence2_length": 251, + "unique_sentence2": 255 + }, + "eng_Latn-txu_Latn": { + "num_samples": 256, + "number_of_characters": 121364, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.9375, + "max_sentence1_length": 256, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 362.140625, + "max_sentence2_length": 1373, + "unique_sentence2": 256 + }, + "txu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 121364, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 362.140625, + "max_sentence1_length": 1373, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 111.9375, + "max_sentence2_length": 256, + "unique_sentence2": 256 + }, + "eng_Latn-tzj_Latn": { + "num_samples": 256, + "number_of_characters": 100500, + "unique_pairs": 254, + "min_sentence1_length": 35, + "average_sentence1_length": 146.40625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 58, + "average_sentence2_length": 246.171875, + "max_sentence2_length": 687, + "unique_sentence2": 254 + }, + "tzj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 100500, + "unique_pairs": 254, + "min_sentence1_length": 58, + "average_sentence1_length": 246.171875, + "max_sentence1_length": 687, + "unique_sentence1": 254, + "min_sentence2_length": 35, + "average_sentence2_length": 146.40625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-tzo_Latn": { + "num_samples": 256, + "number_of_characters": 95185, + "unique_pairs": 254, + "min_sentence1_length": 35, + "average_sentence1_length": 148.25390625, + "max_sentence1_length": 341, + "unique_sentence1": 247, + "min_sentence2_length": 54, + "average_sentence2_length": 223.5625, + "max_sentence2_length": 965, + "unique_sentence2": 254 + }, + "tzo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 95185, + "unique_pairs": 254, + "min_sentence1_length": 54, + "average_sentence1_length": 223.5625, + "max_sentence1_length": 965, + "unique_sentence1": 254, + "min_sentence2_length": 35, + "average_sentence2_length": 148.25390625, + "max_sentence2_length": 341, + "unique_sentence2": 247 + }, + "eng_Latn-ubr_Latn": { + "num_samples": 256, + "number_of_characters": 73958, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.6875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 179.2109375, + "max_sentence2_length": 462, + "unique_sentence2": 256 + }, + "ubr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73958, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 179.2109375, + "max_sentence1_length": 462, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.6875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ubu_Latn": { + "num_samples": 256, + "number_of_characters": 105880, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 146.6953125, + "max_sentence1_length": 341, + "unique_sentence1": 253, + "min_sentence2_length": 47, + "average_sentence2_length": 266.8984375, + "max_sentence2_length": 858, + "unique_sentence2": 255 + }, + "ubu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 105880, + "unique_pairs": 255, + "min_sentence1_length": 47, + "average_sentence1_length": 266.8984375, + "max_sentence1_length": 858, + "unique_sentence1": 255, + "min_sentence2_length": 1, + "average_sentence2_length": 146.6953125, + "max_sentence2_length": 341, + "unique_sentence2": 253 + }, + "eng_Latn-udu_Latn": { + "num_samples": 256, + "number_of_characters": 74189, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 174.4296875, + "max_sentence2_length": 525, + "unique_sentence2": 256 + }, + "udu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74189, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 174.4296875, + "max_sentence1_length": 525, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-uig_Latn": { + "num_samples": 256, + "number_of_characters": 78111, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.046875, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 49, + "average_sentence2_length": 158.07421875, + "max_sentence2_length": 374, + "unique_sentence2": 254 + }, + "uig_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78111, + "unique_pairs": 254, + "min_sentence1_length": 49, + "average_sentence1_length": 158.07421875, + "max_sentence1_length": 374, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.046875, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-ukr_Cyrl": { + "num_samples": 256, + "number_of_characters": 50558, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.78125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 17, + "average_sentence2_length": 87.7109375, + "max_sentence2_length": 234, + "unique_sentence2": 256 + }, + "ukr_Cyrl-eng_Latn": { + "num_samples": 256, + "number_of_characters": 50558, + "unique_pairs": 256, + "min_sentence1_length": 17, + "average_sentence1_length": 87.7109375, + "max_sentence1_length": 234, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.78125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-uli_Latn": { + "num_samples": 256, + "number_of_characters": 69143, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 113.765625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 156.32421875, + "max_sentence2_length": 429, + "unique_sentence2": 256 + }, + "uli_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 69143, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 156.32421875, + "max_sentence1_length": 429, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 113.765625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ulk_Latn": { + "num_samples": 191, + "number_of_characters": 49236, + "unique_pairs": 190, + "min_sentence1_length": 39, + "average_sentence1_length": 110.41361256544502, + "max_sentence1_length": 244, + "unique_sentence1": 190, + "min_sentence2_length": 41, + "average_sentence2_length": 147.36649214659687, + "max_sentence2_length": 858, + "unique_sentence2": 190 + }, + "ulk_Latn-eng_Latn": { + "num_samples": 191, + "number_of_characters": 49236, + "unique_pairs": 190, + "min_sentence1_length": 41, + "average_sentence1_length": 147.36649214659687, + "max_sentence1_length": 858, + "unique_sentence1": 190, + "min_sentence2_length": 39, + "average_sentence2_length": 110.41361256544502, + "max_sentence2_length": 244, + "unique_sentence2": 190 + }, + "eng_Latn-upv_Latn": { + "num_samples": 256, + "number_of_characters": 75129, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.0546875, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 181.41796875, + "max_sentence2_length": 584, + "unique_sentence2": 256 + }, + "upv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75129, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 181.41796875, + "max_sentence1_length": 584, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.0546875, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-ura_Latn": { + "num_samples": 256, + "number_of_characters": 87834, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.66015625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 231.44140625, + "max_sentence2_length": 990, + "unique_sentence2": 256 + }, + "ura_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 87834, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 231.44140625, + "max_sentence1_length": 990, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.66015625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-urb_Latn": { + "num_samples": 256, + "number_of_characters": 104840, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.109375, + "max_sentence1_length": 227, + "unique_sentence1": 255, + "min_sentence2_length": 31, + "average_sentence2_length": 299.421875, + "max_sentence2_length": 1080, + "unique_sentence2": 256 + }, + "urb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104840, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 299.421875, + "max_sentence1_length": 1080, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.109375, + "max_sentence2_length": 227, + "unique_sentence2": 255 + }, + "eng_Latn-urd_Arab": { + "num_samples": 256, + "number_of_characters": 77743, + "unique_pairs": 254, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 35, + "average_sentence2_length": 156.125, + "max_sentence2_length": 420, + "unique_sentence2": 254 + }, + "urd_Arab-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77743, + "unique_pairs": 254, + "min_sentence1_length": 35, + "average_sentence1_length": 156.125, + "max_sentence1_length": 420, + "unique_sentence1": 254, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-uri_Latn": { + "num_samples": 252, + "number_of_characters": 81749, + "unique_pairs": 252, + "min_sentence1_length": 38, + "average_sentence1_length": 112.38888888888889, + "max_sentence1_length": 245, + "unique_sentence1": 252, + "min_sentence2_length": 60, + "average_sentence2_length": 212.01190476190476, + "max_sentence2_length": 618, + "unique_sentence2": 252 + }, + "uri_Latn-eng_Latn": { + "num_samples": 252, + "number_of_characters": 81749, + "unique_pairs": 252, + "min_sentence1_length": 60, + "average_sentence1_length": 212.01190476190476, + "max_sentence1_length": 618, + "unique_sentence1": 252, + "min_sentence2_length": 38, + "average_sentence2_length": 112.38888888888889, + "max_sentence2_length": 245, + "unique_sentence2": 252 + }, + "eng_Latn-urt_Latn": { + "num_samples": 256, + "number_of_characters": 81805, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.5703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 203.98046875, + "max_sentence2_length": 882, + "unique_sentence2": 256 + }, + "urt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81805, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 203.98046875, + "max_sentence1_length": 882, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.5703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-urw_Latn": { + "num_samples": 90, + "number_of_characters": 26383, + "unique_pairs": 90, + "min_sentence1_length": 37, + "average_sentence1_length": 112.38888888888889, + "max_sentence1_length": 245, + "unique_sentence1": 90, + "min_sentence2_length": 47, + "average_sentence2_length": 180.75555555555556, + "max_sentence2_length": 597, + "unique_sentence2": 90 + }, + "urw_Latn-eng_Latn": { + "num_samples": 90, + "number_of_characters": 26383, + "unique_pairs": 90, + "min_sentence1_length": 47, + "average_sentence1_length": 180.75555555555556, + "max_sentence1_length": 597, + "unique_sentence1": 90, + "min_sentence2_length": 37, + "average_sentence2_length": 112.38888888888889, + "max_sentence2_length": 245, + "unique_sentence2": 90 + }, + "eng_Latn-usa_Latn": { + "num_samples": 256, + "number_of_characters": 79899, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.59375, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 8, + "average_sentence2_length": 200.51171875, + "max_sentence2_length": 896, + "unique_sentence2": 256 + }, + "usa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79899, + "unique_pairs": 256, + "min_sentence1_length": 8, + "average_sentence1_length": 200.51171875, + "max_sentence1_length": 896, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.59375, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-usp_Latn": { + "num_samples": 256, + "number_of_characters": 78288, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 190.515625, + "max_sentence2_length": 480, + "unique_sentence2": 256 + }, + "usp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78288, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 190.515625, + "max_sentence1_length": 480, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-uvh_Latn": { + "num_samples": 256, + "number_of_characters": 123347, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 109.5390625, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 54, + "average_sentence2_length": 372.28515625, + "max_sentence2_length": 1232, + "unique_sentence2": 256 + }, + "uvh_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 123347, + "unique_pairs": 256, + "min_sentence1_length": 54, + "average_sentence1_length": 372.28515625, + "max_sentence1_length": 1232, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 109.5390625, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-uvl_Latn": { + "num_samples": 256, + "number_of_characters": 77885, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.4609375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 192.77734375, + "max_sentence2_length": 702, + "unique_sentence2": 256 + }, + "uvl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77885, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 192.77734375, + "max_sentence1_length": 702, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.4609375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-vid_Latn": { + "num_samples": 256, + "number_of_characters": 64273, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 38, + "average_sentence2_length": 138.671875, + "max_sentence2_length": 305, + "unique_sentence2": 256 + }, + "vid_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64273, + "unique_pairs": 256, + "min_sentence1_length": 38, + "average_sentence1_length": 138.671875, + "max_sentence1_length": 305, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-vie_Latn": { + "num_samples": 256, + "number_of_characters": 70490, + "unique_pairs": 255, + "min_sentence1_length": 56, + "average_sentence1_length": 147.55859375, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 32, + "average_sentence2_length": 127.79296875, + "max_sentence2_length": 355, + "unique_sentence2": 255 + }, + "vie_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70490, + "unique_pairs": 255, + "min_sentence1_length": 32, + "average_sentence1_length": 127.79296875, + "max_sentence1_length": 355, + "unique_sentence1": 255, + "min_sentence2_length": 56, + "average_sentence2_length": 147.55859375, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-viv_Latn": { + "num_samples": 256, + "number_of_characters": 110076, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.82421875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 50, + "average_sentence2_length": 319.16015625, + "max_sentence2_length": 1077, + "unique_sentence2": 255 + }, + "viv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 110076, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 319.16015625, + "max_sentence1_length": 1077, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 110.82421875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-vmy_Latn": { + "num_samples": 256, + "number_of_characters": 67043, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 149.4921875, + "max_sentence2_length": 337, + "unique_sentence2": 256 + }, + "vmy_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67043, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 149.4921875, + "max_sentence1_length": 337, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-waj_Latn": { + "num_samples": 256, + "number_of_characters": 92630, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 248.9609375, + "max_sentence2_length": 1148, + "unique_sentence2": 256 + }, + "waj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92630, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 248.9609375, + "max_sentence1_length": 1148, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wal_Ethi": { + "num_samples": 256, + "number_of_characters": 64728, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 106.609375, + "max_sentence1_length": 207, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 146.234375, + "max_sentence2_length": 451, + "unique_sentence2": 256 + }, + "wal_Ethi-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64728, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 146.234375, + "max_sentence1_length": 451, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 106.609375, + "max_sentence2_length": 207, + "unique_sentence2": 256 + }, + "eng_Latn-wap_Latn": { + "num_samples": 256, + "number_of_characters": 84895, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.37890625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 41, + "average_sentence2_length": 218.2421875, + "max_sentence2_length": 566, + "unique_sentence2": 256 + }, + "wap_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84895, + "unique_pairs": 256, + "min_sentence1_length": 41, + "average_sentence1_length": 218.2421875, + "max_sentence1_length": 566, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.37890625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wat_Latn": { + "num_samples": 256, + "number_of_characters": 73445, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 114.1875, + "max_sentence1_length": 257, + "unique_sentence1": 255, + "min_sentence2_length": 50, + "average_sentence2_length": 172.70703125, + "max_sentence2_length": 748, + "unique_sentence2": 256 + }, + "wat_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73445, + "unique_pairs": 256, + "min_sentence1_length": 50, + "average_sentence1_length": 172.70703125, + "max_sentence1_length": 748, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 114.1875, + "max_sentence2_length": 257, + "unique_sentence2": 255 + }, + "eng_Latn-wbi_Latn": { + "num_samples": 256, + "number_of_characters": 62104, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.48046875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 4, + "average_sentence2_length": 132.11328125, + "max_sentence2_length": 359, + "unique_sentence2": 256 + }, + "wbi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62104, + "unique_pairs": 256, + "min_sentence1_length": 4, + "average_sentence1_length": 132.11328125, + "max_sentence1_length": 359, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.48046875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-wbp_Latn": { + "num_samples": 256, + "number_of_characters": 146443, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.17578125, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 99, + "average_sentence2_length": 460.8671875, + "max_sentence2_length": 1904, + "unique_sentence2": 256 + }, + "wbp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 146443, + "unique_pairs": 256, + "min_sentence1_length": 99, + "average_sentence1_length": 460.8671875, + "max_sentence1_length": 1904, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.17578125, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-wed_Latn": { + "num_samples": 256, + "number_of_characters": 57633, + "unique_pairs": 242, + "min_sentence1_length": 28, + "average_sentence1_length": 111.1953125, + "max_sentence1_length": 215, + "unique_sentence1": 235, + "min_sentence2_length": 30, + "average_sentence2_length": 113.93359375, + "max_sentence2_length": 298, + "unique_sentence2": 242 + }, + "wed_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57633, + "unique_pairs": 242, + "min_sentence1_length": 30, + "average_sentence1_length": 113.93359375, + "max_sentence1_length": 298, + "unique_sentence1": 242, + "min_sentence2_length": 28, + "average_sentence2_length": 111.1953125, + "max_sentence2_length": 215, + "unique_sentence2": 235 + }, + "eng_Latn-wer_Latn": { + "num_samples": 256, + "number_of_characters": 71653, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.5859375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 19, + "average_sentence2_length": 165.30859375, + "max_sentence2_length": 590, + "unique_sentence2": 256 + }, + "wer_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71653, + "unique_pairs": 256, + "min_sentence1_length": 19, + "average_sentence1_length": 165.30859375, + "max_sentence1_length": 590, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5859375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wim_Latn": { + "num_samples": 256, + "number_of_characters": 117973, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.02734375, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 64, + "average_sentence2_length": 350.8046875, + "max_sentence2_length": 1563, + "unique_sentence2": 256 + }, + "wim_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 117973, + "unique_pairs": 256, + "min_sentence1_length": 64, + "average_sentence1_length": 350.8046875, + "max_sentence1_length": 1563, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.02734375, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-wiu_Latn": { + "num_samples": 256, + "number_of_characters": 80347, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.5078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 55, + "average_sentence2_length": 200.34765625, + "max_sentence2_length": 590, + "unique_sentence2": 256 + }, + "wiu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80347, + "unique_pairs": 256, + "min_sentence1_length": 55, + "average_sentence1_length": 200.34765625, + "max_sentence1_length": 590, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.5078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wiv_Latn": { + "num_samples": 256, + "number_of_characters": 78716, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.328125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 44, + "average_sentence2_length": 192.15625, + "max_sentence2_length": 518, + "unique_sentence2": 256 + }, + "wiv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 78716, + "unique_pairs": 256, + "min_sentence1_length": 44, + "average_sentence1_length": 192.15625, + "max_sentence1_length": 518, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.328125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wmt_Latn": { + "num_samples": 256, + "number_of_characters": 232952, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 116.83203125, + "max_sentence1_length": 288, + "unique_sentence1": 256, + "min_sentence2_length": 60, + "average_sentence2_length": 793.13671875, + "max_sentence2_length": 4949, + "unique_sentence2": 256 + }, + "wmt_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 232952, + "unique_pairs": 256, + "min_sentence1_length": 60, + "average_sentence1_length": 793.13671875, + "max_sentence1_length": 4949, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 116.83203125, + "max_sentence2_length": 288, + "unique_sentence2": 256 + }, + "eng_Latn-wmw_Latn": { + "num_samples": 256, + "number_of_characters": 60867, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 125.3671875, + "max_sentence2_length": 276, + "unique_sentence2": 256 + }, + "wmw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60867, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 125.3671875, + "max_sentence1_length": 276, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-wnc_Latn": { + "num_samples": 256, + "number_of_characters": 108764, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.4375, + "max_sentence1_length": 251, + "unique_sentence1": 256, + "min_sentence2_length": 58, + "average_sentence2_length": 314.421875, + "max_sentence2_length": 1344, + "unique_sentence2": 256 + }, + "wnc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 108764, + "unique_pairs": 256, + "min_sentence1_length": 58, + "average_sentence1_length": 314.421875, + "max_sentence1_length": 1344, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.4375, + "max_sentence2_length": 251, + "unique_sentence2": 256 + }, + "eng_Latn-wnu_Latn": { + "num_samples": 256, + "number_of_characters": 74454, + "unique_pairs": 256, + "min_sentence1_length": 21, + "average_sentence1_length": 110.328125, + "max_sentence1_length": 271, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 180.5078125, + "max_sentence2_length": 904, + "unique_sentence2": 256 + }, + "wnu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74454, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 180.5078125, + "max_sentence1_length": 904, + "unique_sentence1": 256, + "min_sentence2_length": 21, + "average_sentence2_length": 110.328125, + "max_sentence2_length": 271, + "unique_sentence2": 256 + }, + "eng_Latn-wol_Latn": { + "num_samples": 256, + "number_of_characters": 70892, + "unique_pairs": 255, + "min_sentence1_length": 1, + "average_sentence1_length": 145.33203125, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 31, + "average_sentence2_length": 131.58984375, + "max_sentence2_length": 322, + "unique_sentence2": 255 + }, + "wol_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70892, + "unique_pairs": 255, + "min_sentence1_length": 31, + "average_sentence1_length": 131.58984375, + "max_sentence1_length": 322, + "unique_sentence1": 255, + "min_sentence2_length": 1, + "average_sentence2_length": 145.33203125, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-wos_Latn": { + "num_samples": 256, + "number_of_characters": 93613, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.16796875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 23, + "average_sentence2_length": 256.5078125, + "max_sentence2_length": 878, + "unique_sentence2": 256 + }, + "wos_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93613, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 256.5078125, + "max_sentence1_length": 878, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.16796875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-wrk_Latn": { + "num_samples": 256, + "number_of_characters": 210334, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 109.18359375, + "max_sentence1_length": 256, + "unique_sentence1": 256, + "min_sentence2_length": 85, + "average_sentence2_length": 712.43359375, + "max_sentence2_length": 2886, + "unique_sentence2": 256 + }, + "wrk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 210334, + "unique_pairs": 256, + "min_sentence1_length": 85, + "average_sentence1_length": 712.43359375, + "max_sentence1_length": 2886, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 109.18359375, + "max_sentence2_length": 256, + "unique_sentence2": 256 + }, + "eng_Latn-wro_Latn": { + "num_samples": 256, + "number_of_characters": 57263, + "unique_pairs": 255, + "min_sentence1_length": 37, + "average_sentence1_length": 110.109375, + "max_sentence1_length": 257, + "unique_sentence1": 254, + "min_sentence2_length": 28, + "average_sentence2_length": 113.57421875, + "max_sentence2_length": 323, + "unique_sentence2": 255 + }, + "wro_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 57263, + "unique_pairs": 255, + "min_sentence1_length": 28, + "average_sentence1_length": 113.57421875, + "max_sentence1_length": 323, + "unique_sentence1": 255, + "min_sentence2_length": 37, + "average_sentence2_length": 110.109375, + "max_sentence2_length": 257, + "unique_sentence2": 254 + }, + "eng_Latn-wrs_Latn": { + "num_samples": 256, + "number_of_characters": 84290, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.2578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 216.0, + "max_sentence2_length": 761, + "unique_sentence2": 256 + }, + "wrs_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84290, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 216.0, + "max_sentence1_length": 761, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.2578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wsk_Latn": { + "num_samples": 256, + "number_of_characters": 73376, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.75, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 45, + "average_sentence2_length": 172.875, + "max_sentence2_length": 362, + "unique_sentence2": 256 + }, + "wsk_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73376, + "unique_pairs": 256, + "min_sentence1_length": 45, + "average_sentence1_length": 172.875, + "max_sentence1_length": 362, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.75, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-wuv_Latn": { + "num_samples": 256, + "number_of_characters": 62654, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4140625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 131.328125, + "max_sentence2_length": 284, + "unique_sentence2": 256 + }, + "wuv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 62654, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 131.328125, + "max_sentence1_length": 284, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4140625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-xav_Latn": { + "num_samples": 256, + "number_of_characters": 138695, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.640625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 65, + "average_sentence2_length": 432.13671875, + "max_sentence2_length": 1467, + "unique_sentence2": 256 + }, + "xav_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 138695, + "unique_pairs": 256, + "min_sentence1_length": 65, + "average_sentence1_length": 432.13671875, + "max_sentence1_length": 1467, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.640625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-xbi_Latn": { + "num_samples": 256, + "number_of_characters": 82764, + "unique_pairs": 248, + "min_sentence1_length": 22, + "average_sentence1_length": 108.2265625, + "max_sentence1_length": 232, + "unique_sentence1": 242, + "min_sentence2_length": 32, + "average_sentence2_length": 215.0703125, + "max_sentence2_length": 2517, + "unique_sentence2": 248 + }, + "xbi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 82764, + "unique_pairs": 248, + "min_sentence1_length": 32, + "average_sentence1_length": 215.0703125, + "max_sentence1_length": 2517, + "unique_sentence1": 248, + "min_sentence2_length": 22, + "average_sentence2_length": 108.2265625, + "max_sentence2_length": 232, + "unique_sentence2": 242 + }, + "eng_Latn-xed_Latn": { + "num_samples": 256, + "number_of_characters": 67959, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.484375, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 152.98046875, + "max_sentence2_length": 361, + "unique_sentence2": 256 + }, + "xed_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67959, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 152.98046875, + "max_sentence1_length": 361, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.484375, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-xla_Latn": { + "num_samples": 256, + "number_of_characters": 90647, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.15234375, + "max_sentence1_length": 259, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 242.9375, + "max_sentence2_length": 775, + "unique_sentence2": 255 + }, + "xla_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90647, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 242.9375, + "max_sentence1_length": 775, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 111.15234375, + "max_sentence2_length": 259, + "unique_sentence2": 256 + }, + "eng_Latn-xnn_Latn": { + "num_samples": 256, + "number_of_characters": 89247, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.05078125, + "max_sentence1_length": 239, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 239.5703125, + "max_sentence2_length": 944, + "unique_sentence2": 256 + }, + "xnn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89247, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 239.5703125, + "max_sentence1_length": 944, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.05078125, + "max_sentence2_length": 239, + "unique_sentence2": 256 + }, + "eng_Latn-xon_Latn": { + "num_samples": 256, + "number_of_characters": 65070, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.515625, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 141.6640625, + "max_sentence2_length": 776, + "unique_sentence2": 256 + }, + "xon_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65070, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 141.6640625, + "max_sentence1_length": 776, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.515625, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-xsi_Latn": { + "num_samples": 256, + "number_of_characters": 91644, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.8046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 39, + "average_sentence2_length": 245.1796875, + "max_sentence2_length": 735, + "unique_sentence2": 255 + }, + "xsi_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91644, + "unique_pairs": 256, + "min_sentence1_length": 39, + "average_sentence1_length": 245.1796875, + "max_sentence1_length": 735, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 112.8046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-xtd_Latn": { + "num_samples": 256, + "number_of_characters": 79968, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.34375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 48, + "average_sentence2_length": 201.03125, + "max_sentence2_length": 707, + "unique_sentence2": 256 + }, + "xtd_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79968, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 201.03125, + "max_sentence1_length": 707, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.34375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-xtm_Latn": { + "num_samples": 256, + "number_of_characters": 79657, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 197.6171875, + "max_sentence2_length": 544, + "unique_sentence2": 256 + }, + "xtm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79657, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 197.6171875, + "max_sentence1_length": 544, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yaa_Latn": { + "num_samples": 256, + "number_of_characters": 101940, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 114.7109375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 283.4921875, + "max_sentence2_length": 1230, + "unique_sentence2": 256 + }, + "yaa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 101940, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 283.4921875, + "max_sentence1_length": 1230, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 114.7109375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-yad_Latn": { + "num_samples": 256, + "number_of_characters": 93232, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.2421875, + "max_sentence1_length": 233, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 249.9453125, + "max_sentence2_length": 650, + "unique_sentence2": 256 + }, + "yad_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 93232, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 249.9453125, + "max_sentence1_length": 650, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.2421875, + "max_sentence2_length": 233, + "unique_sentence2": 256 + }, + "eng_Latn-yal_Latn": { + "num_samples": 256, + "number_of_characters": 60223, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 122.8515625, + "max_sentence2_length": 308, + "unique_sentence2": 256 + }, + "yal_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60223, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 122.8515625, + "max_sentence1_length": 308, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-yap_Latn": { + "num_samples": 256, + "number_of_characters": 74267, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.4921875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 176.61328125, + "max_sentence2_length": 491, + "unique_sentence2": 256 + }, + "yap_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74267, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 176.61328125, + "max_sentence1_length": 491, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.4921875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yaq_Latn": { + "num_samples": 256, + "number_of_characters": 84467, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 109.8359375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 60, + "average_sentence2_length": 220.11328125, + "max_sentence2_length": 476, + "unique_sentence2": 256 + }, + "yaq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 84467, + "unique_pairs": 256, + "min_sentence1_length": 60, + "average_sentence1_length": 220.11328125, + "max_sentence1_length": 476, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 109.8359375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-yby_Latn": { + "num_samples": 256, + "number_of_characters": 79832, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.140625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 49, + "average_sentence2_length": 201.703125, + "max_sentence2_length": 611, + "unique_sentence2": 256 + }, + "yby_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79832, + "unique_pairs": 256, + "min_sentence1_length": 49, + "average_sentence1_length": 201.703125, + "max_sentence1_length": 611, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.140625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-ycn_Latn": { + "num_samples": 256, + "number_of_characters": 99397, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.47265625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 67, + "average_sentence2_length": 272.796875, + "max_sentence2_length": 793, + "unique_sentence2": 256 + }, + "ycn_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 99397, + "unique_pairs": 256, + "min_sentence1_length": 67, + "average_sentence1_length": 272.796875, + "max_sentence1_length": 793, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.47265625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yka_Latn": { + "num_samples": 256, + "number_of_characters": 75747, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 112.5, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 33, + "average_sentence2_length": 183.38671875, + "max_sentence2_length": 494, + "unique_sentence2": 256 + }, + "yka_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75747, + "unique_pairs": 256, + "min_sentence1_length": 33, + "average_sentence1_length": 183.38671875, + "max_sentence1_length": 494, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 112.5, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yle_Latn": { + "num_samples": 256, + "number_of_characters": 81166, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 111.36328125, + "max_sentence1_length": 233, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 205.69140625, + "max_sentence2_length": 587, + "unique_sentence2": 254 + }, + "yle_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 81166, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 205.69140625, + "max_sentence1_length": 587, + "unique_sentence1": 254, + "min_sentence2_length": 31, + "average_sentence2_length": 111.36328125, + "max_sentence2_length": 233, + "unique_sentence2": 256 + }, + "eng_Latn-yml_Latn": { + "num_samples": 256, + "number_of_characters": 104539, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 112.3203125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 296.03515625, + "max_sentence2_length": 1111, + "unique_sentence2": 256 + }, + "yml_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 104539, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 296.03515625, + "max_sentence1_length": 1111, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 112.3203125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-yon_Latn": { + "num_samples": 256, + "number_of_characters": 76088, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.66015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 183.55859375, + "max_sentence2_length": 597, + "unique_sentence2": 256 + }, + "yon_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76088, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 183.55859375, + "max_sentence1_length": 597, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.66015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yor_Latn": { + "num_samples": 256, + "number_of_characters": 58962, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.421875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 114.8984375, + "max_sentence2_length": 287, + "unique_sentence2": 256 + }, + "yor_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 58962, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 114.8984375, + "max_sentence1_length": 287, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.421875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yrb_Latn": { + "num_samples": 256, + "number_of_characters": 89411, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.19921875, + "max_sentence1_length": 232, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 234.0625, + "max_sentence2_length": 765, + "unique_sentence2": 256 + }, + "yrb_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89411, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 234.0625, + "max_sentence1_length": 765, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.19921875, + "max_sentence2_length": 232, + "unique_sentence2": 256 + }, + "eng_Latn-yre_Latn": { + "num_samples": 256, + "number_of_characters": 73678, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.09375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 172.7109375, + "max_sentence2_length": 461, + "unique_sentence2": 256 + }, + "yre_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73678, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 172.7109375, + "max_sentence1_length": 461, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.09375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yss_Latn": { + "num_samples": 256, + "number_of_characters": 92183, + "unique_pairs": 256, + "min_sentence1_length": 1, + "average_sentence1_length": 145.40625, + "max_sentence1_length": 341, + "unique_sentence1": 254, + "min_sentence2_length": 23, + "average_sentence2_length": 214.68359375, + "max_sentence2_length": 524, + "unique_sentence2": 256 + }, + "yss_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 92183, + "unique_pairs": 256, + "min_sentence1_length": 23, + "average_sentence1_length": 214.68359375, + "max_sentence1_length": 524, + "unique_sentence1": 256, + "min_sentence2_length": 1, + "average_sentence2_length": 145.40625, + "max_sentence2_length": 341, + "unique_sentence2": 254 + }, + "eng_Latn-yuj_Latn": { + "num_samples": 256, + "number_of_characters": 86283, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.8125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 48, + "average_sentence2_length": 221.23046875, + "max_sentence2_length": 747, + "unique_sentence2": 256 + }, + "yuj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 86283, + "unique_pairs": 256, + "min_sentence1_length": 48, + "average_sentence1_length": 221.23046875, + "max_sentence1_length": 747, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.8125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yut_Latn": { + "num_samples": 256, + "number_of_characters": 73439, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.35546875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 172.515625, + "max_sentence2_length": 549, + "unique_sentence2": 256 + }, + "yut_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73439, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 172.515625, + "max_sentence1_length": 549, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.35546875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-yuw_Latn": { + "num_samples": 256, + "number_of_characters": 85227, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.0703125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 43, + "average_sentence2_length": 221.84765625, + "max_sentence2_length": 809, + "unique_sentence2": 256 + }, + "yuw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 85227, + "unique_pairs": 256, + "min_sentence1_length": 43, + "average_sentence1_length": 221.84765625, + "max_sentence1_length": 809, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.0703125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-yva_Latn": { + "num_samples": 256, + "number_of_characters": 90577, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.35546875, + "max_sentence1_length": 230, + "unique_sentence1": 256, + "min_sentence2_length": 37, + "average_sentence2_length": 241.4609375, + "max_sentence2_length": 1386, + "unique_sentence2": 256 + }, + "yva_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 90577, + "unique_pairs": 256, + "min_sentence1_length": 37, + "average_sentence1_length": 241.4609375, + "max_sentence1_length": 1386, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.35546875, + "max_sentence2_length": 230, + "unique_sentence2": 256 + }, + "eng_Latn-zaa_Latn": { + "num_samples": 256, + "number_of_characters": 91414, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.95703125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 58, + "average_sentence2_length": 243.12890625, + "max_sentence2_length": 823, + "unique_sentence2": 256 + }, + "zaa_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 91414, + "unique_pairs": 256, + "min_sentence1_length": 58, + "average_sentence1_length": 243.12890625, + "max_sentence1_length": 823, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.95703125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zab_Latn": { + "num_samples": 256, + "number_of_characters": 65162, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.93359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 140.60546875, + "max_sentence2_length": 323, + "unique_sentence2": 256 + }, + "zab_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 65162, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 140.60546875, + "max_sentence1_length": 323, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.93359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zac_Latn": { + "num_samples": 256, + "number_of_characters": 73790, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.5078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 173.734375, + "max_sentence2_length": 379, + "unique_sentence2": 256 + }, + "zac_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 73790, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 173.734375, + "max_sentence1_length": 379, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.5078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zad_Latn": { + "num_samples": 256, + "number_of_characters": 66619, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.41796875, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 148.8125, + "max_sentence2_length": 371, + "unique_sentence2": 256 + }, + "zad_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66619, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 148.8125, + "max_sentence1_length": 371, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 111.41796875, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-zai_Latn": { + "num_samples": 256, + "number_of_characters": 63323, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.6328125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 136.72265625, + "max_sentence2_length": 399, + "unique_sentence2": 256 + }, + "zai_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63323, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 136.72265625, + "max_sentence1_length": 399, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.6328125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-zaj_Latn": { + "num_samples": 256, + "number_of_characters": 63029, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 111.21484375, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 134.9921875, + "max_sentence2_length": 340, + "unique_sentence2": 255 + }, + "zaj_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63029, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 134.9921875, + "max_sentence1_length": 340, + "unique_sentence1": 255, + "min_sentence2_length": 24, + "average_sentence2_length": 111.21484375, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-zam_Latn": { + "num_samples": 256, + "number_of_characters": 102139, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.8046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 285.17578125, + "max_sentence2_length": 829, + "unique_sentence2": 256 + }, + "zam_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 102139, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 285.17578125, + "max_sentence1_length": 829, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.8046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zao_Latn": { + "num_samples": 256, + "number_of_characters": 67404, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.84375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 149.453125, + "max_sentence2_length": 447, + "unique_sentence2": 256 + }, + "zao_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67404, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 149.453125, + "max_sentence1_length": 447, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.84375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zap_Latn": { + "num_samples": 256, + "number_of_characters": 64597, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.22265625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 139.109375, + "max_sentence2_length": 358, + "unique_sentence2": 256 + }, + "zap_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 64597, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 139.109375, + "max_sentence1_length": 358, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.22265625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zar_Latn": { + "num_samples": 256, + "number_of_characters": 74092, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 175.34375, + "max_sentence2_length": 531, + "unique_sentence2": 256 + }, + "zar_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74092, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 175.34375, + "max_sentence1_length": 531, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zas_Latn": { + "num_samples": 256, + "number_of_characters": 66604, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.93359375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 146.23828125, + "max_sentence2_length": 329, + "unique_sentence2": 256 + }, + "zas_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 66604, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 146.23828125, + "max_sentence1_length": 329, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.93359375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zat_Latn": { + "num_samples": 256, + "number_of_characters": 71335, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.67578125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 164.9765625, + "max_sentence2_length": 418, + "unique_sentence2": 256 + }, + "zat_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71335, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 164.9765625, + "max_sentence1_length": 418, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.67578125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zav_Latn": { + "num_samples": 256, + "number_of_characters": 89857, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.37890625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 28, + "average_sentence2_length": 240.625, + "max_sentence2_length": 749, + "unique_sentence2": 256 + }, + "zav_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 89857, + "unique_pairs": 256, + "min_sentence1_length": 28, + "average_sentence1_length": 240.625, + "max_sentence1_length": 749, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.37890625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-zaw_Latn": { + "num_samples": 256, + "number_of_characters": 63829, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.54296875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 135.7890625, + "max_sentence2_length": 332, + "unique_sentence2": 256 + }, + "zaw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63829, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 135.7890625, + "max_sentence1_length": 332, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.54296875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zca_Latn": { + "num_samples": 256, + "number_of_characters": 72050, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.73828125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 167.70703125, + "max_sentence2_length": 440, + "unique_sentence2": 256 + }, + "zca_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72050, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 167.70703125, + "max_sentence1_length": 440, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.73828125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zga_Latn": { + "num_samples": 256, + "number_of_characters": 61687, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 116.0625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 22, + "average_sentence2_length": 124.90234375, + "max_sentence2_length": 319, + "unique_sentence2": 256 + }, + "zga_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 61687, + "unique_pairs": 256, + "min_sentence1_length": 22, + "average_sentence1_length": 124.90234375, + "max_sentence1_length": 319, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 116.0625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zia_Latn": { + "num_samples": 256, + "number_of_characters": 76390, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.75390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 29, + "average_sentence2_length": 184.64453125, + "max_sentence2_length": 826, + "unique_sentence2": 256 + }, + "zia_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76390, + "unique_pairs": 256, + "min_sentence1_length": 29, + "average_sentence1_length": 184.64453125, + "max_sentence1_length": 826, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.75390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ziw_Latn": { + "num_samples": 256, + "number_of_characters": 60739, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 112.39453125, + "max_sentence1_length": 376, + "unique_sentence1": 256, + "min_sentence2_length": 19, + "average_sentence2_length": 124.8671875, + "max_sentence2_length": 349, + "unique_sentence2": 256 + }, + "ziw_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 60739, + "unique_pairs": 256, + "min_sentence1_length": 19, + "average_sentence1_length": 124.8671875, + "max_sentence1_length": 349, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 112.39453125, + "max_sentence2_length": 376, + "unique_sentence2": 256 + }, + "eng_Latn-zlm_Latn": { + "num_samples": 256, + "number_of_characters": 63096, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 25, + "average_sentence2_length": 131.09765625, + "max_sentence2_length": 302, + "unique_sentence2": 256 + }, + "zlm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 63096, + "unique_pairs": 256, + "min_sentence1_length": 25, + "average_sentence1_length": 131.09765625, + "max_sentence1_length": 302, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zos_Latn": { + "num_samples": 256, + "number_of_characters": 75447, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.48828125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 30, + "average_sentence2_length": 179.2265625, + "max_sentence2_length": 429, + "unique_sentence2": 256 + }, + "zos_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75447, + "unique_pairs": 256, + "min_sentence1_length": 30, + "average_sentence1_length": 179.2265625, + "max_sentence1_length": 429, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.48828125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zpc_Latn": { + "num_samples": 256, + "number_of_characters": 76140, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.0390625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 182.3828125, + "max_sentence2_length": 515, + "unique_sentence2": 256 + }, + "zpc_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 76140, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 182.3828125, + "max_sentence1_length": 515, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.0390625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zpl_Latn": { + "num_samples": 256, + "number_of_characters": 72610, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.3046875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 34, + "average_sentence2_length": 170.328125, + "max_sentence2_length": 409, + "unique_sentence2": 256 + }, + "zpl_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 72610, + "unique_pairs": 256, + "min_sentence1_length": 34, + "average_sentence1_length": 170.328125, + "max_sentence1_length": 409, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.3046875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zpm_Latn": { + "num_samples": 256, + "number_of_characters": 77080, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 108.8125, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 192.28125, + "max_sentence2_length": 615, + "unique_sentence2": 256 + }, + "zpm_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 77080, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 192.28125, + "max_sentence1_length": 615, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 108.8125, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-zpo_Latn": { + "num_samples": 256, + "number_of_characters": 67831, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.3984375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 27, + "average_sentence2_length": 149.56640625, + "max_sentence2_length": 398, + "unique_sentence2": 256 + }, + "zpo_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67831, + "unique_pairs": 256, + "min_sentence1_length": 27, + "average_sentence1_length": 149.56640625, + "max_sentence1_length": 398, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.3984375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zpq_Latn": { + "num_samples": 256, + "number_of_characters": 79484, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 110.1640625, + "max_sentence1_length": 227, + "unique_sentence1": 256, + "min_sentence2_length": 40, + "average_sentence2_length": 200.3203125, + "max_sentence2_length": 664, + "unique_sentence2": 256 + }, + "zpq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 79484, + "unique_pairs": 256, + "min_sentence1_length": 40, + "average_sentence1_length": 200.3203125, + "max_sentence1_length": 664, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 110.1640625, + "max_sentence2_length": 227, + "unique_sentence2": 256 + }, + "eng_Latn-zpu_Latn": { + "num_samples": 256, + "number_of_characters": 71982, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.37109375, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 35, + "average_sentence2_length": 165.80859375, + "max_sentence2_length": 409, + "unique_sentence2": 256 + }, + "zpu_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 71982, + "unique_pairs": 256, + "min_sentence1_length": 35, + "average_sentence1_length": 165.80859375, + "max_sentence1_length": 409, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.37109375, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zpv_Latn": { + "num_samples": 256, + "number_of_characters": 80759, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.69140625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 42, + "average_sentence2_length": 201.7734375, + "max_sentence2_length": 522, + "unique_sentence2": 256 + }, + "zpv_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 80759, + "unique_pairs": 256, + "min_sentence1_length": 42, + "average_sentence1_length": 201.7734375, + "max_sentence1_length": 522, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.69140625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zpz_Latn": { + "num_samples": 256, + "number_of_characters": 75618, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.1015625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 36, + "average_sentence2_length": 180.28125, + "max_sentence2_length": 506, + "unique_sentence2": 256 + }, + "zpz_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 75618, + "unique_pairs": 256, + "min_sentence1_length": 36, + "average_sentence1_length": 180.28125, + "max_sentence1_length": 506, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.1015625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zsr_Latn": { + "num_samples": 256, + "number_of_characters": 70549, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 114.078125, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 31, + "average_sentence2_length": 161.50390625, + "max_sentence2_length": 488, + "unique_sentence2": 256 + }, + "zsr_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 70549, + "unique_pairs": 256, + "min_sentence1_length": 31, + "average_sentence1_length": 161.50390625, + "max_sentence1_length": 488, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 114.078125, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-ztq_Latn": { + "num_samples": 256, + "number_of_characters": 59614, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 115.171875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 19, + "average_sentence2_length": 117.6953125, + "max_sentence2_length": 505, + "unique_sentence2": 256 + }, + "ztq_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 59614, + "unique_pairs": 256, + "min_sentence1_length": 19, + "average_sentence1_length": 117.6953125, + "max_sentence1_length": 505, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 115.171875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zty_Latn": { + "num_samples": 256, + "number_of_characters": 74180, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.1171875, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 26, + "average_sentence2_length": 176.6484375, + "max_sentence2_length": 528, + "unique_sentence2": 256 + }, + "zty_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 74180, + "unique_pairs": 256, + "min_sentence1_length": 26, + "average_sentence1_length": 176.6484375, + "max_sentence1_length": 528, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.1171875, + "max_sentence2_length": 827, + "unique_sentence2": 256 + }, + "eng_Latn-zyp_Latn": { + "num_samples": 256, + "number_of_characters": 67721, + "unique_pairs": 256, + "min_sentence1_length": 24, + "average_sentence1_length": 113.59765625, + "max_sentence1_length": 827, + "unique_sentence1": 256, + "min_sentence2_length": 32, + "average_sentence2_length": 150.9375, + "max_sentence2_length": 342, + "unique_sentence2": 256 + }, + "zyp_Latn-eng_Latn": { + "num_samples": 256, + "number_of_characters": 67721, + "unique_pairs": 256, + "min_sentence1_length": 32, + "average_sentence1_length": 150.9375, + "max_sentence1_length": 342, + "unique_sentence1": 256, + "min_sentence2_length": 24, + "average_sentence2_length": 113.59765625, + "max_sentence2_length": 827, + "unique_sentence2": 256 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SwissJudgementClassification.json b/mteb/descriptive_stats/Classification/SwissJudgementClassification.json new file mode 100644 index 0000000000..4c9b8fe948 --- /dev/null +++ b/mteb/descriptive_stats/Classification/SwissJudgementClassification.json @@ -0,0 +1,150 @@ +{ + "test": { + "num_samples": 4908, + "number_of_characters": 17205689, + "number_texts_intersect_with_train": 1, + "min_text_length": 16, + "average_text_length": 3505.6416055419722, + "max_text_length": 38479, + "unique_text": 4633, + "unique_labels": 2, + "labels": { + "0": { + "count": 3973 + }, + "1": { + "count": 935 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 2048, + "number_of_characters": 5142662, + "number_texts_intersect_with_train": 1, + "min_text_length": 16, + "average_text_length": 2511.0654296875, + "max_text_length": 27356, + "unique_text": 1989, + "unique_labels": 2, + "labels": { + "0": { + "count": 1645 + }, + "1": { + "count": 403 + } + } + }, + "fr": { + "num_samples": 2048, + "number_of_characters": 9648394, + "number_texts_intersect_with_train": 0, + "min_text_length": 355, + "average_text_length": 4711.1298828125, + "max_text_length": 38479, + "unique_text": 1920, + "unique_labels": 2, + "labels": { + "0": { + "count": 1668 + }, + "1": { + "count": 380 + } + } + }, + "it": { + "num_samples": 812, + "number_of_characters": 2414633, + "number_texts_intersect_with_train": 0, + "min_text_length": 212, + "average_text_length": 2973.685960591133, + "max_text_length": 20029, + "unique_text": 724, + "unique_labels": 2, + "labels": { + "0": { + "count": 660 + }, + "1": { + "count": 152 + } + } + } + } + }, + "train": { + "num_samples": 59709, + "number_of_characters": 203219455, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 3403.497881391415, + "max_text_length": 77488, + "unique_text": 53525, + "unique_labels": 2, + "labels": { + "0": { + "count": 45516 + }, + "1": { + "count": 14193 + } + }, + "hf_subset_descriptive_stats": { + "de": { + "num_samples": 35458, + "number_of_characters": 100575495, + "number_texts_intersect_with_train": null, + "min_text_length": 13, + "average_text_length": 2836.4683569293247, + "max_text_length": 35279, + "unique_text": 32044, + "unique_labels": 2, + "labels": { + "0": { + "count": 27087 + }, + "1": { + "count": 8371 + } + } + }, + "fr": { + "num_samples": 21179, + "number_of_characters": 92104087, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 4348.840219084943, + "max_text_length": 77488, + "unique_text": 18637, + "unique_labels": 2, + "labels": { + "0": { + "count": 15982 + }, + "1": { + "count": 5197 + } + } + }, + "it": { + "num_samples": 3072, + "number_of_characters": 10539873, + "number_texts_intersect_with_train": null, + "min_text_length": 7, + "average_text_length": 3430.9482421875, + "max_text_length": 16196, + "unique_text": 2844, + "unique_labels": 2, + "labels": { + "0": { + "count": 2447 + }, + "1": { + "count": 625 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/MindSmallReranking.json b/mteb/descriptive_stats/Reranking/MindSmallReranking.json new file mode 100644 index 0000000000..a71e415fa6 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/MindSmallReranking.json @@ -0,0 +1,30 @@ +{ + "test": { + "number_of_characters": 162620316, + "num_samples": 2367791, + "num_queries": 2362514, + "num_documents": 5277, + "num_relevant_docs": 97006943, + "min_document_length": 11, + "average_document_length": 30751.748341860904, + "max_document_length": 251, + "unique_documents": 5277, + "min_query_length": 11, + "average_query_length": 0.14532823932471933, + "max_query_length": 176, + "unique_queries": 2362514, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.8289660928993436, + "max_relevant_docs_per_query": 295, + "unique_relevant_docs": 5277, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 41.06168556038187, + "max_top_ranked_per_query": 295 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json b/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json new file mode 100644 index 0000000000..51b405b439 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json @@ -0,0 +1,30 @@ +{ + "test": { + "number_of_characters": 8824820, + "num_samples": 55423, + "num_queries": 2048, + "num_documents": 53375, + "num_relevant_docs": 53375, + "min_document_length": 3, + "average_document_length": 0.6108852459016394, + "max_document_length": 73, + "unique_documents": 53375, + "min_query_length": 19, + "average_query_length": 4293.0732421875, + "max_query_length": 1192, + "unique_queries": 2048, + "none_queries": 0, + "min_relevant_docs_per_query": 26, + "average_relevant_docs_per_query": 1.06201171875, + "max_relevant_docs_per_query": 29, + "unique_relevant_docs": 53375, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 26, + "average_top_ranked_per_query": 26.06201171875, + "max_top_ranked_per_query": 29 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json b/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json new file mode 100644 index 0000000000..e31afea818 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json @@ -0,0 +1,170 @@ +{ + "validation": { + "number_of_characters": 102848781, + "num_samples": 317809, + "num_queries": 1301, + "num_documents": 316508, + "num_relevant_docs": 316508, + "min_document_length": 142, + "average_document_length": 6.772091068788151, + "max_document_length": 9356, + "unique_documents": 316508, + "min_query_length": 152, + "average_query_length": 77406.11837048424, + "max_query_length": 1605, + "unique_queries": 1301, + "none_queries": 0, + "min_relevant_docs_per_query": 21, + "average_relevant_docs_per_query": 1.01076095311299, + "max_relevant_docs_per_query": 945, + "unique_relevant_docs": 316508, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 21, + "average_top_ranked_per_query": 243.2805534204458, + "max_top_ranked_per_query": 945 + }, + "test_iid": { + "number_of_characters": 131631330, + "num_samples": 407410, + "num_queries": 1438, + "num_documents": 405972, + "num_relevant_docs": 405972, + "min_document_length": 173, + "average_document_length": 6.101763175785522, + "max_document_length": 10467, + "unique_documents": 405972, + "min_query_length": 153, + "average_query_length": 89815.14951321279, + "max_query_length": 1471, + "unique_queries": 1438, + "none_queries": 0, + "min_relevant_docs_per_query": 15, + "average_relevant_docs_per_query": 1.0528511821974966, + "max_relevant_docs_per_query": 1149, + "unique_relevant_docs": 405972, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 15, + "average_top_ranked_per_query": 282.317107093185, + "max_top_ranked_per_query": 1149 + }, + "test_cat": { + "number_of_characters": 402615943, + "num_samples": 1261751, + "num_queries": 3560, + "num_documents": 1258191, + "num_relevant_docs": 1258191, + "min_document_length": 161, + "average_document_length": 6.082371436451222, + "max_document_length": 8502, + "unique_documents": 1258191, + "min_query_length": 156, + "average_query_length": 110944.70730337079, + "max_query_length": 1590, + "unique_queries": 3560, + "none_queries": 0, + "min_relevant_docs_per_query": 14, + "average_relevant_docs_per_query": 1.0016853932584269, + "max_relevant_docs_per_query": 1245, + "unique_relevant_docs": 1258191, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 14, + "average_top_ranked_per_query": 353.4244382022472, + "max_top_ranked_per_query": 1245 + }, + "test_geo": { + "number_of_characters": 371063547, + "num_samples": 1155697, + "num_queries": 4916, + "num_documents": 1150781, + "num_relevant_docs": 1150781, + "min_document_length": 146, + "average_document_length": 7.444432085687894, + "max_document_length": 19082, + "unique_documents": 1150781, + "min_query_length": 154, + "average_query_length": 73738.12774613507, + "max_query_length": 1289, + "unique_queries": 4916, + "none_queries": 0, + "min_relevant_docs_per_query": 3, + "average_relevant_docs_per_query": 1.0024410089503661, + "max_relevant_docs_per_query": 1274, + "unique_relevant_docs": 1150781, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 3, + "average_top_ranked_per_query": 234.08889340927584, + "max_top_ranked_per_query": 1274 + }, + "test_vis": { + "number_of_characters": 534911902, + "num_samples": 1612156, + "num_queries": 5298, + "num_documents": 1606858, + "num_relevant_docs": 1606858, + "min_document_length": 176, + "average_document_length": 5.7279492027298, + "max_document_length": 28468, + "unique_documents": 1606858, + "min_query_length": 154, + "average_query_length": 99227.61438278596, + "max_query_length": 1796, + "unique_queries": 5298, + "none_queries": 0, + "min_relevant_docs_per_query": 11, + "average_relevant_docs_per_query": 1.0152887882219706, + "max_relevant_docs_per_query": 1819, + "unique_relevant_docs": 1606858, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 11, + "average_top_ranked_per_query": 303.2952057380143, + "max_top_ranked_per_query": 1819 + }, + "test_web": { + "number_of_characters": 277932894, + "num_samples": 837319, + "num_queries": 3144, + "num_documents": 834175, + "num_relevant_docs": 834175, + "min_document_length": 146, + "average_document_length": 6.902769802499476, + "max_document_length": 15329, + "unique_documents": 834175, + "min_query_length": 157, + "average_query_length": 86569.58524173028, + "max_query_length": 1542, + "unique_queries": 3144, + "none_queries": 0, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0588422391857506, + "max_relevant_docs_per_query": 1064, + "unique_relevant_docs": 834175, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 265.3228371501272, + "max_top_ranked_per_query": 1064 + } +} \ No newline at end of file diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 40acaca430..2289dd7676 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -535,13 +535,8 @@ def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): "BrightRetrieval", "NeuCLIR2022Retrieval", "NeuCLIR2023Retrieval", - "BibleNLPBitextMining", "FloresBitextMining", "FilipinoHateSpeechClassification", - "SwissJudgementClassification", - "MindSmallReranking", - "WebLINXCandidatesReranking", - "VoyageMMarcoReranking", ] if task.metadata.name.startswith("Mock"): From 99247b29e6459db1363cdc1575a5582b67c0366d Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sat, 23 Nov 2024 17:46:26 +0500 Subject: [PATCH 14/40] fix: Fix `BrightRetrieval` calculate stats (#1484) * fix bright loader * lint * fix comment --- mteb/abstasks/AbsTaskRetrieval.py | 16 +- .../Retrieval/BrightRetrieval.json | 626 ++++++++++++++++++ tests/test_TaskMetadata.py | 1 - 3 files changed, 641 insertions(+), 2 deletions(-) create mode 100644 mteb/descriptive_stats/Retrieval/BrightRetrieval.json diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index bc86928a53..e7d886beb7 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -379,6 +379,13 @@ def _calculate_metrics_from_split( top_ranked = None instructions = None if hf_subset and hf_subset in self.queries: + # BrightRetrieval has different splits for different subsets of the corpus. + if ( + self.corpus.get(hf_subset, None) is None + or self.corpus[hf_subset].get(split, None) is None + ): + return {} + queries = self.queries[hf_subset][split] corpus = self.corpus[hf_subset][split] relevant_docs = self.relevant_docs[hf_subset][split] @@ -393,6 +400,12 @@ def _calculate_metrics_from_split( instructions = {} top_ranked = {} for hf_subset in self.metadata.eval_langs: + # BrightRetrieval has different splits for different subsets of the corpus. + if ( + self.corpus.get(hf_subset, None) is None + or self.corpus[hf_subset].get(split, None) is None + ): + continue queries.update(process_docs(self.queries, hf_subset, split)) corpus.update(process_docs(self.corpus, hf_subset, split)) relevant_docs.update( @@ -506,7 +519,8 @@ def calculate_length( queries_lens.append(len(query)) else: queries_lens.extend([len(turn) for turn in query]) - + if corpus is None: + return None, queries_lens for doc in corpus.values(): if isinstance(doc, dict): doc_lens.append(len(doc["text"])) diff --git a/mteb/descriptive_stats/Retrieval/BrightRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightRetrieval.json new file mode 100644 index 0000000000..c07260fc99 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/BrightRetrieval.json @@ -0,0 +1,626 @@ +{ + "standard": { + "number_of_characters": 1068198065, + "num_samples": 1334550, + "num_queries": 1384, + "num_documents": 1333166, + "num_relevant_docs": 8424, + "min_document_length": 12, + "average_document_length": 0.8236686204118617, + "max_document_length": 19341, + "unique_documents": 1333166, + "min_query_length": 1, + "average_query_length": 771025.9956647399, + "max_query_length": 233623, + "unique_queries": 1384, + "none_queries": 0, + "min_relevant_docs_per_query": 0, + "average_relevant_docs_per_query": 6.086705202312139, + "max_relevant_docs_per_query": 85, + "unique_relevant_docs": 5220, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "biology": { + "number_of_characters": 18960905, + "num_samples": 57462, + "num_queries": 103, + "num_documents": 57359, + "num_relevant_docs": 374, + "min_document_length": 89, + "average_document_length": 0.9392248818842728, + "max_document_length": 2195, + "unique_documents": 57359, + "min_query_length": 1, + "average_query_length": 183563.41747572814, + "max_query_length": 31131, + "unique_queries": 103, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.6310679611650487, + "max_relevant_docs_per_query": 19, + "unique_relevant_docs": 374, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "earth_science": { + "number_of_characters": 41046773, + "num_samples": 121365, + "num_queries": 116, + "num_documents": 121249, + "num_relevant_docs": 609, + "min_document_length": 83, + "average_document_length": 0.4561687106697787, + "max_document_length": 1565, + "unique_documents": 121249, + "min_query_length": 2, + "average_query_length": 353374.6810344828, + "max_query_length": 233623, + "unique_queries": 116, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.25, + "max_relevant_docs_per_query": 23, + "unique_relevant_docs": 609, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "economics": { + "number_of_characters": 19885867, + "num_samples": 50323, + "num_queries": 103, + "num_documents": 50220, + "num_relevant_docs": 823, + "min_document_length": 164, + "average_document_length": 1.5169653524492235, + "max_document_length": 2223, + "unique_documents": 50220, + "min_query_length": 3, + "average_query_length": 192327.03883495147, + "max_query_length": 39672, + "unique_queries": 103, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.990291262135922, + "max_relevant_docs_per_query": 85, + "unique_relevant_docs": 823, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "psychology": { + "number_of_characters": 20378352, + "num_samples": 52936, + "num_queries": 101, + "num_documents": 52835, + "num_relevant_docs": 742, + "min_document_length": 166, + "average_document_length": 1.3251253903662346, + "max_document_length": 2334, + "unique_documents": 52835, + "min_query_length": 3, + "average_query_length": 201072.66336633664, + "max_query_length": 226941, + "unique_queries": 101, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.346534653465347, + "max_relevant_docs_per_query": 59, + "unique_relevant_docs": 738, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "robotics": { + "number_of_characters": 18256389, + "num_samples": 62062, + "num_queries": 101, + "num_documents": 61961, + "num_relevant_docs": 553, + "min_document_length": 165, + "average_document_length": 3.5527993415212795, + "max_document_length": 19341, + "unique_documents": 61961, + "min_query_length": 3, + "average_query_length": 178576.77227722772, + "max_query_length": 28640, + "unique_queries": 101, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.475247524752476, + "max_relevant_docs_per_query": 36, + "unique_relevant_docs": 553, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "stackoverflow": { + "number_of_characters": 183787099, + "num_samples": 107198, + "num_queries": 117, + "num_documents": 107081, + "num_relevant_docs": 819, + "min_document_length": 185, + "average_document_length": 1.4127529627104716, + "max_document_length": 12432, + "unique_documents": 107081, + "min_query_length": 1, + "average_query_length": 1569536.923076923, + "max_query_length": 4000, + "unique_queries": 117, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 7.0, + "max_relevant_docs_per_query": 59, + "unique_relevant_docs": 816, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "sustainable_living": { + "number_of_characters": 21000744, + "num_samples": 60900, + "num_queries": 108, + "num_documents": 60792, + "num_relevant_docs": 604, + "min_document_length": 158, + "average_document_length": 1.213103697854981, + "max_document_length": 2843, + "unique_documents": 60792, + "min_query_length": 1, + "average_query_length": 193768.49074074073, + "max_query_length": 158299, + "unique_queries": 108, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 5.592592592592593, + "max_relevant_docs_per_query": 59, + "unique_relevant_docs": 604, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pony": { + "number_of_characters": 2094456, + "num_samples": 8006, + "num_queries": 112, + "num_documents": 7894, + "num_relevant_docs": 2519, + "min_document_length": 182, + "average_document_length": 5.518748416518875, + "max_document_length": 946, + "unique_documents": 7894, + "min_query_length": 8, + "average_query_length": 18311.526785714286, + "max_query_length": 2583, + "unique_queries": 112, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 22.491071428571427, + "max_relevant_docs_per_query": 32, + "unique_relevant_docs": 47, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "leetcode": { + "number_of_characters": 438348000, + "num_samples": 414074, + "num_queries": 142, + "num_documents": 413932, + "num_relevant_docs": 262, + "min_document_length": 422, + "average_document_length": 0.5006160432148276, + "max_document_length": 3964, + "unique_documents": 413932, + "min_query_length": 75, + "average_query_length": 3085498.443661972, + "max_query_length": 103665, + "unique_queries": 142, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.8450704225352113, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 216, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "aops": { + "number_of_characters": 141769723, + "num_samples": 188113, + "num_queries": 111, + "num_documents": 188002, + "num_relevant_docs": 524, + "min_document_length": 85, + "average_document_length": 0.18875863022733802, + "max_document_length": 1167, + "unique_documents": 188002, + "min_query_length": 58, + "average_query_length": 1276885.009009009, + "max_query_length": 7334, + "unique_queries": 111, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 4.7207207207207205, + "max_relevant_docs_per_query": 8, + "unique_relevant_docs": 111, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "theoremqa_theorems": { + "number_of_characters": 20852144, + "num_samples": 23904, + "num_queries": 65, + "num_documents": 23839, + "num_relevant_docs": 126, + "min_document_length": 13, + "average_document_length": 1.1702672091950166, + "max_document_length": 1255, + "unique_documents": 23839, + "min_query_length": 74, + "average_query_length": 320373.0153846154, + "max_query_length": 19106, + "unique_queries": 65, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9384615384615385, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 95, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "theoremqa_questions": { + "number_of_characters": 141817613, + "num_samples": 188207, + "num_queries": 205, + "num_documents": 188002, + "num_relevant_docs": 469, + "min_document_length": 12, + "average_document_length": 0.4434899628727354, + "max_document_length": 1255, + "unique_documents": 188002, + "min_query_length": 58, + "average_query_length": 691386.5170731707, + "max_query_length": 7334, + "unique_queries": 205, + "none_queries": 0, + "min_relevant_docs_per_query": 0, + "average_relevant_docs_per_query": 2.2878048780487803, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 234, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "long": { + "number_of_characters": 327567114, + "num_samples": 6511, + "num_queries": 861, + "num_documents": 5650, + "num_relevant_docs": 1679, + "min_document_length": 83, + "average_document_length": 131.69982300884956, + "max_document_length": 19341, + "unique_documents": 5650, + "min_query_length": 25, + "average_query_length": 379585.3774680604, + "max_query_length": 9182740, + "unique_queries": 861, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.9500580720092915, + "max_relevant_docs_per_query": 12, + "unique_relevant_docs": 920, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "biology": { + "number_of_characters": 19398768, + "num_samples": 627, + "num_queries": 103, + "num_documents": 524, + "num_relevant_docs": 134, + "min_document_length": 89, + "average_document_length": 102.81106870229007, + "max_document_length": 2195, + "unique_documents": 524, + "min_query_length": 142, + "average_query_length": 187814.5145631068, + "max_query_length": 1324203, + "unique_queries": 103, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.3009708737864079, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 134, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "earth_science": { + "number_of_characters": 41705609, + "num_samples": 717, + "num_queries": 116, + "num_documents": 601, + "num_relevant_docs": 187, + "min_document_length": 83, + "average_document_length": 92.02995008319468, + "max_document_length": 1565, + "unique_documents": 601, + "min_query_length": 33, + "average_query_length": 359054.3017241379, + "max_query_length": 2627263, + "unique_queries": 116, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.6120689655172413, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 187, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "economics": { + "number_of_characters": 19994187, + "num_samples": 619, + "num_queries": 103, + "num_documents": 516, + "num_relevant_docs": 109, + "min_document_length": 164, + "average_document_length": 147.63953488372093, + "max_document_length": 2223, + "unique_documents": 516, + "min_query_length": 45, + "average_query_length": 193378.68932038834, + "max_query_length": 429509, + "unique_queries": 103, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.058252427184466, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 109, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "psychology": { + "number_of_characters": 20490305, + "num_samples": 613, + "num_queries": 101, + "num_documents": 512, + "num_relevant_docs": 116, + "min_document_length": 166, + "average_document_length": 136.744140625, + "max_document_length": 2334, + "unique_documents": 512, + "min_query_length": 25, + "average_query_length": 202181.10891089108, + "max_query_length": 669577, + "unique_queries": 101, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1485148514851484, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 113, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "robotics": { + "number_of_characters": 18387998, + "num_samples": 609, + "num_queries": 101, + "num_documents": 508, + "num_relevant_docs": 106, + "min_document_length": 165, + "average_document_length": 433.3366141732283, + "max_document_length": 19341, + "unique_documents": 508, + "min_query_length": 120, + "average_query_length": 179879.8316831683, + "max_query_length": 3589950, + "unique_queries": 101, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0495049504950495, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 106, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "stackoverflow": { + "number_of_characters": 184328188, + "num_samples": 1975, + "num_queries": 117, + "num_documents": 1858, + "num_relevant_docs": 129, + "min_document_length": 185, + "average_document_length": 81.42034445640473, + "max_document_length": 12432, + "unique_documents": 1858, + "min_query_length": 43, + "average_query_length": 1574161.6153846155, + "max_query_length": 9182740, + "unique_queries": 117, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1025641025641026, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 125, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "sustainable_living": { + "number_of_characters": 21155433, + "num_samples": 662, + "num_queries": 108, + "num_documents": 554, + "num_relevant_docs": 129, + "min_document_length": 158, + "average_document_length": 133.1173285198556, + "max_document_length": 2843, + "unique_documents": 554, + "min_query_length": 32, + "average_query_length": 195200.7962962963, + "max_query_length": 5732347, + "unique_queries": 108, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.1944444444444444, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 129, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "pony": { + "number_of_characters": 2106626, + "num_samples": 689, + "num_queries": 112, + "num_documents": 577, + "num_relevant_docs": 769, + "min_document_length": 182, + "average_document_length": 75.50259965337955, + "max_document_length": 946, + "unique_documents": 577, + "min_query_length": 54, + "average_query_length": 18420.1875, + "max_query_length": 108909, + "unique_queries": 112, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 6.866071428571429, + "max_relevant_docs_per_query": 12, + "unique_relevant_docs": 17, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "leetcode": {}, + "aops": {}, + "theoremqa_theorems": {}, + "theoremqa_questions": {} + } + } +} \ No newline at end of file diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 2289dd7676..c935bb6590 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -532,7 +532,6 @@ def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): # THIS IS ONLY INTENDED FOR HISTORIC DATASETS exceptions = [ "MSMARCOv2", - "BrightRetrieval", "NeuCLIR2022Retrieval", "NeuCLIR2023Retrieval", "FloresBitextMining", From 022d3557e6543804234bbab03e9612d36199d504 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Wed, 27 Nov 2024 20:16:02 +0500 Subject: [PATCH 15/40] Merge main v2 (#1504) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Count unique texts, data leaks in calculate metrics (#1438) * add more stat * add more stat * update statistics * fix: update task metadata to allow for null (#1448) * Update tasks table * 1.19.5 Automatically generated by python-semantic-release * Fix: Made data parsing in the leaderboard figure more robust (#1450) Bugfixes with data parsing in main figure * Fixed task loading (#1451) * Fixed task result loading from disk * Fixed task result loading from disk * fix: publish (#1452) * 1.19.6 Automatically generated by python-semantic-release * fix: Fix load external results with `None` mteb_version (#1453) * fix * lint * 1.19.7 Automatically generated by python-semantic-release * WIP: Polishing up leaderboard UI (#1461) * fix: Removed column wrapping on the table, so that it remains readable * Added disclaimer to figure * fix: Added links to task info table, switched out license with metric * fix: loading pre 1.11.0 (#1460) * small fix * fix: fix * 1.19.8 Automatically generated by python-semantic-release * fix: swap touche2020 to maintain compatibility (#1469) swap touche2020 for parity * 1.19.9 Automatically generated by python-semantic-release * docs: Add sum per language for task counts (#1468) * add sum per lang * add sort by sum option * make lint * fix: pinned datasets to <3.0.0 (#1470) * 1.19.10 Automatically generated by python-semantic-release * feat: add CUREv1 retrieval dataset (#1459) * feat: add CUREv1 dataset --------- Co-authored-by: nadshe Co-authored-by: olivierr42 Co-authored-by: Daniel Buades Marcos * feat: add missing domains to medical tasks * feat: modify benchmark tasks * chore: benchmark naming --------- Co-authored-by: nadshe Co-authored-by: olivierr42 * Update tasks table * 1.20.0 Automatically generated by python-semantic-release * fix: check if `model` attr of model exists (#1499) * check if model attr of model exists * lint * Fix retrieval evaluator * 1.20.1 Automatically generated by python-semantic-release * add cure statistics --------- Co-authored-by: Kenneth Enevoldsen Co-authored-by: github-actions[bot] Co-authored-by: github-actions Co-authored-by: Márton Kardos Co-authored-by: Isaac Chung Co-authored-by: Napuh <55241721+Napuh@users.noreply.github.com> Co-authored-by: Daniel Buades Marcos Co-authored-by: nadshe Co-authored-by: olivierr42 --- docs/create_tasks_table.py | 19 +- mteb/__init__.py | 2 + mteb/benchmarks/benchmarks.py | 28 +- mteb/descriptive_stats/Retrieval/CUREv1.json | 1256 +++++++++++++++++ mteb/leaderboard/app.py | 15 +- mteb/leaderboard/figures.py | 8 +- mteb/leaderboard/table.py | 2 +- mteb/load_results/task_results.py | 10 +- mteb/models/sentence_transformer_wrapper.py | 5 +- mteb/tasks/Reranking/zho/CMTEBReranking.py | 2 +- mteb/tasks/Retrieval/__init__.py | 1 + mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py | 2 +- mteb/tasks/Retrieval/eng/SciFactRetrieval.py | 2 +- .../tasks/Retrieval/eng/TRECCOVIDRetrieval.py | 2 +- .../Retrieval/multilingual/CUREv1Retrieval.py | 151 ++ .../tasks/Retrieval/pol/SciFactPLRetrieval.py | 2 +- .../Retrieval/pol/TRECCOVIDPLRetrieval.py | 2 +- mteb/tasks/Retrieval/zho/CMTEBRetrieval.py | 2 +- pyproject.toml | 2 +- 19 files changed, 1485 insertions(+), 28 deletions(-) create mode 100644 mteb/descriptive_stats/Retrieval/CUREv1.json create mode 100644 mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index a6111523a9..13e9830276 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -68,7 +68,7 @@ def create_tasks_table(tasks: list[mteb.AbsTask]) -> str: return table -def create_task_lang_table(tasks: list[mteb.AbsTask]) -> str: +def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: table_dict = {} ## Group by language. If it is a multilingual dataset, 1 is added to all languages present. for task in tasks: @@ -82,22 +82,27 @@ def create_task_lang_table(tasks: list[mteb.AbsTask]) -> str: ## Wrangle for polars pl_table_dict = [] for lang, d in table_dict.items(): - d.update({"lang": lang}) + d.update({"0-lang": lang}) # for sorting columns pl_table_dict.append(d) - df = pl.DataFrame(pl_table_dict).sort(by="lang") + df = pl.DataFrame(pl_table_dict).sort(by="0-lang") + df = df.with_columns(sum=pl.sum_horizontal(get_args(TASK_TYPE))) + df = df.select(sorted(df.columns)) + if sort_by_sum: + df = df.sort(by="sum", descending=True) + total = df.sum() task_names_md = " | ".join(sorted(get_args(TASK_TYPE))) - horizontal_line_md = "---|---" * len(sorted(get_args(TASK_TYPE))) + horizontal_line_md = "---|---" * (len(sorted(get_args(TASK_TYPE))) + 1) table = f""" -| Language | {task_names_md} | +| Language | {task_names_md} | Sum | |{horizontal_line_md}| """ for row in df.iter_rows(): - table += f"| {row[-1]} " - for num in row[:-1]: + table += f"| {row[0]} " + for num in row[1:]: table += f"| {num} " table += "|\n" diff --git a/mteb/__init__.py b/mteb/__init__.py index 1ef561a5f1..6de017b1f1 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -6,6 +6,7 @@ MTEB_ENG_CLASSIC, MTEB_MAIN_RU, MTEB_RETRIEVAL_LAW, + MTEB_RETRIEVAL_MEDICAL, MTEB_RETRIEVAL_WITH_INSTRUCTIONS, CoIR, ) @@ -24,6 +25,7 @@ "MTEB_ENG_CLASSIC", "MTEB_MAIN_RU", "MTEB_RETRIEVAL_LAW", + "MTEB_RETRIEVAL_MEDICAL", "MTEB_RETRIEVAL_WITH_INSTRUCTIONS", "CoIR", "TASKS_REGISTRY", diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 743a5bde12..9aaefda3cb 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -106,7 +106,7 @@ def load_results( "StackExchangeClustering.v2", "StackExchangeClusteringP2P.v2", "TRECCOVID", - "Touche2020", + "Touche2020Retrieval.v3", "ToxicConversationsClassification", "TweetSentimentExtractionClassification", "TwentyNewsgroupsClustering.v2", @@ -186,7 +186,7 @@ def load_results( "StackOverflowDupQuestions", "SummEval", "TRECCOVID", - "Touche2020Retrieval.v3", + "Touche2020", "ToxicConversationsClassification", "TweetSentimentExtractionClassification", "TwentyNewsgroupsClustering", @@ -308,6 +308,29 @@ def load_results( citation=None, ) +MTEB_RETRIEVAL_MEDICAL = Benchmark( + name="MTEB(Medical)", + tasks=get_tasks( + tasks=[ + "CUREv1", + "NFCorpus", + "TRECCOVID", + "TRECCOVID-PL", + "SciFact", + "SciFact-PL", + "MedicalQARetrieval", + "PublicHealthQA", + "MedrxivClusteringP2P.v2", + "MedrxivClusteringS2S.v2", + "CmedqaRetrieval", + "CMedQAv2-reranking", + ], + ), + description="A curated set of MTEB tasks designed to evaluate systems in the context of medical information retrieval.", + reference="", + citation=None, +) + MTEB_MINERS_BITEXT_MINING = Benchmark( name="MINERSBitextMining", tasks=get_tasks( @@ -702,6 +725,7 @@ def load_results( "SpartQA", "TempReasonL1", "TRECCOVID", + "CUREv1", "WinoGrande", "BelebeleRetrieval", "MLQARetrieval", diff --git a/mteb/descriptive_stats/Retrieval/CUREv1.json b/mteb/descriptive_stats/Retrieval/CUREv1.json new file mode 100644 index 0000000000..682b3752fb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/CUREv1.json @@ -0,0 +1,1256 @@ +{ + "all": { + "number_of_characters": 376986167, + "num_samples": 739800, + "num_queries": 6000, + "num_documents": 733800, + "num_relevant_docs": 242148, + "min_document_length": 13, + "average_document_length": 0.7376887435268465, + "max_document_length": 357, + "unique_documents": 733800, + "min_query_length": 39, + "average_query_length": 62740.8085, + "max_query_length": 10344, + "unique_queries": 6000, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.358, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 124581, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 125639484, + "num_samples": 246600, + "num_queries": 2000, + "num_documents": 244600, + "num_relevant_docs": 80716, + "min_document_length": 13, + "average_document_length": 0.645408830744072, + "max_document_length": 232, + "unique_documents": 244600, + "min_query_length": 39, + "average_query_length": 62740.8085, + "max_query_length": 10344, + "unique_queries": 2000, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.358, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 41527, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 125664632, + "num_samples": 246600, + "num_queries": 2000, + "num_documents": 244600, + "num_relevant_docs": 80716, + "min_document_length": 16, + "average_document_length": 0.748221586263287, + "max_document_length": 288, + "unique_documents": 244600, + "min_query_length": 39, + "average_query_length": 62740.8085, + "max_query_length": 10344, + "unique_queries": 2000, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.358, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 41527, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 125682051, + "num_samples": 246600, + "num_queries": 2000, + "num_documents": 244600, + "num_relevant_docs": 80716, + "min_document_length": 20, + "average_document_length": 0.8194358135731807, + "max_document_length": 357, + "unique_documents": 244600, + "min_query_length": 39, + "average_query_length": 62740.8085, + "max_query_length": 10344, + "unique_queries": 2000, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.358, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 41527, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "dentistry_and_oral_health": { + "number_of_characters": 42547753, + "num_samples": 88656, + "num_queries": 600, + "num_documents": 88056, + "num_relevant_docs": 23898, + "min_document_length": 21, + "average_document_length": 0.5983351503588625, + "max_document_length": 187, + "unique_documents": 88056, + "min_query_length": 39, + "average_query_length": 70825.11, + "max_query_length": 4539, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 39.83, + "max_relevant_docs_per_query": 269, + "unique_relevant_docs": 12189, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 14180200, + "num_samples": 29552, + "num_queries": 200, + "num_documents": 29352, + "num_relevant_docs": 7966, + "min_document_length": 21, + "average_document_length": 0.5171027527936768, + "max_document_length": 147, + "unique_documents": 29352, + "min_query_length": 39, + "average_query_length": 70825.11, + "max_query_length": 4539, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 39.83, + "max_relevant_docs_per_query": 269, + "unique_relevant_docs": 4063, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 14183105, + "num_samples": 29552, + "num_queries": 200, + "num_documents": 29352, + "num_relevant_docs": 7966, + "min_document_length": 27, + "average_document_length": 0.6160738620877624, + "max_document_length": 160, + "unique_documents": 29352, + "min_query_length": 39, + "average_query_length": 70825.11, + "max_query_length": 4539, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 39.83, + "max_relevant_docs_per_query": 269, + "unique_relevant_docs": 4063, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 14184448, + "num_samples": 29552, + "num_queries": 200, + "num_documents": 29352, + "num_relevant_docs": 7966, + "min_document_length": 31, + "average_document_length": 0.6618288361951485, + "max_document_length": 187, + "unique_documents": 29352, + "min_query_length": 39, + "average_query_length": 70825.11, + "max_query_length": 4539, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 39.83, + "max_relevant_docs_per_query": 269, + "unique_relevant_docs": 4063, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "dermatology": { + "number_of_characters": 22943198, + "num_samples": 47661, + "num_queries": 600, + "num_documents": 47061, + "num_relevant_docs": 8076, + "min_document_length": 16, + "average_document_length": 0.8371687809438814, + "max_document_length": 127, + "unique_documents": 47061, + "min_query_length": 52, + "average_query_length": 38173.0, + "max_query_length": 5440, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 13.46, + "max_relevant_docs_per_query": 111, + "unique_relevant_docs": 3270, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 7646329, + "num_samples": 15887, + "num_queries": 200, + "num_documents": 15687, + "num_relevant_docs": 2692, + "min_document_length": 16, + "average_document_length": 0.7476891693759163, + "max_document_length": 106, + "unique_documents": 15687, + "min_query_length": 52, + "average_query_length": 38173.0, + "max_query_length": 5440, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 13.46, + "max_relevant_docs_per_query": 111, + "unique_relevant_docs": 1090, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 7647866, + "num_samples": 15887, + "num_queries": 200, + "num_documents": 15687, + "num_relevant_docs": 2692, + "min_document_length": 20, + "average_document_length": 0.8456683878370626, + "max_document_length": 126, + "unique_documents": 15687, + "min_query_length": 52, + "average_query_length": 38173.0, + "max_query_length": 5440, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 13.46, + "max_relevant_docs_per_query": 111, + "unique_relevant_docs": 1090, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 7649003, + "num_samples": 15887, + "num_queries": 200, + "num_documents": 15687, + "num_relevant_docs": 2692, + "min_document_length": 25, + "average_document_length": 0.9181487856186651, + "max_document_length": 127, + "unique_documents": 15687, + "min_query_length": 52, + "average_query_length": 38173.0, + "max_query_length": 5440, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 13.46, + "max_relevant_docs_per_query": 111, + "unique_relevant_docs": 1090, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "gastroenterology": { + "number_of_characters": 33714725, + "num_samples": 69804, + "num_queries": 600, + "num_documents": 69204, + "num_relevant_docs": 31995, + "min_document_length": 26, + "average_document_length": 0.7955031501069303, + "max_document_length": 216, + "unique_documents": 69204, + "min_query_length": 51, + "average_query_length": 56099.455, + "max_query_length": 5027, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 53.325, + "max_relevant_docs_per_query": 892, + "unique_relevant_docs": 15657, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 11236098, + "num_samples": 23268, + "num_queries": 200, + "num_documents": 23068, + "num_relevant_docs": 10665, + "min_document_length": 26, + "average_document_length": 0.7025749956649905, + "max_document_length": 174, + "unique_documents": 23068, + "min_query_length": 51, + "average_query_length": 56099.455, + "max_query_length": 5027, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 53.325, + "max_relevant_docs_per_query": 892, + "unique_relevant_docs": 5219, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 11238409, + "num_samples": 23268, + "num_queries": 200, + "num_documents": 23068, + "num_relevant_docs": 10665, + "min_document_length": 26, + "average_document_length": 0.8027570660655453, + "max_document_length": 214, + "unique_documents": 23068, + "min_query_length": 51, + "average_query_length": 56099.455, + "max_query_length": 5027, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 53.325, + "max_relevant_docs_per_query": 892, + "unique_relevant_docs": 5219, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 11240218, + "num_samples": 23268, + "num_queries": 200, + "num_documents": 23068, + "num_relevant_docs": 10665, + "min_document_length": 31, + "average_document_length": 0.8811773885902549, + "max_document_length": 216, + "unique_documents": 23068, + "min_query_length": 51, + "average_query_length": 56099.455, + "max_query_length": 5027, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 53.325, + "max_relevant_docs_per_query": 892, + "unique_relevant_docs": 5219, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "genetics": { + "number_of_characters": 46802844, + "num_samples": 81588, + "num_queries": 600, + "num_documents": 80988, + "num_relevant_docs": 40815, + "min_document_length": 16, + "average_document_length": 0.6085716402429989, + "max_document_length": 227, + "unique_documents": 80988, + "min_query_length": 44, + "average_query_length": 77922.595, + "max_query_length": 6394, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.025, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 20163, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 15598969, + "num_samples": 27196, + "num_queries": 200, + "num_documents": 26996, + "num_relevant_docs": 13605, + "min_document_length": 16, + "average_document_length": 0.535264483627204, + "max_document_length": 178, + "unique_documents": 26996, + "min_query_length": 44, + "average_query_length": 77922.595, + "max_query_length": 6394, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.025, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 6721, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 15601118, + "num_samples": 27196, + "num_queries": 200, + "num_documents": 26996, + "num_relevant_docs": 13605, + "min_document_length": 18, + "average_document_length": 0.6148688694621426, + "max_document_length": 205, + "unique_documents": 26996, + "min_query_length": 44, + "average_query_length": 77922.595, + "max_query_length": 6394, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.025, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 6721, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 15602757, + "num_samples": 27196, + "num_queries": 200, + "num_documents": 26996, + "num_relevant_docs": 13605, + "min_document_length": 25, + "average_document_length": 0.6755815676396503, + "max_document_length": 227, + "unique_documents": 26996, + "min_query_length": 44, + "average_query_length": 77922.595, + "max_query_length": 6394, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.025, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 6721, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "neuroscience_and_neurology": { + "number_of_characters": 47565061, + "num_samples": 91764, + "num_queries": 600, + "num_documents": 91164, + "num_relevant_docs": 25227, + "min_document_length": 28, + "average_document_length": 0.6213417577113773, + "max_document_length": 357, + "unique_documents": 91164, + "min_query_length": 45, + "average_query_length": 79180.695, + "max_query_length": 6394, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 42.045, + "max_relevant_docs_per_query": 251, + "unique_relevant_docs": 15252, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 15852189, + "num_samples": 30588, + "num_queries": 200, + "num_documents": 30388, + "num_relevant_docs": 8409, + "min_document_length": 28, + "average_document_length": 0.528169014084507, + "max_document_length": 196, + "unique_documents": 30388, + "min_query_length": 45, + "average_query_length": 79180.695, + "max_query_length": 6394, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 42.045, + "max_relevant_docs_per_query": 251, + "unique_relevant_docs": 5084, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 15854555, + "num_samples": 30588, + "num_queries": 200, + "num_documents": 30388, + "num_relevant_docs": 8409, + "min_document_length": 33, + "average_document_length": 0.6060286955377122, + "max_document_length": 223, + "unique_documents": 30388, + "min_query_length": 45, + "average_query_length": 79180.695, + "max_query_length": 6394, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 42.045, + "max_relevant_docs_per_query": 251, + "unique_relevant_docs": 5084, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 15858317, + "num_samples": 30588, + "num_queries": 200, + "num_documents": 30388, + "num_relevant_docs": 8409, + "min_document_length": 38, + "average_document_length": 0.7298275635119126, + "max_document_length": 357, + "unique_documents": 30388, + "min_query_length": 45, + "average_query_length": 79180.695, + "max_query_length": 6394, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 42.045, + "max_relevant_docs_per_query": 251, + "unique_relevant_docs": 5084, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "orthopedic_surgery": { + "number_of_characters": 37106615, + "num_samples": 75330, + "num_queries": 600, + "num_documents": 74730, + "num_relevant_docs": 9006, + "min_document_length": 20, + "average_document_length": 0.7896293322628128, + "max_document_length": 239, + "unique_documents": 74730, + "min_query_length": 52, + "average_query_length": 61746.01, + "max_query_length": 10344, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 15.01, + "max_relevant_docs_per_query": 82, + "unique_relevant_docs": 5085, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 12366489, + "num_samples": 25110, + "num_queries": 200, + "num_documents": 24910, + "num_relevant_docs": 3002, + "min_document_length": 25, + "average_document_length": 0.6939783219590526, + "max_document_length": 219, + "unique_documents": 24910, + "min_query_length": 52, + "average_query_length": 61746.01, + "max_query_length": 10344, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 15.01, + "max_relevant_docs_per_query": 82, + "unique_relevant_docs": 1695, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 12369347, + "num_samples": 25110, + "num_queries": 200, + "num_documents": 24910, + "num_relevant_docs": 3002, + "min_document_length": 21, + "average_document_length": 0.8087113608992372, + "max_document_length": 228, + "unique_documents": 24910, + "min_query_length": 52, + "average_query_length": 61746.01, + "max_query_length": 10344, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 15.01, + "max_relevant_docs_per_query": 82, + "unique_relevant_docs": 1695, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 12370779, + "num_samples": 25110, + "num_queries": 200, + "num_documents": 24910, + "num_relevant_docs": 3002, + "min_document_length": 20, + "average_document_length": 0.8661983139301486, + "max_document_length": 239, + "unique_documents": 24910, + "min_query_length": 52, + "average_query_length": 61746.01, + "max_query_length": 10344, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 15.01, + "max_relevant_docs_per_query": 82, + "unique_relevant_docs": 1695, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "otorhinolaryngology": { + "number_of_characters": 33469218, + "num_samples": 73176, + "num_queries": 600, + "num_documents": 72576, + "num_relevant_docs": 22026, + "min_document_length": 13, + "average_document_length": 0.7396660052910053, + "max_document_length": 214, + "unique_documents": 72576, + "min_query_length": 44, + "average_query_length": 55692.56, + "max_query_length": 3594, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 36.71, + "max_relevant_docs_per_query": 254, + "unique_relevant_docs": 8856, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 11154337, + "num_samples": 24392, + "num_queries": 200, + "num_documents": 24192, + "num_relevant_docs": 7342, + "min_document_length": 13, + "average_document_length": 0.6541418650793651, + "max_document_length": 189, + "unique_documents": 24192, + "min_query_length": 44, + "average_query_length": 55692.56, + "max_query_length": 3594, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 36.71, + "max_relevant_docs_per_query": 254, + "unique_relevant_docs": 2952, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 11156678, + "num_samples": 24392, + "num_queries": 200, + "num_documents": 24192, + "num_relevant_docs": 7342, + "min_document_length": 16, + "average_document_length": 0.7509093915343915, + "max_document_length": 198, + "unique_documents": 24192, + "min_query_length": 44, + "average_query_length": 55692.56, + "max_query_length": 3594, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 36.71, + "max_relevant_docs_per_query": 254, + "unique_relevant_docs": 2952, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 11158203, + "num_samples": 24392, + "num_queries": 200, + "num_documents": 24192, + "num_relevant_docs": 7342, + "min_document_length": 20, + "average_document_length": 0.8139467592592593, + "max_document_length": 214, + "unique_documents": 24192, + "min_query_length": 44, + "average_query_length": 55692.56, + "max_query_length": 3594, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 36.71, + "max_relevant_docs_per_query": 254, + "unique_relevant_docs": 2952, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "plastic_surgery": { + "number_of_characters": 36780121, + "num_samples": 80415, + "num_queries": 600, + "num_documents": 79815, + "num_relevant_docs": 15255, + "min_document_length": 22, + "average_document_length": 0.7145774603771221, + "max_document_length": 245, + "unique_documents": 79815, + "min_query_length": 44, + "average_query_length": 61205.145, + "max_query_length": 4996, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 25.425, + "max_relevant_docs_per_query": 197, + "unique_relevant_docs": 9324, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 12257549, + "num_samples": 26805, + "num_queries": 200, + "num_documents": 26605, + "num_relevant_docs": 5085, + "min_document_length": 22, + "average_document_length": 0.6209359143018229, + "max_document_length": 177, + "unique_documents": 26605, + "min_query_length": 44, + "average_query_length": 61205.145, + "max_query_length": 4996, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 25.425, + "max_relevant_docs_per_query": 197, + "unique_relevant_docs": 3108, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 12260329, + "num_samples": 26805, + "num_queries": 200, + "num_documents": 26605, + "num_relevant_docs": 5085, + "min_document_length": 25, + "average_document_length": 0.7254275512121782, + "max_document_length": 225, + "unique_documents": 26605, + "min_query_length": 44, + "average_query_length": 61205.145, + "max_query_length": 4996, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 25.425, + "max_relevant_docs_per_query": 197, + "unique_relevant_docs": 3108, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 12262243, + "num_samples": 26805, + "num_queries": 200, + "num_documents": 26605, + "num_relevant_docs": 5085, + "min_document_length": 29, + "average_document_length": 0.7973689156173651, + "max_document_length": 245, + "unique_documents": 26605, + "min_query_length": 44, + "average_query_length": 61205.145, + "max_query_length": 4996, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 25.425, + "max_relevant_docs_per_query": 197, + "unique_relevant_docs": 3108, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "psychiatry_and_psychology": { + "number_of_characters": 59876950, + "num_samples": 107868, + "num_queries": 600, + "num_documents": 107268, + "num_relevant_docs": 41259, + "min_document_length": 29, + "average_document_length": 0.5822892195249282, + "max_document_length": 248, + "unique_documents": 107268, + "min_query_length": 44, + "average_query_length": 99690.815, + "max_query_length": 5370, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.765, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 23991, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 19956676, + "num_samples": 35956, + "num_queries": 200, + "num_documents": 35756, + "num_relevant_docs": 13753, + "min_document_length": 29, + "average_document_length": 0.5177592571876048, + "max_document_length": 226, + "unique_documents": 35756, + "min_query_length": 44, + "average_query_length": 99690.815, + "max_query_length": 5370, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.765, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 7997, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 19959277, + "num_samples": 35956, + "num_queries": 200, + "num_documents": 35756, + "num_relevant_docs": 13753, + "min_document_length": 34, + "average_document_length": 0.5905022933214006, + "max_document_length": 248, + "unique_documents": 35756, + "min_query_length": 44, + "average_query_length": 99690.815, + "max_query_length": 5370, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.765, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 7997, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 19960997, + "num_samples": 35956, + "num_queries": 200, + "num_documents": 35756, + "num_relevant_docs": 13753, + "min_document_length": 35, + "average_document_length": 0.6386061080657792, + "max_document_length": 248, + "unique_documents": 35756, + "min_query_length": 44, + "average_query_length": 99690.815, + "max_query_length": 5370, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 68.765, + "max_relevant_docs_per_query": 1070, + "unique_relevant_docs": 7997, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + }, + "pulmonology": { + "number_of_characters": 47108443, + "num_samples": 97551, + "num_queries": 600, + "num_documents": 96951, + "num_relevant_docs": 24591, + "min_document_length": 25, + "average_document_length": 0.5782508689956782, + "max_document_length": 289, + "unique_documents": 96951, + "min_query_length": 39, + "average_query_length": 78420.635, + "max_query_length": 4772, + "unique_queries": 600, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.985, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 13683, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "en": { + "number_of_characters": 15700235, + "num_samples": 32517, + "num_queries": 200, + "num_documents": 32317, + "num_relevant_docs": 8197, + "min_document_length": 25, + "average_document_length": 0.4984373549525018, + "max_document_length": 232, + "unique_documents": 32317, + "min_query_length": 39, + "average_query_length": 78420.635, + "max_query_length": 4772, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.985, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 4561, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "es": { + "number_of_characters": 15703535, + "num_samples": 32517, + "num_queries": 200, + "num_documents": 32317, + "num_relevant_docs": 8197, + "min_document_length": 29, + "average_document_length": 0.6005507936999103, + "max_document_length": 288, + "unique_documents": 32317, + "min_query_length": 39, + "average_query_length": 78420.635, + "max_query_length": 4772, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.985, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 4561, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "fr": { + "number_of_characters": 15704673, + "num_samples": 32517, + "num_queries": 200, + "num_documents": 32317, + "num_relevant_docs": 8197, + "min_document_length": 29, + "average_document_length": 0.6357644583346227, + "max_document_length": 289, + "unique_documents": 32317, + "min_query_length": 39, + "average_query_length": 78420.635, + "max_query_length": 4772, + "unique_queries": 200, + "none_queries": 0, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 40.985, + "max_relevant_docs_per_query": 1364, + "unique_relevant_docs": 4561, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 9b89d5dd4c..8a5eb961c1 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -60,21 +60,25 @@ def format_list(props: list[str]): return ", ".join(props) -def update_task_info(task_names: str) -> str: +def update_task_info(task_names: str) -> gr.DataFrame: tasks = mteb.get_tasks(tasks=task_names) - df = tasks.to_dataframe() + df = tasks.to_dataframe( + properties=["name", "type", "languages", "domains", "reference", "main_score"] + ) df["languages"] = df["languages"].map(format_list) df["domains"] = df["domains"].map(format_list) + df["name"] = "[" + df["name"] + "](" + df["reference"] + ")" df = df.rename( columns={ "name": "Task Name", "type": "Task Type", "languages": "Languages", "domains": "Domains", - "license": "License", + "main_score": "Metric", } ) - return df + df = df.drop(columns="reference") + return gr.DataFrame(df, datatype=["markdown"] + ["str"] * (len(df.columns) - 1)) all_results = load_results().filter_models() @@ -215,6 +219,9 @@ def update_task_info(task_names: str) -> str: citation = gr.Markdown(update_citation, inputs=[benchmark_select]) with gr.Column(): plot = gr.Plot(performance_size_plot, inputs=[summary_table]) + gr.Markdown( + "*We only display models that have been run on all tasks in the benchmark*" + ) with gr.Tab("Summary"): summary_table.render() with gr.Tab("Performance per task"): diff --git a/mteb/leaderboard/figures.py b/mteb/leaderboard/figures.py index 7a354f7c82..373bcd00c6 100644 --- a/mteb/leaderboard/figures.py +++ b/mteb/leaderboard/figures.py @@ -14,6 +14,10 @@ def parse_n_params(text: str) -> int: def parse_model_name(name: str) -> str: + if name is None: + return "" + if "]" not in name: + return name name, _ = name.split("]") return name[1:] @@ -38,8 +42,8 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: df["Number of Parameters"] = df["Number of Parameters"].map(parse_n_params) df["Model"] = df["Model"].map(parse_model_name) df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "") - df["Embedding Dimensions"] = df["Embedding Dimensions"].map(int) - df["Max Tokens"] = df["Max Tokens"].map(int) + df["Embedding Dimensions"] = df["Embedding Dimensions"].map(parse_float) + df["Max Tokens"] = df["Max Tokens"].map(parse_float) df["Log(Tokens)"] = np.log10(df["Max Tokens"]) df["Mean (Task)"] = df["Mean (Task)"].map(parse_float) df = df.dropna(subset=["Mean (Task)", "Number of Parameters"]) diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index d9b830d236..c965a7f682 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -200,7 +200,7 @@ def scores_to_tables( joint_table_style, # column_widths=column_widths, datatype=column_types, - wrap=True, + # wrap=True, ), gr.DataFrame(per_task_style), ) diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index 202ed9b5f5..8f587fd72b 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -296,10 +296,12 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: pre_1_11_load = ( ( "mteb_version" in data + and data["mteb_version"] is not None and Version(data["mteb_version"]) < Version("1.11.0") ) or "mteb_version" not in data ) # assume it is before 1.11.0 if the version is not present + try: obj = cls.model_validate(data) except Exception as e: @@ -310,9 +312,11 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: ) obj = cls._convert_from_before_v1_11_0(data) - pre_v_12_48 = "mteb_version" in data and Version( - data["mteb_version"] - ) < Version("1.12.48") + pre_v_12_48 = ( + "mteb_version" in data + and data["mteb_version"] is not None + and Version(data["mteb_version"]) < Version("1.12.48") + ) if pre_v_12_48: cls._fix_pair_classification_scores(obj) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 5cc824fa82..13d39e4031 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -53,6 +53,9 @@ def __init__( self.model.prompts = model_prompts self.model_prompts = self.validate_task_to_prompt_name(model_prompts) + if isinstance(self.model, CrossEncoder): + self.predict = self._predict + def encode( self, sentences: Sequence[str], @@ -106,7 +109,7 @@ def encode( embeddings = embeddings.cpu().detach().float().numpy() return embeddings - def predict( + def _predict( self, sentences: Sequence[str], **kwargs: Any, diff --git a/mteb/tasks/Reranking/zho/CMTEBReranking.py b/mteb/tasks/Reranking/zho/CMTEBReranking.py index ee830f7e16..d6ff57a2a9 100644 --- a/mteb/tasks/Reranking/zho/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zho/CMTEBReranking.py @@ -128,7 +128,7 @@ class CMedQAv2(AbsTaskReranking): main_score="map_at_1000", date=None, form=None, - domains=None, + domains=["Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index f8a47b08a9..ca41d4354f 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -105,6 +105,7 @@ from .multilingual.BelebeleRetrieval import * from .multilingual.CrossLingualSemanticDiscriminationWMT19 import * from .multilingual.CrossLingualSemanticDiscriminationWMT21 import * +from .multilingual.CUREv1Retrieval import * from .multilingual.IndicQARetrieval import * from .multilingual.MintakaRetrieval import * from .multilingual.MIRACLRetrieval import * diff --git a/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py index 7c40b6707b..31f4eb60b1 100644 --- a/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py @@ -21,7 +21,7 @@ class NFCorpus(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Medical", "Academic", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/eng/SciFactRetrieval.py b/mteb/tasks/Retrieval/eng/SciFactRetrieval.py index 05e9a6e541..1dc47d8b66 100644 --- a/mteb/tasks/Retrieval/eng/SciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/SciFactRetrieval.py @@ -21,7 +21,7 @@ class SciFact(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Academic", "Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py b/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py index 6c7b7f01d1..00c96c0d04 100644 --- a/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py @@ -21,7 +21,7 @@ class TRECCOVID(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Medical", "Academic", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py b/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py new file mode 100644 index 0000000000..6e97786a77 --- /dev/null +++ b/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from enum import Enum + +from datasets import DatasetDict, load_dataset + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from ....abstasks.MultilingualTask import MultilingualTask + +_LANGUAGES = { + "en": ["eng-Latn", "eng-Latn"], + "es": ["spa-Latn", "eng-Latn"], + "fr": ["fra-Latn", "eng-Latn"], +} + + +class CUREv1Splits(str, Enum): + all = "All" + dentistry_and_oral_health = "Dentistry and Oral Health" + dermatology = "Dermatology" + gastroenterology = "Gastroenterology" + genetics = "Genetics" + neuroscience_and_neurology = "Neuroscience and Neurology" + orthopedic_surgery = "Orthopedic Surgery" + otorhinolaryngology = "Otorhinolaryngology" + plastic_surgery = "Plastic Surgery" + psychiatry_and_psychology = "Psychiatry and Psychology" + pulmonology = "Pulmonology" + + @classmethod + def names(cls) -> list[str]: + return sorted(cls._member_names_) + + +class CUREv1Retrieval(MultilingualTask, AbsTaskRetrieval): + metadata = TaskMetadata( + dataset={ + "path": "clinia/CUREv1", + "revision": "3bcf51c91e04d04a8a3329dfbe988b964c5cbe83", + }, + name="CUREv1", + description="Collection of query-passage pairs curated by medical professionals, across 10 disciplines and 3 cross-lingual settings.", + type="Retrieval", + modalities=["text"], + category="s2p", + reference="https://huggingface.co/datasets/clinia/CUREv1", + eval_splits=CUREv1Splits.names(), + eval_langs=_LANGUAGES, + main_score="ndcg_at_10", + date=("2024-01-01", "2024-10-31"), + domains=["Medical", "Academic", "Written"], + task_subtypes=[], + license="cc-by-nc-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="created", + bibtex_citation="", + prompt={ + "query": "Given a question by a medical professional, retrieve relevant passages that best answer the question", + }, + ) + + def _load_corpus(self, split: str, cache_dir: str | None = None): + ds = load_dataset( + path=self.metadata_dict["dataset"]["path"], + revision=self.metadata_dict["dataset"]["revision"], + name="corpus", + split=split, + cache_dir=cache_dir, + ) + + corpus = { + doc["_id"]: {"title": doc["title"], "text": doc["text"]} for doc in ds + } + + return corpus + + def _load_qrels(self, split: str, cache_dir: str | None = None): + ds = load_dataset( + path=self.metadata_dict["dataset"]["path"], + revision=self.metadata_dict["dataset"]["revision"], + name="qrels", + split=split, + cache_dir=cache_dir, + ) + + qrels = {} + + for qrel in ds: + query_id = qrel["query-id"] + doc_id = qrel["corpus-id"] + score = int(qrel["score"]) + if query_id not in qrels: + qrels[query_id] = {} + qrels[query_id][doc_id] = score + + return qrels + + def _load_queries(self, split: str, language: str, cache_dir: str | None = None): + ds = load_dataset( + path=self.metadata_dict["dataset"]["path"], + revision=self.metadata_dict["dataset"]["revision"], + name=f"queries-{language}", + split=split, + cache_dir=cache_dir, + ) + + queries = {query["_id"]: query["text"] for query in ds} + + return queries + + def load_data(self, **kwargs): + if self.data_loaded: + return + + eval_splits = kwargs.get("eval_splits", self.metadata.eval_splits) + languages = kwargs.get("eval_langs", self.metadata.eval_langs) + cache_dir = kwargs.get("cache_dir", None) + + # Iterate over splits and languages + corpus = { + language: {split: None for split in eval_splits} for language in languages + } + queries = { + language: {split: None for split in eval_splits} for language in languages + } + relevant_docs = { + language: {split: None for split in eval_splits} for language in languages + } + for split in eval_splits: + # Since this is a cross-lingual dataset, the corpus and the relevant documents do not depend on the language + split_corpus = self._load_corpus(split=split, cache_dir=cache_dir) + split_qrels = self._load_qrels(split=split, cache_dir=cache_dir) + + # Queries depend on the language + for language in languages: + corpus[language][split] = split_corpus + relevant_docs[language][split] = split_qrels + + queries[language][split] = self._load_queries( + split=split, language=language, cache_dir=cache_dir + ) + + # Convert into DatasetDict + self.corpus = DatasetDict(corpus) + self.queries = DatasetDict(queries) + self.relevant_docs = DatasetDict(relevant_docs) + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py b/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py index 2588b1c288..92d61b42bd 100644 --- a/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py @@ -22,7 +22,7 @@ class SciFactPL(AbsTaskRetrieval): eval_langs=["pol-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Academic", "Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py b/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py index 4ba6a9ac00..f9f331191a 100644 --- a/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py @@ -25,7 +25,7 @@ class TRECCOVIDPL(AbsTaskRetrieval): "2019-12-01", "2022-12-31", ), # approximate date of covid pandemic start and end (best guess) - domains=["Academic", "Non-fiction", "Written"], + domains=["Academic", "Medical", "Non-fiction", "Written"], task_subtypes=["Article retrieval"], license="not specified", annotations_creators="derived", diff --git a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py index 08674ec8c8..ad26652ccd 100644 --- a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py @@ -236,7 +236,7 @@ class CmedqaRetrieval(AbsTaskRetrieval): eval_langs=["cmn-Hans"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/pyproject.toml b/pyproject.toml index c5bd396536..1ce9e09356 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.5" +version = "1.19.4" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 6383950aac730f615d8f98521ba8bdd23c61ed50 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 28 Nov 2024 03:30:43 +0500 Subject: [PATCH 16/40] Fix: retrieval stats (#1496) * fix bright loader * lint * fix comment * fix stats * fix retrieval stats * update stats * add rest of the stat * move bach code * fix docs * lint --- mteb/abstasks/AbsTaskRetrieval.py | 58 +- mteb/abstasks/dataloaders.py | 2 +- .../BitextMining/FloresBitextMining.json | 538373 +++++++++++++++ .../Core17InstructionRetrieval.json | 24 +- .../News21InstructionRetrieval.json | 24 +- .../Robust04InstructionRetrieval.json | 24 +- .../Core17InstructionRetrieval.json | 30 - .../Reranking/AlloprofReranking.json | 18 +- .../Reranking/AskUbuntuDupQuestions.json | 14 +- .../Reranking/CMedQAv1-reranking.json | 18 +- .../Reranking/CMedQAv2-reranking.json | 14 +- .../Reranking/ESCIReranking.json | 60 +- .../Reranking/InstructIR.json | 19 +- .../Reranking/MIRACLReranking.json | 353 +- .../Reranking/MMarcoReranking.json | 18 +- .../Reranking/MindSmallReranking.json | 15 +- mteb/descriptive_stats/Reranking/NevIR.json | 25 +- .../Reranking/RuBQReranking.json | 18 +- .../Reranking/SciDocsRR.json | 14 +- .../Reranking/StackOverflowDupQuestions.json | 18 +- .../Reranking/SyntecReranking.json | 18 +- .../Reranking/T2Reranking.json | 18 +- .../Reranking/VoyageMMarcoReranking.json | 19 +- .../Reranking/WebLINXCandidatesReranking.json | 114 +- .../WikipediaRerankingMultilingual.json | 306 +- .../Retrieval/AILACasedocs.json | 19 +- .../Retrieval/AILAStatutes.json | 19 +- .../Retrieval/ARCChallenge.json | 19 +- .../Retrieval/AlloprofRetrieval.json | 19 +- .../descriptive_stats/Retrieval/AlphaNLI.json | 19 +- .../Retrieval/AppsRetrieval.json | 29 +- .../Retrieval/ArguAna-PL.json | 19 +- mteb/descriptive_stats/Retrieval/ArguAna.json | 19 +- .../Retrieval/AutoRAGRetrieval.json | 19 +- .../Retrieval/BSARDRetrieval.json | 19 +- .../Retrieval/BelebeleRetrieval.json | 10931 +- .../Retrieval/BrightRetrieval.json | 418 +- .../Retrieval/COIRCodeSearchNetRetrieval.json | 201 +- .../CQADupstackAndroidRetrieval.json | 19 +- .../CQADupstackEnglishRetrieval.json | 19 +- .../Retrieval/CQADupstackGamingRetrieval.json | 19 +- .../Retrieval/CQADupstackGisRetrieval.json | 19 +- .../CQADupstackMathematicaRetrieval.json | 19 +- .../CQADupstackPhysicsRetrieval.json | 19 +- .../CQADupstackProgrammersRetrieval.json | 19 +- .../Retrieval/CQADupstackStatsRetrieval.json | 19 +- .../Retrieval/CQADupstackTexRetrieval.json | 19 +- .../Retrieval/CQADupstackUnixRetrieval.json | 19 +- .../CQADupstackWebmastersRetrieval.json | 19 +- .../CQADupstackWordpressRetrieval.json | 19 +- .../Retrieval/ClimateFEVER.json | 19 +- .../Retrieval/ClimateFEVERHardNegatives.json | 19 +- .../Retrieval/CmedqaRetrieval.json | 19 +- .../Retrieval/CodeEditSearchRetrieval.json | 404 +- .../Retrieval/CodeFeedbackMT.json | 29 +- .../Retrieval/CodeFeedbackST.json | 29 +- .../Retrieval/CodeSearchNetCCRetrieval.json | 201 +- .../Retrieval/CodeSearchNetRetrieval.json | 201 +- .../Retrieval/CodeTransOceanContest.json | 25 +- .../Retrieval/CodeTransOceanDL.json | 29 +- mteb/descriptive_stats/Retrieval/CosQA.json | 29 +- .../Retrieval/CovidRetrieval.json | 19 +- ...ossLingualSemanticDiscriminationWMT19.json | 57 +- ...ossLingualSemanticDiscriminationWMT21.json | 53 +- .../Retrieval/DBPedia-PL.json | 19 +- .../Retrieval/DBPedia-PLHardNegatives.json | 19 +- mteb/descriptive_stats/Retrieval/DBPedia.json | 38 +- .../Retrieval/DBPediaHardNegatives.json | 19 +- .../Retrieval/DanFeverRetrieval.json | 19 +- .../Retrieval/DuRetrieval.json | 19 +- .../Retrieval/EcomRetrieval.json | 19 +- mteb/descriptive_stats/Retrieval/EstQA.json | 19 +- mteb/descriptive_stats/Retrieval/FEVER.json | 19 +- .../Retrieval/FEVERHardNegatives.json | 19 +- .../Retrieval/FQuADRetrieval.json | 38 +- .../Retrieval/FaithDial.json | 19 +- .../Retrieval/FeedbackQARetrieval.json | 19 +- mteb/descriptive_stats/Retrieval/FiQA-PL.json | 19 +- .../descriptive_stats/Retrieval/FiQA2018.json | 57 +- .../Retrieval/GeorgianFAQRetrieval.json | 19 +- .../descriptive_stats/Retrieval/GerDaLIR.json | 19 +- .../Retrieval/GerDaLIRSmall.json | 19 +- .../Retrieval/GermanDPR.json | 19 +- .../Retrieval/GermanGovServiceRetrieval.json | 19 +- .../Retrieval/GermanQuAD-Retrieval.json | 19 +- .../Retrieval/GreekCivicsQA.json | 19 +- .../Retrieval/HagridRetrieval.json | 19 +- .../Retrieval/HellaSwag.json | 19 +- .../Retrieval/HotpotQA-PL.json | 19 +- .../Retrieval/HotpotQA-PLHardNegatives.json | 19 +- .../descriptive_stats/Retrieval/HotpotQA.json | 57 +- .../Retrieval/HotpotQAHardNegatives.json | 19 +- .../HunSum2AbstractiveRetrieval.json | 19 +- .../Retrieval/IndicQARetrieval.json | 216 +- .../Retrieval/JaGovFaqsRetrieval.json | 19 +- .../Retrieval/JaQuADRetrieval.json | 19 +- .../Retrieval/JaqketRetrieval.json | 29 +- .../Retrieval/Ko-StrategyQA.json | 19 +- .../Retrieval/LEMBNarrativeQARetrieval.json | 19 +- .../Retrieval/LEMBNeedleRetrieval.json | 152 +- .../Retrieval/LEMBPasskeyRetrieval.json | 152 +- .../Retrieval/LEMBQMSumRetrieval.json | 19 +- .../Retrieval/LEMBSummScreenFDRetrieval.json | 19 +- .../Retrieval/LEMBWikimQARetrieval.json | 19 +- .../descriptive_stats/Retrieval/LeCaRDv2.json | 19 +- .../LegalBenchConsumerContractsQA.json | 19 +- .../LegalBenchCorporateLobbying.json | 19 +- .../Retrieval/LegalQuAD.json | 19 +- .../Retrieval/LegalSummarization.json | 19 +- .../Retrieval/LitSearchRetrieval.json | 19 +- .../Retrieval/MIRACLRetrieval.json | 361 +- .../MIRACLRetrievalHardNegatives.json | 361 +- .../Retrieval/MLQARetrieval.json | 1900 +- .../Retrieval/MLQuestions.json | 38 +- .../Retrieval/MMarcoRetrieval.json | 19 +- .../Retrieval/MSMARCO-PL.json | 19 +- .../Retrieval/MSMARCO-PLHardNegatives.json | 19 +- mteb/descriptive_stats/Retrieval/MSMARCO.json | 57 +- .../Retrieval/MSMARCOHardNegatives.json | 19 +- .../Retrieval/MSMARCOv2.json | 89 + .../Retrieval/MedicalQARetrieval.json | 19 +- .../Retrieval/MedicalRetrieval.json | 19 +- .../Retrieval/MintakaRetrieval.json | 171 +- .../Retrieval/MrTidyRetrieval.json | 228 +- .../Retrieval/MultiLongDocRetrieval.json | 532 +- .../Retrieval/NFCorpus-PL.json | 19 +- .../descriptive_stats/Retrieval/NFCorpus.json | 31 +- .../NLPJournalAbsIntroRetrieval.json | 19 +- .../NLPJournalTitleAbsRetrieval.json | 19 +- .../NLPJournalTitleIntroRetrieval.json | 19 +- mteb/descriptive_stats/Retrieval/NQ-PL.json | 19 +- .../Retrieval/NQ-PLHardNegatives.json | 19 +- mteb/descriptive_stats/Retrieval/NQ.json | 19 +- .../Retrieval/NQHardNegatives.json | 19 +- .../Retrieval/NarrativeQARetrieval.json | 19 +- .../Retrieval/NeuCLIR2022Retrieval.json | 120 + .../NeuCLIR2022RetrievalHardNegatives.json | 76 +- .../Retrieval/NeuCLIR2023Retrieval.json | 120 + .../NeuCLIR2023RetrievalHardNegatives.json | 76 +- .../Retrieval/NorQuadRetrieval.json | 19 +- mteb/descriptive_stats/Retrieval/PIQA.json | 19 +- .../Retrieval/PublicHealthQA.json | 166 +- mteb/descriptive_stats/Retrieval/Quail.json | 19 +- .../descriptive_stats/Retrieval/Quora-PL.json | 34 +- .../Retrieval/Quora-PLHardNegatives.json | 15 +- .../Retrieval/QuoraRetrieval.json | 34 +- .../QuoraRetrievalHardNegatives.json | 15 +- .../descriptive_stats/Retrieval/RARbCode.json | 19 +- .../descriptive_stats/Retrieval/RARbMath.json | 19 +- .../Retrieval/RiaNewsRetrieval.json | 19 +- .../RiaNewsRetrievalHardNegatives.json | 19 +- .../Retrieval/RuBQRetrieval.json | 19 +- .../Retrieval/SCIDOCS-PL.json | 19 +- mteb/descriptive_stats/Retrieval/SCIDOCS.json | 19 +- mteb/descriptive_stats/Retrieval/SIQA.json | 19 +- .../Retrieval/SKQuadRetrieval.json | 19 +- .../Retrieval/SNLRetrieval.json | 19 +- .../Retrieval/SadeemQuestionRetrieval.json | 19 +- .../Retrieval/SciFact-PL.json | 19 +- mteb/descriptive_stats/Retrieval/SciFact.json | 38 +- .../Retrieval/SlovakSumRetrieval.json | 19 +- .../Retrieval/SpanishPassageRetrievalS2P.json | 19 +- .../Retrieval/SpanishPassageRetrievalS2S.json | 19 +- mteb/descriptive_stats/Retrieval/SpartQA.json | 19 +- .../Retrieval/StackOverflowQA.json | 29 +- .../StatcanDialogueDatasetRetrieval.json | 114 +- .../Retrieval/SweFaqRetrieval.json | 19 +- .../Retrieval/SwednRetrieval.json | 19 +- .../Retrieval/SyntecRetrieval.json | 19 +- .../Retrieval/SyntheticText2SQL.json | 29 +- .../Retrieval/T2Retrieval.json | 19 +- .../Retrieval/TRECCOVID-PL.json | 19 +- .../Retrieval/TRECCOVID.json | 19 +- .../Retrieval/TV2Nordretrieval.json | 19 +- .../Retrieval/TempReasonL1.json | 19 +- .../Retrieval/TempReasonL2Context.json | 19 +- .../Retrieval/TempReasonL2Fact.json | 19 +- .../Retrieval/TempReasonL2Pure.json | 19 +- .../Retrieval/TempReasonL3Context.json | 19 +- .../Retrieval/TempReasonL3Fact.json | 19 +- .../Retrieval/TempReasonL3Pure.json | 19 +- .../descriptive_stats/Retrieval/TopiOCQA.json | 17 +- .../Retrieval/TopiOCQAHardNegatives.json | 19 +- .../Retrieval/Touche2020.json | 20 - .../Retrieval/Touche2020Retrieval.v3.json | 31 +- .../Retrieval/TurHistQuadRetrieval.json | 19 +- .../Retrieval/TwitterHjerneRetrieval.json | 19 +- .../Retrieval/VideoRetrieval.json | 19 +- .../Retrieval/VieQuADRetrieval.json | 19 +- .../WikipediaRetrievalMultilingual.json | 323 +- .../Retrieval/WinoGrande.json | 19 +- mteb/descriptive_stats/Retrieval/XMarket.json | 72 +- .../Retrieval/XPQARetrieval.json | 687 +- .../Retrieval/XQuADRetrieval.json | 247 +- .../Retrieval/mFollowIR.json | 76 +- .../Retrieval/mFollowIRCrossLingual.json | 76 +- ...lowIRCrossLingualInstructionRetrieval.json | 116 - .../mFollowIRInstructionRetrieval.json | 116 - tests/test_TaskMetadata.py | 5 +- tests/test_benchmark/mock_tasks.py | 315 +- 200 files changed, 553170 insertions(+), 8777 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/FloresBitextMining.json delete mode 100644 mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/MSMARCOv2.json create mode 100644 mteb/descriptive_stats/Retrieval/NeuCLIR2022Retrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NeuCLIR2023Retrieval.json delete mode 100644 mteb/descriptive_stats/Retrieval/Touche2020.json delete mode 100644 mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json delete mode 100644 mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index e7d886beb7..573a0efd12 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -24,21 +24,22 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: Number of queries and documents - num_queries: number of queries in the dataset - num_documents: Number of documents - number_of_characters: Total number of symbols in the dataset + num_relevant_docs: Number of relevant documents + num_documents: Number of documents min_document_length: Minimum length of documents average_document_length: Average length of documents max_document_length: Maximum length of documents unique_documents: Number of unique documents + num_queries: number of queries in the dataset min_query_length: Minimum length of queries average_query_length: Average length of queries max_query_length: Maximum length of queries unique_queries: Number of unique queries none_queries: Number of none queries + number_of_characters: Total number of symbols in the dataset min_relevant_docs_per_query: Minimum number of relevant documents per query average_relevant_docs_per_query: Average number of relevant documents per query max_relevant_docs_per_query: Maximum number of relevant documents per query @@ -50,28 +51,29 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): max_instruction_length: Maximum length of instructions unique_instructions: Number of unique instructions + num_top_ranked: Number of top ranked documents min_top_ranked_per_query: Minimum number of top ranked documents per query average_top_ranked_per_query: Average number of top ranked documents per query max_top_ranked_per_query: Maximum number of relevant documents per query """ num_samples: int - num_queries: int - num_documents: int - num_relevant_docs: int number_of_characters: int + num_documents: int min_document_length: int average_document_length: float max_document_length: int unique_documents: int + num_queries: int min_query_length: int average_query_length: float max_query_length: int unique_queries: int none_queries: int + num_relevant_docs: int min_relevant_docs_per_query: int average_relevant_docs_per_query: float max_relevant_docs_per_query: float @@ -85,6 +87,7 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): unique_instructions: int | None # this is for datasets that do reranking + num_top_ranked: int | None min_top_ranked_per_query: int | None average_top_ranked_per_query: float | None max_top_ranked_per_query: int | None @@ -418,7 +421,7 @@ def _calculate_metrics_from_split( if self.top_ranked is not None: top_ranked.update(process_docs(self.top_ranked, hf_subset, split)) else: - if "default" in self.queries: + if "default" in self.queries and split != "default": return self._calculate_metrics_from_split( split=split, hf_subset="default" ) @@ -430,9 +433,10 @@ def _calculate_metrics_from_split( if self.top_ranked is not None: top_ranked = self.top_ranked[split] - query_len, doc_len = calculate_length(queries, corpus) - num_documents = len(corpus) - num_queries = len(queries) + query_len = calculate_queries_length(queries) + doc_len = calculate_corpus_length(corpus) + num_documents = len(doc_len) if corpus is not None else 0 + num_queries = len(query_len) num_relevant_docs = sum(len(relevant_docs[qid]) for qid in relevant_docs) none_queries = sum(q is None or len(q) == 0 for q in queries.values()) @@ -440,8 +444,6 @@ def _calculate_metrics_from_split( qrels_lengths = [ len(relevant_docs[qid]) for qid in relevant_docs if qid in queries ] - num_qrels = sum(qrels_lengths) - qrels_per_doc = num_qrels / len(relevant_docs) if num_queries else 0 unique_qrels = len({doc for qid in relevant_docs for doc in relevant_docs[qid]}) # number of qrels that are not 0 num_qrels_non_zero = sum( @@ -468,49 +470,54 @@ def _calculate_metrics_from_split( if self.top_ranked is not None and num_queries: top_ranked_per_query = [len(docs) for docs in top_ranked.values()] + num_top_ranked = len(top_ranked_per_query) min_top_ranked_per_query = min(top_ranked_per_query) average_top_ranked_per_query = sum(top_ranked_per_query) / num_queries max_top_ranked_per_query = max(top_ranked_per_query) else: + num_top_ranked = None min_top_ranked_per_query = None average_top_ranked_per_query = None max_top_ranked_per_query = None return RetrievalDescriptiveStatistics( - number_of_characters=sum(query_len) + sum(doc_len), num_samples=num_documents + num_queries, - num_queries=num_queries, + number_of_characters=sum(query_len) + sum(doc_len), + # documents num_documents=num_documents, - num_relevant_docs=num_relevant_docs, min_document_length=min(doc_len), average_document_length=sum(doc_len) / num_documents, max_document_length=max(doc_len), unique_documents=len(set(corpus)), + # queries + num_queries=num_queries, min_query_length=min(query_len), average_query_length=sum(query_len) / num_queries, max_query_length=max(query_len), unique_queries=len(set(queries)), none_queries=none_queries, + # relevant docs + num_relevant_docs=num_relevant_docs, min_relevant_docs_per_query=min(qrels_lengths), average_relevant_docs_per_query=qrels_per_doc, max_relevant_docs_per_query=max(qrels_lengths), unique_relevant_docs=unique_qrels, + # instructions num_instructions=num_instructions, min_instruction_length=min_instruction_length, average_instruction_length=average_instruction_length, max_instruction_length=max_instruction_length, unique_instructions=unique_instructions, + # top ranked + num_top_ranked=num_top_ranked, min_top_ranked_per_query=min_top_ranked_per_query, average_top_ranked_per_query=average_top_ranked_per_query, max_top_ranked_per_query=max_top_ranked_per_query, ) -def calculate_length( - queries: dict[str, str], corpus: dict[str, str] -) -> tuple[list[int], list[int]]: +def calculate_queries_length(queries: dict[str, str]) -> list[int] | None: queries_lens = [] - doc_lens = [] for query in queries.values(): if query is None or len(query) == 0: continue @@ -519,15 +526,22 @@ def calculate_length( queries_lens.append(len(query)) else: queries_lens.extend([len(turn) for turn in query]) + return queries_lens + + +def calculate_corpus_length( + corpus: dict[str, str | dict[str, str]], +) -> list[int] | None: + doc_lens = [] if corpus is None: - return None, queries_lens + return None for doc in corpus.values(): if isinstance(doc, dict): - doc_lens.append(len(doc["text"])) + doc_lens.append(len(doc["text"]) + len(doc.get("title", ""))) else: doc_lens.append(len(doc)) - return doc_lens, queries_lens + return doc_lens def process_docs( diff --git a/mteb/abstasks/dataloaders.py b/mteb/abstasks/dataloaders.py index a8c165007e..ba5d180ca5 100644 --- a/mteb/abstasks/dataloaders.py +++ b/mteb/abstasks/dataloaders.py @@ -231,7 +231,7 @@ def _load_queries(self, config: str | None = None): self.queries = queries_ds def _load_qrels(self, split: str, config: str | None = None): - config = f"{config}-qrels" if config is not None else None + config = f"{config}-qrels" if config is not None else "default" if self.hf_repo: qrels_ds = load_dataset( self.hf_repo_qrels, diff --git a/mteb/descriptive_stats/BitextMining/FloresBitextMining.json b/mteb/descriptive_stats/BitextMining/FloresBitextMining.json new file mode 100644 index 0000000000..95faf2ee4b --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/FloresBitextMining.json @@ -0,0 +1,538373 @@ +{ + "devtest": { + "num_samples": 41908944, + "number_of_characters": 11221665014, + "unique_pairs": 41545149, + "min_sentence1_length": 10, + "average_sentence1_length": 133.88150527009222, + "max_sentence1_length": 597, + "unique_sentence1": 205519, + "min_sentence2_length": 10, + "average_sentence2_length": 133.88150527009222, + "max_sentence2_length": 597, + "unique_sentence2": 205519, + "hf_subset_descriptive_stats": { + "ace_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 238008, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ace_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 274979, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ace_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 242679, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ace_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 264833, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ace_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 238316, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ace_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 250775, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 270353, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ace_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 247382, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ace_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 253554, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ace_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 257191, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ace_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 269160, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ace_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 238554, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ace_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 265052, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ace_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 241029, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 234076, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ace_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 268618, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 240219, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ace_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 225194, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ace_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259612, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ace_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 243587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 240348, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 257483, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ace_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 260957, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ace_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 235448, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ace_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 264968, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ace_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 229581, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ace_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 226734, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ace_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 272372, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ace_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 242913, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ace_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 250250, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ace_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247153, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ace_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 244672, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ace_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 272617, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ace_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 242401, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 252098, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ace_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 223698, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ace_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 240663, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ace_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 241080, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ace_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 257577, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ace_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 257383, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ace_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 238020, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ace_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 251104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ace_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 260347, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ace_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246067, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ace_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 251311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ace_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 240232, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ace_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 251443, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ace_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 246181, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ace_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 241993, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ace_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 252784, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ace_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 255039, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ace_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 248467, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ace_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 246368, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ace_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 220667, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ace_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 233851, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ace_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 239069, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 271301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ace_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 227621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ace_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249448, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ace_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 232264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ace_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 243794, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ace_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 241540, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ace_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 244271, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 249803, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 245895, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ace_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 253939, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ace_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 250593, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ace_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 257909, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ace_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 252648, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ace_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 249725, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ace_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 260944, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ace_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 258685, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ace_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 259772, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ace_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 266798, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ace_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 241404, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ace_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 261515, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ace_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 248403, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ace_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 260192, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 248992, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ace_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 244858, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ace_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 198753, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ace_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 243808, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ace_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 251749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ace_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 267759, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ace_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 177586, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ace_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 236278, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ace_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 258886, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ace_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 265754, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ace_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 250216, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ace_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 219602, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ace_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 250996, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ace_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 246881, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ace_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 248549, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 241681, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 258148, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ace_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253717, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 236038, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ace_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 276367, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ace_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 227928, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ace_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249445, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ace_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 269259, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ace_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 168564, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ace_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 257615, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ace_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 274790, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ace_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 254242, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ace_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 235525, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ace_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 238794, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ace_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 265379, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ace_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 257402, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 256140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ace_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 242175, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ace_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 249307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ace_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 259128, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ace_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 239878, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ace_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244903, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ace_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 250744, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ace_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 228049, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ace_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 270306, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ace_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 233783, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ace_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 281018, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ace_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 253818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ace_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 245064, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ace_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 251412, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ace_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 244091, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ace_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 253279, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ace_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 225184, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ace_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 238878, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 268039, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ace_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 239977, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ace_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 241950, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ace_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 243759, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ace_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 250192, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ace_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257492, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ace_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 238465, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ace_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 225391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ace_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 250882, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ace_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 274218, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ace_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 249661, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 253287, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ace_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 238524, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 299905, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ace_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 277790, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ace_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 151640, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ace_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 237557, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ace_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 240206, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ace_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 264102, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ace_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 237726, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ace_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 241469, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ace_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 262730, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ace_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 242701, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ace_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 237730, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ace_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 154869, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ace_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 247984, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 245659, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ace_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 257764, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ace_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 238022, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ace_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 259166, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ace_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 253961, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ace_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 243331, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ace_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 202321, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ace_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 152316, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ace_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 239792, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ace_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 252878, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ace_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 244436, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ace_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 255551, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ace_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 254619, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ace_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 258793, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ace_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 242476, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ace_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 280632, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ace_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 257822, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 250238, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ace_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 246704, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ace_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 238049, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ace_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247025, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 246699, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ace_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 262272, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ace_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 264345, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ace_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 274656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ace_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 259983, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ace_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 228682, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ace_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 265430, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ace_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 232901, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ace_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 254578, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ace_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 249167, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ace_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 246460, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ace_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 259224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ace_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 269799, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ace_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 255360, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ace_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 223410, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ace_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 251688, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ace_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 242421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ace_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 257876, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ace_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 241520, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ace_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 231209, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ace_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 251883, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244843, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ace_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 244476, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ace_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 213675, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ace_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248688, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ace_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 245778, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ace_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 245046, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ace_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 263283, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ace_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 284166, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 110.29743083003953, + "max_sentence1_length": 295, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bam_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238008, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bam_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289745, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bam_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257445, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bam_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279599, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bam_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253082, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bam_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265541, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bam_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285119, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bam_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262148, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bam_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268320, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bam_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271957, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bam_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283926, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bam_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253320, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bam_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279818, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bam_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255795, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bam_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248842, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bam_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283384, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bam_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 254985, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bam_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 239960, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bam_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274378, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bam_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258353, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bam_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255114, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bam_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272249, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bam_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275723, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bam_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250214, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bam_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279734, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bam_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244347, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bam_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241500, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bam_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287138, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bam_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257679, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bam_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265016, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bam_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261919, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bam_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259438, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bam_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287383, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bam_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257167, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bam_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266864, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bam_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238464, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bam_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255429, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bam_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255846, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bam_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272343, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bam_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272149, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bam_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 252786, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bam_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265870, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bam_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275113, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bam_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260833, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bam_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266077, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bam_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 254998, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bam_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266209, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bam_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260947, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bam_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256759, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bam_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267550, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bam_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269805, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bam_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263233, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bam_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261134, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bam_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235433, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bam_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248617, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bam_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253835, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bam_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286067, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bam_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242387, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bam_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264214, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bam_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247030, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bam_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258560, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bam_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256306, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bam_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259037, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bam_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264569, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bam_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260661, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bam_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268705, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bam_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265359, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bam_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bam_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267414, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bam_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264491, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bam_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275710, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bam_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273451, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bam_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274538, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bam_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281564, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bam_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256170, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bam_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276281, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bam_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263169, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bam_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274958, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bam_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263758, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bam_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259624, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bam_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213519, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bam_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bam_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266515, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bam_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282525, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bam_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192352, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bam_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251044, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bam_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273652, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bam_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280520, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bam_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 264982, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bam_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234368, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bam_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265762, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bam_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261647, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bam_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263315, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bam_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256447, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bam_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272914, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bam_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268483, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bam_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250804, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bam_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291133, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bam_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242694, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bam_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264211, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bam_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284025, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bam_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183330, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bam_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272381, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bam_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289556, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bam_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269008, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bam_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250291, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bam_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253560, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bam_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bam_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272168, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bam_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270906, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bam_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256941, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bam_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264073, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bam_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273894, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bam_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254644, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bam_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259669, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bam_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265510, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bam_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242815, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bam_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bam_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248549, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bam_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295784, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bam_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268584, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bam_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259830, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bam_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266178, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bam_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258857, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bam_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268045, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bam_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239950, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bam_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253644, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bam_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282805, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bam_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254743, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bam_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bam_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258525, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bam_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264958, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bam_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272258, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bam_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253231, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bam_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240157, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bam_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265648, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bam_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 288984, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bam_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264427, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bam_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268053, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bam_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253290, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bam_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314671, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bam_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292556, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bam_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166406, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bam_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252323, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bam_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 254972, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bam_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278868, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bam_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252492, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bam_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256235, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bam_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277496, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bam_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257467, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bam_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252496, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bam_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169635, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bam_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262750, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bam_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260425, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bam_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272530, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bam_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 252788, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bam_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273932, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bam_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268727, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bam_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258097, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bam_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217087, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bam_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167082, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bam_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254558, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bam_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267644, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bam_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259202, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bam_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270317, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bam_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269385, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bam_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273559, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bam_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257242, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bam_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295398, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bam_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272588, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bam_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265004, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bam_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261470, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bam_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 252815, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bam_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261791, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bam_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261465, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bam_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277038, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bam_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279111, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bam_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289422, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bam_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274749, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bam_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243448, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bam_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280196, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bam_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247667, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bam_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269344, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bam_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263933, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bam_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261226, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bam_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 273990, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bam_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284565, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bam_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270126, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bam_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238176, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bam_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266454, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bam_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257187, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bam_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272642, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bam_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256286, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bam_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 245975, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bam_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266649, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bam_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259609, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bam_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259242, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bam_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228441, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bam_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263454, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bam_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260544, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bam_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259812, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bam_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278049, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bam_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298932, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 124.88833992094861, + "max_sentence1_length": 372, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 274979, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 289745, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "dzo_Tibt-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 294416, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "dzo_Tibt-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 316570, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 290053, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "dzo_Tibt-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 302512, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dzo_Tibt-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 322090, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 299119, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 305291, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 308928, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 320897, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "dzo_Tibt-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 290291, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "dzo_Tibt-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 316789, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "dzo_Tibt-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 292766, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dzo_Tibt-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 285813, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dzo_Tibt-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 320355, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dzo_Tibt-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 291956, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "dzo_Tibt-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 276931, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 311349, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dzo_Tibt-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 295324, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dzo_Tibt-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 292085, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 309220, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 312694, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dzo_Tibt-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 287185, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "dzo_Tibt-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 316705, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 281318, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "dzo_Tibt-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 278471, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 324109, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "dzo_Tibt-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 294650, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dzo_Tibt-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 301987, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298890, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 296409, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dzo_Tibt-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 324354, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "dzo_Tibt-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294138, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dzo_Tibt-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 303835, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "dzo_Tibt-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 275435, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 292400, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dzo_Tibt-est_Latn": { + "num_samples": 1012, + "number_of_characters": 292817, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "dzo_Tibt-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 309314, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 309120, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "dzo_Tibt-min_Arab": { + "num_samples": 1012, + "number_of_characters": 289757, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dzo_Tibt-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 302841, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 312084, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297804, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dzo_Tibt-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 303048, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 291969, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dzo_Tibt-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 303180, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 297918, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 293730, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dzo_Tibt-min_Latn": { + "num_samples": 1012, + "number_of_characters": 304521, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dzo_Tibt-por_Latn": { + "num_samples": 1012, + "number_of_characters": 306776, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dzo_Tibt-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 300204, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dzo_Tibt-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 298105, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 272404, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 285588, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 290806, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 323038, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "dzo_Tibt-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 279358, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301185, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "dzo_Tibt-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 284001, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "dzo_Tibt-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 295531, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dzo_Tibt-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 293277, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dzo_Tibt-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 296008, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 301540, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dzo_Tibt-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 297632, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 305676, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dzo_Tibt-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 302330, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 309646, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "dzo_Tibt-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 304385, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dzo_Tibt-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 301462, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "dzo_Tibt-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 312681, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dzo_Tibt-als_Latn": { + "num_samples": 1012, + "number_of_characters": 310422, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 311509, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "dzo_Tibt-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 318535, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "dzo_Tibt-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 293141, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 313252, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 300140, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 311929, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dzo_Tibt-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 300729, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "dzo_Tibt-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 296595, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dzo_Tibt-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 250490, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 295545, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dzo_Tibt-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 303486, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 319496, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 229323, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 288015, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "dzo_Tibt-run_Latn": { + "num_samples": 1012, + "number_of_characters": 310623, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "dzo_Tibt-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 317491, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "dzo_Tibt-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 301953, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "dzo_Tibt-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 271339, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 302733, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dzo_Tibt-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 298618, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "dzo_Tibt-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 300286, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 293418, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 309885, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dzo_Tibt-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305454, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dzo_Tibt-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 287775, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dzo_Tibt-war_Latn": { + "num_samples": 1012, + "number_of_characters": 328104, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dzo_Tibt-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 279665, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301182, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dzo_Tibt-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 320996, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "dzo_Tibt-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 220301, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 309352, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dzo_Tibt-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 326527, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dzo_Tibt-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 305979, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "dzo_Tibt-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 287262, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dzo_Tibt-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 290531, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dzo_Tibt-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 317116, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "dzo_Tibt-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 309139, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dzo_Tibt-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 307877, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 293912, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 301044, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "dzo_Tibt-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 310865, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dzo_Tibt-san_Deva": { + "num_samples": 1012, + "number_of_characters": 291615, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "dzo_Tibt-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296640, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dzo_Tibt-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 302481, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 279786, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 322043, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dzo_Tibt-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 285520, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 332755, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 305555, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dzo_Tibt-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 296801, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dzo_Tibt-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 303149, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 295828, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 305016, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 276921, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "dzo_Tibt-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 290615, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dzo_Tibt-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 319776, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 291714, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "dzo_Tibt-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 293687, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "dzo_Tibt-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 295496, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "dzo_Tibt-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 301929, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 309229, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "dzo_Tibt-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 290202, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dzo_Tibt-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 277128, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "dzo_Tibt-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 302619, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dzo_Tibt-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 325955, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "dzo_Tibt-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 301398, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 305024, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dzo_Tibt-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 290261, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dzo_Tibt-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 351642, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 329527, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dzo_Tibt-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 203377, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "dzo_Tibt-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 289294, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 291943, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dzo_Tibt-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 315839, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 289463, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 293206, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dzo_Tibt-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 314467, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "dzo_Tibt-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 294438, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 289467, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dzo_Tibt-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 206606, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 299721, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dzo_Tibt-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 297396, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dzo_Tibt-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 309501, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 289759, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 310903, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "dzo_Tibt-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 305698, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "dzo_Tibt-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 295068, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 254058, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "dzo_Tibt-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 204053, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "dzo_Tibt-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 291529, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dzo_Tibt-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 304615, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dzo_Tibt-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 296173, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "dzo_Tibt-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 307288, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 306356, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "dzo_Tibt-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 310530, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dzo_Tibt-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 294213, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 332369, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "dzo_Tibt-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 309559, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 301975, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "dzo_Tibt-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 298441, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dzo_Tibt-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 289786, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298762, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 298436, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "dzo_Tibt-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 314009, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dzo_Tibt-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 316082, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "dzo_Tibt-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 326393, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dzo_Tibt-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 311720, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "dzo_Tibt-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 280419, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "dzo_Tibt-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 317167, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dzo_Tibt-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 284638, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "dzo_Tibt-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 306315, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "dzo_Tibt-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 300904, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dzo_Tibt-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 298197, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dzo_Tibt-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 310961, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 321536, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dzo_Tibt-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 307097, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "dzo_Tibt-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 275147, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "dzo_Tibt-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 303425, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "dzo_Tibt-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 294158, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 309613, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dzo_Tibt-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 293257, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "dzo_Tibt-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 282946, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 303620, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dzo_Tibt-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296580, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "dzo_Tibt-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 296213, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "dzo_Tibt-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 265412, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "dzo_Tibt-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300425, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dzo_Tibt-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 297515, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dzo_Tibt-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 296783, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dzo_Tibt-som_Latn": { + "num_samples": 1012, + "number_of_characters": 315020, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "dzo_Tibt-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 335903, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 161.42094861660078, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hin_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242679, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hin_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 257445, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hin_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 294416, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hin_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 284270, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hin_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257753, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hin_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 270212, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hin_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289790, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hin_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266819, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hin_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272991, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hin_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276628, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hin_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288597, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hin_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257991, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "hin_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 284489, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hin_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 260466, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hin_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253513, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hin_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 288055, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hin_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259656, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hin_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244631, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hin_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279049, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hin_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 263024, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hin_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259785, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hin_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276920, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hin_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 280394, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hin_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254885, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hin_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 284405, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hin_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 249018, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hin_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 246171, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hin_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291809, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hin_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 262350, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hin_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269687, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "hin_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266590, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hin_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 264109, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hin_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 292054, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hin_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261838, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hin_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271535, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hin_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 243135, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hin_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 260100, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hin_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260517, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hin_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 277014, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hin_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276820, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hin_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 257457, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hin_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270541, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hin_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279784, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hin_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265504, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hin_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270748, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hin_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259669, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hin_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270880, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hin_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265618, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hin_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 261430, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hin_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 272221, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hin_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 274476, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hin_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267904, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hin_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265805, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hin_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 240104, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hin_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253288, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hin_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258506, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hin_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290738, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hin_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 247058, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hin_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268885, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hin_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251701, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hin_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 263231, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hin_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260977, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hin_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263708, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hin_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 269240, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hin_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265332, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hin_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 273376, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hin_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 270030, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hin_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277346, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hin_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 272085, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hin_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 269162, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hin_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 280381, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hin_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 278122, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hin_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 279209, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hin_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 286235, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hin_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260841, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hin_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280952, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hin_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267840, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hin_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279629, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hin_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 268429, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hin_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264295, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hin_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 218190, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hin_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 263245, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hin_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 271186, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hin_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 287196, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hin_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 197023, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hin_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255715, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hin_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278323, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hin_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 285191, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hin_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269653, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hin_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 239039, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hin_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 270433, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hin_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266318, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hin_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267986, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hin_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hin_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277585, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hin_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273154, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hin_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 255475, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hin_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295804, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hin_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 247365, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hin_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268882, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hin_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288696, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hin_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 188001, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hin_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 277052, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hin_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 294227, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hin_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273679, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hin_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254962, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hin_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 258231, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hin_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284816, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hin_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276839, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hin_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275577, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hin_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 261612, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hin_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268744, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hin_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278565, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hin_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259315, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hin_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264340, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hin_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 270181, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hin_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 247486, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hin_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289743, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hin_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 253220, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hin_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 300455, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hin_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 273255, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hin_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 264501, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hin_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270849, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hin_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263528, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hin_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272716, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hin_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244621, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hin_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258315, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hin_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 287476, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hin_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 259414, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hin_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 261387, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hin_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 263196, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hin_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269629, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hin_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276929, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hin_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hin_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244828, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hin_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270319, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hin_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293655, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hin_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 269098, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hin_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272724, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hin_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257961, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hin_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319342, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hin_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 297227, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hin_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 171077, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hin_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256994, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hin_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259643, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hin_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283539, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hin_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 257163, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hin_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260906, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hin_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 282167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hin_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 262138, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hin_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 257167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hin_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174306, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hin_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 267421, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hin_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 265096, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hin_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 277201, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hin_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 257459, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hin_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278603, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hin_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 273398, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hin_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262768, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hin_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221758, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hin_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171753, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hin_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 259229, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hin_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272315, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hin_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263873, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hin_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274988, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hin_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 274056, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hin_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 278230, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hin_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261913, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hin_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 300069, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hin_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 277259, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hin_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269675, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hin_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 266141, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hin_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 257486, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hin_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266462, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hin_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 266136, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hin_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281709, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hin_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283782, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hin_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 294093, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hin_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 279420, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hin_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 248119, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hin_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284867, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hin_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252338, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "hin_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 274015, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hin_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268604, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hin_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265897, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hin_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278661, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hin_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 289236, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hin_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274797, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hin_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242847, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hin_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 271125, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "hin_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261858, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hin_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277313, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hin_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260957, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hin_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250646, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hin_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271320, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hin_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264280, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hin_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263913, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hin_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 233112, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hin_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268125, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hin_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 265215, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hin_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 264483, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hin_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282720, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hin_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303603, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.50395256916997, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "khm_Khmr-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 264833, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "khm_Khmr-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 279599, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "khm_Khmr-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 316570, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "khm_Khmr-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 284270, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "khm_Khmr-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 279907, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "khm_Khmr-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 292366, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khm_Khmr-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 311944, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "khm_Khmr-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 288973, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "khm_Khmr-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 295145, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "khm_Khmr-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 298782, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "khm_Khmr-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 310751, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "khm_Khmr-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "khm_Khmr-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 306643, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "khm_Khmr-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 282620, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khm_Khmr-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 275667, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "khm_Khmr-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 310209, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khm_Khmr-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 281810, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "khm_Khmr-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 266785, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "khm_Khmr-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301203, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "khm_Khmr-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 285178, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khm_Khmr-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 281939, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khm_Khmr-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 299074, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "khm_Khmr-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 302548, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "khm_Khmr-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 277039, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "khm_Khmr-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 306559, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "khm_Khmr-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 271172, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "khm_Khmr-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 268325, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "khm_Khmr-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 313963, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "khm_Khmr-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 284504, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "khm_Khmr-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 291841, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "khm_Khmr-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288744, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "khm_Khmr-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 286263, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "khm_Khmr-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 314208, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "khm_Khmr-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283992, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khm_Khmr-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 293689, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "khm_Khmr-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 265289, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "khm_Khmr-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 282254, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "khm_Khmr-est_Latn": { + "num_samples": 1012, + "number_of_characters": 282671, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "khm_Khmr-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 299168, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "khm_Khmr-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 298974, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "khm_Khmr-min_Arab": { + "num_samples": 1012, + "number_of_characters": 279611, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "khm_Khmr-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 292695, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "khm_Khmr-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 301938, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "khm_Khmr-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287658, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "khm_Khmr-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 292902, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "khm_Khmr-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 281823, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "khm_Khmr-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 293034, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "khm_Khmr-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 287772, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "khm_Khmr-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 283584, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "khm_Khmr-min_Latn": { + "num_samples": 1012, + "number_of_characters": 294375, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "khm_Khmr-por_Latn": { + "num_samples": 1012, + "number_of_characters": 296630, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "khm_Khmr-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 290058, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "khm_Khmr-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 287959, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "khm_Khmr-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 262258, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "khm_Khmr-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 275442, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "khm_Khmr-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 280660, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khm_Khmr-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 312892, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "khm_Khmr-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 269212, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "khm_Khmr-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291039, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "khm_Khmr-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 273855, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "khm_Khmr-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 285385, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "khm_Khmr-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 283131, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "khm_Khmr-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 285862, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khm_Khmr-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 291394, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khm_Khmr-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 287486, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "khm_Khmr-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 295530, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "khm_Khmr-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 292184, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "khm_Khmr-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 299500, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "khm_Khmr-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 294239, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "khm_Khmr-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 291316, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "khm_Khmr-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 302535, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "khm_Khmr-als_Latn": { + "num_samples": 1012, + "number_of_characters": 300276, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "khm_Khmr-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 301363, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "khm_Khmr-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 308389, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "khm_Khmr-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 282995, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "khm_Khmr-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 303106, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "khm_Khmr-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 289994, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "khm_Khmr-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 301783, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khm_Khmr-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 290583, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "khm_Khmr-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 286449, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "khm_Khmr-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 240344, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "khm_Khmr-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 285399, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "khm_Khmr-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 293340, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "khm_Khmr-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 309350, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "khm_Khmr-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 219177, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "khm_Khmr-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 277869, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "khm_Khmr-run_Latn": { + "num_samples": 1012, + "number_of_characters": 300477, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "khm_Khmr-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 307345, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "khm_Khmr-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 291807, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "khm_Khmr-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 261193, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "khm_Khmr-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 292587, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "khm_Khmr-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 288472, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "khm_Khmr-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 290140, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khm_Khmr-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 283272, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khm_Khmr-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 299739, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "khm_Khmr-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295308, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khm_Khmr-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 277629, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "khm_Khmr-war_Latn": { + "num_samples": 1012, + "number_of_characters": 317958, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "khm_Khmr-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "khm_Khmr-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291036, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "khm_Khmr-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 310850, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "khm_Khmr-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 210155, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "khm_Khmr-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 299206, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "khm_Khmr-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 316381, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "khm_Khmr-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 295833, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "khm_Khmr-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 277116, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "khm_Khmr-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 280385, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "khm_Khmr-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 306970, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "khm_Khmr-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 298993, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khm_Khmr-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 297731, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "khm_Khmr-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 283766, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "khm_Khmr-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 290898, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "khm_Khmr-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 300719, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "khm_Khmr-san_Deva": { + "num_samples": 1012, + "number_of_characters": 281469, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "khm_Khmr-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286494, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "khm_Khmr-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 292335, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "khm_Khmr-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 269640, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "khm_Khmr-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 311897, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "khm_Khmr-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 275374, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "khm_Khmr-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 322609, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "khm_Khmr-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 295409, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "khm_Khmr-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 286655, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "khm_Khmr-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 293003, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "khm_Khmr-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 285682, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "khm_Khmr-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 294870, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "khm_Khmr-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 266775, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "khm_Khmr-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 280469, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khm_Khmr-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 309630, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "khm_Khmr-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 281568, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "khm_Khmr-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 283541, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "khm_Khmr-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 285350, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "khm_Khmr-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 291783, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "khm_Khmr-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299083, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "khm_Khmr-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 280056, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "khm_Khmr-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 266982, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "khm_Khmr-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 292473, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "khm_Khmr-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 315809, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "khm_Khmr-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 291252, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khm_Khmr-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 294878, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "khm_Khmr-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 280115, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khm_Khmr-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 341496, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "khm_Khmr-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 319381, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "khm_Khmr-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 193231, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "khm_Khmr-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 279148, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "khm_Khmr-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 281797, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "khm_Khmr-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 305693, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "khm_Khmr-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 279317, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "khm_Khmr-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 283060, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "khm_Khmr-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 304321, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "khm_Khmr-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 284292, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "khm_Khmr-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 279321, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "khm_Khmr-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 196460, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "khm_Khmr-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 289575, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khm_Khmr-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 287250, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "khm_Khmr-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 299355, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "khm_Khmr-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 279613, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "khm_Khmr-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 300757, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "khm_Khmr-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 295552, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "khm_Khmr-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 284922, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "khm_Khmr-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 243912, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "khm_Khmr-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 193907, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "khm_Khmr-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 281383, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "khm_Khmr-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 294469, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "khm_Khmr-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 286027, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "khm_Khmr-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 297142, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "khm_Khmr-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 296210, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "khm_Khmr-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 300384, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "khm_Khmr-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 284067, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "khm_Khmr-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 322223, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "khm_Khmr-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 299413, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khm_Khmr-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 291829, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "khm_Khmr-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 288295, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "khm_Khmr-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 279640, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "khm_Khmr-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288616, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khm_Khmr-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 288290, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "khm_Khmr-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 303863, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "khm_Khmr-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 305936, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "khm_Khmr-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 316247, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "khm_Khmr-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 301574, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "khm_Khmr-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 270273, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "khm_Khmr-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 307021, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "khm_Khmr-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 274492, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "khm_Khmr-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 296169, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "khm_Khmr-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 290758, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "khm_Khmr-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 288051, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "khm_Khmr-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 300815, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "khm_Khmr-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 311390, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "khm_Khmr-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 296951, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "khm_Khmr-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 265001, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "khm_Khmr-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 293279, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "khm_Khmr-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 284012, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "khm_Khmr-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 299467, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "khm_Khmr-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 283111, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "khm_Khmr-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 272800, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "khm_Khmr-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 293474, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khm_Khmr-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286434, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "khm_Khmr-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 286067, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "khm_Khmr-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 255266, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "khm_Khmr-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290279, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "khm_Khmr-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 287369, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "khm_Khmr-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 286637, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "khm_Khmr-som_Latn": { + "num_samples": 1012, + "number_of_characters": 304874, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "khm_Khmr-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 325757, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 151.39525691699606, + "max_sentence1_length": 507, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mag_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mag_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253082, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mag_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290053, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mag_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257753, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mag_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279907, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mag_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mag_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285427, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mag_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262456, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mag_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mag_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 272265, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mag_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284234, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mag_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mag_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280126, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mag_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256103, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mag_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249150, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mag_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283692, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mag_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255293, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mag_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240268, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mag_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274686, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mag_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mag_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255422, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mag_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272557, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mag_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276031, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mag_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250522, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mag_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mag_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244655, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mag_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241808, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mag_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287446, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mag_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mag_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mag_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262227, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mag_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259746, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mag_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287691, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mag_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257475, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mag_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267172, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mag_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238772, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mag_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mag_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mag_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272651, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mag_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272457, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mag_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253094, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mag_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266178, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mag_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275421, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mag_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261141, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mag_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mag_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255306, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mag_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mag_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 261255, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mag_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257067, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mag_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267858, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mag_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mag_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mag_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261442, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mag_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235741, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mag_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248925, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mag_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 254143, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mag_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286375, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mag_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242695, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mag_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264522, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mag_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247338, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mag_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258868, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mag_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256614, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mag_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259345, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mag_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264877, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mag_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260969, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mag_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269013, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mag_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265667, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mag_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mag_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267722, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mag_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264799, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mag_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276018, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mag_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273759, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mag_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274846, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mag_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281872, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mag_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256478, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mag_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276589, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mag_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mag_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 275266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mag_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mag_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259932, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mag_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213827, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mag_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258882, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mag_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266823, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mag_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mag_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192660, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mag_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251352, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mag_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mag_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280828, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mag_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265290, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mag_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234676, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mag_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266070, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mag_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mag_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263623, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mag_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256755, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mag_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273222, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mag_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268791, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mag_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mag_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291441, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mag_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243002, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mag_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mag_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284333, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mag_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183638, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mag_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272689, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mag_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289864, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mag_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mag_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250599, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mag_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253868, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mag_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280453, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mag_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272476, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mag_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271214, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mag_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 257249, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mag_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264381, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mag_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274202, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mag_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mag_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259977, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mag_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265818, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mag_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mag_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285380, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mag_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248857, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mag_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296092, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mag_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268892, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mag_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mag_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266486, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mag_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259165, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mag_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268353, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mag_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 240258, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mag_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mag_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mag_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255051, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mag_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257024, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mag_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mag_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 265266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mag_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mag_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253539, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mag_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240465, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mag_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265956, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mag_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289292, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mag_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264735, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mag_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268361, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mag_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253598, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mag_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314979, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mag_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292864, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mag_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166714, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mag_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252631, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mag_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mag_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279176, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mag_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mag_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256543, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mag_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mag_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257775, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mag_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mag_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169943, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mag_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263058, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mag_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260733, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mag_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272838, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mag_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253096, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mag_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274240, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mag_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269035, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mag_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258405, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mag_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217395, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mag_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167390, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mag_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254866, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mag_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mag_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259510, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mag_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270625, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mag_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269693, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mag_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mag_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257550, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mag_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295706, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mag_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272896, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mag_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265312, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mag_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261778, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mag_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mag_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262099, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mag_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261773, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mag_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277346, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mag_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279419, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mag_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289730, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mag_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275057, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mag_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243756, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mag_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mag_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247975, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mag_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269652, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mag_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264241, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mag_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261534, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mag_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mag_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284873, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mag_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270434, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mag_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238484, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mag_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266762, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mag_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257495, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mag_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mag_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256594, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mag_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246283, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mag_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266957, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mag_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259917, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mag_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259550, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mag_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228749, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mag_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263762, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mag_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mag_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mag_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278357, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mag_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299240, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 125.19268774703558, + "max_sentence1_length": 343, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "pap_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250775, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "pap_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265541, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "pap_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 302512, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pap_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270212, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "pap_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292366, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "pap_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "pap_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297886, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "pap_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274915, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "pap_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281087, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pap_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pap_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296693, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "pap_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266087, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "pap_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292585, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "pap_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268562, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pap_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261609, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pap_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296151, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pap_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267752, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "pap_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252727, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "pap_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287145, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pap_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pap_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267881, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pap_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285016, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pap_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 288490, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pap_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262981, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "pap_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 292501, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pap_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257114, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "pap_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254267, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "pap_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299905, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "pap_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270446, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pap_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277783, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "pap_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274686, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pap_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272205, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pap_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300150, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "pap_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pap_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279631, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "pap_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251231, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "pap_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268196, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pap_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268613, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "pap_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285110, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pap_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284916, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "pap_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265553, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pap_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278637, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pap_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287880, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pap_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273600, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pap_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278844, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pap_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267765, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pap_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278976, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pap_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273714, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pap_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269526, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pap_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280317, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pap_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282572, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pap_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276000, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pap_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273901, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pap_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248200, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "pap_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261384, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "pap_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266602, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pap_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298834, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "pap_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255154, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "pap_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276981, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "pap_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259797, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "pap_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271327, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pap_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269073, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pap_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271804, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pap_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277336, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pap_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273428, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pap_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 281472, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pap_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278126, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pap_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285442, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "pap_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280181, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pap_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277258, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "pap_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 288477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pap_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286218, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pap_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287305, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "pap_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294331, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "pap_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268937, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pap_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289048, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pap_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275936, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pap_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287725, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pap_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276525, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "pap_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272391, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pap_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226286, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "pap_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pap_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279282, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pap_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295292, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "pap_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205119, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "pap_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263811, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "pap_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286419, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "pap_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293287, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "pap_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277749, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "pap_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247135, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "pap_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 278529, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pap_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274414, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "pap_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276082, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pap_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269214, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pap_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285681, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pap_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281250, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pap_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263571, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pap_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303900, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pap_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pap_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276978, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pap_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296792, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "pap_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196097, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "pap_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285148, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pap_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302323, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pap_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281775, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "pap_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263058, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pap_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266327, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pap_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292912, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "pap_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284935, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pap_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283673, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pap_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269708, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "pap_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276840, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "pap_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286661, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pap_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267411, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "pap_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272436, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pap_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278277, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "pap_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255582, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pap_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297839, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pap_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261316, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "pap_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308551, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "pap_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pap_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272597, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pap_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278945, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pap_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271624, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pap_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280812, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pap_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "pap_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266411, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pap_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295572, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "pap_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 267510, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "pap_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 269483, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "pap_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271292, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "pap_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277725, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pap_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285025, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "pap_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265998, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pap_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252924, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "pap_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 278415, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pap_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301751, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "pap_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277194, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pap_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280820, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pap_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266057, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pap_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327438, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "pap_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305323, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pap_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179173, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "pap_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265090, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pap_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267739, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pap_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291635, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "pap_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265259, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "pap_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269002, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pap_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290263, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "pap_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270234, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pap_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265263, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pap_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182402, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "pap_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 275517, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pap_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273192, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pap_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285297, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pap_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265555, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "pap_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286699, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "pap_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 281494, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "pap_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270864, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pap_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229854, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "pap_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "pap_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267325, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pap_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280411, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pap_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271969, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "pap_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283084, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pap_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282152, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "pap_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286326, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pap_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270009, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pap_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308165, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "pap_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285355, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pap_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277771, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "pap_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274237, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pap_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265582, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pap_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274558, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pap_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274232, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "pap_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289805, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pap_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291878, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "pap_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302189, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pap_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 287516, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "pap_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256215, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "pap_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292963, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pap_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260434, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "pap_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "pap_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276700, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pap_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273993, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pap_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "pap_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297332, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pap_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282893, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "pap_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250943, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "pap_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "pap_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pap_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285409, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pap_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269053, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "pap_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258742, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "pap_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279416, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pap_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "pap_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272009, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "pap_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241208, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "pap_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pap_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273311, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pap_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pap_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290816, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "pap_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311699, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 137.50395256916997, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "sot_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 270353, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "sot_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 285119, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "sot_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 322090, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sot_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 289790, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "sot_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 311944, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "sot_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 285427, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "sot_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 297886, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sot_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 294493, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "sot_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 300665, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sot_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 304302, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sot_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 316271, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "sot_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 285665, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "sot_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 312163, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "sot_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 288140, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sot_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 281187, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sot_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 315729, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sot_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 287330, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "sot_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 272305, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "sot_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306723, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sot_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 290698, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sot_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 287459, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sot_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 304594, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sot_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 308068, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sot_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 282559, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "sot_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 312079, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sot_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 276692, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "sot_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 273845, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "sot_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 319483, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "sot_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 290024, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sot_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 297361, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "sot_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294264, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sot_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 291783, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sot_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 319728, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "sot_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289512, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sot_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 299209, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "sot_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 270809, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "sot_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 287774, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sot_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 288191, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "sot_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 304688, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sot_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 304494, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "sot_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 285131, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sot_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 298215, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sot_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 307458, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sot_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293178, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sot_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 298422, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sot_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 287343, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sot_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 298554, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sot_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 293292, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sot_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 289104, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sot_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 299895, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sot_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 302150, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sot_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 295578, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sot_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 293479, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sot_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 267778, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "sot_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 280962, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "sot_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 286180, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sot_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 318412, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "sot_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 274732, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "sot_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296559, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "sot_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 279375, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "sot_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 290905, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sot_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 288651, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sot_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 291382, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sot_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 296914, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sot_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 293006, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sot_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 301050, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sot_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 297704, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sot_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 305020, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "sot_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 299759, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sot_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 296836, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "sot_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 308055, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sot_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 305796, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sot_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 306883, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "sot_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 313909, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "sot_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 288515, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sot_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 308626, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sot_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 295514, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sot_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 307303, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sot_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 296103, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "sot_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 291969, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sot_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 245864, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "sot_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 290919, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sot_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 298860, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sot_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 314870, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "sot_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 224697, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "sot_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 283389, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "sot_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 305997, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "sot_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 312865, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "sot_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 297327, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "sot_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 266713, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "sot_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 298107, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sot_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 293992, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "sot_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 295660, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sot_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 288792, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sot_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 305259, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sot_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300828, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sot_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 283149, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sot_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 323478, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sot_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 275039, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sot_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296556, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sot_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 316370, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "sot_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 215675, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "sot_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 304726, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sot_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 321901, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sot_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 301353, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "sot_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 282636, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sot_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 285905, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sot_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 312490, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "sot_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 304513, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sot_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 303251, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sot_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 289286, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "sot_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 296418, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "sot_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 306239, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sot_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 286989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "sot_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292014, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sot_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 297855, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "sot_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 275160, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sot_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 317417, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sot_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 280894, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "sot_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 328129, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "sot_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 300929, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sot_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 292175, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sot_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 298523, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sot_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 291202, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sot_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 300390, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sot_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 272295, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "sot_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 285989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sot_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 315150, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "sot_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 287088, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "sot_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 289061, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "sot_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 290870, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "sot_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 297303, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sot_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304603, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "sot_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 285576, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sot_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 272502, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "sot_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 297993, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sot_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 321329, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "sot_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 296772, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sot_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 300398, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sot_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 285635, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sot_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 347016, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "sot_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 324901, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sot_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 198751, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "sot_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 284668, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sot_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 287317, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sot_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 311213, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "sot_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 284837, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "sot_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 288580, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sot_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 309841, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "sot_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 289812, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sot_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 284841, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sot_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 201980, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "sot_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 295095, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sot_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 292770, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sot_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 304875, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sot_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 285133, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "sot_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 306277, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "sot_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 301072, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "sot_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 290442, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sot_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 249432, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "sot_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 199427, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "sot_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 286903, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sot_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 299989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sot_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 291547, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "sot_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 302662, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sot_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 301730, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "sot_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 305904, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sot_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 289587, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sot_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 327743, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "sot_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 304933, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sot_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 297349, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "sot_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 293815, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sot_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 285160, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sot_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294136, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sot_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 293810, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "sot_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 309383, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sot_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 311456, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "sot_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 321767, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sot_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 307094, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "sot_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 275793, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "sot_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 312541, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sot_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 280012, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "sot_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 301689, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "sot_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 296278, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sot_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 293571, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sot_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 306335, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "sot_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 316910, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sot_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 302471, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "sot_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 270521, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "sot_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 298799, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "sot_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 289532, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sot_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 304987, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sot_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 288631, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "sot_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 278320, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "sot_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 298994, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sot_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291954, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "sot_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 291587, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "sot_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 260786, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "sot_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295799, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sot_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 292889, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sot_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 292157, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sot_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 310394, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "sot_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 331277, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 156.8498023715415, + "max_sentence1_length": 461, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tur_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 247382, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tur_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 262148, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tur_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 299119, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tur_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 266819, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tur_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288973, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tur_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 262456, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tur_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 274915, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tur_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 294493, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tur_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 277694, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tur_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 281331, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tur_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 293300, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tur_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 262694, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tur_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 289192, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tur_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 265169, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tur_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 258216, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tur_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 292758, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tur_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 264359, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tur_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 249334, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tur_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283752, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tur_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 267727, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tur_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 264488, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tur_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 281623, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tur_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 285097, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tur_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 259588, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tur_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 289108, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tur_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 253721, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tur_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 250874, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tur_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 296512, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tur_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 267053, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tur_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 274390, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tur_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271293, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tur_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 268812, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tur_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 296757, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tur_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266541, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tur_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 276238, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tur_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 247838, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tur_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 264803, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tur_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 265220, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tur_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 281717, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tur_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 281523, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tur_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 262160, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tur_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 275244, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tur_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 284487, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tur_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270207, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tur_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 275451, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tur_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 264372, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tur_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 275583, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tur_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 270321, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tur_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 266133, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tur_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276924, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tur_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 279179, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tur_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 272607, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tur_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 270508, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tur_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 244807, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tur_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257991, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tur_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 263209, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tur_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 295441, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tur_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 251761, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tur_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273588, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tur_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 256404, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tur_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267934, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tur_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 265680, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tur_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 268411, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tur_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273943, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tur_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 270035, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tur_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 278079, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tur_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 274733, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tur_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 282049, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tur_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 276788, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tur_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 273865, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tur_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 285084, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tur_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 282825, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tur_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 283912, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tur_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290938, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tur_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 265544, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tur_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 285655, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tur_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 272543, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tur_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 284332, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tur_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 273132, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tur_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268998, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tur_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 222893, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tur_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267948, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tur_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 275889, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tur_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 291899, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tur_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 201726, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tur_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 260418, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tur_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 283026, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tur_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 289894, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tur_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 274356, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tur_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 243742, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tur_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 275136, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tur_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 271021, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tur_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 272689, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tur_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 265821, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tur_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 282288, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tur_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277857, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tur_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 260178, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tur_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 300507, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tur_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 252068, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tur_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273585, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tur_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 293399, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tur_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 192704, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tur_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 281755, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tur_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298930, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tur_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 278382, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tur_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 259665, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tur_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262934, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tur_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 289519, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tur_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 281542, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tur_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 280280, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tur_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 266315, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tur_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 273447, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tur_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 283268, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tur_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 264018, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tur_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269043, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tur_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 274884, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tur_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 252189, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tur_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 294446, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tur_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257923, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tur_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 305158, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tur_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277958, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tur_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 269204, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tur_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 275552, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tur_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 268231, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tur_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 277419, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tur_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 249324, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tur_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 263018, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tur_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 292179, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tur_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 264117, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tur_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 266090, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tur_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 267899, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tur_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 274332, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tur_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281632, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tur_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 262605, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tur_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 249531, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tur_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 275022, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tur_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 298358, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tur_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 273801, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tur_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 277427, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tur_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 262664, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tur_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 324045, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tur_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301930, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tur_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 175780, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tur_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 261697, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tur_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 264346, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tur_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 288242, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tur_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 261866, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tur_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 265609, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tur_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 286870, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tur_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 266841, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tur_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 261870, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tur_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 179009, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tur_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 272124, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tur_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 269799, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tur_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 281904, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tur_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 262162, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tur_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 283306, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tur_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 278101, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tur_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 267471, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tur_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 226461, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tur_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 176456, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tur_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263932, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tur_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 277018, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tur_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 268576, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tur_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 279691, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tur_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 278759, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tur_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282933, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tur_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 266616, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tur_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 304772, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tur_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281962, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tur_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 274378, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tur_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 270844, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tur_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 262189, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tur_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271165, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tur_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 270839, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tur_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 286412, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tur_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 288485, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tur_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 298796, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tur_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 284123, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tur_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 252822, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tur_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 289570, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tur_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 257041, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tur_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 278718, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tur_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 273307, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tur_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 270600, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tur_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 283364, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tur_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293939, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tur_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 279500, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tur_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 247550, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tur_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 275828, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tur_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 266561, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tur_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 282016, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tur_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 265660, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tur_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 255349, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tur_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 276023, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tur_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268983, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tur_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 268616, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tur_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 237815, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tur_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272828, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tur_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 269918, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tur_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 269186, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tur_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 287423, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tur_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 308306, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 134.151185770751, + "max_sentence1_length": 386, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ace_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253554, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ace_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268320, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ace_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305291, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ace_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 272991, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ace_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 295145, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ace_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 268628, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ace_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 281087, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 300665, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ace_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 277694, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ace_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287503, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ace_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299472, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ace_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 268866, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ace_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295364, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ace_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264388, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ace_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 298930, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270531, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ace_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255506, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ace_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289924, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ace_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 273899, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 270660, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 287795, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ace_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291269, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ace_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 265760, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ace_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295280, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ace_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 259893, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ace_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 257046, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ace_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 302684, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ace_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 273225, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ace_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280562, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ace_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277465, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ace_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 274984, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ace_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 302929, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ace_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272713, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282410, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ace_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 254010, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ace_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 270975, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ace_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271392, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ace_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 287889, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ace_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 287695, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ace_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268332, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ace_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281416, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ace_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 290659, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ace_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276379, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ace_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 281623, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ace_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270544, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ace_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 281755, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ace_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276493, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ace_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272305, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ace_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 283096, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ace_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ace_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 278779, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ace_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 276680, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ace_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 250979, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ace_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 264163, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ace_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269381, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 301613, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ace_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 257933, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ace_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279760, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ace_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262576, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ace_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 274106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ace_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 271852, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ace_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ace_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 280115, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 276207, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ace_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 284251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ace_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 280905, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ace_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 288221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ace_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 282960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ace_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 280037, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ace_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 291256, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ace_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 288997, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ace_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 290084, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ace_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 297110, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ace_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 271716, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ace_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 291827, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ace_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 278715, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ace_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290504, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279304, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ace_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 275170, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ace_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 229065, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ace_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 274120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ace_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 282061, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ace_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 298071, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ace_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 207898, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ace_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266590, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ace_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 289198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ace_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 296066, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ace_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280528, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ace_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 249914, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ace_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281308, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ace_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 277193, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ace_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 278861, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 271993, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288460, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ace_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284029, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ace_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266350, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ace_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 306679, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ace_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 258240, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ace_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ace_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299571, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ace_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 198876, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ace_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 287927, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ace_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 305102, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ace_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284554, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ace_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 265837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ace_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 269106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ace_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 295691, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ace_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 287714, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286452, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ace_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272487, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ace_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 279619, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ace_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289440, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ace_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 270190, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ace_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275215, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ace_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 281056, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ace_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258361, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ace_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 300618, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ace_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 264095, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ace_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311330, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ace_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 284130, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ace_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ace_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 281724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ace_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274403, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ace_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 283591, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ace_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255496, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ace_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 269190, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ace_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270289, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ace_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 272262, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ace_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 274071, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ace_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280504, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ace_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287804, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ace_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 268777, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ace_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 255703, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ace_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 281194, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ace_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304530, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ace_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 279973, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 283599, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ace_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 268836, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 330217, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ace_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 308102, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ace_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 181952, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ace_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 267869, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ace_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270518, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ace_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294414, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ace_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 268038, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ace_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 271781, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ace_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 293042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ace_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 273013, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ace_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 268042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ace_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 185181, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ace_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278296, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 275971, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ace_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 288076, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ace_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268334, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ace_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289478, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ace_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 284273, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ace_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 273643, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ace_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 232633, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ace_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 182628, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ace_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 270104, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ace_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 283190, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ace_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 274748, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ace_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 285863, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ace_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 284931, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ace_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 289105, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ace_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 272788, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ace_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 310944, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ace_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 288134, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ace_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280550, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ace_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 277016, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ace_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268361, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ace_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277337, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ace_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 277011, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ace_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292584, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ace_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 294657, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ace_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 304968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ace_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290295, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ace_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 258994, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ace_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 295742, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ace_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 263213, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ace_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 284890, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ace_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279479, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ace_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 276772, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ace_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289536, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ace_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 300111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ace_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 285672, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ace_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 253722, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ace_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 282000, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ace_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 272733, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ace_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 288188, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ace_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 271832, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ace_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261521, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ace_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 282195, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ace_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275155, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ace_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 274788, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ace_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 243987, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ace_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279000, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ace_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 276090, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ace_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275358, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ace_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 293595, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ace_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314478, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.25, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ban_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257191, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ban_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 271957, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ban_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 308928, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ban_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 276628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ban_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 298782, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ban_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272265, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ban_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 284724, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ban_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304302, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ban_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281331, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ban_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287503, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ban_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303109, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ban_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272503, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ban_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299001, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ban_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 274978, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ban_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268025, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ban_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302567, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ban_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274168, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ban_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259143, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ban_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293561, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ban_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277536, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ban_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274297, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ban_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 291432, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ban_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 294906, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ban_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269397, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ban_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 298917, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ban_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263530, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ban_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 260683, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ban_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306321, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ban_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 276862, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ban_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284199, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ban_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281102, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ban_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 278621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ban_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306566, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ban_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276350, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ban_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286047, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ban_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 257647, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ban_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 274612, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ban_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275029, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ban_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 291526, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ban_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291332, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ban_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 271969, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ban_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285053, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ban_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294296, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ban_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280016, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ban_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285260, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ban_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274181, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ban_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285392, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ban_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280130, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ban_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 275942, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ban_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 286733, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ban_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 288988, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ban_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282416, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ban_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280317, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ban_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 254616, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ban_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 267800, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ban_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273018, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ban_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305250, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ban_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261570, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ban_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283397, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ban_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266213, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ban_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 277743, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ban_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275489, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ban_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278220, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ban_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 283752, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ban_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 279844, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ban_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 287888, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ban_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284542, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ban_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 291858, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ban_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 286597, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ban_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 283674, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ban_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 294893, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ban_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 292634, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ban_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 293721, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ban_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 300747, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ban_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275353, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ban_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295464, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ban_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282352, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ban_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294141, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ban_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 282941, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ban_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 278807, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ban_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 232702, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ban_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 277757, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ban_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 285698, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ban_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 301708, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ban_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211535, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ban_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270227, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ban_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 292835, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ban_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 299703, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ban_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284165, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ban_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253551, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ban_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 284945, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ban_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 280830, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ban_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282498, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ban_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 275630, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ban_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292097, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ban_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287666, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ban_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 269987, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ban_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310316, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ban_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 261877, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ban_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ban_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303208, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ban_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202513, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ban_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 291564, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ban_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 308739, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ban_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288191, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ban_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269474, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ban_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 272743, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ban_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299328, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ban_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291351, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ban_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290089, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ban_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276124, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ban_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283256, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ban_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293077, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ban_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 273827, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ban_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278852, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ban_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 284693, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ban_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 261998, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ban_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304255, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ban_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 267732, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ban_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 314967, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ban_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 287767, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ban_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279013, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ban_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285361, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ban_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278040, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ban_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287228, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ban_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259133, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ban_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 272827, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ban_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 301988, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ban_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 273926, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ban_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 275899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ban_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 277708, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ban_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284141, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ban_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291441, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ban_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272414, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ban_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259340, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ban_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 284831, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ban_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308167, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ban_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 283610, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ban_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287236, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ban_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272473, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ban_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 333854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ban_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 311739, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ban_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 185589, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ban_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271506, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ban_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274155, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ban_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298051, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ban_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 271675, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ban_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275418, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ban_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 296679, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ban_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 276650, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ban_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 271679, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ban_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 188818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ban_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 281933, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ban_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 279608, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ban_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 291713, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ban_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 271971, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ban_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293115, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ban_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 287910, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ban_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277280, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ban_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236270, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ban_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186265, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ban_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 273741, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ban_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 286827, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ban_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278385, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ban_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289500, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ban_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288568, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ban_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 292742, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ban_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276425, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ban_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 314581, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ban_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 291771, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ban_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284187, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ban_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 280653, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ban_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 271998, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ban_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280974, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ban_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 280648, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ban_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296221, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ban_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298294, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ban_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 308605, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ban_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 293932, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ban_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 262631, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ban_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299379, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ban_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 266850, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ban_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288527, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ban_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283116, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ban_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280409, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ban_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293173, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ban_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 303748, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ban_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289309, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ban_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257359, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ban_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 285637, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ban_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276370, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ban_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 291825, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ban_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275469, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ban_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265158, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ban_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 285832, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ban_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278792, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ban_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278425, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ban_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 247624, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ban_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282637, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ban_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 279727, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ban_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 278995, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ban_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297232, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ban_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318115, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 143.84387351778656, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ell_Grek-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 269160, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ell_Grek-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 283926, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ell_Grek-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 320897, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ell_Grek-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 288597, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ell_Grek-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 310751, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ell_Grek-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 284234, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ell_Grek-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 296693, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ell_Grek-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 316271, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ell_Grek-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 293300, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ell_Grek-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 299472, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ell_Grek-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 303109, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ell_Grek-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 284472, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ell_Grek-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 310970, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ell_Grek-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 286947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ell_Grek-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 279994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ell_Grek-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 314536, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ell_Grek-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 286137, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ell_Grek-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 271112, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ell_Grek-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305530, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ell_Grek-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 289505, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ell_Grek-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 286266, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ell_Grek-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 303401, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ell_Grek-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 306875, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ell_Grek-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 281366, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ell_Grek-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 310886, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ell_Grek-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 275499, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ell_Grek-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 272652, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ell_Grek-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 318290, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ell_Grek-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 288831, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ell_Grek-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 296168, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ell_Grek-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293071, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ell_Grek-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 290590, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ell_Grek-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 318535, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ell_Grek-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288319, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ell_Grek-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 298016, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ell_Grek-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 269616, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ell_Grek-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 286581, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ell_Grek-est_Latn": { + "num_samples": 1012, + "number_of_characters": 286998, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ell_Grek-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 303495, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ell_Grek-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 303301, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ell_Grek-min_Arab": { + "num_samples": 1012, + "number_of_characters": 283938, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ell_Grek-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 297022, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ell_Grek-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 306265, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ell_Grek-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291985, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ell_Grek-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 297229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ell_Grek-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 286150, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ell_Grek-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 297361, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ell_Grek-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 292099, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ell_Grek-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 287911, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ell_Grek-min_Latn": { + "num_samples": 1012, + "number_of_characters": 298702, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ell_Grek-por_Latn": { + "num_samples": 1012, + "number_of_characters": 300957, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ell_Grek-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 294385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ell_Grek-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 292286, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ell_Grek-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 266585, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ell_Grek-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 279769, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ell_Grek-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 284987, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ell_Grek-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 317219, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ell_Grek-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 273539, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ell_Grek-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295366, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ell_Grek-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 278182, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ell_Grek-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 289712, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ell_Grek-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 287458, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ell_Grek-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 290189, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ell_Grek-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 295721, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ell_Grek-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 291813, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ell_Grek-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 299857, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ell_Grek-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 296511, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ell_Grek-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 303827, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ell_Grek-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 298566, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ell_Grek-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 295643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ell_Grek-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 306862, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ell_Grek-als_Latn": { + "num_samples": 1012, + "number_of_characters": 304603, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ell_Grek-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 305690, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ell_Grek-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 312716, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ell_Grek-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 287322, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ell_Grek-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 307433, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ell_Grek-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 294321, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ell_Grek-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 306110, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ell_Grek-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 294910, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ell_Grek-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 290776, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ell_Grek-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 244671, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ell_Grek-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 289726, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ell_Grek-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 297667, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ell_Grek-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 313677, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ell_Grek-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 223504, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ell_Grek-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 282196, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ell_Grek-run_Latn": { + "num_samples": 1012, + "number_of_characters": 304804, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ell_Grek-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 311672, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ell_Grek-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 296134, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ell_Grek-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 265520, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ell_Grek-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 296914, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ell_Grek-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 292799, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ell_Grek-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 294467, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ell_Grek-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 287599, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ell_Grek-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 304066, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ell_Grek-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299635, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ell_Grek-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 281956, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ell_Grek-war_Latn": { + "num_samples": 1012, + "number_of_characters": 322285, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ell_Grek-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 273846, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ell_Grek-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295363, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ell_Grek-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 315177, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ell_Grek-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 214482, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ell_Grek-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 303533, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ell_Grek-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 320708, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ell_Grek-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 300160, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ell_Grek-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 281443, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ell_Grek-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 284712, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ell_Grek-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 311297, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ell_Grek-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 303320, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ell_Grek-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 302058, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ell_Grek-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 288093, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ell_Grek-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 295225, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ell_Grek-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 305046, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ell_Grek-san_Deva": { + "num_samples": 1012, + "number_of_characters": 285796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ell_Grek-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290821, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ell_Grek-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 296662, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ell_Grek-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 273967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ell_Grek-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 316224, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ell_Grek-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 279701, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ell_Grek-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 326936, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ell_Grek-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 299736, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ell_Grek-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 290982, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ell_Grek-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 297330, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ell_Grek-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 290009, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ell_Grek-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 299197, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ell_Grek-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 271102, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ell_Grek-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 284796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ell_Grek-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 313957, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ell_Grek-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 285895, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ell_Grek-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 287868, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ell_Grek-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 289677, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ell_Grek-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 296110, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ell_Grek-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303410, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ell_Grek-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 284383, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ell_Grek-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 271309, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ell_Grek-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 296800, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ell_Grek-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 320136, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ell_Grek-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 295579, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ell_Grek-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 299205, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ell_Grek-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 284442, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ell_Grek-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 345823, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ell_Grek-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 323708, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ell_Grek-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 197558, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ell_Grek-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 283475, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ell_Grek-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 286124, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ell_Grek-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 310020, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ell_Grek-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 283644, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ell_Grek-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 287387, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ell_Grek-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 308648, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ell_Grek-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 288619, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ell_Grek-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 283648, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ell_Grek-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 200787, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ell_Grek-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 293902, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ell_Grek-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 291577, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ell_Grek-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 303682, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ell_Grek-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 283940, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ell_Grek-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 305084, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ell_Grek-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 299879, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ell_Grek-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 289249, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ell_Grek-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 248239, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ell_Grek-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 198234, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ell_Grek-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 285710, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ell_Grek-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 298796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ell_Grek-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 290354, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ell_Grek-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 301469, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ell_Grek-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 300537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ell_Grek-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 304711, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ell_Grek-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 288394, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ell_Grek-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 326550, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ell_Grek-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 303740, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ell_Grek-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 296156, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ell_Grek-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 292622, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ell_Grek-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 283967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ell_Grek-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292943, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ell_Grek-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 292617, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ell_Grek-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 308190, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ell_Grek-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 310263, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ell_Grek-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 320574, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ell_Grek-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 305901, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ell_Grek-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 274600, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ell_Grek-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 311348, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ell_Grek-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 278819, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ell_Grek-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 300496, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ell_Grek-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 295085, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ell_Grek-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 292378, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ell_Grek-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 305142, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ell_Grek-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 315717, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ell_Grek-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 301278, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ell_Grek-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 269328, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ell_Grek-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 297606, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ell_Grek-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 288339, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ell_Grek-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 303794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ell_Grek-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 287438, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ell_Grek-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 277127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ell_Grek-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 297801, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ell_Grek-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290761, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ell_Grek-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 290394, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ell_Grek-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 259593, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ell_Grek-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294606, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ell_Grek-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 291696, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ell_Grek-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 290964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ell_Grek-som_Latn": { + "num_samples": 1012, + "number_of_characters": 309201, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ell_Grek-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 330084, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 155.67094861660078, + "max_sentence1_length": 464, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hne_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238554, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hne_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253320, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hne_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290291, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hne_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257991, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "hne_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hne_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253628, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hne_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 266087, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hne_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285665, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hne_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262694, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hne_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268866, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hne_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 272503, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hne_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284472, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hne_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280364, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hne_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256341, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hne_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249388, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hne_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283930, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hne_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255531, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hne_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240506, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hne_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274924, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hne_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258899, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hne_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255660, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hne_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272795, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hne_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276269, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hne_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250760, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hne_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280280, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hne_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244893, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hne_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 242046, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hne_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287684, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hne_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 258225, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hne_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265562, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "hne_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262465, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hne_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259984, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hne_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287929, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hne_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257713, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hne_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267410, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hne_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 239010, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hne_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255975, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hne_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256392, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hne_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272889, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hne_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272695, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hne_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253332, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hne_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266416, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hne_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275659, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hne_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261379, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hne_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266623, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hne_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255544, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hne_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266755, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hne_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 261493, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hne_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257305, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hne_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 268096, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hne_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270351, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hne_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263779, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hne_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261680, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hne_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235979, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hne_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 249163, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hne_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 254381, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hne_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286613, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hne_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242933, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hne_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264760, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hne_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247576, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hne_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 259106, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hne_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256852, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hne_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259583, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hne_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 265115, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hne_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 261207, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hne_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269251, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hne_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265905, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hne_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 273221, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hne_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267960, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hne_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 265037, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hne_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276256, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hne_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273997, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hne_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 275084, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hne_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 282110, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hne_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hne_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276827, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hne_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263715, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hne_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 275504, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hne_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264304, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hne_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 260170, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hne_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 214065, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hne_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 259120, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hne_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 267061, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hne_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 283071, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hne_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192898, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hne_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251590, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hne_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 274198, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hne_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 281066, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hne_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265528, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hne_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234914, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hne_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266308, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hne_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 262193, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hne_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263861, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hne_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256993, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hne_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273460, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hne_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269029, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hne_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251350, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hne_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291679, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hne_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243240, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hne_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264757, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hne_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284571, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hne_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183876, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hne_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272927, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hne_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 290102, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hne_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269554, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hne_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250837, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hne_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 254106, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hne_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280691, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hne_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272714, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hne_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271452, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hne_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 257487, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hne_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264619, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hne_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274440, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hne_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 255190, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hne_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260215, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hne_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 266056, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hne_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243361, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hne_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285618, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hne_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 249095, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hne_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296330, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hne_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 269130, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hne_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260376, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hne_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266724, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hne_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259403, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hne_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268591, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hne_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 240496, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hne_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 254190, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hne_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283351, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hne_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255289, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hne_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257262, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hne_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 259071, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hne_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 265504, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hne_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272804, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hne_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253777, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hne_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240703, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hne_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 266194, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hne_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289530, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hne_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264973, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hne_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268599, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hne_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253836, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hne_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 315217, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hne_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 293102, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hne_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166952, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hne_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252869, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hne_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255518, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hne_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279414, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hne_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 253038, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hne_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256781, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hne_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 278042, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hne_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 258013, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hne_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 253042, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hne_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 170181, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hne_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263296, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hne_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260971, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hne_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 273076, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hne_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253334, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hne_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274478, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hne_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269273, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hne_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258643, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hne_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217633, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hne_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167628, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hne_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 255104, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hne_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 268190, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hne_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259748, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hne_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270863, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hne_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269931, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hne_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 274105, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hne_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257788, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hne_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295944, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hne_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 273134, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hne_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265550, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hne_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 262016, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hne_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253361, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hne_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262337, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hne_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 262011, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hne_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277584, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hne_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279657, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hne_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289968, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hne_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275295, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hne_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243994, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hne_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280742, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hne_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 248213, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "hne_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269890, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hne_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264479, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hne_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261772, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hne_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274536, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hne_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 285111, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hne_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270672, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hne_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238722, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hne_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 267000, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "hne_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257733, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hne_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 273188, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hne_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256832, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hne_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246521, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hne_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 267195, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hne_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260155, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hne_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259788, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hne_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228987, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hne_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264000, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hne_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 261090, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hne_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260358, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hne_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278595, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hne_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299478, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.42786561264822, + "max_sentence1_length": 326, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kik_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 265052, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kik_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 279818, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kik_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 316789, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kik_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 284489, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kik_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 306643, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kik_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 280126, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kik_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 292585, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kik_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 312163, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kik_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 289192, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kik_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 295364, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kik_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 299001, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kik_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 310970, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kik_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 280364, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kik_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 282839, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kik_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 275886, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kik_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 310428, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kik_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 282029, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kik_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 267004, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kik_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301422, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kik_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 285397, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kik_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 282158, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kik_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 299293, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kik_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 302767, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kik_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 277258, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kik_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 306778, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kik_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 271391, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kik_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 268544, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kik_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 314182, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kik_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 284723, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kik_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 292060, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kik_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288963, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kik_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 286482, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kik_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 314427, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kik_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284211, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kik_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 293908, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kik_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 265508, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kik_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 282473, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kik_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 282890, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kik_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 299387, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kik_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 299193, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kik_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 279830, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kik_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 292914, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kik_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 302157, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kik_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287877, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kik_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 293121, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kik_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 282042, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kik_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 293253, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kik_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 287991, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kik_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 283803, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kik_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 294594, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kik_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 296849, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kik_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 290277, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kik_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 288178, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kik_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 262477, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kik_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 275661, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kik_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 280879, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kik_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 313111, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kik_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 269431, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kik_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291258, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kik_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 274074, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kik_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 285604, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kik_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 283350, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kik_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 286081, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kik_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 291613, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kik_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 287705, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kik_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 295749, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kik_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 292403, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kik_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 299719, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kik_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 294458, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kik_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 291535, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kik_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 302754, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kik_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 300495, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kik_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 301582, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kik_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 308608, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kik_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 283214, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kik_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 303325, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kik_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 290213, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kik_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 302002, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kik_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 290802, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kik_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 286668, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kik_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 240563, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kik_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 285618, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kik_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 293559, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kik_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 309569, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kik_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 219396, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kik_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 278088, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kik_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 300696, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kik_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 307564, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kik_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 292026, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kik_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 261412, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kik_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 292806, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kik_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 288691, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kik_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 290359, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kik_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 283491, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kik_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 299958, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kik_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295527, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kik_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 277848, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kik_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 318177, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kik_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 269738, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kik_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291255, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kik_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 311069, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kik_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 210374, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kik_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 299425, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kik_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 316600, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kik_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 296052, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kik_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 277335, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kik_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 280604, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kik_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 307189, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kik_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 299212, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kik_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 297950, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kik_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 283985, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kik_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 291117, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kik_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 300938, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kik_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 281688, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kik_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286713, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kik_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 292554, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kik_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 269859, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kik_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 312116, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kik_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 275593, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kik_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 322828, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kik_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 295628, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kik_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 286874, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kik_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 293222, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kik_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 285901, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kik_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 295089, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kik_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 266994, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kik_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 280688, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kik_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 309849, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kik_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kik_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 283760, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kik_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 285569, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kik_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 292002, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kik_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299302, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kik_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 280275, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kik_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 267201, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kik_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 292692, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kik_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 316028, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kik_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 291471, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kik_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 295097, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kik_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 280334, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kik_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 341715, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kik_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 319600, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kik_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 193450, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kik_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 279367, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kik_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 282016, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kik_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 305912, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kik_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 279536, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kik_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 283279, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kik_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 304540, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kik_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 284511, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kik_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 279540, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kik_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 196679, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kik_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 289794, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kik_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 287469, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kik_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 299574, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kik_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 279832, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kik_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 300976, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kik_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 295771, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kik_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 285141, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kik_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 244131, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kik_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 194126, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kik_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 281602, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kik_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 294688, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kik_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 286246, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kik_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 297361, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kik_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 296429, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kik_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 300603, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kik_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 284286, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kik_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 322442, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kik_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 299632, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kik_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 292048, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kik_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 288514, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kik_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 279859, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kik_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288835, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kik_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 288509, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kik_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 304082, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kik_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 306155, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kik_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 316466, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kik_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 301793, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kik_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 270492, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kik_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 307240, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kik_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 274711, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kik_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 296388, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kik_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 290977, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kik_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 288270, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kik_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 301034, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kik_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 311609, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kik_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 297170, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kik_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 265220, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kik_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 293498, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kik_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 284231, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kik_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 299686, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kik_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 283330, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kik_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 273019, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kik_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 293693, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kik_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286653, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kik_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 286286, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kik_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 255485, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kik_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290498, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kik_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 287588, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kik_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 286856, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kik_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 305093, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kik_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 325976, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 151.61166007905138, + "max_sentence1_length": 515, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mai_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241029, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mai_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 255795, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mai_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 292766, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mai_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260466, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mai_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 282620, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mai_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256103, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mai_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 268562, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mai_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288140, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mai_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265169, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mai_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mai_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 274978, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mai_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286947, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mai_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256341, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mai_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282839, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mai_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251863, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mai_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286405, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mai_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258006, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mai_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 242981, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mai_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277399, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mai_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261374, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mai_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258135, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mai_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275270, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mai_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 278744, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mai_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253235, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mai_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 282755, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mai_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247368, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mai_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 244521, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mai_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290159, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mai_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 260700, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mai_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268037, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mai_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264940, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mai_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262459, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mai_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290404, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mai_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260188, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mai_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269885, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mai_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241485, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mai_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 258450, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mai_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 258867, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mai_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 275364, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mai_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275170, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mai_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 255807, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mai_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268891, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mai_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278134, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mai_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263854, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mai_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269098, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mai_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258019, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mai_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269230, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mai_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 263968, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mai_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 259780, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mai_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 270571, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mai_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272826, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mai_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266254, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mai_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264155, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mai_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238454, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mai_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 251638, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mai_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256856, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mai_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289088, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mai_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245408, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mai_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267235, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mai_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250051, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mai_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 261581, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mai_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 259327, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mai_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262058, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mai_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 267590, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mai_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 263682, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mai_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 271726, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mai_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268380, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mai_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 275696, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mai_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270435, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mai_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 267512, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mai_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 278731, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mai_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276472, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mai_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 277559, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mai_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 284585, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mai_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 259191, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mai_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279302, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mai_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266190, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mai_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 277979, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mai_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 266779, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mai_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 262645, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mai_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 216540, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mai_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 261595, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mai_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 269536, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mai_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 285546, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mai_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195373, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mai_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254065, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mai_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 276673, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mai_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 283541, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mai_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268003, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mai_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237389, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mai_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 268783, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mai_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 264668, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mai_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266336, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mai_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259468, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mai_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275935, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mai_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271504, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mai_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253825, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mai_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294154, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mai_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 245715, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mai_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267232, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mai_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287046, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mai_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 186351, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mai_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275402, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mai_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 292577, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mai_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272029, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mai_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253312, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mai_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 256581, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mai_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283166, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mai_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275189, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mai_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273927, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mai_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 259962, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mai_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267094, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mai_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276915, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mai_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 257665, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mai_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262690, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mai_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 268531, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mai_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245836, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mai_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288093, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mai_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 251570, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mai_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 298805, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mai_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 271605, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mai_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262851, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mai_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269199, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mai_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261878, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mai_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271066, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mai_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 242971, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mai_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 256665, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mai_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285826, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mai_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 257764, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mai_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 259737, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mai_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 261546, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mai_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 267979, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mai_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275279, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mai_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256252, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mai_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243178, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mai_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 268669, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mai_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292005, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mai_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267448, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mai_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271074, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mai_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256311, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mai_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 317692, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mai_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 295577, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mai_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169427, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mai_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255344, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mai_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 257993, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mai_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281889, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mai_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 255513, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mai_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 259256, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mai_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 280517, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mai_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260488, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mai_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 255517, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mai_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 172656, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mai_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 265771, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mai_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263446, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mai_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 275551, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mai_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 255809, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mai_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 276953, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mai_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 271748, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mai_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mai_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220108, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mai_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170103, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mai_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 257579, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mai_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 270665, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mai_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262223, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mai_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273338, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mai_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272406, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mai_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 276580, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mai_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260263, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mai_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298419, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mai_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 275609, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mai_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268025, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mai_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 264491, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mai_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255836, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mai_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264812, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mai_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264486, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mai_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280059, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mai_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282132, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mai_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292443, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mai_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 277770, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mai_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246469, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mai_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283217, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mai_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 250688, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mai_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 272365, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mai_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 266954, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mai_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264247, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mai_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277011, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mai_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 287586, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mai_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273147, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mai_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241197, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mai_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269475, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mai_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260208, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mai_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 275663, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mai_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 259307, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mai_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 248996, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mai_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 269670, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mai_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262630, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mai_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262263, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mai_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231462, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mai_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266475, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mai_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 263565, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mai_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262833, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mai_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281070, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mai_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 301953, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "pbt_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 234076, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "pbt_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 248842, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "pbt_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 285813, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pbt_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 253513, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "pbt_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 275667, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "pbt_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 249150, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "pbt_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 261609, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pbt_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 281187, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "pbt_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 258216, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "pbt_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 264388, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pbt_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 268025, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pbt_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 279994, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "pbt_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 249388, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "pbt_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 275886, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "pbt_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 251863, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pbt_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 279452, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pbt_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 251053, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "pbt_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 236028, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "pbt_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270446, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pbt_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 254421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pbt_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 251182, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pbt_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 268317, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pbt_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 271791, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pbt_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 246282, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "pbt_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 275802, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pbt_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 240415, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "pbt_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 237568, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "pbt_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 283206, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "pbt_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 253747, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pbt_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 261084, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "pbt_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257987, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pbt_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 255506, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pbt_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 283451, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "pbt_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253235, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pbt_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 262932, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "pbt_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 234532, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "pbt_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 251497, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pbt_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 251914, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "pbt_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 268411, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pbt_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 268217, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "pbt_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 248854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pbt_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 261938, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pbt_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 271181, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pbt_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256901, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pbt_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 262145, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pbt_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 251066, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pbt_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 262277, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pbt_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 257015, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pbt_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 252827, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pbt_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 263618, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pbt_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 265873, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pbt_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 259301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pbt_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 257202, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pbt_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 231501, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "pbt_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 244685, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "pbt_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 249903, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pbt_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 282135, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "pbt_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 238455, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "pbt_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260282, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "pbt_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 243098, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "pbt_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 254628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pbt_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 252374, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pbt_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 255105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pbt_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 260637, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pbt_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 256729, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pbt_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 264773, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pbt_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 261427, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pbt_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 268743, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "pbt_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 263482, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pbt_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 260559, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "pbt_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 271778, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pbt_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pbt_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 270606, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "pbt_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 277632, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "pbt_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 252238, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pbt_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 272349, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pbt_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 259237, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pbt_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 271026, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pbt_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 259826, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "pbt_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 255692, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pbt_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 209587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "pbt_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 254642, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pbt_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 262583, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pbt_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 278593, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "pbt_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 188420, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "pbt_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 247112, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "pbt_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 269720, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "pbt_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 276588, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "pbt_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 261050, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "pbt_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 230436, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "pbt_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 261830, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pbt_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 257715, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "pbt_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 259383, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pbt_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 252515, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pbt_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 268982, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pbt_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264551, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pbt_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 246872, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pbt_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 287201, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pbt_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 238762, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pbt_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260279, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pbt_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 280093, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "pbt_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 179398, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "pbt_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 268449, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pbt_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 285624, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pbt_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 265076, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "pbt_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 246359, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pbt_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 249628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pbt_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 276213, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "pbt_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 268236, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pbt_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 266974, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pbt_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 253009, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "pbt_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 260141, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "pbt_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 269962, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pbt_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 250712, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "pbt_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255737, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pbt_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 261578, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "pbt_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 238883, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pbt_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 281140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pbt_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 244617, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "pbt_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 291852, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "pbt_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 264652, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pbt_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 255898, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pbt_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 262246, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pbt_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 254925, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pbt_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 264113, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pbt_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 236018, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "pbt_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 249712, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pbt_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 278873, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "pbt_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 250811, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "pbt_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 252784, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "pbt_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 254593, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "pbt_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 261026, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pbt_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268326, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "pbt_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 249299, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pbt_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 236225, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "pbt_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 261716, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pbt_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 285052, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "pbt_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 260495, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pbt_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 264121, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pbt_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 249358, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pbt_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 310739, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "pbt_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 288624, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pbt_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 162474, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "pbt_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 248391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pbt_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 251040, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pbt_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 274936, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "pbt_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 248560, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "pbt_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 252303, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pbt_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 273564, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "pbt_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 253535, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pbt_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 248564, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pbt_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 165703, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "pbt_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 258818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pbt_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 256493, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pbt_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 268598, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pbt_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 248856, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "pbt_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 270000, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "pbt_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 264795, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "pbt_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 254165, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pbt_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 213155, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "pbt_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 163150, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "pbt_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 250626, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pbt_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 263712, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pbt_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 255270, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "pbt_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 266385, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pbt_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 265453, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "pbt_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 269627, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pbt_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 253310, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pbt_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 291466, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "pbt_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 268656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pbt_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 261072, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "pbt_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 257538, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pbt_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 248883, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pbt_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257859, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pbt_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 257533, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "pbt_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 273106, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pbt_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 275179, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "pbt_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 285490, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pbt_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 270817, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "pbt_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 239516, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "pbt_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 276264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pbt_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 243735, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "pbt_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 265412, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "pbt_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 260001, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pbt_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 257294, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pbt_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 270058, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "pbt_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 280633, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pbt_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 266194, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "pbt_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 234244, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "pbt_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 262522, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "pbt_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 253255, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pbt_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 268710, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pbt_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 252354, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "pbt_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 242043, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "pbt_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 262717, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pbt_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255677, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "pbt_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 255310, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "pbt_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 224509, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "pbt_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259522, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pbt_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 256612, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pbt_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 255880, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pbt_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 274117, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "pbt_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 295000, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00296442687747, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "spa_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 268618, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "spa_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 283384, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "spa_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 320355, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "spa_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 288055, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "spa_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 310209, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "spa_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 283692, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "spa_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 296151, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "spa_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 315729, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "spa_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 292758, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "spa_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 298930, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "spa_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 302567, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "spa_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 314536, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "spa_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 283930, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "spa_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 310428, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "spa_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 286405, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "spa_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 279452, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "spa_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 285595, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "spa_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 270570, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "spa_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304988, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "spa_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 288963, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "spa_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 285724, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "spa_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "spa_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 306333, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "spa_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 280824, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "spa_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 310344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "spa_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 274957, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "spa_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 272110, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "spa_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 317748, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "spa_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 288289, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "spa_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 295626, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "spa_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "spa_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 290048, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "spa_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 317993, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "spa_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287777, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "spa_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 297474, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "spa_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 269074, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "spa_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 286039, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "spa_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 286456, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "spa_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 302953, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "spa_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 302759, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "spa_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 283396, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "spa_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 296480, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "spa_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 305723, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "spa_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "spa_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 296687, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "spa_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 285608, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "spa_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 296819, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "spa_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 291557, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "spa_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 287369, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "spa_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 298160, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "spa_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 300415, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "spa_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 293843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "spa_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 291744, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "spa_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 266043, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "spa_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 279227, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "spa_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 284445, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "spa_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 316677, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "spa_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 272997, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "spa_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294824, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "spa_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 277640, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "spa_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 289170, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "spa_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 286916, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "spa_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 289647, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "spa_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 295179, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "spa_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 291271, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "spa_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 299315, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "spa_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 295969, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "spa_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 303285, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "spa_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 298024, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "spa_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 295101, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "spa_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 306320, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "spa_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 304061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "spa_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 305148, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "spa_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 312174, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "spa_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 286780, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "spa_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 306891, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "spa_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 293779, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "spa_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 305568, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "spa_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 294368, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "spa_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 290234, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "spa_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 244129, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "spa_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 289184, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "spa_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 297125, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "spa_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 313135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "spa_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 222962, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "spa_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 281654, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "spa_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 304262, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "spa_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 311130, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "spa_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 295592, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "spa_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 264978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "spa_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 296372, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "spa_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 292257, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "spa_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 293925, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "spa_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 287057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "spa_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 303524, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "spa_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299093, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "spa_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 281414, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "spa_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 321743, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "spa_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 273304, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "spa_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294821, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "spa_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 314635, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "spa_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 213940, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "spa_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 302991, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "spa_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 320166, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "spa_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 299618, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "spa_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 280901, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "spa_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 284170, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "spa_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 310755, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "spa_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 302778, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "spa_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 301516, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "spa_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 287551, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "spa_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 294683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "spa_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 304504, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "spa_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 285254, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "spa_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290279, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "spa_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 296120, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "spa_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 273425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "spa_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 315682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "spa_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 279159, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "spa_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 326394, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "spa_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 299194, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "spa_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 290440, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "spa_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 296788, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "spa_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 289467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "spa_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 298655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "spa_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 270560, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "spa_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 284254, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "spa_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 313415, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "spa_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 285353, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "spa_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 287326, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "spa_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 289135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "spa_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 295568, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "spa_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302868, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "spa_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 283841, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "spa_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 270767, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "spa_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 296258, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "spa_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 319594, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "spa_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 295037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "spa_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 298663, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "spa_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 283900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "spa_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 345281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "spa_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 323166, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "spa_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 197016, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "spa_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 282933, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "spa_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 285582, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "spa_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 309478, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "spa_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 283102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "spa_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 286845, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "spa_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 308106, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "spa_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 288077, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "spa_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 283106, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "spa_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 200245, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "spa_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 293360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "spa_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 291035, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "spa_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 303140, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "spa_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 283398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "spa_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 304542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "spa_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 299337, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "spa_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 288707, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "spa_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 247697, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "spa_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 197692, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "spa_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 285168, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "spa_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 298254, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "spa_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 289812, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "spa_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 300927, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "spa_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 299995, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "spa_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 304169, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "spa_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 287852, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "spa_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 326008, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "spa_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 303198, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "spa_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 295614, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "spa_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 292080, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "spa_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 283425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "spa_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292401, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "spa_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 292075, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "spa_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 307648, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "spa_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 309721, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "spa_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 320032, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "spa_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 305359, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "spa_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 274058, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "spa_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 310806, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "spa_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 278277, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "spa_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 299954, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "spa_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 294543, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "spa_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 291836, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "spa_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 304600, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "spa_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 315175, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "spa_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 300736, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "spa_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 268786, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "spa_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 297064, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "spa_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 287797, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "spa_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 303252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "spa_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 286896, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "spa_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 276585, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "spa_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 297259, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "spa_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290219, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "spa_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 289852, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "spa_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 259051, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "spa_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294064, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "spa_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 291154, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "spa_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 290422, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "spa_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 308659, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "spa_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 329542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 155.13537549407116, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "twi_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 240219, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "twi_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 254985, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "twi_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 291956, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "twi_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259656, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "twi_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281810, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "twi_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 255293, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "twi_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267752, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "twi_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 287330, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "twi_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264359, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "twi_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270531, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "twi_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 274168, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "twi_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286137, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "twi_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255531, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "twi_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282029, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "twi_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 258006, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "twi_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251053, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "twi_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285595, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "twi_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 242171, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "twi_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276589, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "twi_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260564, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "twi_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 257325, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "twi_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274460, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "twi_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 277934, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "twi_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252425, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "twi_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 281945, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "twi_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246558, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "twi_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243711, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "twi_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289349, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "twi_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 259890, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "twi_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 267227, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "twi_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264130, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "twi_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261649, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "twi_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289594, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "twi_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259378, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "twi_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269075, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "twi_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240675, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "twi_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257640, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "twi_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 258057, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "twi_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274554, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "twi_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274360, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "twi_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 254997, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "twi_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268081, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "twi_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 277324, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "twi_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263044, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "twi_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 268288, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "twi_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 257209, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "twi_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268420, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "twi_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 263158, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "twi_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 258970, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "twi_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269761, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "twi_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272016, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "twi_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265444, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "twi_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263345, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "twi_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237644, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "twi_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250828, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "twi_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256046, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "twi_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 288278, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "twi_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244598, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "twi_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266425, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "twi_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 249241, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "twi_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260771, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "twi_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258517, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "twi_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 261248, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "twi_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266780, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "twi_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 262872, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "twi_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 270916, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "twi_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267570, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "twi_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "twi_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269625, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "twi_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266702, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "twi_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 277921, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "twi_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275662, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "twi_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276749, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "twi_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283775, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "twi_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258381, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "twi_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278492, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "twi_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265380, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "twi_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 277169, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "twi_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 265969, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "twi_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261835, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "twi_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215730, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "twi_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260785, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "twi_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268726, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "twi_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284736, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "twi_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194563, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "twi_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 253255, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "twi_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275863, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "twi_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282731, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "twi_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 267193, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "twi_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236579, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "twi_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 267973, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "twi_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263858, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "twi_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265526, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "twi_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258658, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "twi_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275125, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "twi_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270694, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "twi_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253015, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "twi_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293344, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "twi_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 244905, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "twi_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266422, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "twi_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 286236, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "twi_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185541, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "twi_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274592, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "twi_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291767, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "twi_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 271219, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "twi_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252502, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "twi_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255771, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "twi_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282356, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "twi_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274379, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "twi_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273117, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "twi_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 259152, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "twi_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 266284, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "twi_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276105, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "twi_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 256855, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "twi_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261880, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "twi_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267721, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "twi_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245026, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "twi_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 287283, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "twi_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250760, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "twi_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 297995, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "twi_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270795, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "twi_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262041, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "twi_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268389, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "twi_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261068, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "twi_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 270256, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "twi_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 242161, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "twi_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255855, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "twi_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285016, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "twi_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 256954, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "twi_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 258927, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "twi_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260736, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "twi_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 267169, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "twi_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274469, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "twi_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255442, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "twi_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242368, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "twi_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267859, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "twi_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 291195, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "twi_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266638, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "twi_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 270264, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "twi_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255501, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "twi_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 316882, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "twi_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294767, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "twi_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168617, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "twi_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254534, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "twi_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 257183, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "twi_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281079, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "twi_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254703, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "twi_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258446, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "twi_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279707, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "twi_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259678, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "twi_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254707, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "twi_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171846, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "twi_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 264961, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "twi_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262636, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "twi_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274741, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "twi_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 254999, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "twi_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 276143, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "twi_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 270938, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "twi_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 260308, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "twi_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 219298, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "twi_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 169293, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "twi_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 256769, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "twi_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269855, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "twi_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261413, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "twi_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272528, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "twi_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271596, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "twi_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275770, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "twi_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259453, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "twi_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297609, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "twi_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274799, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "twi_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 267215, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "twi_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263681, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "twi_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255026, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "twi_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264002, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "twi_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263676, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "twi_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 279249, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "twi_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 281322, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "twi_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291633, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "twi_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 276960, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "twi_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245659, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "twi_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282407, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "twi_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 249878, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "twi_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271555, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "twi_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 266144, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "twi_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263437, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "twi_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 276201, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "twi_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286776, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "twi_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 272337, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "twi_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240387, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "twi_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268665, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "twi_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259398, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "twi_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274853, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "twi_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258497, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "twi_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 248186, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "twi_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268860, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "twi_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261820, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "twi_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261453, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "twi_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230652, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "twi_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265665, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "twi_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262755, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "twi_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262023, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "twi_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 280260, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "twi_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 301143, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 127.07312252964427, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "acm_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 225194, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "acm_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 239960, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "acm_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 276931, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "acm_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 244631, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "acm_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 266785, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "acm_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 240268, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "acm_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 252727, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acm_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 272305, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "acm_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 249334, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "acm_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 255506, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "acm_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 259143, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "acm_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 271112, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "acm_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 240506, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "acm_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 267004, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "acm_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 242981, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acm_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 236028, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "acm_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 270570, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acm_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 242171, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "acm_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261564, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "acm_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 245539, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acm_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 242300, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acm_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 259435, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "acm_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 262909, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "acm_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 237400, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "acm_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 266920, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "acm_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 231533, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "acm_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 228686, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "acm_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 274324, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "acm_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 244865, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "acm_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 252202, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "acm_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249105, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "acm_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 246624, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "acm_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 274569, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "acm_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244353, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acm_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 254050, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "acm_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 225650, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "acm_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 242615, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "acm_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 243032, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "acm_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 259529, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "acm_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 259335, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "acm_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 239972, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "acm_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 253056, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "acm_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 262299, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "acm_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248019, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "acm_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 253263, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "acm_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 242184, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "acm_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 253395, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "acm_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 248133, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "acm_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 243945, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "acm_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 254736, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "acm_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 256991, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "acm_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 250419, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "acm_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 248320, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "acm_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 222619, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "acm_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 235803, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "acm_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 241021, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acm_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 273253, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "acm_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 229573, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "acm_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251400, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "acm_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 234216, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "acm_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 245746, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "acm_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 243492, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "acm_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 246223, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acm_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 251755, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acm_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 247847, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "acm_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 255891, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "acm_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 252545, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "acm_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 259861, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "acm_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 254600, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "acm_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 251677, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "acm_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 262896, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "acm_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 260637, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "acm_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 261724, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "acm_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 268750, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "acm_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 243356, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "acm_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 263467, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "acm_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 250355, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "acm_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 262144, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acm_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 250944, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "acm_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 246810, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "acm_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 200705, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "acm_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 245760, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "acm_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 253701, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "acm_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 269711, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "acm_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 179538, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "acm_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 238230, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "acm_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 260838, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "acm_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 267706, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "acm_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 252168, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "acm_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 221554, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "acm_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 252948, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "acm_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 248833, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "acm_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 250501, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acm_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 243633, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acm_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 260100, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "acm_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255669, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acm_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 237990, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "acm_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 278319, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "acm_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 229880, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "acm_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251397, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "acm_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 271211, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "acm_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 170516, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "acm_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 259567, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "acm_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 276742, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "acm_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 256194, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "acm_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 237477, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "acm_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 240746, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "acm_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 267331, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "acm_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 259354, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acm_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 258092, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "acm_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 244127, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "acm_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 251259, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "acm_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 261080, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "acm_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 241830, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "acm_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246855, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "acm_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 252696, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "acm_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 230001, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "acm_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 272258, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "acm_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 235735, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "acm_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 282970, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "acm_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 255770, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "acm_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 247016, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "acm_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 253364, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "acm_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 246043, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "acm_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 255231, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "acm_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 227136, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "acm_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 240830, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acm_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 269991, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "acm_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 241929, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "acm_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 243902, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "acm_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 245711, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "acm_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 252144, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "acm_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259444, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "acm_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 240417, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "acm_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 227343, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "acm_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 252834, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "acm_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 276170, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "acm_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 251613, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acm_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 255239, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "acm_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 240476, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acm_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 301857, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "acm_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 279742, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "acm_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 153592, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "acm_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 239509, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "acm_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 242158, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "acm_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 266054, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "acm_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 239678, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "acm_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 243421, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "acm_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 264682, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "acm_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 244653, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "acm_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 239682, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "acm_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 156821, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "acm_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 249936, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acm_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 247611, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "acm_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 259716, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "acm_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 239974, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "acm_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "acm_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 255913, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "acm_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 245283, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "acm_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 204273, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "acm_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 154268, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "acm_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 241744, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "acm_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 254830, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "acm_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 246388, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "acm_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 257503, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "acm_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 256571, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "acm_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 260745, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "acm_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 244428, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "acm_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 282584, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "acm_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 259774, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acm_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 252190, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "acm_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 248656, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "acm_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 240001, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "acm_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248977, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acm_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 248651, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "acm_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 264224, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "acm_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 266297, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "acm_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 276608, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "acm_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 261935, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "acm_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 230634, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "acm_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 267382, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "acm_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 234853, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "acm_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 256530, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "acm_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 251119, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "acm_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 248412, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "acm_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 261176, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "acm_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 271751, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "acm_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 257312, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "acm_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 225362, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "acm_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 253640, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "acm_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 244373, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "acm_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 259828, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "acm_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 243472, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "acm_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 233161, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "acm_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 253835, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acm_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246795, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "acm_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 246428, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "acm_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 215627, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "acm_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250640, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "acm_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 247730, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "acm_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 246998, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "acm_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 265235, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "acm_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 286118, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 112.22628458498023, + "max_sentence1_length": 303, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 259612, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 274378, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bel_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 311349, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 279049, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bel_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 301203, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 274686, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bel_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 287145, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bel_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 306723, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 283752, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 289924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 293561, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 305530, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 274924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bel_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 301422, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bel_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 277399, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bel_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 270446, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bel_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bel_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 276589, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bel_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 261564, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bel_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279957, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 276718, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 293853, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 297327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bel_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 271818, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bel_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 301338, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265951, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bel_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 263104, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 308742, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bel_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 279283, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 286620, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283523, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 281042, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bel_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308987, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bel_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278771, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bel_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 288468, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bel_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 260068, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 277033, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bel_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 277450, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293947, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 293753, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bel_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 274390, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bel_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 287474, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 296717, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282437, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bel_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 287681, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 276602, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bel_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 287813, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 282551, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 278363, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bel_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 289154, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bel_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 291409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bel_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284837, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bel_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 282738, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 257037, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 270221, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 275439, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 307671, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bel_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263991, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285818, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bel_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 268634, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bel_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 280164, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bel_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 277910, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bel_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 280641, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 286173, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bel_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 282265, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 290309, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bel_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286963, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 294279, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bel_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 289018, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bel_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 286095, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bel_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 297314, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bel_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 295055, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 296142, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bel_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 303168, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bel_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 277774, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 297885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 284773, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 296562, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bel_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 285362, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bel_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 281228, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bel_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 235123, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 280178, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bel_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 288119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 304129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213956, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 272648, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bel_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 295256, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bel_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 302124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bel_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 286586, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bel_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255972, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 287366, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bel_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 283251, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bel_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 284919, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 278051, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 294518, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bel_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290087, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bel_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 272408, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bel_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 312737, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bel_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 264298, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285815, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bel_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 305629, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bel_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204934, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293985, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bel_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 311160, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bel_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 290612, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bel_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 271895, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bel_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 275164, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bel_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 301749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bel_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 293772, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bel_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 292510, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 278545, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 285677, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bel_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 295498, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bel_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 276248, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bel_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bel_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 287114, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 264419, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 306676, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bel_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 270153, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 317388, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 290188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bel_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 281434, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bel_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 287782, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 280461, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 289649, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 261554, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bel_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 275248, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bel_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 304409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 276347, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bel_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 278320, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bel_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 280129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bel_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 286562, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293862, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bel_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274835, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bel_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 261761, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bel_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 287252, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bel_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 310588, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bel_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 286031, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 289657, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bel_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 274894, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bel_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 336275, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 314160, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bel_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 188010, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bel_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273927, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 276576, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bel_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 300472, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 274096, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277839, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bel_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 299100, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bel_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 279071, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 274100, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bel_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 191239, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 284354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bel_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 282029, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bel_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 294134, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 274392, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 295536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bel_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 290331, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bel_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 279701, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 238691, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bel_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 188686, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bel_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 276162, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bel_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 289248, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bel_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 280806, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bel_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 291921, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290989, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bel_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 295163, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bel_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 278846, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 317002, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bel_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 294192, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 286608, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bel_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 283074, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bel_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 274419, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283395, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 283069, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bel_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 298642, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bel_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 300715, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bel_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 311026, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bel_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 296353, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bel_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 265052, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bel_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 301800, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 269271, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bel_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290948, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bel_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 285537, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bel_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282830, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bel_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 295594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 306169, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bel_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 291730, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bel_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 259780, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bel_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 288058, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bel_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 278791, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 294246, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bel_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 277890, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bel_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 267579, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 288253, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bel_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281213, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bel_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 280846, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bel_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 250045, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bel_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285058, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bel_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 282148, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bel_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 281416, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bel_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 299653, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bel_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 320536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.23616600790513, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "eng_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 243587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "eng_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 258353, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "eng_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 295324, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "eng_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263024, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "eng_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 285178, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "eng_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 258661, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "eng_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271120, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eng_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 290698, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "eng_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 267727, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "eng_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 273899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "eng_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 277536, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "eng_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 289505, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "eng_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 258899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "eng_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 285397, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "eng_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 261374, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eng_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 254421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "eng_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 288963, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eng_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 260564, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "eng_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 245539, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "eng_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279957, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "eng_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 260693, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eng_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 277828, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "eng_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "eng_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 255793, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "eng_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285313, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "eng_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 249926, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "eng_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247079, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "eng_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 292717, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "eng_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263258, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "eng_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 270595, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "eng_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267498, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "eng_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265017, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "eng_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 292962, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "eng_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262746, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eng_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 272443, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "eng_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244043, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "eng_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "eng_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 261425, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "eng_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 277922, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "eng_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 277728, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "eng_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 258365, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "eng_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 271449, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "eng_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 280692, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "eng_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266412, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "eng_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 271656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "eng_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 260577, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "eng_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 271788, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "eng_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 266526, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "eng_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 262338, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "eng_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273129, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "eng_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 275384, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "eng_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 268812, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "eng_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 266713, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "eng_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241012, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "eng_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 254196, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "eng_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 259414, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eng_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 291646, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "eng_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 247966, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "eng_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269793, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "eng_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 252609, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "eng_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 264139, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "eng_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 261885, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "eng_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 264616, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eng_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270148, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 266240, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "eng_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274284, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "eng_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 270938, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "eng_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 278254, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "eng_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 272993, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "eng_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270070, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "eng_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281289, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "eng_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279030, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "eng_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280117, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "eng_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287143, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "eng_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 261749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "eng_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 281860, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "eng_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 268748, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "eng_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 280537, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eng_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 269337, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "eng_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 265203, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "eng_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219098, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "eng_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 264153, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "eng_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272094, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "eng_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "eng_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 197931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "eng_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 256623, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "eng_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 279231, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "eng_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "eng_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 270561, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "eng_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 239947, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "eng_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "eng_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 267226, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "eng_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 268894, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eng_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262026, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 278493, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "eng_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274062, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 256383, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "eng_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 296712, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "eng_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248273, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "eng_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269790, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "eng_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 289604, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "eng_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 188909, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "eng_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 277960, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "eng_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295135, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "eng_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 274587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "eng_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 255870, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "eng_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259139, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "eng_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 285724, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "eng_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 277747, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eng_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 276485, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "eng_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 262520, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "eng_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 269652, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "eng_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 279473, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "eng_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 260223, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "eng_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265248, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "eng_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271089, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "eng_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 248394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "eng_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 290651, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "eng_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254128, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "eng_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 301363, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "eng_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 274163, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "eng_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 265409, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "eng_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "eng_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 264436, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "eng_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 273624, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "eng_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 245529, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "eng_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 259223, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eng_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 288384, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "eng_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 260322, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "eng_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262295, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "eng_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "eng_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 270537, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "eng_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277837, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "eng_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 258810, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "eng_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 245736, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "eng_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 271227, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "eng_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 294563, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "eng_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270006, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eng_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 273632, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "eng_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eng_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 320250, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "eng_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298135, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "eng_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 171985, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "eng_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "eng_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 260551, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "eng_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 284447, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "eng_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258071, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "eng_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 261814, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "eng_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283075, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "eng_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263046, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "eng_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258075, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "eng_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 175214, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "eng_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 268329, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eng_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266004, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "eng_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278109, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "eng_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 258367, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "eng_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 279511, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "eng_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274306, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "eng_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 263676, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "eng_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 222666, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "eng_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 172661, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "eng_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260137, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "eng_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 273223, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "eng_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 264781, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "eng_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 275896, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "eng_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 274964, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "eng_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279138, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "eng_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 262821, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "eng_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 300977, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "eng_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 278167, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eng_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 270583, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "eng_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267049, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "eng_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "eng_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267370, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eng_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267044, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "eng_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 282617, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "eng_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 284690, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "eng_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295001, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "eng_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 280328, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "eng_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249027, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "eng_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 285775, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "eng_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 253246, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "eng_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 274923, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "eng_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 269512, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "eng_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "eng_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 279569, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "eng_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290144, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "eng_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 275705, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "eng_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 243755, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "eng_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272033, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "eng_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 262766, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "eng_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 278221, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "eng_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 261865, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "eng_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 251554, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "eng_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 272228, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eng_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265188, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "eng_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 264821, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "eng_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234020, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "eng_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269033, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "eng_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266123, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "eng_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "eng_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 283628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "eng_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 304511, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hrv_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 240348, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hrv_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 255114, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hrv_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 292085, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hrv_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259785, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "hrv_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281939, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hrv_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 255422, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hrv_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267881, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hrv_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 287459, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hrv_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264488, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hrv_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270660, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hrv_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 274297, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hrv_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hrv_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255660, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "hrv_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282158, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hrv_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 258135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hrv_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hrv_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285724, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hrv_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 257325, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hrv_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 242300, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hrv_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276718, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hrv_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260693, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hrv_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274589, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hrv_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 278063, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hrv_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252554, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hrv_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 282074, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hrv_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246687, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hrv_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hrv_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289478, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hrv_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 260019, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hrv_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 267356, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "hrv_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264259, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hrv_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261778, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hrv_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289723, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hrv_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259507, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hrv_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269204, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hrv_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240804, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hrv_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257769, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hrv_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 258186, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hrv_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hrv_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274489, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hrv_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 255126, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hrv_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268210, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hrv_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 277453, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hrv_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263173, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hrv_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 268417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hrv_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 257338, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hrv_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268549, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hrv_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 263287, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hrv_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 259099, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hrv_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269890, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hrv_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272145, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hrv_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265573, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hrv_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263474, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hrv_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237773, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hrv_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250957, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hrv_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256175, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hrv_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 288407, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hrv_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244727, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hrv_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266554, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hrv_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 249370, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hrv_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hrv_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258646, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hrv_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 261377, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hrv_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hrv_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 263001, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hrv_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 271045, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hrv_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267699, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hrv_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 275015, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hrv_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269754, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hrv_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266831, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hrv_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 278050, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hrv_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275791, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hrv_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276878, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hrv_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283904, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hrv_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258510, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hrv_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278621, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hrv_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265509, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hrv_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 277298, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hrv_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 266098, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hrv_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261964, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hrv_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215859, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hrv_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260914, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hrv_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hrv_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284865, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hrv_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194692, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hrv_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 253384, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hrv_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275992, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hrv_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282860, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hrv_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 267322, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hrv_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236708, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hrv_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 268102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hrv_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hrv_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hrv_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258787, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hrv_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275254, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hrv_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270823, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hrv_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253144, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hrv_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293473, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hrv_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 245034, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hrv_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266551, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hrv_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 286365, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hrv_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185670, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hrv_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274721, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hrv_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291896, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hrv_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 271348, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hrv_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252631, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hrv_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hrv_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282485, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hrv_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274508, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hrv_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hrv_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 259281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hrv_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 266413, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hrv_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276234, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hrv_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 256984, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hrv_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262009, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hrv_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267850, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hrv_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245155, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hrv_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 287412, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hrv_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250889, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hrv_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 298124, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hrv_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270924, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hrv_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262170, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hrv_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268518, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hrv_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261197, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hrv_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 270385, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hrv_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 242290, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hrv_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255984, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hrv_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285145, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hrv_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 257083, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hrv_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 259056, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hrv_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260865, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hrv_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 267298, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hrv_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274598, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hrv_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255571, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hrv_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242497, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hrv_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267988, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hrv_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 291324, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hrv_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266767, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hrv_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 270393, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hrv_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255630, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hrv_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 317011, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hrv_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294896, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hrv_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168746, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hrv_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254663, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hrv_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 257312, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hrv_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281208, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hrv_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254832, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hrv_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258575, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hrv_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279836, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hrv_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259807, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hrv_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254836, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hrv_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171975, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hrv_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 265090, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hrv_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262765, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hrv_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274870, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hrv_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 255128, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hrv_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 276272, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hrv_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 271067, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hrv_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 260437, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hrv_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 219427, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hrv_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 169422, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hrv_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 256898, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hrv_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269984, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hrv_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hrv_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272657, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hrv_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271725, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hrv_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275899, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hrv_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259582, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hrv_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hrv_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274928, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hrv_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 267344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hrv_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263810, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hrv_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255155, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hrv_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264131, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hrv_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263805, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hrv_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 279378, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hrv_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 281451, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hrv_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291762, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hrv_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 277089, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hrv_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245788, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hrv_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282536, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hrv_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 250007, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "hrv_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271684, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hrv_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 266273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hrv_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263566, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hrv_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 276330, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hrv_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286905, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hrv_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 272466, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hrv_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240516, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hrv_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "hrv_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259527, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hrv_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274982, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hrv_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258626, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hrv_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 248315, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hrv_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268989, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hrv_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261949, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hrv_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261582, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hrv_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hrv_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hrv_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262884, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hrv_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262152, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hrv_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 280389, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hrv_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 301272, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 127.2005928853755, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kin_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257483, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kin_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272249, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kin_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309220, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kin_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 276920, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kin_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299074, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kin_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272557, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kin_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285016, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kin_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kin_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281623, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kin_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287795, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kin_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291432, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kin_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303401, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kin_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272795, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kin_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299293, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kin_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275270, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kin_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268317, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kin_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kin_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274460, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kin_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259435, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kin_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293853, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kin_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277828, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kin_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274589, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kin_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295198, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kin_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269689, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kin_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299209, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kin_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263822, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kin_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 260975, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kin_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306613, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kin_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277154, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kin_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284491, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kin_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281394, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kin_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 278913, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kin_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306858, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kin_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276642, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kin_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286339, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kin_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 257939, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kin_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 274904, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kin_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275321, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kin_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 291818, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kin_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291624, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kin_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272261, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kin_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285345, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kin_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294588, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kin_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280308, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kin_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285552, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kin_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274473, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kin_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285684, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kin_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280422, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kin_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276234, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kin_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287025, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kin_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289280, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kin_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282708, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kin_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280609, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kin_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 254908, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kin_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268092, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kin_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273310, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kin_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305542, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kin_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261862, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kin_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283689, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kin_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266505, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kin_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kin_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275781, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kin_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278512, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kin_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284044, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kin_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280136, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kin_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288180, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kin_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284834, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kin_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292150, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kin_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 286889, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kin_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 283966, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kin_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295185, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kin_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 292926, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kin_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294013, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kin_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301039, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kin_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275645, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kin_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295756, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kin_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282644, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kin_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294433, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kin_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283233, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kin_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279099, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kin_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 232994, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kin_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278049, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kin_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 285990, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kin_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kin_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211827, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kin_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270519, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kin_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293127, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kin_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 299995, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kin_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284457, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kin_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253843, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kin_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285237, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kin_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281122, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kin_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kin_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 275922, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kin_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292389, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kin_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287958, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kin_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270279, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kin_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310608, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kin_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262169, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kin_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283686, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kin_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303500, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kin_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202805, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kin_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 291856, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kin_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309031, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kin_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288483, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kin_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269766, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kin_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kin_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299620, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kin_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291643, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kin_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290381, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kin_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276416, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kin_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283548, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kin_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293369, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kin_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kin_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279144, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kin_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 284985, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kin_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kin_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304547, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kin_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268024, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kin_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315259, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kin_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288059, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kin_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279305, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kin_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285653, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kin_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278332, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kin_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287520, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kin_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259425, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kin_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kin_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302280, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kin_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274218, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kin_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276191, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kin_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kin_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284433, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kin_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291733, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kin_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272706, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kin_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259632, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kin_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285123, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kin_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308459, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kin_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 283902, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kin_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287528, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kin_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272765, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kin_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334146, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kin_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312031, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kin_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 185881, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kin_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271798, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kin_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274447, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kin_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298343, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kin_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 271967, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kin_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275710, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kin_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 296971, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kin_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 276942, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kin_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 271971, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kin_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189110, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kin_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282225, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kin_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 279900, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kin_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292005, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kin_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272263, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kin_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293407, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kin_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288202, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kin_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277572, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kin_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236562, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kin_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186557, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kin_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274033, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kin_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kin_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278677, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kin_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289792, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kin_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288860, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kin_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293034, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kin_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276717, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kin_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 314873, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kin_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292063, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kin_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284479, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kin_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 280945, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kin_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kin_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281266, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kin_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 280940, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kin_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296513, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kin_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298586, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kin_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 308897, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kin_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294224, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kin_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 262923, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kin_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299671, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kin_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267142, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kin_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288819, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kin_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283408, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kin_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280701, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kin_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293465, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kin_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304040, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kin_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289601, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kin_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257651, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kin_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 285929, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kin_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276662, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kin_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292117, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kin_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275761, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kin_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265450, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kin_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kin_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kin_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278717, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kin_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 247916, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kin_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282929, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kin_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280019, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kin_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279287, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kin_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297524, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kin_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318407, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.1324110671937, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mal_Mlym-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 260957, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mal_Mlym-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 275723, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mal_Mlym-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 312694, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mal_Mlym-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 280394, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mal_Mlym-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 302548, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mal_Mlym-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 276031, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mal_Mlym-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 288490, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal_Mlym-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 308068, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mal_Mlym-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 285097, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mal_Mlym-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 291269, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mal_Mlym-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 294906, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mal_Mlym-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 306875, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mal_Mlym-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 276269, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mal_Mlym-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 302767, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mal_Mlym-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 278744, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mal_Mlym-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 271791, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mal_Mlym-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 306333, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mal_Mlym-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 277934, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mal_Mlym-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 262909, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mal_Mlym-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297327, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mal_Mlym-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mal_Mlym-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 278063, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mal_Mlym-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 295198, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mal_Mlym-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 273163, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mal_Mlym-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 302683, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mal_Mlym-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 267296, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mal_Mlym-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 264449, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mal_Mlym-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 310087, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mal_Mlym-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 280628, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mal_Mlym-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 287965, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mal_Mlym-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284868, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mal_Mlym-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 282387, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mal_Mlym-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 310332, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mal_Mlym-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280116, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mal_Mlym-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 289813, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mal_Mlym-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 261413, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mal_Mlym-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 278378, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mal_Mlym-est_Latn": { + "num_samples": 1012, + "number_of_characters": 278795, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mal_Mlym-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 295292, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mal_Mlym-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 295098, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mal_Mlym-min_Arab": { + "num_samples": 1012, + "number_of_characters": 275735, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mal_Mlym-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 288819, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mal_Mlym-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 298062, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mal_Mlym-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283782, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mal_Mlym-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 289026, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mal_Mlym-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 277947, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mal_Mlym-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 289158, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mal_Mlym-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 283896, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mal_Mlym-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 279708, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mal_Mlym-min_Latn": { + "num_samples": 1012, + "number_of_characters": 290499, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mal_Mlym-por_Latn": { + "num_samples": 1012, + "number_of_characters": 292754, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mal_Mlym-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 286182, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mal_Mlym-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 284083, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mal_Mlym-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 258382, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mal_Mlym-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 271566, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mal_Mlym-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 276784, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mal_Mlym-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 309016, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mal_Mlym-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 265336, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mal_Mlym-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287163, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mal_Mlym-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 269979, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mal_Mlym-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 281509, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mal_Mlym-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 279255, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mal_Mlym-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 281986, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mal_Mlym-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 287518, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mal_Mlym-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 283610, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mal_Mlym-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 291654, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mal_Mlym-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 288308, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mal_Mlym-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 295624, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mal_Mlym-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 290363, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mal_Mlym-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 287440, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mal_Mlym-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 298659, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mal_Mlym-als_Latn": { + "num_samples": 1012, + "number_of_characters": 296400, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mal_Mlym-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 297487, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mal_Mlym-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 304513, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mal_Mlym-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 279119, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mal_Mlym-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 299230, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mal_Mlym-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 286118, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mal_Mlym-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 297907, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mal_Mlym-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 286707, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mal_Mlym-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 282573, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mal_Mlym-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 236468, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mal_Mlym-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 281523, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mal_Mlym-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 289464, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mal_Mlym-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 305474, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mal_Mlym-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 215301, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mal_Mlym-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 273993, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mal_Mlym-run_Latn": { + "num_samples": 1012, + "number_of_characters": 296601, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mal_Mlym-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 303469, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mal_Mlym-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 287931, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mal_Mlym-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 257317, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mal_Mlym-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 288711, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mal_Mlym-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 284596, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mal_Mlym-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 286264, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mal_Mlym-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 279396, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mal_Mlym-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 295863, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mal_Mlym-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291432, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mal_Mlym-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 273753, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mal_Mlym-war_Latn": { + "num_samples": 1012, + "number_of_characters": 314082, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mal_Mlym-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 265643, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mal_Mlym-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287160, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mal_Mlym-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 306974, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mal_Mlym-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 206279, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mal_Mlym-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 295330, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mal_Mlym-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 312505, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mal_Mlym-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 291957, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mal_Mlym-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 273240, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mal_Mlym-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 276509, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mal_Mlym-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 303094, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mal_Mlym-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 295117, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal_Mlym-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 293855, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mal_Mlym-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 279890, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mal_Mlym-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 287022, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mal_Mlym-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 296843, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mal_Mlym-san_Deva": { + "num_samples": 1012, + "number_of_characters": 277593, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mal_Mlym-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282618, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mal_Mlym-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 288459, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mal_Mlym-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 265764, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mal_Mlym-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 308021, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mal_Mlym-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 271498, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mal_Mlym-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 318733, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mal_Mlym-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 291533, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mal_Mlym-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 282779, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mal_Mlym-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 289127, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mal_Mlym-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 281806, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mal_Mlym-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 290994, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mal_Mlym-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 262899, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mal_Mlym-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 276593, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mal_Mlym-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 305754, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mal_Mlym-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 277692, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mal_Mlym-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 279665, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mal_Mlym-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 281474, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mal_Mlym-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 287907, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mal_Mlym-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295207, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mal_Mlym-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 276180, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mal_Mlym-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 263106, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mal_Mlym-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 288597, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mal_Mlym-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 311933, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mal_Mlym-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 287376, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal_Mlym-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 291002, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mal_Mlym-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 276239, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mal_Mlym-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 337620, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mal_Mlym-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 315505, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mal_Mlym-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 189355, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mal_Mlym-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 275272, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mal_Mlym-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 277921, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mal_Mlym-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 301817, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mal_Mlym-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 275441, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mal_Mlym-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 279184, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mal_Mlym-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 300445, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mal_Mlym-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 280416, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mal_Mlym-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 275445, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mal_Mlym-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 192584, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mal_Mlym-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 285699, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal_Mlym-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 283374, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mal_Mlym-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 295479, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mal_Mlym-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 275737, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mal_Mlym-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 296881, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mal_Mlym-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 291676, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mal_Mlym-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 281046, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mal_Mlym-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 240036, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mal_Mlym-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 190031, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mal_Mlym-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 277507, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mal_Mlym-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 290593, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mal_Mlym-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 282151, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mal_Mlym-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 293266, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mal_Mlym-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 292334, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mal_Mlym-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 296508, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mal_Mlym-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 280191, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mal_Mlym-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 318347, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mal_Mlym-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 295537, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mal_Mlym-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 287953, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mal_Mlym-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 284419, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mal_Mlym-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 275764, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mal_Mlym-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284740, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal_Mlym-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 284414, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mal_Mlym-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 299987, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mal_Mlym-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 302060, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mal_Mlym-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 312371, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mal_Mlym-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 297698, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mal_Mlym-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 266397, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mal_Mlym-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 303145, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mal_Mlym-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 270616, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mal_Mlym-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 292293, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mal_Mlym-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 286882, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mal_Mlym-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 284175, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mal_Mlym-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 296939, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mal_Mlym-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 307514, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mal_Mlym-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 293075, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mal_Mlym-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 261125, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mal_Mlym-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 289403, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mal_Mlym-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 280136, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mal_Mlym-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 295591, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mal_Mlym-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 279235, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mal_Mlym-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 268924, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mal_Mlym-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 289598, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mal_Mlym-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282558, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mal_Mlym-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 282191, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mal_Mlym-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 251390, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mal_Mlym-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286403, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mal_Mlym-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 283493, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mal_Mlym-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 282761, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mal_Mlym-som_Latn": { + "num_samples": 1012, + "number_of_characters": 300998, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mal_Mlym-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 321881, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "pes_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 235448, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "pes_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 250214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "pes_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 287185, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pes_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 254885, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "pes_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 277039, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "pes_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 250522, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "pes_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 262981, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pes_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 282559, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "pes_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 259588, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "pes_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 265760, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pes_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 269397, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pes_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 281366, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "pes_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 250760, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "pes_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 277258, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "pes_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 253235, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pes_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 246282, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pes_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 280824, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pes_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 252425, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "pes_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 237400, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "pes_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271818, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pes_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 255793, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pes_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 252554, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pes_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 269689, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pes_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 273163, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pes_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 277174, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pes_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 241787, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "pes_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 238940, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "pes_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 284578, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "pes_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 255119, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pes_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 262456, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "pes_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259359, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pes_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 256878, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pes_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 284823, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "pes_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254607, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pes_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 264304, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "pes_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 235904, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "pes_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 252869, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pes_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 253286, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "pes_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 269783, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pes_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 269589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "pes_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 250226, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pes_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 263310, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pes_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 272553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pes_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258273, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pes_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 263517, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pes_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 252438, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pes_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 263649, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pes_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 258387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pes_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 254199, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pes_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 264990, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pes_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 267245, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pes_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 260673, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pes_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pes_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 232873, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "pes_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 246057, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "pes_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 251275, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pes_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 283507, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "pes_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 239827, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "pes_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "pes_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 244470, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "pes_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 256000, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pes_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 253746, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pes_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 256477, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pes_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 262009, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pes_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 258101, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pes_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 266145, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pes_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 262799, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pes_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 270115, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "pes_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 264854, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pes_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 261931, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "pes_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 273150, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pes_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 270891, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pes_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 271978, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "pes_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 279004, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "pes_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 253610, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pes_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 273721, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pes_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 260609, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pes_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 272398, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pes_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 261198, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "pes_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 257064, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pes_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 210959, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "pes_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 256014, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pes_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 263955, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pes_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 279965, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "pes_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 189792, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "pes_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 248484, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "pes_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 271092, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "pes_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 277960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "pes_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 262422, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "pes_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 231808, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "pes_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 263202, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pes_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 259087, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "pes_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 260755, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pes_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 253887, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pes_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 270354, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pes_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265923, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pes_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 248244, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pes_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 288573, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pes_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 240134, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pes_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261651, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pes_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 281465, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "pes_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 180770, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "pes_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 269821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pes_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 286996, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pes_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 266448, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "pes_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 247731, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pes_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 251000, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pes_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 277585, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "pes_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 269608, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pes_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 268346, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pes_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 254381, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "pes_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 261513, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "pes_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 271334, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pes_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 252084, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "pes_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257109, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pes_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 262950, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "pes_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 240255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pes_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 282512, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pes_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 245989, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "pes_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 293224, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "pes_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 266024, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pes_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 257270, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pes_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 263618, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pes_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 256297, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pes_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 265485, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pes_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 237390, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "pes_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 251084, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pes_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 280245, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "pes_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 252183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "pes_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 254156, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "pes_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 255965, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "pes_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 262398, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pes_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269698, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "pes_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 250671, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pes_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 237597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "pes_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 263088, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pes_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 286424, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "pes_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 261867, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pes_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 265493, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pes_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 250730, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pes_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 312111, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "pes_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 289996, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pes_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 163846, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "pes_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 249763, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pes_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 252412, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pes_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 276308, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "pes_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 249932, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "pes_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 253675, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pes_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 274936, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "pes_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 254907, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pes_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 249936, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pes_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 167075, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "pes_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 260190, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pes_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 257865, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pes_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 269970, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pes_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 250228, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "pes_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 271372, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "pes_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 266167, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "pes_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 255537, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pes_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 214527, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "pes_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 164522, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "pes_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 251998, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pes_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 265084, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pes_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 256642, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "pes_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 267757, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pes_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 266825, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "pes_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 270999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pes_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 254682, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pes_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 292838, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "pes_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 270028, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pes_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 262444, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "pes_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 258910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pes_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 250255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pes_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259231, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pes_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 258905, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "pes_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 274478, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pes_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 276551, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "pes_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 286862, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pes_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 272189, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "pes_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 240888, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "pes_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 277636, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pes_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 245107, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "pes_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 266784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "pes_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 261373, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pes_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 258666, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pes_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 271430, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "pes_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 282005, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pes_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 267566, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "pes_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 235616, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "pes_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 263894, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "pes_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 254627, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pes_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 270082, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pes_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 253726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "pes_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 243415, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "pes_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 264089, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pes_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257049, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "pes_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 256682, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "pes_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 225881, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "pes_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260894, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pes_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 257984, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pes_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 257252, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pes_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 275489, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "pes_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 296372, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 122.3586956521739, + "max_sentence1_length": 324, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "srd_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 264968, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "srd_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 279734, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "srd_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 316705, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "srd_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 284405, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "srd_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 306559, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "srd_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 280042, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "srd_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 292501, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srd_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 312079, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "srd_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 289108, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "srd_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 295280, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "srd_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 298917, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "srd_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 310886, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "srd_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 280280, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "srd_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 306778, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "srd_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 282755, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srd_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 275802, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "srd_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 310344, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srd_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 281945, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "srd_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 266920, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "srd_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301338, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "srd_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 285313, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srd_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 282074, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srd_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 299209, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "srd_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 302683, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "srd_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 277174, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "srd_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 271307, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "srd_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 268460, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "srd_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 314098, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "srd_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 284639, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "srd_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 291976, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "srd_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288879, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "srd_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 286398, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "srd_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 314343, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "srd_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srd_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 293824, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "srd_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 265424, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "srd_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 282389, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "srd_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 282806, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "srd_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 299303, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "srd_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 299109, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "srd_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 279746, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "srd_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 292830, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "srd_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 302073, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "srd_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287793, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "srd_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 293037, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "srd_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 281958, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "srd_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 293169, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "srd_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 287907, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "srd_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 283719, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "srd_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 294510, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "srd_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 296765, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "srd_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 290193, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "srd_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 288094, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "srd_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 262393, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "srd_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 275577, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "srd_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 280795, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srd_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 313027, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "srd_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 269347, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "srd_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291174, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "srd_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 273990, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "srd_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 285520, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "srd_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 283266, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "srd_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 285997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srd_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 291529, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srd_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 287621, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "srd_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 295665, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "srd_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 292319, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "srd_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 299635, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "srd_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 294374, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "srd_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 291451, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "srd_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 302670, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "srd_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 300411, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "srd_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 301498, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "srd_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 308524, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "srd_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 283130, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "srd_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 303241, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "srd_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 290129, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "srd_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 301918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srd_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 290718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "srd_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 286584, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "srd_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 240479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "srd_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 285534, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "srd_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 293475, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "srd_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 309485, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "srd_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 219312, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "srd_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 278004, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "srd_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 300612, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "srd_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 307480, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "srd_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 291942, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "srd_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 261328, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "srd_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 292722, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "srd_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 288607, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "srd_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 290275, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srd_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 283407, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srd_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 299874, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "srd_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295443, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srd_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 277764, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "srd_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 318093, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "srd_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 269654, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "srd_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291171, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "srd_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 310985, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "srd_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 210290, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "srd_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 299341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "srd_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 316516, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "srd_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 295968, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "srd_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 277251, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "srd_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 280520, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "srd_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 307105, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "srd_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 299128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srd_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 297866, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "srd_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 283901, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "srd_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 291033, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "srd_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 300854, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "srd_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 281604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "srd_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286629, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "srd_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 292470, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "srd_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 269775, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "srd_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 312032, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "srd_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 275509, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "srd_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 322744, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "srd_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 295544, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "srd_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 286790, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "srd_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 293138, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "srd_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 285817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "srd_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 295005, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "srd_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 266910, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "srd_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 280604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srd_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 309765, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "srd_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 281703, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "srd_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 283676, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "srd_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 285485, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "srd_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 291918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "srd_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299218, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "srd_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 280191, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "srd_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 267117, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "srd_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 292608, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "srd_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 315944, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "srd_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 291387, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srd_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 295013, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "srd_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 280250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srd_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 341631, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "srd_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 319516, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "srd_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 193366, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "srd_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 279283, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "srd_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 281932, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "srd_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 305828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "srd_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 279452, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "srd_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 283195, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "srd_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 304456, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "srd_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 284427, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "srd_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 279456, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "srd_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 196595, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "srd_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 289710, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srd_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 287385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "srd_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 299490, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "srd_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 279748, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "srd_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 300892, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "srd_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 295687, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "srd_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 285057, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "srd_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 244047, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "srd_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 194042, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "srd_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 281518, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "srd_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 294604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "srd_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 286162, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "srd_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 297277, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "srd_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 296345, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "srd_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 300519, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "srd_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 284202, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "srd_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 322358, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "srd_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 299548, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srd_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 291964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "srd_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 288430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "srd_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 279775, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "srd_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288751, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srd_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 288425, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "srd_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 303998, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "srd_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 306071, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "srd_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 316382, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "srd_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 301709, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "srd_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 270408, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "srd_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 307156, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "srd_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 274627, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "srd_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 296304, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "srd_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 290893, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "srd_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 288186, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "srd_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 300950, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "srd_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 311525, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "srd_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 297086, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "srd_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 265136, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "srd_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 293414, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "srd_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 284147, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "srd_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 299602, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "srd_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 283246, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "srd_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 272935, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "srd_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 293609, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srd_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286569, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "srd_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 286202, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "srd_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 255401, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "srd_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290414, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "srd_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 287504, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "srd_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 286772, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "srd_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 305009, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "srd_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 325892, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 151.52865612648222, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 229581, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 244347, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tzm_Tfng-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 281318, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 249018, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tzm_Tfng-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 271172, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 244655, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tzm_Tfng-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 257114, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tzm_Tfng-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 276692, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 253721, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 259893, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 263530, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 275499, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 244893, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tzm_Tfng-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 271391, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tzm_Tfng-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 247368, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tzm_Tfng-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 240415, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tzm_Tfng-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 274957, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tzm_Tfng-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 246558, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tzm_Tfng-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 231533, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265951, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tzm_Tfng-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 249926, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 246687, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 263822, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 267296, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tzm_Tfng-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 241787, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tzm_Tfng-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 271307, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tzm_Tfng-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 233073, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 278711, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tzm_Tfng-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 249252, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 256589, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253492, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 251011, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tzm_Tfng-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 278956, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tzm_Tfng-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248740, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tzm_Tfng-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 258437, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tzm_Tfng-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 230037, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 247002, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tzm_Tfng-est_Latn": { + "num_samples": 1012, + "number_of_characters": 247419, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 263916, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 263722, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tzm_Tfng-min_Arab": { + "num_samples": 1012, + "number_of_characters": 244359, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tzm_Tfng-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 257443, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 266686, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252406, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tzm_Tfng-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 257650, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 246571, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tzm_Tfng-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 257782, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 252520, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 248332, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tzm_Tfng-min_Latn": { + "num_samples": 1012, + "number_of_characters": 259123, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tzm_Tfng-por_Latn": { + "num_samples": 1012, + "number_of_characters": 261378, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tzm_Tfng-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 254806, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tzm_Tfng-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 252707, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 227006, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 240190, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 245408, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 277640, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tzm_Tfng-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 233960, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255787, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tzm_Tfng-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 238603, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tzm_Tfng-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 250133, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tzm_Tfng-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 247879, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tzm_Tfng-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 250610, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 256142, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tzm_Tfng-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 252234, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 260278, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tzm_Tfng-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 256932, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 264248, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tzm_Tfng-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 258987, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tzm_Tfng-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 256064, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tzm_Tfng-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 267283, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tzm_Tfng-als_Latn": { + "num_samples": 1012, + "number_of_characters": 265024, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 266111, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tzm_Tfng-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 273137, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tzm_Tfng-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 247743, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 267854, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 254742, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 266531, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tzm_Tfng-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 255331, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tzm_Tfng-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 251197, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tzm_Tfng-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 205092, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 250147, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tzm_Tfng-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 258088, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 274098, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 183925, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 242617, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tzm_Tfng-run_Latn": { + "num_samples": 1012, + "number_of_characters": 265225, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tzm_Tfng-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 272093, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tzm_Tfng-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 256555, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tzm_Tfng-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 225941, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 257335, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tzm_Tfng-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 253220, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tzm_Tfng-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 254888, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 248020, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 264487, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tzm_Tfng-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260056, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tzm_Tfng-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 242377, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tzm_Tfng-war_Latn": { + "num_samples": 1012, + "number_of_characters": 282706, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tzm_Tfng-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 234267, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255784, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tzm_Tfng-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 275598, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tzm_Tfng-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 174903, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 263954, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tzm_Tfng-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 281129, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tzm_Tfng-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 260581, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tzm_Tfng-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 241864, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tzm_Tfng-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 245133, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tzm_Tfng-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 271718, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tzm_Tfng-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 263741, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tzm_Tfng-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 262479, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 248514, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 255646, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tzm_Tfng-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 265467, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tzm_Tfng-san_Deva": { + "num_samples": 1012, + "number_of_characters": 246217, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tzm_Tfng-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251242, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tzm_Tfng-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 257083, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 234388, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 276645, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tzm_Tfng-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 240122, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 287357, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 260157, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tzm_Tfng-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 251403, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tzm_Tfng-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 257751, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 250430, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 259618, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 231523, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tzm_Tfng-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 245217, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tzm_Tfng-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 274378, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 246316, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tzm_Tfng-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 248289, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tzm_Tfng-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 250098, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tzm_Tfng-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 256531, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263831, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tzm_Tfng-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 244804, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tzm_Tfng-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 231730, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tzm_Tfng-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 257221, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tzm_Tfng-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 280557, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tzm_Tfng-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 256000, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 259626, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tzm_Tfng-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 244863, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tzm_Tfng-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 306244, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 284129, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tzm_Tfng-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 157979, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tzm_Tfng-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 243896, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 246545, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tzm_Tfng-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 270441, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 244065, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 247808, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tzm_Tfng-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 269069, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tzm_Tfng-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 249040, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 244069, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tzm_Tfng-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 161208, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 254323, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tzm_Tfng-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 251998, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tzm_Tfng-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 264103, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 244361, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 265505, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tzm_Tfng-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 260300, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tzm_Tfng-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 249670, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 208660, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tzm_Tfng-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 158655, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tzm_Tfng-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 246131, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tzm_Tfng-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 259217, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tzm_Tfng-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 250775, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tzm_Tfng-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 261890, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 260958, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tzm_Tfng-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 265132, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tzm_Tfng-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 248815, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 286971, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tzm_Tfng-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 264161, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 256577, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tzm_Tfng-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 253043, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tzm_Tfng-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 244388, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253364, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 253038, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tzm_Tfng-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 268611, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tzm_Tfng-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 270684, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tzm_Tfng-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 280995, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tzm_Tfng-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 266322, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tzm_Tfng-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 235021, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tzm_Tfng-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 271769, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 239240, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tzm_Tfng-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 260917, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tzm_Tfng-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 255506, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tzm_Tfng-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 252799, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tzm_Tfng-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 265563, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 276138, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tzm_Tfng-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 261699, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tzm_Tfng-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 229749, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tzm_Tfng-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 258027, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tzm_Tfng-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 248760, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 264215, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tzm_Tfng-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 247859, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tzm_Tfng-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 237548, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 258222, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tzm_Tfng-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251182, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tzm_Tfng-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 250815, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tzm_Tfng-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 220014, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tzm_Tfng-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255027, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tzm_Tfng-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 252117, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tzm_Tfng-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 251385, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tzm_Tfng-som_Latn": { + "num_samples": 1012, + "number_of_characters": 269622, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tzm_Tfng-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 290505, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 116.56126482213439, + "max_sentence1_length": 330, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "acq_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 226734, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "acq_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 241500, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "acq_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 278471, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "acq_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 246171, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "acq_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 268325, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "acq_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 241808, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "acq_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 254267, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acq_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 273845, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "acq_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 250874, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "acq_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 257046, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "acq_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 260683, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "acq_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 272652, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "acq_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 242046, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "acq_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 268544, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "acq_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 244521, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acq_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 237568, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "acq_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 272110, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acq_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 243711, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "acq_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 228686, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "acq_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263104, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "acq_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 247079, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acq_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 243840, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acq_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 260975, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "acq_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 264449, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "acq_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 238940, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "acq_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 268460, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "acq_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 233073, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "acq_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 275864, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "acq_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 246405, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "acq_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 253742, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "acq_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250645, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "acq_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 248164, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "acq_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 276109, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "acq_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245893, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acq_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 255590, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "acq_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 227190, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "acq_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 244155, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "acq_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 244572, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "acq_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 261069, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "acq_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 260875, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "acq_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 241512, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "acq_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 254596, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "acq_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 263839, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "acq_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249559, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "acq_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 254803, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "acq_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 243724, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "acq_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 254935, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "acq_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 249673, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "acq_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 245485, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "acq_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 256276, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "acq_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 258531, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "acq_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 251959, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "acq_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 249860, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "acq_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 224159, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "acq_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 237343, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "acq_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 242561, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acq_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 274793, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "acq_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 231113, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "acq_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252940, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "acq_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 235756, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "acq_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 247286, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "acq_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 245032, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "acq_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 247763, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "acq_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 253295, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acq_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 249387, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "acq_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 257431, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "acq_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 254085, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "acq_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 261401, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "acq_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 256140, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "acq_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 253217, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "acq_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 264436, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "acq_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 262177, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "acq_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 263264, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "acq_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 270290, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "acq_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 244896, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "acq_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 265007, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "acq_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 251895, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "acq_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 263684, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acq_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 252484, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "acq_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 248350, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "acq_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 202245, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "acq_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 247300, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "acq_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 255241, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "acq_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 271251, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "acq_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 181078, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "acq_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 239770, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "acq_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 262378, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "acq_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 269246, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "acq_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 253708, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "acq_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 223094, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "acq_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 254488, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "acq_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 250373, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "acq_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 252041, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acq_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 245173, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acq_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 261640, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "acq_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257209, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "acq_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 239530, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "acq_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 279859, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "acq_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 231420, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "acq_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252937, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "acq_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 272751, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "acq_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 172056, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "acq_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 261107, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "acq_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 278282, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "acq_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 257734, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "acq_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 239017, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "acq_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 242286, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "acq_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 268871, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "acq_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 260894, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acq_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 259632, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "acq_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 245667, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "acq_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 252799, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "acq_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 262620, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "acq_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 243370, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "acq_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248395, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "acq_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 254236, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "acq_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 231541, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "acq_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 273798, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "acq_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 237275, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "acq_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 284510, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "acq_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 257310, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "acq_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 248556, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "acq_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 254904, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "acq_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 247583, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "acq_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 256771, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "acq_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 228676, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "acq_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 242370, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acq_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 271531, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "acq_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 243469, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "acq_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 245442, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "acq_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 247251, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "acq_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 253684, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "acq_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260984, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "acq_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 241957, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "acq_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 228883, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "acq_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 254374, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "acq_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 277710, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "acq_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 253153, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acq_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 256779, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "acq_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 242016, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acq_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 303397, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "acq_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 281282, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "acq_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 155132, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "acq_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 241049, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "acq_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 243698, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "acq_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 267594, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "acq_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 241218, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "acq_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 244961, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "acq_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 266222, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "acq_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 246193, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "acq_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 241222, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "acq_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 158361, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "acq_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 251476, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acq_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 249151, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "acq_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 261256, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "acq_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 241514, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "acq_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 262658, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "acq_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 257453, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "acq_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 246823, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "acq_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 205813, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "acq_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 155808, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "acq_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 243284, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "acq_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 256370, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "acq_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 247928, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "acq_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 259043, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "acq_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 258111, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "acq_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 262285, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "acq_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 245968, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "acq_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 284124, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "acq_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 261314, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "acq_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 253730, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "acq_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 250196, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "acq_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 241541, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "acq_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250517, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "acq_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 250191, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "acq_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 265764, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "acq_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 267837, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "acq_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 278148, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "acq_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 263475, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "acq_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 232174, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "acq_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 268922, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "acq_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 236393, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "acq_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 258070, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "acq_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 252659, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "acq_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 249952, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "acq_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 262716, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "acq_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 273291, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "acq_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 258852, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "acq_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 226902, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "acq_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 255180, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "acq_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 245913, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "acq_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 261368, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "acq_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 245012, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "acq_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 234701, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "acq_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 255375, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "acq_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248335, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "acq_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 247968, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "acq_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 217167, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "acq_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252180, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "acq_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 249270, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "acq_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 248538, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "acq_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 266775, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "acq_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 287658, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 113.74802371541502, + "max_sentence1_length": 318, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bem_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 272372, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bem_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 287138, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bem_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 324109, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bem_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 291809, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bem_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 313963, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bem_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 287446, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bem_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 299905, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bem_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 319483, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bem_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 296512, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bem_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 302684, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bem_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 306321, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bem_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 318290, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bem_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 287684, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bem_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 314182, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bem_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 290159, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bem_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 283206, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bem_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 317748, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bem_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 289349, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bem_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 274324, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bem_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 308742, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bem_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 292717, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bem_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 289478, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bem_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 306613, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bem_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 310087, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bem_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 284578, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bem_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 314098, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bem_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 278711, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bem_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 275864, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bem_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 292043, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bem_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 299380, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bem_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296283, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bem_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 293802, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bem_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 321747, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bem_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291531, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bem_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 301228, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bem_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 272828, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bem_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 289793, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bem_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 290210, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bem_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 306707, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bem_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 306513, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bem_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 287150, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bem_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 300234, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bem_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 309477, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bem_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295197, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bem_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 300441, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bem_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 289362, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bem_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 300573, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bem_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 295311, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bem_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 291123, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bem_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 301914, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bem_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 304169, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bem_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 297597, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bem_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 295498, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bem_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 269797, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bem_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 282981, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bem_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 288199, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bem_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 320431, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bem_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 276751, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bem_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298578, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bem_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 281394, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bem_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 292924, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bem_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 290670, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bem_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 293401, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bem_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 298933, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bem_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 295025, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bem_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 303069, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bem_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 299723, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bem_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 307039, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bem_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 301778, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bem_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 298855, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bem_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 310074, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bem_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 307815, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bem_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 308902, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bem_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 315928, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bem_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 290534, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bem_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 310645, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bem_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 297533, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bem_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 309322, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bem_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 298122, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bem_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 293988, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bem_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 247883, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bem_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 292938, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bem_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 300879, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bem_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 316889, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bem_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 226716, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bem_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 285408, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bem_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 308016, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bem_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 314884, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bem_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 299346, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bem_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 268732, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bem_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 300126, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bem_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 296011, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bem_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 297679, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bem_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 290811, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bem_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 307278, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bem_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302847, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bem_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 285168, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bem_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 325497, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bem_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 277058, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bem_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298575, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bem_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 318389, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bem_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 217694, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bem_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 306745, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bem_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 323920, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bem_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 303372, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bem_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 284655, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bem_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 287924, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bem_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 314509, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bem_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 306532, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bem_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 305270, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bem_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 291305, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bem_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 298437, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bem_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 308258, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bem_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 289008, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bem_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294033, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bem_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 299874, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bem_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 277179, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bem_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 319436, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bem_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 282913, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bem_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 330148, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bem_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 302948, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bem_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 294194, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bem_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 300542, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bem_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 293221, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bem_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 302409, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bem_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 274314, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bem_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 288008, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bem_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 317169, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bem_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 289107, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bem_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 291080, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bem_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 292889, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bem_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 299322, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bem_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306622, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bem_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 287595, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bem_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 274521, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bem_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 300012, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bem_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 323348, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bem_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 298791, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bem_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 302417, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bem_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 287654, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bem_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 349035, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bem_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 326920, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bem_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 200770, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bem_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 286687, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bem_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 289336, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bem_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 313232, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bem_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 286856, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bem_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 290599, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bem_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 311860, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bem_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 291831, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bem_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 286860, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bem_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 203999, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bem_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 297114, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bem_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 294789, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bem_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 306894, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bem_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 287152, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bem_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 308296, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bem_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 303091, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bem_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 292461, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bem_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 251451, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bem_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 201446, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bem_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 288922, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bem_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 302008, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bem_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 293566, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bem_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 304681, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bem_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 303749, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bem_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 307923, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bem_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 291606, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bem_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 329762, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bem_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 306952, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bem_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 299368, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bem_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 295834, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bem_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 287179, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bem_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296155, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bem_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 295829, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bem_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 311402, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bem_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 313475, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bem_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 323786, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bem_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 309113, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bem_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 277812, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bem_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 314560, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bem_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 282031, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bem_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 303708, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bem_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 298297, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bem_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 295590, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bem_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 308354, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bem_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 318929, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bem_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 304490, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bem_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 272540, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bem_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 300818, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bem_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 291551, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bem_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 307006, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bem_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 290650, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bem_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 280339, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bem_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 301013, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bem_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293973, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bem_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 293606, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bem_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 262805, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bem_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297818, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bem_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 294908, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bem_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 294176, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bem_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 312413, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bem_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 333296, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 158.84486166007906, + "max_sentence1_length": 422, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "epo_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242913, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "epo_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 257679, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "epo_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 294650, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "epo_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 262350, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "epo_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 284504, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "epo_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "epo_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 270446, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "epo_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 290024, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "epo_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 267053, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "epo_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 273225, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "epo_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276862, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "epo_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288831, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "epo_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 258225, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "epo_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 284723, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "epo_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 260700, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "epo_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253747, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "epo_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 288289, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "epo_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259890, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "epo_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244865, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "epo_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279283, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "epo_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 263258, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "epo_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 260019, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "epo_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 277154, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "epo_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 280628, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "epo_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 255119, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "epo_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 284639, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "epo_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 249252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "epo_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 246405, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "epo_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 292043, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "epo_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269921, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "epo_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266824, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "epo_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 264343, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "epo_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 292288, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "epo_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262072, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "epo_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271769, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "epo_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 243369, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "epo_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 260334, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "epo_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260751, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "epo_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 277248, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "epo_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 277054, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "epo_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 257691, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "epo_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270775, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "epo_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 280018, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "epo_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "epo_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270982, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "epo_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259903, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "epo_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 271114, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "epo_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265852, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "epo_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 261664, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "epo_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 272455, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "epo_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 274710, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "epo_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 268138, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "epo_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 266039, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "epo_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 240338, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "epo_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253522, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "epo_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258740, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "epo_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290972, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "epo_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 247292, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "epo_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269119, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "epo_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251935, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "epo_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 263465, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "epo_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 261211, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "epo_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263942, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "epo_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 269474, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "epo_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265566, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "epo_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 273610, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "epo_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 270264, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "epo_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277580, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "epo_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 272319, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "epo_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 269396, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "epo_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 280615, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "epo_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 278356, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "epo_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 279443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "epo_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 286469, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "epo_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 261075, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "epo_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 281186, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "epo_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 268074, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "epo_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279863, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "epo_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 268663, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "epo_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "epo_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 218424, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "epo_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 263479, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "epo_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 271420, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "epo_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 287430, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "epo_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 197257, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "epo_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255949, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "epo_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278557, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "epo_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 285425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "epo_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269887, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "epo_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 239273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "epo_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 270667, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "epo_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266552, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "epo_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 268220, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "epo_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 261352, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "epo_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277819, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "epo_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273388, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "epo_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 255709, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "epo_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 296038, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "epo_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 247599, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "epo_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269116, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "epo_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288930, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "epo_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 188235, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "epo_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 277286, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "epo_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 294461, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "epo_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273913, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "epo_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 255196, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "epo_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 258465, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "epo_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 285050, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "epo_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 277073, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "epo_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275811, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "epo_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 261846, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "epo_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "epo_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278799, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "epo_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259549, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "epo_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264574, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "epo_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 270415, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "epo_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 247720, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "epo_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289977, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "epo_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 253454, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "epo_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 300689, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "epo_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 273489, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "epo_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 264735, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "epo_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 271083, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "epo_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263762, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "epo_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "epo_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "epo_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258549, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "epo_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 287710, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "epo_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 259648, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "epo_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 261621, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "epo_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 263430, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "epo_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269863, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "epo_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277163, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "epo_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 258136, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "epo_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 245062, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "epo_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270553, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "epo_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293889, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "epo_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 269332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "epo_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272958, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "epo_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 258195, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "epo_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319576, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "epo_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 297461, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "epo_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 171311, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "epo_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 257228, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "epo_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259877, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "epo_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283773, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "epo_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 257397, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "epo_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 261140, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "epo_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 282401, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "epo_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 262372, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "epo_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 257401, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "epo_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174540, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "epo_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 267655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "epo_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 265330, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "epo_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 277435, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "epo_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 257693, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "epo_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278837, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "epo_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 273632, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "epo_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 263002, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "epo_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221992, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "epo_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "epo_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 259463, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "epo_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272549, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "epo_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 264107, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "epo_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 275222, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "epo_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 274290, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "epo_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 278464, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "epo_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 262147, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "epo_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 300303, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "epo_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 277493, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "epo_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "epo_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 266375, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "epo_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 257720, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "epo_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266696, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "epo_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 266370, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "epo_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "epo_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 284016, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "epo_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 294327, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "epo_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 279654, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "epo_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 248353, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "epo_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 285101, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "epo_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252572, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "epo_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 274249, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "epo_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268838, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "epo_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 266131, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "epo_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278895, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "epo_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 289470, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "epo_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 275031, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "epo_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 243081, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "epo_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 271359, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "epo_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 262092, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "epo_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "epo_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 261191, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "epo_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250880, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "epo_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271554, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "epo_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264514, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "epo_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 264147, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "epo_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 233346, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "epo_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268359, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "epo_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 265449, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "epo_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 264717, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "epo_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282954, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "epo_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303837, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.73517786561266, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hun_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250250, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hun_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265016, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hun_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301987, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hun_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269687, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "hun_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291841, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hun_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265324, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hun_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277783, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hun_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297361, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hun_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274390, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hun_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280562, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hun_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284199, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hun_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296168, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hun_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265562, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "hun_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292060, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hun_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268037, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hun_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261084, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hun_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295626, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hun_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267227, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hun_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252202, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hun_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286620, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hun_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270595, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hun_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267356, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hun_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284491, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hun_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287965, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hun_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262456, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hun_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291976, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hun_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256589, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hun_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253742, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hun_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299380, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hun_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269921, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hun_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274161, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hun_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271680, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hun_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299625, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hun_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269409, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hun_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279106, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hun_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250706, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hun_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267671, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hun_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268088, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hun_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284585, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hun_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284391, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hun_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265028, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hun_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278112, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hun_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287355, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hun_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273075, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hun_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278319, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hun_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267240, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hun_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278451, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hun_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273189, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hun_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269001, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hun_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279792, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hun_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282047, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hun_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275475, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hun_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273376, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hun_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hun_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260859, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hun_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266077, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hun_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298309, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hun_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254629, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hun_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276456, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hun_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259272, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hun_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270802, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hun_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268548, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hun_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271279, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hun_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276811, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hun_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272903, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hun_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280947, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hun_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277601, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hun_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284917, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hun_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279656, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hun_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 276733, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hun_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287952, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hun_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285693, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hun_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286780, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hun_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293806, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hun_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268412, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hun_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 288523, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hun_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275411, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hun_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287200, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hun_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276000, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hun_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271866, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hun_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225761, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hun_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270816, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hun_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278757, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hun_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294767, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hun_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204594, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hun_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263286, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hun_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285894, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hun_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292762, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hun_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277224, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hun_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246610, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hun_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 278004, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hun_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273889, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hun_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275557, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hun_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268689, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hun_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285156, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hun_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280725, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hun_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263046, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hun_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303375, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hun_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254936, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hun_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276453, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hun_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296267, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hun_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195572, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hun_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284623, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hun_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301798, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hun_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281250, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hun_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262533, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hun_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265802, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hun_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292387, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hun_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284410, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hun_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283148, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hun_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269183, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hun_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276315, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hun_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286136, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hun_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266886, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hun_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271911, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hun_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277752, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hun_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255057, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hun_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297314, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hun_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260791, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hun_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308026, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hun_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280826, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hun_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hun_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278420, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hun_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271099, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hun_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280287, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hun_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252192, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hun_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265886, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hun_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295047, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hun_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266985, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hun_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268958, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hun_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270767, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hun_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277200, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hun_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284500, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hun_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265473, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hun_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252399, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hun_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277890, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hun_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301226, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hun_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 276669, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hun_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280295, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hun_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265532, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hun_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326913, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hun_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304798, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hun_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178648, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hun_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264565, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hun_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267214, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hun_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291110, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hun_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264734, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hun_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268477, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hun_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289738, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hun_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269709, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hun_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264738, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hun_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181877, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hun_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274992, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hun_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272667, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hun_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284772, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hun_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265030, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hun_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286174, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hun_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280969, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hun_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270339, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hun_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229329, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hun_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179324, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hun_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266800, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hun_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279886, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hun_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271444, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hun_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282559, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hun_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281627, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hun_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285801, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hun_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269484, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hun_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307640, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hun_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284830, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hun_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277246, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hun_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273712, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hun_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265057, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hun_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274033, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hun_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273707, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hun_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289280, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hun_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291353, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hun_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301664, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hun_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286991, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hun_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255690, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hun_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292438, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hun_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259909, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "hun_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281586, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hun_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276175, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hun_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273468, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hun_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286232, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hun_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296807, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hun_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282368, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hun_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250418, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hun_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278696, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "hun_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269429, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hun_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284884, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hun_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268528, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hun_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258217, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hun_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278891, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hun_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271851, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hun_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271484, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hun_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240683, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hun_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275696, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hun_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272786, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hun_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272054, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hun_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290291, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hun_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311174, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 136.98517786561266, + "max_sentence1_length": 393, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 247153, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261919, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kir_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298890, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 266590, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kir_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288744, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 262227, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kir_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 274686, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kir_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 294264, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 271293, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 277465, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 281102, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 293071, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 262465, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kir_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288963, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kir_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264940, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kir_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257987, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kir_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 292529, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kir_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 264130, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kir_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 249105, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283523, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kir_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 267498, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 264259, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 281394, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284868, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kir_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 259359, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kir_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288879, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 253492, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kir_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 250645, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 296283, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kir_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266824, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 274161, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 268583, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kir_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 296528, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kir_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266312, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kir_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 276009, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kir_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 247609, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 264574, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kir_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264991, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 281488, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 281294, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kir_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261931, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kir_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 275015, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 284258, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269978, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kir_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 275222, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 264143, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kir_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 275354, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 270092, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265904, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kir_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276695, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kir_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278950, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kir_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 272378, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kir_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 270279, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 244578, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257762, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262980, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 295212, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kir_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 251532, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273359, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kir_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 256175, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kir_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267705, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kir_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 265451, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kir_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 268182, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273714, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kir_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269806, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277850, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kir_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 274504, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281820, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kir_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 276559, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kir_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 273636, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kir_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284855, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kir_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 282596, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 283683, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kir_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290709, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kir_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 265315, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 285426, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 272314, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 284103, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kir_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272903, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kir_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268769, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kir_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 222664, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267719, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kir_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 275660, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 291670, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 201497, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 260189, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kir_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282797, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kir_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 289665, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kir_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 274127, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kir_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 243513, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274907, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kir_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 270792, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kir_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 272460, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 265592, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 282059, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kir_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277628, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kir_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259949, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kir_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 300278, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kir_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251839, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273356, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kir_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 293170, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kir_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 192475, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 281526, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kir_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298701, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kir_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 278153, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kir_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 259436, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kir_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262705, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kir_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 289290, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kir_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 281313, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kir_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 280051, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 266086, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 273218, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kir_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 283039, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kir_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263789, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kir_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268814, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kir_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 274655, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251960, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 294217, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kir_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257694, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304929, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277729, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kir_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268975, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kir_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 275323, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 268002, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 277190, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 249095, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kir_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262789, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kir_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291950, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263888, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kir_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265861, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kir_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 267670, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kir_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 274103, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281403, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kir_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 262376, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kir_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 249302, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kir_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274793, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kir_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 298129, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kir_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 273572, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 277198, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kir_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 262435, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kir_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323816, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301701, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kir_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 175551, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kir_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 261468, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 264117, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kir_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 288013, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 261637, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 265380, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kir_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 286641, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kir_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 266612, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 261641, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kir_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 178780, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271895, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kir_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 269570, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kir_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 281675, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261933, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 283077, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kir_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277872, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kir_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 267242, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 226232, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kir_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 176227, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kir_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263703, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kir_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276789, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kir_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 268347, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kir_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 279462, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 278530, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kir_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282704, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kir_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 266387, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 304543, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kir_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281733, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 274149, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kir_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 270615, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kir_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261960, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270936, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 270610, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kir_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 286183, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kir_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 288256, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kir_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 298567, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kir_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283894, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kir_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 252593, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kir_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 289341, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256812, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kir_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 278489, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kir_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 273078, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kir_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 270371, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kir_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 283135, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293710, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kir_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 279271, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kir_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 247321, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kir_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 275599, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kir_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 266332, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kir_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 265431, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kir_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 255120, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275794, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kir_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268754, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kir_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 268387, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kir_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 237586, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kir_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272599, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kir_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 269689, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kir_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268957, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kir_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 287194, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kir_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 308077, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.92490118577075, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mar_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244672, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mar_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259438, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mar_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296409, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mar_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 264109, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mar_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286263, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mar_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259746, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mar_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272205, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mar_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291783, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mar_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268812, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mar_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274984, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mar_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278621, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mar_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290590, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mar_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259984, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mar_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286482, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mar_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262459, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mar_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255506, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mar_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 290048, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mar_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261649, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mar_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246624, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mar_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281042, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mar_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 265017, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mar_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261778, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mar_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278913, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mar_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mar_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256878, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mar_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286398, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mar_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251011, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mar_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 248164, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mar_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293802, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mar_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mar_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271680, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mar_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268583, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mar_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 294047, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mar_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263831, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mar_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273528, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mar_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 245128, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mar_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 262093, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mar_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262510, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mar_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279007, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mar_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278813, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mar_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259450, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mar_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272534, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mar_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281777, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mar_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267497, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mar_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272741, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mar_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261662, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mar_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 272873, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mar_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267611, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mar_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263423, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mar_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mar_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276469, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mar_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269897, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mar_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267798, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mar_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 242097, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mar_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255281, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mar_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260499, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mar_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292731, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mar_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 249051, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mar_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270878, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mar_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253694, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mar_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265224, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mar_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262970, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mar_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265701, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mar_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271233, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mar_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267325, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mar_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275369, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mar_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 272023, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mar_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279339, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mar_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 274078, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mar_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 271155, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mar_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282374, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mar_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 280115, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mar_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281202, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mar_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288228, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mar_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 262834, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mar_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282945, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mar_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 269833, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mar_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281622, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mar_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270422, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mar_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266288, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mar_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 220183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mar_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265238, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mar_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 273179, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mar_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 289189, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mar_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 199016, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mar_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257708, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mar_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280316, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mar_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 287184, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mar_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271646, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mar_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 241032, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mar_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272426, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mar_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268311, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mar_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269979, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mar_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 263111, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mar_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279578, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mar_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275147, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mar_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257468, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mar_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297797, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mar_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mar_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mar_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290689, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mar_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189994, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mar_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 279045, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mar_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296220, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mar_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275672, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mar_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256955, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mar_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260224, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mar_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286809, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mar_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 278832, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mar_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277570, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mar_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mar_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270737, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mar_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280558, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mar_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261308, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mar_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266333, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mar_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 272174, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mar_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mar_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291736, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mar_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255213, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mar_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302448, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mar_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mar_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mar_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 272842, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mar_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265521, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mar_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274709, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mar_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mar_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260308, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mar_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289469, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mar_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261407, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mar_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263380, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mar_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 265189, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mar_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271622, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mar_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278922, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mar_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259895, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mar_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 246821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mar_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272312, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mar_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295648, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mar_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 271091, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mar_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274717, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mar_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259954, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mar_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321335, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mar_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299220, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mar_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 173070, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mar_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258987, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mar_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261636, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mar_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285532, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mar_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 259156, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mar_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262899, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mar_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 284160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mar_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 264131, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mar_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 259160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mar_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176299, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mar_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269414, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mar_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 267089, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mar_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 279194, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mar_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259452, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mar_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280596, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mar_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275391, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mar_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264761, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mar_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223751, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mar_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173746, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mar_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261222, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mar_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274308, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mar_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 265866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mar_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276981, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mar_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 276049, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mar_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280223, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mar_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263906, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mar_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 302062, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mar_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279252, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mar_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271668, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mar_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 268134, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mar_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mar_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268455, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mar_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 268129, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mar_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283702, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mar_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285775, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mar_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 296086, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mar_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281413, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mar_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 250112, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mar_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 286860, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mar_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254331, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mar_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276008, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mar_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mar_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267890, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mar_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mar_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291229, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mar_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276790, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mar_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 244840, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mar_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 273118, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mar_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 263851, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mar_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279306, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mar_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262950, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mar_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252639, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mar_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273313, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mar_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266273, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mar_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 265906, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mar_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 235105, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mar_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270118, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mar_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267208, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mar_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266476, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mar_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284713, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mar_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305596, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.47332015810278, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "plt_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 272617, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "plt_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 287383, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "plt_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 324354, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "plt_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 292054, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "plt_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 314208, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "plt_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 287691, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "plt_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 300150, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "plt_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 319728, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "plt_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 296757, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "plt_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 302929, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "plt_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 306566, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "plt_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 318535, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "plt_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 287929, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "plt_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 314427, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "plt_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 290404, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "plt_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 283451, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "plt_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 317993, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "plt_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 289594, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "plt_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 274569, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "plt_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 308987, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "plt_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 292962, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "plt_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 289723, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "plt_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 306858, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "plt_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 310332, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "plt_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 284823, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "plt_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 314343, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "plt_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 278956, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "plt_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 276109, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "plt_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 321747, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "plt_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 292288, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "plt_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 299625, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "plt_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296528, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "plt_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 294047, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "plt_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291776, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "plt_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 301473, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "plt_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 273073, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "plt_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 290038, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "plt_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 290455, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "plt_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 306952, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "plt_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 306758, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "plt_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 287395, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "plt_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 300479, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "plt_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 309722, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "plt_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295442, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "plt_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 300686, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "plt_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 289607, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "plt_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 300818, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "plt_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 295556, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "plt_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 291368, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "plt_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 302159, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "plt_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 304414, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "plt_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 297842, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "plt_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 295743, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "plt_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 270042, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "plt_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 283226, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "plt_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 288444, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "plt_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 320676, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "plt_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 276996, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "plt_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298823, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "plt_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 281639, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "plt_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 293169, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "plt_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 290915, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "plt_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 293646, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "plt_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 299178, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "plt_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 295270, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "plt_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 303314, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "plt_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 299968, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "plt_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 307284, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "plt_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 302023, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "plt_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 299100, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "plt_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 310319, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "plt_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 308060, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "plt_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 309147, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "plt_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 316173, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "plt_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 290779, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "plt_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 310890, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "plt_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 297778, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "plt_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 309567, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "plt_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 298367, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "plt_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 294233, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "plt_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 248128, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "plt_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 293183, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "plt_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 301124, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "plt_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 317134, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "plt_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 226961, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "plt_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 285653, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "plt_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 308261, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "plt_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 315129, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "plt_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 299591, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "plt_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 268977, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "plt_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 300371, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "plt_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 296256, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "plt_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 297924, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "plt_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 291056, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "plt_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 307523, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "plt_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303092, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "plt_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 285413, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "plt_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 325742, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "plt_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 277303, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "plt_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298820, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "plt_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 318634, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "plt_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 217939, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "plt_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 306990, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "plt_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 324165, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "plt_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 303617, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "plt_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 284900, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "plt_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 288169, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "plt_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 314754, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "plt_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 306777, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "plt_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 305515, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "plt_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 291550, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "plt_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 298682, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "plt_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 308503, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "plt_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 289253, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "plt_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294278, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "plt_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 300119, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "plt_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 277424, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "plt_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 319681, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "plt_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 283158, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "plt_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 330393, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "plt_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 303193, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "plt_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 294439, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "plt_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 300787, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "plt_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 293466, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "plt_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 302654, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "plt_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 274559, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "plt_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 288253, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "plt_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 317414, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "plt_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 289352, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "plt_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 291325, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "plt_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 293134, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "plt_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 299567, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "plt_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306867, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "plt_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 287840, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "plt_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 274766, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "plt_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 300257, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "plt_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 323593, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "plt_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 299036, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "plt_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 302662, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "plt_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 287899, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "plt_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 349280, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "plt_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 327165, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "plt_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 201015, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "plt_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 286932, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "plt_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 289581, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "plt_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 313477, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "plt_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 287101, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "plt_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 290844, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "plt_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 312105, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "plt_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 292076, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "plt_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 287105, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "plt_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 204244, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "plt_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 297359, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "plt_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 295034, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "plt_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 307139, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "plt_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 287397, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "plt_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 308541, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "plt_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 303336, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "plt_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 292706, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "plt_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 251696, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "plt_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 201691, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "plt_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 289167, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "plt_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 302253, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "plt_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 293811, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "plt_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 304926, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "plt_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 303994, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "plt_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 308168, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "plt_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 291851, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "plt_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 330007, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "plt_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 307197, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "plt_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 299613, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "plt_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 296079, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "plt_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 287424, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "plt_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296400, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "plt_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 296074, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "plt_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 311647, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "plt_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 313720, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "plt_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 324031, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "plt_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 309358, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "plt_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 278057, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "plt_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 314805, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "plt_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 282276, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "plt_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 303953, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "plt_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 298542, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "plt_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 295835, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "plt_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 308599, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "plt_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 319174, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "plt_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 304735, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "plt_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 272785, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "plt_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 301063, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "plt_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 291796, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "plt_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 307251, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "plt_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 290895, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "plt_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 280584, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "plt_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 301258, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "plt_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294218, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "plt_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 293851, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "plt_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 263050, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "plt_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298063, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "plt_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 295153, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "plt_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 294421, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "plt_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 312658, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "plt_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 333541, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 159.08695652173913, + "max_sentence1_length": 479, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242401, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 257167, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "srp_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 294138, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261838, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "srp_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283992, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257475, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "srp_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269934, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srp_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289512, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266541, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272713, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276350, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288319, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257713, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "srp_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 284211, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "srp_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 260188, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srp_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253235, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "srp_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287777, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srp_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259378, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "srp_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244353, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278771, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "srp_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262746, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259507, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276642, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 280116, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "srp_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254607, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "srp_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 284127, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248740, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "srp_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245893, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291531, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "srp_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 262072, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269409, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266312, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263831, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "srp_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291776, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "srp_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271257, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "srp_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242857, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259822, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "srp_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260239, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276736, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276542, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "srp_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 257179, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "srp_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270263, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279506, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265226, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "srp_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270470, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259391, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "srp_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270602, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265340, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 261152, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "srp_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271943, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "srp_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 274198, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "srp_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267626, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "srp_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265527, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239826, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253010, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258228, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290460, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "srp_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246780, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268607, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "srp_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251423, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "srp_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262953, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "srp_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260699, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "srp_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263430, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268962, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srp_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265054, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 273098, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "srp_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269752, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277068, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "srp_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271807, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "srp_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268884, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "srp_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 280103, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "srp_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277844, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278931, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "srp_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285957, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "srp_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260563, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280674, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267562, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279351, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srp_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 268151, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "srp_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264017, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "srp_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217912, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262967, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "srp_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270908, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286918, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196745, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255437, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "srp_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278045, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "srp_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284913, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "srp_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269375, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "srp_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238761, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 270155, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "srp_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266040, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "srp_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267708, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 260840, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277307, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "srp_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272876, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "srp_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 255197, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "srp_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295526, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "srp_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 247087, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268604, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "srp_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288418, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "srp_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187723, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276774, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "srp_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293949, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "srp_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273401, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "srp_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254684, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "srp_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257953, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "srp_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284538, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "srp_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276561, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srp_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275299, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 261334, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268466, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "srp_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278287, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "srp_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259037, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "srp_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264062, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "srp_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269903, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 247208, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289465, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "srp_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252942, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 300177, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272977, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "srp_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 264223, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "srp_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270571, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263250, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272438, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244343, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "srp_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258037, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srp_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 287198, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 259136, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "srp_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 261109, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "srp_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262918, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "srp_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269351, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276651, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "srp_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257624, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "srp_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244550, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "srp_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270041, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "srp_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293377, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "srp_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268820, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272446, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "srp_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257683, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srp_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319064, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296949, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "srp_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170799, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "srp_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259365, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "srp_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283261, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256885, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260628, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "srp_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281889, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "srp_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261860, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256889, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "srp_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174028, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 267143, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srp_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264818, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "srp_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276923, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 257181, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278325, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "srp_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 273120, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "srp_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262490, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221480, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "srp_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171475, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "srp_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258951, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "srp_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272037, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "srp_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263595, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "srp_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274710, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273778, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "srp_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277952, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "srp_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261635, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299791, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "srp_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276981, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269397, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "srp_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265863, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "srp_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 257208, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266184, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265858, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "srp_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281431, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "srp_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283504, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "srp_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293815, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "srp_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 279142, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "srp_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247841, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "srp_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284589, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252060, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "srp_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273737, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "srp_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268326, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "srp_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265619, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "srp_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278383, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288958, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "srp_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274519, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "srp_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242569, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "srp_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270847, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "srp_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261580, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277035, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "srp_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260679, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "srp_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250368, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271042, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "srp_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264002, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "srp_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263635, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "srp_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232834, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "srp_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267847, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "srp_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264937, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "srp_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 264205, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "srp_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282442, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "srp_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303325, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 129.22924901185772, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "uig_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 252098, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "uig_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266864, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "uig_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303835, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "uig_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 271535, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "uig_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 293689, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "uig_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 267172, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "uig_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 279631, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uig_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 299209, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "uig_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 276238, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "uig_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 282410, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "uig_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 286047, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "uig_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 298016, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "uig_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 267410, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "uig_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293908, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "uig_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269885, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uig_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262932, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "uig_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 297474, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uig_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 269075, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "uig_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 254050, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "uig_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288468, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "uig_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 272443, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uig_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 269204, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uig_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 286339, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "uig_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289813, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "uig_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 264304, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "uig_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293824, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "uig_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 258437, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "uig_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 255590, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "uig_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 301228, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "uig_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 271769, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "uig_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 279106, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "uig_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276009, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "uig_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 273528, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "uig_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 301473, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "uig_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271257, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uig_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 252554, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "uig_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "uig_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269936, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "uig_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 286433, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "uig_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 286239, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "uig_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266876, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "uig_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "uig_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 289203, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "uig_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274923, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "uig_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 280167, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "uig_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 269088, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "uig_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 280299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "uig_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 275037, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "uig_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "uig_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 281640, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "uig_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "uig_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 277323, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "uig_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 275224, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "uig_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 249523, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "uig_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 262707, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "uig_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267925, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uig_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 300157, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "uig_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 256477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "uig_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278304, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "uig_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 261120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "uig_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 272650, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "uig_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 270396, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "uig_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 273127, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uig_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 278659, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uig_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 274751, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "uig_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282795, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "uig_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 279449, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "uig_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 286765, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "uig_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 281504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "uig_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 278581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "uig_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "uig_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 287541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "uig_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 288628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "uig_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 295654, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "uig_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 270260, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "uig_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 290371, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "uig_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 277259, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "uig_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 289048, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uig_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277848, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "uig_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 273714, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "uig_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 227609, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "uig_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 272664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "uig_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 280605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "uig_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 296615, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "uig_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 206442, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "uig_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 265134, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "uig_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 287742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "uig_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 294610, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "uig_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 279072, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "uig_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 248458, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "uig_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "uig_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 275737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "uig_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 277405, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uig_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 270537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uig_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 287004, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "uig_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282573, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uig_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264894, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "uig_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 305223, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "uig_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 256784, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "uig_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278301, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "uig_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 298115, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "uig_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 197420, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "uig_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 286471, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "uig_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 303646, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "uig_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 283098, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "uig_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 264381, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "uig_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 267650, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "uig_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 294235, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "uig_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 286258, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uig_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284996, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "uig_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 271031, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "uig_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 278163, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "uig_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "uig_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 268734, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "uig_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273759, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "uig_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 279600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "uig_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "uig_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 299162, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "uig_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 262639, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "uig_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "uig_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 282674, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "uig_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273920, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "uig_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 280268, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "uig_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272947, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "uig_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 282135, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "uig_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 254040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "uig_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 267734, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uig_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "uig_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "uig_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270806, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "uig_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 272615, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "uig_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 279048, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "uig_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286348, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "uig_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 267321, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "uig_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 254247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "uig_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 279738, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "uig_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 303074, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "uig_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 278517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uig_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 282143, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "uig_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 267380, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uig_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 328761, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "uig_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 306646, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "uig_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 180496, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "uig_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 266413, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "uig_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 269062, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "uig_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292958, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "uig_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 266582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "uig_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 270325, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "uig_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 291586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "uig_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 271557, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "uig_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 266586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "uig_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 183725, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "uig_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276840, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uig_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 274515, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "uig_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 286620, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "uig_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266878, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "uig_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 288022, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "uig_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282817, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "uig_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 272187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "uig_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 231177, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "uig_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 181172, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "uig_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 268648, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "uig_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 281734, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "uig_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 273292, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "uig_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 284407, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "uig_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 283475, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "uig_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 287649, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "uig_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 271332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "uig_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 309488, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "uig_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 286678, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uig_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 279094, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "uig_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 275560, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "uig_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "uig_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uig_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 275555, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "uig_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 291128, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "uig_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 293201, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "uig_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 303512, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "uig_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288839, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "uig_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 257538, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "uig_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 294286, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "uig_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 261757, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "uig_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 283434, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "uig_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 278023, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "uig_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 275316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "uig_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 288080, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "uig_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 298655, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "uig_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 284216, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "uig_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 252266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "uig_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 280544, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "uig_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 271277, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "uig_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 286732, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "uig_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 270376, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "uig_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 260065, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "uig_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 280739, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uig_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273699, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "uig_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 273332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "uig_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 242531, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "uig_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277544, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "uig_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 274634, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "uig_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273902, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "uig_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 292139, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "uig_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 313022, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 138.81126482213438, + "max_sentence1_length": 354, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "aeb_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 223698, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "aeb_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 238464, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "aeb_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 275435, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "aeb_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 243135, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "aeb_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 265289, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "aeb_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 238772, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "aeb_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 251231, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aeb_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 270809, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "aeb_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 247838, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "aeb_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 254010, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "aeb_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 257647, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "aeb_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 269616, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "aeb_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 239010, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "aeb_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 265508, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "aeb_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 241485, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aeb_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 234532, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "aeb_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 269074, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aeb_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 240675, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "aeb_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 225650, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "aeb_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260068, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "aeb_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 244043, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aeb_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 240804, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aeb_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 257939, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "aeb_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 261413, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "aeb_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 235904, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "aeb_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 265424, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "aeb_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 230037, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "aeb_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 227190, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "aeb_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 272828, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "aeb_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 243369, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "aeb_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 250706, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "aeb_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247609, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "aeb_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 245128, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "aeb_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 273073, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "aeb_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 242857, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aeb_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 252554, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "aeb_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 241119, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "aeb_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 241536, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "aeb_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 258033, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "aeb_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 257839, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "aeb_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 238476, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "aeb_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 251560, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "aeb_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 260803, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "aeb_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246523, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "aeb_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 251767, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "aeb_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 240688, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "aeb_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 251899, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "aeb_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 246637, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "aeb_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 242449, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "aeb_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 253240, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "aeb_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 255495, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "aeb_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 248923, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "aeb_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 246824, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "aeb_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 221123, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "aeb_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 234307, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "aeb_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 239525, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aeb_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "aeb_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 228077, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "aeb_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249904, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "aeb_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 232720, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "aeb_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 244250, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "aeb_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 241996, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "aeb_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 244727, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aeb_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 250259, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aeb_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 246351, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "aeb_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 254395, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "aeb_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 251049, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "aeb_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 258365, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "aeb_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 253104, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "aeb_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 250181, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "aeb_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 261400, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "aeb_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 259141, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "aeb_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 260228, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "aeb_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 267254, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "aeb_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 241860, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "aeb_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 261971, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "aeb_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 248859, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "aeb_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 260648, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aeb_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 249448, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "aeb_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 245314, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "aeb_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 199209, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "aeb_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 244264, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "aeb_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 252205, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "aeb_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 268215, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "aeb_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 178042, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "aeb_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 236734, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "aeb_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 259342, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "aeb_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 266210, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "aeb_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 250672, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "aeb_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 220058, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "aeb_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 251452, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "aeb_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 247337, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "aeb_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 249005, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aeb_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 242137, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aeb_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 258604, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "aeb_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254173, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aeb_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 236494, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "aeb_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 276823, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "aeb_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 228384, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "aeb_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249901, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "aeb_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 269715, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "aeb_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 169020, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "aeb_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 258071, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "aeb_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 275246, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "aeb_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 254698, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "aeb_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 235981, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "aeb_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 239250, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "aeb_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 265835, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "aeb_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 257858, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aeb_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 256596, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "aeb_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 242631, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "aeb_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 249763, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "aeb_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 259584, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "aeb_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 240334, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "aeb_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245359, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "aeb_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 251200, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "aeb_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 228505, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "aeb_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 270762, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "aeb_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 234239, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "aeb_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 281474, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "aeb_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 254274, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "aeb_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 245520, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "aeb_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 251868, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "aeb_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 244547, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "aeb_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 253735, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "aeb_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 225640, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "aeb_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 239334, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aeb_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 268495, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "aeb_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 240433, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "aeb_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 242406, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "aeb_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 244215, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "aeb_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 250648, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "aeb_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257948, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "aeb_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 238921, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "aeb_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 225847, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "aeb_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 251338, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "aeb_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 274674, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "aeb_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 250117, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aeb_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 253743, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "aeb_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 238980, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aeb_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 300361, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "aeb_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 278246, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "aeb_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 152096, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "aeb_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 238013, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "aeb_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 240662, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "aeb_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 264558, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "aeb_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 238182, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "aeb_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 241925, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "aeb_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 263186, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "aeb_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 243157, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "aeb_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 238186, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "aeb_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 155325, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "aeb_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 248440, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aeb_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 246115, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "aeb_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 258220, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "aeb_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 238478, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "aeb_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 259622, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "aeb_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 254417, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "aeb_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 243787, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "aeb_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 202777, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "aeb_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 152772, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "aeb_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 240248, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "aeb_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 253334, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "aeb_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 244892, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "aeb_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 256007, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "aeb_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 255075, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "aeb_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 259249, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "aeb_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 242932, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "aeb_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 281088, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "aeb_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 258278, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aeb_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 250694, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "aeb_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 247160, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "aeb_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 238505, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "aeb_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247481, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aeb_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 247155, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "aeb_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 262728, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "aeb_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 264801, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "aeb_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 275112, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "aeb_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 260439, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "aeb_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 229138, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "aeb_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 265886, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "aeb_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 233357, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "aeb_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 255034, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "aeb_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 249623, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "aeb_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 246916, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "aeb_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 259680, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "aeb_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 270255, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "aeb_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 255816, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "aeb_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 223866, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "aeb_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 252144, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "aeb_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 242877, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "aeb_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 258332, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "aeb_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 241976, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "aeb_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 231665, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "aeb_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 252339, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aeb_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245299, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "aeb_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 244932, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "aeb_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 214131, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "aeb_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249144, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "aeb_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 246234, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "aeb_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 245502, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "aeb_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 263739, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "aeb_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 284622, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 110.74802371541502, + "max_sentence1_length": 305, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ben_Beng-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 240663, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ben_Beng-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 255429, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ben_Beng-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 292400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ben_Beng-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260100, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ben_Beng-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 282254, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ben_Beng-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 255737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ben_Beng-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 268196, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ben_Beng-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 287774, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ben_Beng-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264803, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ben_Beng-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270975, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ben_Beng-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 274612, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ben_Beng-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ben_Beng-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255975, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ben_Beng-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282473, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ben_Beng-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 258450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ben_Beng-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251497, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ben_Beng-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ben_Beng-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 257640, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ben_Beng-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 242615, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ben_Beng-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277033, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ben_Beng-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ben_Beng-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 257769, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ben_Beng-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274904, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ben_Beng-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 278378, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ben_Beng-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252869, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ben_Beng-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 282389, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ben_Beng-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247002, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ben_Beng-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 244155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ben_Beng-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289793, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ben_Beng-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 260334, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ben_Beng-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 267671, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ben_Beng-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264574, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ben_Beng-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262093, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ben_Beng-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290038, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ben_Beng-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ben_Beng-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ben_Beng-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241119, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ben_Beng-est_Latn": { + "num_samples": 1012, + "number_of_characters": 258501, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ben_Beng-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274998, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ben_Beng-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ben_Beng-min_Arab": { + "num_samples": 1012, + "number_of_characters": 255441, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ben_Beng-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268525, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ben_Beng-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 277768, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ben_Beng-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263488, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ben_Beng-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 268732, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ben_Beng-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 257653, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ben_Beng-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268864, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ben_Beng-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 263602, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ben_Beng-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 259414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ben_Beng-min_Latn": { + "num_samples": 1012, + "number_of_characters": 270205, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ben_Beng-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272460, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ben_Beng-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265888, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ben_Beng-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263789, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ben_Beng-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238088, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ben_Beng-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 251272, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ben_Beng-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256490, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ben_Beng-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 288722, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ben_Beng-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ben_Beng-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266869, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ben_Beng-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 249685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ben_Beng-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 261215, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ben_Beng-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258961, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ben_Beng-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 261692, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ben_Beng-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 267224, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ben_Beng-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 263316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ben_Beng-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 271360, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ben_Beng-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268014, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ben_Beng-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 275330, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ben_Beng-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ben_Beng-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 267146, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ben_Beng-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 278365, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ben_Beng-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276106, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ben_Beng-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 277193, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ben_Beng-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 284219, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ben_Beng-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258825, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ben_Beng-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278936, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ben_Beng-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265824, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ben_Beng-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 277613, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ben_Beng-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 266413, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ben_Beng-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 262279, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ben_Beng-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 216174, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ben_Beng-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 261229, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ben_Beng-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 269170, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ben_Beng-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 285180, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ben_Beng-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195007, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ben_Beng-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 253699, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ben_Beng-run_Latn": { + "num_samples": 1012, + "number_of_characters": 276307, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ben_Beng-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 283175, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ben_Beng-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 267637, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ben_Beng-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237023, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ben_Beng-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 268417, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ben_Beng-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 264302, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ben_Beng-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265970, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ben_Beng-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259102, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ben_Beng-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275569, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ben_Beng-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ben_Beng-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253459, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ben_Beng-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293788, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ben_Beng-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 245349, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ben_Beng-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266866, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ben_Beng-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 286680, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ben_Beng-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185985, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ben_Beng-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275036, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ben_Beng-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 292211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ben_Beng-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 271663, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ben_Beng-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252946, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ben_Beng-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 256215, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ben_Beng-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ben_Beng-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274823, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ben_Beng-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273561, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ben_Beng-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 259596, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ben_Beng-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 266728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ben_Beng-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276549, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ben_Beng-san_Deva": { + "num_samples": 1012, + "number_of_characters": 257299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ben_Beng-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ben_Beng-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 268165, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ben_Beng-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245470, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ben_Beng-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 287727, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ben_Beng-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 251204, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ben_Beng-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 298439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ben_Beng-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 271239, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ben_Beng-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262485, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ben_Beng-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ben_Beng-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261512, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ben_Beng-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 270700, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ben_Beng-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 242605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ben_Beng-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 256299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ben_Beng-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285460, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ben_Beng-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 257398, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ben_Beng-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 259371, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ben_Beng-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 261180, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ben_Beng-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 267613, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ben_Beng-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274913, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ben_Beng-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255886, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ben_Beng-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242812, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ben_Beng-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 268303, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ben_Beng-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 291639, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ben_Beng-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267082, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ben_Beng-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 270708, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ben_Beng-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255945, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ben_Beng-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 317326, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ben_Beng-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 295211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ben_Beng-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169061, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ben_Beng-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254978, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ben_Beng-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 257627, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ben_Beng-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281523, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ben_Beng-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 255147, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ben_Beng-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258890, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ben_Beng-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 280151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ben_Beng-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260122, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ben_Beng-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 255151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ben_Beng-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 172290, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ben_Beng-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 265405, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ben_Beng-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263080, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ben_Beng-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 275185, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ben_Beng-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 255443, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ben_Beng-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 276587, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ben_Beng-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 271382, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ben_Beng-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 260752, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ben_Beng-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 219742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ben_Beng-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 169737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ben_Beng-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 257213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ben_Beng-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 270299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ben_Beng-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261857, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ben_Beng-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272972, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ben_Beng-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ben_Beng-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 276214, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ben_Beng-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259897, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ben_Beng-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298053, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ben_Beng-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 275243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ben_Beng-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 267659, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ben_Beng-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 264125, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ben_Beng-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255470, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ben_Beng-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264446, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ben_Beng-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ben_Beng-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 279693, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ben_Beng-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 281766, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ben_Beng-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292077, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ben_Beng-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 277404, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ben_Beng-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246103, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ben_Beng-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282851, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ben_Beng-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 250322, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ben_Beng-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271999, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ben_Beng-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 266588, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ben_Beng-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ben_Beng-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 276645, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ben_Beng-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 287220, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ben_Beng-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 272781, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ben_Beng-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240831, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ben_Beng-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269109, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ben_Beng-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ben_Beng-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 275297, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ben_Beng-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258941, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ben_Beng-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 248630, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ben_Beng-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 269304, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ben_Beng-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262264, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ben_Beng-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261897, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ben_Beng-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231096, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ben_Beng-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266109, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ben_Beng-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 263199, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ben_Beng-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262467, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ben_Beng-som_Latn": { + "num_samples": 1012, + "number_of_characters": 280704, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ben_Beng-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 301587, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "est_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241080, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "est_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 255846, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "est_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 292817, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "est_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260517, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "est_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 282671, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "est_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256154, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "est_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 268613, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "est_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288191, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "est_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265220, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "est_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271392, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "est_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275029, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "est_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286998, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "est_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256392, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "est_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282890, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "est_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 258867, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "est_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251914, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "est_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286456, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "est_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258057, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "est_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243032, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "est_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277450, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "est_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261425, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "est_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258186, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "est_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275321, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "est_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 278795, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "est_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253286, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "est_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 282806, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "est_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247419, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "est_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 244572, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "est_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290210, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "est_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 260751, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "est_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268088, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "est_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264991, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "est_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262510, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "est_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290455, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "est_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260239, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "est_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269936, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "est_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241536, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "est_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 258501, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "est_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 275415, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "est_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275221, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "est_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 255858, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "est_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268942, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "est_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278185, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "est_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263905, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "est_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269149, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "est_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258070, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "est_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269281, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "est_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264019, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "est_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 259831, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "est_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 270622, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "est_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272877, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "est_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266305, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "est_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264206, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "est_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238505, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "est_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 251689, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "est_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256907, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "est_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289139, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "est_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245459, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "est_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267286, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "est_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250102, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "est_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 261632, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "est_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 259378, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "est_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262109, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "est_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 267641, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "est_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 263733, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "est_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 271777, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "est_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268431, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "est_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 275747, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "est_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270486, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "est_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 267563, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "est_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 278782, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "est_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276523, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "est_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 277610, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "est_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 284636, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "est_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 259242, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "est_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279353, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "est_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266241, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "est_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278030, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "est_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 266830, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "est_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 262696, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "est_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 216591, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "est_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 261646, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "est_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 269587, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "est_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 285597, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "est_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195424, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "est_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254116, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "est_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 276724, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "est_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 283592, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "est_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268054, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "est_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237440, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "est_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 268834, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "est_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 264719, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "est_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266387, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "est_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259519, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "est_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275986, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "est_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271555, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "est_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253876, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "est_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294205, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "est_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 245766, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "est_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267283, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "est_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287097, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "est_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 186402, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "est_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275453, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "est_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 292628, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "est_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272080, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "est_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253363, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "est_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 256632, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "est_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283217, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "est_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275240, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "est_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273978, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "est_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260013, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "est_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267145, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "est_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276966, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "est_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 257716, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "est_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262741, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "est_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 268582, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "est_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245887, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "est_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288144, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "est_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 251621, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "est_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 298856, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "est_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 271656, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "est_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262902, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "est_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269250, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "est_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261929, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "est_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271117, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "est_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243022, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "est_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "est_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285877, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "est_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 257815, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "est_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 259788, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "est_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 261597, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "est_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268030, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "est_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275330, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "est_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256303, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "est_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243229, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "est_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 268720, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "est_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292056, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "est_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267499, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "est_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271125, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "est_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256362, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "est_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 317743, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "est_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 295628, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "est_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169478, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "est_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255395, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "est_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258044, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "est_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281940, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "est_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 255564, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "est_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 259307, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "est_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 280568, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "est_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260539, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "est_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 255568, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "est_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 172707, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "est_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 265822, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "est_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263497, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "est_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 275602, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "est_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 255860, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "est_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277004, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "est_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 271799, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "est_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261169, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "est_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220159, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "est_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170154, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "est_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 257630, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "est_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 270716, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "est_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262274, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "est_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273389, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "est_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272457, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "est_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 276631, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "est_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260314, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "est_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298470, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "est_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 275660, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "est_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268076, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "est_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 264542, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "est_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255887, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "est_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264863, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "est_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264537, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "est_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280110, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "est_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282183, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "est_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292494, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "est_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 277821, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "est_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246520, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "est_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283268, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "est_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 250739, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "est_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 272416, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "est_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267005, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "est_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264298, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "est_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277062, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "est_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 287637, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "est_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273198, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "est_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241248, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "est_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269526, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "est_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260259, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "est_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 275714, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "est_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 259358, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "est_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249047, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "est_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 269721, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "est_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262681, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "est_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262314, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "est_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231513, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "est_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266526, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "est_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 263616, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "est_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262884, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "est_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281121, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "est_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302004, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.92391304347827, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hye_Armn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257577, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hye_Armn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272343, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hye_Armn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309314, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hye_Armn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277014, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "hye_Armn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299168, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hye_Armn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272651, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hye_Armn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285110, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hye_Armn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304688, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hye_Armn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281717, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hye_Armn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287889, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hye_Armn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291526, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hye_Armn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303495, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hye_Armn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272889, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "hye_Armn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299387, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hye_Armn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275364, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hye_Armn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268411, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hye_Armn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302953, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hye_Armn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274554, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hye_Armn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259529, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hye_Armn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293947, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hye_Armn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277922, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hye_Armn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274683, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hye_Armn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 291818, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hye_Armn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295292, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hye_Armn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269783, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hye_Armn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299303, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hye_Armn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263916, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hye_Armn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261069, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hye_Armn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306707, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hye_Armn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277248, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hye_Armn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284585, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "hye_Armn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281488, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hye_Armn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279007, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hye_Armn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306952, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hye_Armn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276736, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hye_Armn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286433, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hye_Armn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258033, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hye_Armn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 274998, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hye_Armn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275415, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hye_Armn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291718, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hye_Armn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272355, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hye_Armn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285439, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hye_Armn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294682, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hye_Armn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280402, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hye_Armn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285646, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hye_Armn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274567, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hye_Armn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285778, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hye_Armn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280516, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hye_Armn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276328, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hye_Armn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287119, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hye_Armn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289374, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hye_Armn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282802, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hye_Armn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280703, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hye_Armn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255002, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hye_Armn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268186, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hye_Armn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273404, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hye_Armn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305636, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hye_Armn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261956, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hye_Armn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283783, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hye_Armn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266599, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hye_Armn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278129, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hye_Armn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275875, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hye_Armn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278606, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hye_Armn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284138, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hye_Armn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280230, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hye_Armn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288274, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hye_Armn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284928, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hye_Armn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292244, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hye_Armn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 286983, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hye_Armn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284060, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hye_Armn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295279, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hye_Armn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293020, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hye_Armn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294107, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hye_Armn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301133, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hye_Armn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275739, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hye_Armn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295850, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hye_Armn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282738, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hye_Armn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294527, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hye_Armn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283327, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hye_Armn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279193, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hye_Armn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233088, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hye_Armn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278143, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hye_Armn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286084, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hye_Armn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302094, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hye_Armn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211921, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hye_Armn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270613, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hye_Armn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293221, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hye_Armn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300089, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hye_Armn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284551, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hye_Armn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253937, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hye_Armn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285331, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hye_Armn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281216, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hye_Armn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282884, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hye_Armn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276016, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hye_Armn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292483, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hye_Armn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288052, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hye_Armn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270373, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hye_Armn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310702, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hye_Armn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262263, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hye_Armn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283780, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hye_Armn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303594, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hye_Armn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202899, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hye_Armn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 291950, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hye_Armn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309125, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hye_Armn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288577, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hye_Armn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269860, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hye_Armn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273129, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hye_Armn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299714, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hye_Armn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291737, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hye_Armn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290475, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hye_Armn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276510, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hye_Armn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283642, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hye_Armn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293463, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hye_Armn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274213, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hye_Armn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279238, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hye_Armn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285079, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hye_Armn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262384, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hye_Armn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304641, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hye_Armn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268118, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hye_Armn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315353, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hye_Armn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288153, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hye_Armn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279399, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hye_Armn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285747, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hye_Armn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278426, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hye_Armn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287614, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hye_Armn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259519, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hye_Armn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273213, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hye_Armn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302374, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hye_Armn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274312, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hye_Armn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276285, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hye_Armn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278094, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hye_Armn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284527, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hye_Armn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291827, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hye_Armn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272800, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hye_Armn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259726, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hye_Armn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285217, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hye_Armn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308553, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hye_Armn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 283996, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hye_Armn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287622, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hye_Armn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272859, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hye_Armn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334240, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hye_Armn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312125, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hye_Armn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 185975, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hye_Armn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271892, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hye_Armn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274541, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hye_Armn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298437, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hye_Armn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272061, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hye_Armn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275804, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hye_Armn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297065, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hye_Armn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277036, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hye_Armn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272065, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hye_Armn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189204, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hye_Armn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282319, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hye_Armn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 279994, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hye_Armn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292099, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hye_Armn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272357, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hye_Armn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293501, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hye_Armn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288296, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hye_Armn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277666, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hye_Armn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236656, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hye_Armn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186651, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hye_Armn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274127, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hye_Armn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287213, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hye_Armn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278771, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hye_Armn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289886, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hye_Armn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288954, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hye_Armn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293128, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hye_Armn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276811, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hye_Armn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 314967, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hye_Armn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292157, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hye_Armn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284573, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hye_Armn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281039, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hye_Armn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272384, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hye_Armn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281360, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hye_Armn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281034, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hye_Armn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296607, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hye_Armn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298680, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hye_Armn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 308991, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hye_Armn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294318, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hye_Armn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263017, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hye_Armn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299765, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hye_Armn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267236, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "hye_Armn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288913, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hye_Armn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283502, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hye_Armn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280795, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hye_Armn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293559, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hye_Armn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304134, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hye_Armn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289695, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hye_Armn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257745, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hye_Armn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286023, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "hye_Armn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276756, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hye_Armn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292211, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hye_Armn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275855, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hye_Armn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265544, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hye_Armn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286218, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hye_Armn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279178, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hye_Armn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278811, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hye_Armn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248010, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hye_Armn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283023, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hye_Armn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280113, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hye_Armn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279381, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hye_Armn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297618, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hye_Armn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318501, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 144.22529644268775, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kmb_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257383, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kmb_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272149, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kmb_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kmb_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 276820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kmb_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 298974, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kmb_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272457, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kmb_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 284916, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmb_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304494, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kmb_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281523, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kmb_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287695, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kmb_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kmb_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303301, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kmb_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272695, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kmb_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299193, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kmb_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275170, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmb_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kmb_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302759, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmb_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274360, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kmb_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259335, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kmb_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293753, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kmb_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmb_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274489, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmb_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 291624, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kmb_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295098, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kmb_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269589, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kmb_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299109, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kmb_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263722, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kmb_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 260875, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kmb_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kmb_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277054, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kmb_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284391, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kmb_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281294, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kmb_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 278813, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kmb_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306758, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kmb_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276542, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmb_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286239, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kmb_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 257839, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kmb_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 274804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kmb_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275221, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kmb_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 291718, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kmb_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272161, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kmb_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285245, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kmb_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294488, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kmb_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280208, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kmb_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285452, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kmb_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274373, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kmb_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285584, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kmb_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280322, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kmb_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276134, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kmb_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 286925, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kmb_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289180, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kmb_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282608, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kmb_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280509, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kmb_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 254808, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kmb_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 267992, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kmb_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmb_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305442, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kmb_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261762, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kmb_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283589, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kmb_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266405, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kmb_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 277935, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kmb_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275681, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kmb_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmb_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 283944, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmb_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280036, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kmb_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288080, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kmb_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284734, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kmb_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292050, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kmb_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 286789, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kmb_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 283866, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kmb_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295085, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kmb_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 292826, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kmb_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 293913, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kmb_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 300939, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kmb_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275545, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kmb_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295656, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kmb_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282544, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kmb_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294333, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmb_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283133, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kmb_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 278999, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kmb_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 232894, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kmb_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 277949, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kmb_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 285890, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kmb_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 301900, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kmb_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211727, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kmb_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270419, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kmb_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293027, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kmb_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 299895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kmb_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284357, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kmb_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253743, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kmb_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285137, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kmb_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281022, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kmb_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282690, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmb_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 275822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmb_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292289, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kmb_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287858, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmb_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270179, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kmb_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kmb_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kmb_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kmb_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kmb_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202705, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kmb_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 291756, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kmb_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 308931, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kmb_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288383, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kmb_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269666, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kmb_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 272935, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kmb_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299520, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kmb_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291543, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmb_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290281, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kmb_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kmb_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283448, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kmb_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293269, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kmb_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274019, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kmb_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279044, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kmb_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 284885, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kmb_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kmb_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304447, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kmb_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 267924, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kmb_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315159, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kmb_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 287959, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kmb_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279205, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kmb_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285553, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kmb_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278232, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kmb_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287420, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kmb_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259325, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kmb_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273019, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmb_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302180, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kmb_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274118, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kmb_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276091, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kmb_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 277900, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kmb_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284333, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kmb_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291633, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kmb_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272606, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kmb_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259532, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kmb_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285023, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kmb_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308359, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kmb_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 283802, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmb_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287428, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kmb_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272665, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmb_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334046, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kmb_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 311931, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kmb_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 185781, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kmb_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271698, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kmb_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kmb_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kmb_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 271867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kmb_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275610, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kmb_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 296871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kmb_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 276842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kmb_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 271871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kmb_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189010, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kmb_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282125, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmb_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 279800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kmb_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 291905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kmb_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272163, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kmb_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293307, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kmb_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288102, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kmb_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277472, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kmb_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236462, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kmb_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186457, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kmb_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 273933, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kmb_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287019, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kmb_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278577, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kmb_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289692, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kmb_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288760, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kmb_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 292934, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kmb_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276617, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kmb_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 314773, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kmb_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 291963, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmb_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284379, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kmb_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 280845, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kmb_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kmb_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281166, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmb_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 280840, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kmb_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296413, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kmb_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298486, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kmb_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 308797, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kmb_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294124, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kmb_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 262823, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kmb_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299571, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kmb_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kmb_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288719, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kmb_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283308, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kmb_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280601, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kmb_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293365, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kmb_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 303940, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kmb_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289501, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kmb_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257551, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kmb_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 285829, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kmb_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276562, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kmb_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292017, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kmb_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kmb_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265350, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kmb_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286024, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmb_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kmb_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278617, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kmb_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 247816, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kmb_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282829, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kmb_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kmb_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kmb_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297424, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kmb_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318307, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 144.03359683794466, + "max_sentence1_length": 426, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "min_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238020, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "min_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 252786, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "min_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289757, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "min_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257457, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "min_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279611, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "min_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253094, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "min_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265553, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285131, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "min_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262160, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "min_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268332, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "min_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271969, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "min_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283938, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "min_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253332, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "min_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279830, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "min_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255807, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248854, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "min_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283396, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 254997, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "min_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 239972, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "min_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274390, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "min_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258365, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255126, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272261, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "min_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275735, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "min_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250226, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "min_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279746, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "min_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244359, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "min_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241512, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "min_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287150, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "min_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257691, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "min_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265028, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "min_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261931, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "min_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259450, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "min_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287395, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "min_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257179, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266876, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "min_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238476, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "min_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255441, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "min_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255858, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "min_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272355, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "min_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272161, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "min_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265882, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "min_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275125, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "min_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260845, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "min_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266089, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "min_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255010, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "min_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266221, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "min_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260959, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "min_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256771, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "min_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267562, + "unique_pairs": 1011, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "min_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269817, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "min_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263245, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "min_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261146, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "min_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235445, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "min_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248629, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "min_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253847, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286079, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "min_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242399, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "min_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264226, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "min_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247042, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "min_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258572, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "min_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256318, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "min_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259049, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264581, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260673, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "min_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268717, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "min_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265371, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "min_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272687, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "min_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267426, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "min_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264503, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "min_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275722, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "min_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273463, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "min_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274550, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "min_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281576, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "min_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256182, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "min_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276293, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "min_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263181, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "min_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274970, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263770, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "min_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259636, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "min_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213531, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "min_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258586, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "min_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266527, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "min_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282537, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "min_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192364, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "min_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251056, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "min_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273664, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "min_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280532, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "min_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 264994, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "min_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234380, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "min_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265774, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "min_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261659, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "min_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263327, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256459, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272926, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "min_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268495, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250816, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "min_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291145, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "min_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242706, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "min_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264223, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "min_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284037, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "min_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183342, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "min_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272393, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "min_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289568, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "min_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269020, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "min_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250303, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "min_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253572, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "min_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280157, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "min_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272180, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270918, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "min_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256953, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "min_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264085, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "min_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273906, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "min_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254656, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "min_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259681, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "min_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265522, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "min_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242827, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "min_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285084, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "min_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248561, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "min_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295796, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "min_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268596, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "min_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259842, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "min_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266190, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "min_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "min_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268057, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "min_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239962, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "min_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253656, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282817, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "min_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254755, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "min_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256728, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "min_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258537, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "min_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264970, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "min_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272270, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "min_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253243, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "min_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240169, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "min_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265660, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "min_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 288996, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "min_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264439, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268065, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "min_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253302, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314683, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "min_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292568, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "min_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166418, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "min_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252335, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "min_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 254984, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "min_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278880, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "min_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252504, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "min_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256247, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "min_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277508, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "min_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257479, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "min_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252508, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "min_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169647, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "min_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262762, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260437, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "min_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272542, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "min_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 252800, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "min_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273944, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "min_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268739, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "min_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258109, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "min_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217099, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "min_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167094, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "min_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254570, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "min_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267656, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "min_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259214, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "min_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270329, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "min_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269397, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "min_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273571, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "min_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257254, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "min_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295410, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "min_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272600, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265016, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "min_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261482, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "min_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 252827, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "min_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261803, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261477, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "min_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277050, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "min_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279123, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "min_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289434, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "min_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274761, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "min_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243460, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "min_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280208, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "min_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247679, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "min_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269356, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "min_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263945, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "min_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261238, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "min_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274002, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "min_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284577, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "min_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270138, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "min_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238188, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "min_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266466, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "min_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257199, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "min_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272654, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "min_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256298, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "min_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 245987, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "min_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266661, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259621, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "min_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259254, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "min_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228453, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "min_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263466, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "min_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260556, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "min_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259824, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "min_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278061, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "min_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298944, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 124.9001976284585, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "pol_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "pol_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265870, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "pol_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 302841, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pol_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270541, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "pol_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292695, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "pol_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266178, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "pol_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278637, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pol_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298215, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "pol_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275244, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "pol_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281416, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pol_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285053, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pol_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297022, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "pol_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266416, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "pol_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292914, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "pol_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268891, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pol_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261938, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pol_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296480, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pol_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268081, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "pol_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253056, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "pol_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287474, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pol_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271449, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pol_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268210, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pol_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285345, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pol_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 288819, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pol_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263310, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "pol_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 292830, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pol_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257443, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "pol_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254596, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "pol_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300234, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "pol_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270775, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pol_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278112, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "pol_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275015, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pol_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272534, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pol_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300479, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "pol_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270263, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pol_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279960, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "pol_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251560, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "pol_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268525, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pol_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268942, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "pol_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285439, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pol_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285245, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "pol_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265882, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pol_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288209, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pol_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273929, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pol_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279173, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pol_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268094, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pol_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279305, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pol_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274043, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pol_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269855, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pol_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280646, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pol_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282901, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pol_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276329, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pol_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274230, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pol_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248529, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "pol_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261713, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "pol_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pol_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299163, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "pol_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255483, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "pol_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277310, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "pol_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260126, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "pol_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pol_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269402, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pol_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272133, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pol_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277665, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pol_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273757, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pol_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 281801, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pol_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278455, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pol_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285771, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "pol_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280510, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pol_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "pol_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 288806, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pol_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286547, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pol_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287634, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "pol_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294660, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "pol_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269266, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pol_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289377, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pol_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276265, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pol_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288054, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pol_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "pol_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272720, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pol_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226615, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "pol_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271670, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pol_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279611, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pol_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "pol_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205448, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "pol_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "pol_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286748, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "pol_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293616, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "pol_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278078, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "pol_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247464, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "pol_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 278858, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pol_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274743, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "pol_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276411, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pol_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269543, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pol_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286010, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pol_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281579, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pol_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263900, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pol_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304229, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pol_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255790, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pol_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pol_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297121, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "pol_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196426, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "pol_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285477, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pol_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302652, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pol_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "pol_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263387, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pol_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pol_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293241, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "pol_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pol_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284002, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pol_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270037, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "pol_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277169, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "pol_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286990, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pol_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267740, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "pol_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272765, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pol_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278606, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "pol_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255911, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pol_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298168, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pol_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261645, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "pol_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308880, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "pol_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281680, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pol_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272926, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pol_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279274, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pol_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271953, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pol_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281141, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pol_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253046, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "pol_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266740, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pol_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295901, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "pol_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 267839, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "pol_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 269812, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "pol_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "pol_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278054, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pol_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285354, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "pol_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266327, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pol_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253253, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "pol_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 278744, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pol_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302080, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "pol_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277523, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pol_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281149, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pol_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266386, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pol_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327767, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "pol_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305652, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pol_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179502, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "pol_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265419, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pol_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268068, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pol_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291964, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "pol_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265588, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "pol_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269331, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pol_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290592, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "pol_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270563, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pol_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265592, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pol_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182731, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "pol_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 275846, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pol_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273521, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pol_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285626, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pol_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265884, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "pol_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287028, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "pol_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 281823, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "pol_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271193, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pol_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230183, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "pol_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180178, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "pol_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267654, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pol_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280740, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pol_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272298, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "pol_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283413, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pol_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282481, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "pol_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286655, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pol_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270338, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pol_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308494, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "pol_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285684, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pol_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278100, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "pol_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274566, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pol_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265911, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pol_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274887, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pol_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274561, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "pol_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290134, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pol_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292207, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "pol_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302518, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pol_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 287845, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "pol_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256544, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "pol_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293292, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pol_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260763, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "pol_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282440, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "pol_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277029, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pol_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274322, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pol_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287086, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "pol_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297661, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pol_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283222, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "pol_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251272, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "pol_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279550, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "pol_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270283, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pol_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285738, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pol_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269382, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "pol_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259071, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "pol_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279745, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pol_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272705, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "pol_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272338, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "pol_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241537, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "pol_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276550, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pol_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273640, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pol_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272908, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pol_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291145, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "pol_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312028, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 137.82905138339922, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ssw_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 260347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ssw_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 275113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ssw_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 312084, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ssw_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 279784, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ssw_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 301938, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ssw_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 275421, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ssw_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 287880, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ssw_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 307458, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ssw_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 284487, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ssw_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 290659, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ssw_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 294296, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ssw_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 306265, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ssw_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 275659, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ssw_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 302157, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ssw_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 278134, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ssw_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 271181, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ssw_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 305723, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ssw_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 277324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ssw_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 262299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ssw_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296717, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ssw_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 280692, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ssw_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 277453, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ssw_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 294588, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ssw_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 298062, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ssw_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 272553, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ssw_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 302073, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ssw_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 266686, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ssw_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 263839, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ssw_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 309477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ssw_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 280018, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ssw_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 287355, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ssw_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284258, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ssw_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 281777, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ssw_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 309722, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ssw_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ssw_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 289203, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ssw_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 260803, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ssw_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 277768, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ssw_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 278185, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ssw_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 294682, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ssw_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 294488, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ssw_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 275125, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ssw_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 288209, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ssw_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283172, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ssw_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 288416, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ssw_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 277337, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ssw_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 288548, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ssw_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 283286, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ssw_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 279098, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ssw_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 289889, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ssw_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 292144, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ssw_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 285572, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ssw_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 283473, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ssw_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 257772, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ssw_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 270956, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ssw_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 276174, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ssw_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 308406, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ssw_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 264726, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ssw_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286553, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ssw_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 269369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ssw_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 280899, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ssw_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 278645, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ssw_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 281376, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ssw_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 286908, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ssw_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 283000, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ssw_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 291044, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ssw_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 287698, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ssw_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 295014, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ssw_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 289753, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ssw_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 286830, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ssw_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 298049, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ssw_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 295790, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ssw_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 296877, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ssw_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 303903, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ssw_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 278509, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ssw_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 298620, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ssw_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 285508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ssw_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 297297, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ssw_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 286097, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ssw_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 281963, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ssw_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 235858, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ssw_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 280913, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ssw_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 288854, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ssw_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 304864, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ssw_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 214691, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ssw_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 273383, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ssw_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 295991, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ssw_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ssw_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 287321, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ssw_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 256707, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ssw_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 288101, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ssw_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 283986, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ssw_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 285654, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ssw_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 278786, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ssw_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 295253, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ssw_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ssw_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 273143, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ssw_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 313472, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ssw_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 265033, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ssw_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286550, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ssw_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 306364, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ssw_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 205669, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ssw_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 294720, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ssw_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 311895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ssw_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 291347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ssw_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 272630, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ssw_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 275899, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ssw_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 302484, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ssw_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 294507, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ssw_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 293245, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ssw_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 279280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ssw_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 286412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ssw_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 296233, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ssw_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 276983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ssw_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ssw_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 287849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ssw_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 265154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ssw_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 307411, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ssw_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 270888, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ssw_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 318123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ssw_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 290923, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ssw_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 282169, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ssw_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 288517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ssw_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 281196, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ssw_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 290384, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ssw_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 262289, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ssw_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 275983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ssw_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 305144, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ssw_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 277082, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ssw_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 279055, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ssw_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 280864, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ssw_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 287297, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ssw_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ssw_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 275570, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ssw_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 262496, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ssw_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 287987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ssw_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 311323, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ssw_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 286766, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ssw_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 290392, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ssw_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 275629, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ssw_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 337010, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ssw_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 314895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ssw_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 188745, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ssw_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 274662, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ssw_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 277311, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ssw_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 301207, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ssw_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 274831, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ssw_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 278574, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ssw_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 299835, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ssw_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 279806, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ssw_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 274835, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ssw_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 191974, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ssw_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 285089, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ssw_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 282764, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ssw_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 294869, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ssw_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 275127, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ssw_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 296271, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ssw_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 291066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ssw_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 280436, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ssw_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 239426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ssw_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 189421, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ssw_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 276897, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ssw_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 289983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ssw_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 281541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ssw_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 292656, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ssw_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 291724, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ssw_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 295898, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ssw_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 279581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ssw_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 317737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ssw_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 294927, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ssw_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 287343, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ssw_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 283809, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ssw_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 275154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ssw_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284130, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ssw_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 283804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ssw_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 299377, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ssw_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 301450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ssw_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 311761, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ssw_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 297088, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ssw_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 265787, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ssw_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 302535, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ssw_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 270006, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ssw_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 291683, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ssw_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 286272, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ssw_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 283565, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ssw_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 296329, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ssw_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 306904, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ssw_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 292465, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ssw_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 260515, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ssw_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 288793, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ssw_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 279526, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ssw_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 294981, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ssw_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 278625, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ssw_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 268314, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ssw_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 288988, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ssw_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281948, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ssw_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 281581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ssw_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 250780, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ssw_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285793, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ssw_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 282883, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ssw_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 282151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ssw_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 300388, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ssw_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 321271, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 146.96245059288538, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246067, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 260833, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ukr_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 297804, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265504, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 287658, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261141, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273600, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293178, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270207, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 276379, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280016, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 291985, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 261379, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 287877, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ukr_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 263854, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 256901, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263044, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248019, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282437, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266412, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263173, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280308, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 283782, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 287793, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 252406, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249559, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295197, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 265738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273075, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267497, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295442, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265226, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 274923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246523, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263488, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 263905, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 280402, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280208, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 260845, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 273929, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283172, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274136, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274268, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269006, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 264818, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 275609, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 277864, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271292, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 269193, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243492, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 256676, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 261894, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294126, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250446, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255089, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 266619, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264365, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267096, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 272628, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 268720, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 276764, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273418, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 280734, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275473, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272550, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 283769, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281510, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282597, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ukr_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 289623, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264229, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284340, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271228, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283017, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 271817, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 267683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221578, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 266633, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274574, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290584, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200411, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259103, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 281711, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288579, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273041, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242427, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 273821, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 269706, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 271374, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264506, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 280973, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 258863, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299192, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 250753, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272270, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292084, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 191389, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280440, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 297615, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277067, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258350, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 261619, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288204, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280227, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 278965, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265000, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272132, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 281953, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 262703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267728, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273569, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 250874, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293131, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 256608, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 303843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 276643, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 267889, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274237, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 266916, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276104, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248009, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 261703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 290864, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 262802, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ukr_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 264775, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266584, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273017, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280317, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261290, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248216, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 273707, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297043, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272486, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276112, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261349, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 322730, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 300615, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174465, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 260382, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263031, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 286927, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260551, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264294, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285555, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265526, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260555, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 177694, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 270809, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 268484, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280589, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 260847, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 281991, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 276786, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266156, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225146, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175141, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 262617, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 275703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267261, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ukr_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 278376, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277444, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ukr_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 281618, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303457, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 280647, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273063, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 260874, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269850, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269524, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285097, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287170, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ukr_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297481, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 282808, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251507, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288255, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 255726, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ukr_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 277403, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 271992, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 269285, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282049, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 292624, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278185, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246235, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274513, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 280701, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264345, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254034, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 274708, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267668, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ukr_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236500, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271513, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 268603, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 267871, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286108, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ukr_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 306991, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 132.85177865612647, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "afr_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251311, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "afr_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266077, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "afr_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303048, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "afr_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270748, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "afr_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292902, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "afr_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "afr_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278844, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "afr_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298422, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "afr_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275451, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "afr_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281623, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "afr_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285260, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "afr_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "afr_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266623, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "afr_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293121, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "afr_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269098, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "afr_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262145, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "afr_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296687, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "afr_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268288, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "afr_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253263, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "afr_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287681, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "afr_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "afr_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268417, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "afr_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285552, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "afr_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289026, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "afr_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263517, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "afr_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293037, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "afr_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257650, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "afr_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254803, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "afr_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300441, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "afr_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270982, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "afr_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278319, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "afr_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275222, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "afr_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272741, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "afr_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300686, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "afr_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270470, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "afr_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 280167, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "afr_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251767, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "afr_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268732, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "afr_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269149, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "afr_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285646, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "afr_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285452, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "afr_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266089, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "afr_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279173, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "afr_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288416, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "afr_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274136, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "afr_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268301, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "afr_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279512, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "afr_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "afr_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270062, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "afr_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280853, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "afr_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283108, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "afr_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276536, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "afr_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274437, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "afr_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248736, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "afr_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261920, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "afr_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267138, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "afr_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299370, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "afr_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255690, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "afr_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277517, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "afr_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260333, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "afr_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271863, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "afr_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269609, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "afr_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272340, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "afr_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277872, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "afr_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "afr_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282008, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "afr_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278662, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "afr_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285978, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "afr_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280717, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "afr_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "afr_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289013, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "afr_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286754, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "afr_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287841, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "afr_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294867, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "afr_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269473, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "afr_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289584, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "afr_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276472, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "afr_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288261, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "afr_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277061, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "afr_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272927, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "afr_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226822, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "afr_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271877, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "afr_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279818, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "afr_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "afr_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205655, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "afr_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264347, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "afr_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286955, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "afr_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293823, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "afr_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278285, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "afr_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247671, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "afr_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279065, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "afr_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274950, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "afr_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276618, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "afr_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269750, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "afr_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286217, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "afr_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281786, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "afr_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264107, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "afr_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304436, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "afr_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "afr_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277514, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "afr_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297328, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "afr_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196633, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "afr_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285684, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "afr_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "afr_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282311, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "afr_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263594, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "afr_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266863, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "afr_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293448, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "afr_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285471, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "afr_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284209, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "afr_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270244, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "afr_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277376, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "afr_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287197, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "afr_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "afr_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272972, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "afr_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278813, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "afr_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256118, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "afr_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298375, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "afr_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261852, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "afr_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309087, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "afr_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281887, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "afr_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273133, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "afr_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279481, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "afr_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272160, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "afr_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281348, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "afr_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "afr_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "afr_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296108, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "afr_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268046, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "afr_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270019, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "afr_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "afr_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278261, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "afr_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285561, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "afr_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266534, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "afr_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253460, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "afr_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 278951, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "afr_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302287, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "afr_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277730, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "afr_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281356, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "afr_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266593, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "afr_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327974, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "afr_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305859, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "afr_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179709, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "afr_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265626, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "afr_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268275, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "afr_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292171, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "afr_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265795, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "afr_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269538, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "afr_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290799, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "afr_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270770, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "afr_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265799, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "afr_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182938, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "afr_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276053, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "afr_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273728, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "afr_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285833, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "afr_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266091, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "afr_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287235, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "afr_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282030, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "afr_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271400, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "afr_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230390, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "afr_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "afr_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267861, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "afr_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "afr_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272505, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "afr_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283620, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "afr_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282688, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "afr_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286862, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "afr_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270545, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "afr_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308701, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "afr_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285891, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "afr_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278307, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "afr_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274773, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "afr_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266118, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "afr_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275094, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "afr_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274768, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "afr_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "afr_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292414, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "afr_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302725, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "afr_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288052, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "afr_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256751, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "afr_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293499, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "afr_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260970, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "afr_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282647, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "afr_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277236, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "afr_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274529, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "afr_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287293, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "afr_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297868, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "afr_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283429, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "afr_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "afr_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "afr_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270490, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "afr_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285945, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "afr_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269589, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "afr_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259278, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "afr_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279952, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "afr_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272912, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "afr_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272545, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "afr_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241744, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "afr_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "afr_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273847, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "afr_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273115, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "afr_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291352, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "afr_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312235, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.03359683794466, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bho_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 240232, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bho_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 254998, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bho_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 291969, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bho_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259669, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bho_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281823, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bho_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 255306, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bho_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267765, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bho_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 287343, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bho_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264372, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bho_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270544, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bho_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 274181, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bho_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286150, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bho_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255544, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bho_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282042, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bho_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 258019, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bho_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251066, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bho_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285608, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bho_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 257209, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bho_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 242184, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bho_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276602, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bho_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260577, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bho_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 257338, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bho_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274473, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bho_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 277947, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bho_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252438, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bho_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 281958, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bho_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246571, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bho_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243724, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bho_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289362, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bho_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 259903, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bho_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 267240, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bho_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264143, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bho_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261662, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bho_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289607, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bho_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259391, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bho_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269088, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bho_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240688, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bho_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257653, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bho_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 258070, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bho_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274567, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bho_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274373, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bho_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 255010, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bho_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268094, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bho_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 277337, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bho_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263057, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bho_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 268301, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bho_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268433, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bho_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 263171, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bho_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 258983, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bho_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269774, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bho_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272029, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bho_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265457, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bho_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263358, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bho_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237657, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bho_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250841, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bho_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256059, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bho_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 288291, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bho_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244611, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bho_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266438, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bho_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 249254, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bho_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260784, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bho_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258530, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bho_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 261261, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bho_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266793, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bho_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 262885, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bho_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 270929, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bho_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267583, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bho_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 274899, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bho_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269638, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bho_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266715, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bho_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 277934, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bho_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275675, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bho_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276762, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bho_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283788, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bho_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bho_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278505, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bho_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265393, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bho_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 277182, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bho_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 265982, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bho_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261848, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bho_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215743, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bho_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260798, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bho_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268739, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bho_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284749, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bho_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194576, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bho_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 253268, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bho_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275876, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bho_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282744, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bho_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 267206, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bho_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236592, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bho_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 267986, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bho_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263871, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bho_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265539, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bho_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258671, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bho_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275138, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bho_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270707, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bho_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253028, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bho_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293357, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bho_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 244918, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bho_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266435, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bho_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 286249, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bho_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185554, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bho_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274605, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bho_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291780, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bho_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 271232, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bho_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252515, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bho_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255784, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bho_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282369, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bho_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274392, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bho_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273130, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bho_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 259165, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bho_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 266297, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bho_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276118, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bho_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 256868, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bho_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261893, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bho_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267734, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bho_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245039, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bho_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 287296, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bho_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250773, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bho_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 298008, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bho_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270808, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bho_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262054, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bho_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268402, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bho_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261081, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bho_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 270269, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bho_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 242174, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bho_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255868, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bho_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285029, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bho_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 256967, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bho_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 258940, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bho_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260749, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bho_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 267182, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bho_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274482, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bho_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255455, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bho_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242381, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bho_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267872, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bho_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 291208, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bho_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266651, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bho_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 270277, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bho_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255514, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bho_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 316895, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bho_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294780, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bho_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168630, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bho_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254547, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bho_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 257196, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bho_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281092, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bho_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254716, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bho_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258459, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bho_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279720, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bho_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259691, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bho_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254720, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bho_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171859, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bho_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 264974, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bho_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262649, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bho_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274754, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bho_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 255012, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bho_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 276156, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bho_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 270951, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bho_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 260321, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bho_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 219311, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bho_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 169306, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bho_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 256782, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bho_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269868, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bho_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261426, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bho_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272541, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bho_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271609, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bho_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275783, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bho_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259466, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bho_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297622, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bho_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274812, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bho_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 267228, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bho_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263694, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bho_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255039, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bho_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264015, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bho_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263689, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bho_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 279262, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bho_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 281335, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bho_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291646, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bho_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 276973, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bho_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245672, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bho_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282420, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bho_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 249891, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bho_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271568, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bho_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 266157, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bho_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263450, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bho_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 276214, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bho_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286789, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bho_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 272350, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bho_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240400, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bho_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268678, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bho_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259411, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bho_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274866, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bho_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258510, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bho_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 248199, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bho_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268873, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bho_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261833, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bho_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261466, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bho_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230665, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bho_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265678, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bho_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262768, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bho_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262036, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bho_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 280273, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bho_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 301156, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08596837944664, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "eus_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251443, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "eus_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266209, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "eus_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303180, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "eus_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270880, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "eus_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 293034, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "eus_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266517, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "eus_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278976, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eus_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298554, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "eus_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275583, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "eus_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281755, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "eus_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285392, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "eus_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297361, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "eus_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266755, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "eus_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "eus_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269230, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eus_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262277, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "eus_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296819, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eus_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268420, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "eus_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253395, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "eus_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287813, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "eus_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271788, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eus_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268549, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eus_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285684, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "eus_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289158, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "eus_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263649, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "eus_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293169, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "eus_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257782, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "eus_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254935, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "eus_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300573, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "eus_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 271114, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "eus_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278451, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "eus_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275354, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "eus_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272873, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "eus_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300818, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "eus_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270602, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eus_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 280299, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "eus_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251899, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "eus_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268864, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "eus_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269281, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "eus_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285778, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "eus_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285584, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "eus_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266221, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "eus_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279305, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "eus_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288548, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "eus_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274268, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "eus_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279512, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "eus_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268433, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "eus_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274382, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "eus_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270194, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "eus_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280985, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "eus_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "eus_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276668, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "eus_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274569, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "eus_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248868, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "eus_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 262052, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "eus_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267270, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eus_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299502, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "eus_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255822, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "eus_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277649, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "eus_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260465, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "eus_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271995, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "eus_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269741, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "eus_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272472, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "eus_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 278004, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eus_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 274096, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "eus_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282140, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "eus_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "eus_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 286110, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "eus_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280849, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "eus_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277926, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "eus_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289145, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "eus_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286886, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "eus_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287973, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "eus_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294999, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "eus_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269605, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "eus_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289716, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "eus_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "eus_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288393, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eus_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277193, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "eus_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 273059, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "eus_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226954, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "eus_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 272009, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "eus_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279950, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "eus_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295960, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "eus_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205787, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "eus_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "eus_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 287087, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "eus_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293955, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "eus_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278417, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "eus_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247803, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "eus_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279197, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "eus_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 275082, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "eus_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276750, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eus_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269882, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eus_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286349, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "eus_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eus_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264239, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "eus_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304568, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "eus_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 256129, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "eus_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277646, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "eus_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297460, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "eus_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196765, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "eus_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285816, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "eus_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302991, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "eus_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282443, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "eus_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263726, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "eus_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266995, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "eus_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293580, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "eus_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285603, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eus_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "eus_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270376, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "eus_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277508, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "eus_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287329, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "eus_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 268079, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "eus_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273104, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "eus_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278945, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "eus_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "eus_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298507, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "eus_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261984, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "eus_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309219, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "eus_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 282019, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "eus_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273265, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "eus_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "eus_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272292, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "eus_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281480, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "eus_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "eus_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 267079, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eus_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "eus_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268178, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "eus_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270151, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "eus_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271960, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "eus_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278393, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "eus_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285693, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "eus_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266666, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "eus_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253592, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "eus_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 279083, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "eus_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302419, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "eus_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277862, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eus_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281488, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "eus_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266725, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eus_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 328106, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "eus_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305991, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "eus_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179841, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "eus_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265758, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "eus_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268407, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "eus_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292303, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "eus_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265927, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "eus_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269670, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "eus_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290931, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "eus_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270902, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "eus_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265931, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "eus_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 183070, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "eus_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276185, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eus_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273860, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "eus_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285965, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "eus_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266223, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "eus_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287367, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "eus_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282162, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "eus_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271532, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "eus_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230522, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "eus_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180517, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "eus_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267993, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "eus_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 281079, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "eus_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272637, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "eus_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283752, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "eus_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282820, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "eus_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "eus_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270677, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "eus_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308833, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "eus_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 286023, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "eus_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278439, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "eus_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274905, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "eus_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "eus_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275226, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "eus_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274900, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "eus_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290473, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "eus_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292546, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "eus_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302857, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "eus_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288184, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "eus_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256883, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "eus_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293631, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "eus_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 261102, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "eus_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282779, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "eus_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277368, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "eus_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274661, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "eus_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287425, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "eus_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 298000, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "eus_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283561, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "eus_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251611, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "eus_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279889, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "eus_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270622, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "eus_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 286077, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "eus_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269721, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "eus_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259410, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "eus_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 280084, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "eus_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273044, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "eus_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272677, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "eus_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241876, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "eus_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276889, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "eus_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273979, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "eus_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273247, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "eus_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291484, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "eus_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312367, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.16403162055337, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ibo_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246181, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ibo_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 260947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ibo_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 297918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ibo_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265618, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ibo_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 287772, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ibo_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261255, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ibo_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273714, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ibo_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293292, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ibo_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270321, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ibo_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 276493, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ibo_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280130, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ibo_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292099, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ibo_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 261493, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ibo_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 287991, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ibo_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 263968, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ibo_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257015, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ibo_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291557, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ibo_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263158, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ibo_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248133, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ibo_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282551, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ibo_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266526, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ibo_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263287, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ibo_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280422, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ibo_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 283896, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ibo_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258387, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ibo_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 287907, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ibo_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 252520, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ibo_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249673, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ibo_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295311, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ibo_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 265852, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ibo_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273189, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ibo_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270092, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ibo_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267611, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ibo_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295556, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ibo_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265340, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ibo_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275037, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ibo_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246637, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ibo_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263602, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ibo_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264019, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ibo_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 280516, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ibo_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280322, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ibo_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 260959, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ibo_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274043, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ibo_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283286, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ibo_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269006, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ibo_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ibo_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263171, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ibo_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274382, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ibo_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 264932, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ibo_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 275723, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ibo_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 277978, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ibo_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271406, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ibo_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 269307, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ibo_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243606, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ibo_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 256790, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ibo_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262008, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ibo_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ibo_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250560, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ibo_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272387, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ibo_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255203, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ibo_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 266733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ibo_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ibo_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267210, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ibo_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 272742, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ibo_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 268834, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ibo_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 276878, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ibo_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273532, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ibo_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 280848, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ibo_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275587, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ibo_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272664, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ibo_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 283883, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ibo_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281624, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ibo_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282711, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ibo_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 289737, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ibo_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264343, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ibo_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284454, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ibo_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271342, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ibo_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283131, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ibo_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 271931, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ibo_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 267797, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ibo_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221692, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ibo_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 266747, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ibo_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274688, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ibo_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290698, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ibo_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200525, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ibo_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259217, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ibo_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 281825, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ibo_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288693, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ibo_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273155, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ibo_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242541, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ibo_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 273935, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ibo_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 269820, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ibo_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 271488, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ibo_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264620, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ibo_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281087, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ibo_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ibo_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 258977, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ibo_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299306, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ibo_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 250867, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ibo_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272384, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ibo_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292198, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ibo_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 191503, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ibo_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280554, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ibo_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 297729, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ibo_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277181, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ibo_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258464, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ibo_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 261733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ibo_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288318, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ibo_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ibo_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279079, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ibo_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265114, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ibo_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272246, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ibo_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282067, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ibo_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 262817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ibo_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267842, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ibo_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273683, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ibo_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 250988, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ibo_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293245, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ibo_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 256722, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ibo_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 303957, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ibo_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 276757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ibo_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268003, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ibo_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274351, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ibo_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267030, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ibo_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276218, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ibo_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248123, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ibo_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 261817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ibo_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 290978, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ibo_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 262916, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ibo_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 264889, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ibo_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266698, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ibo_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273131, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ibo_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280431, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ibo_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261404, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ibo_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248330, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ibo_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 273821, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ibo_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297157, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ibo_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272600, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ibo_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276226, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ibo_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261463, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ibo_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 322844, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ibo_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 300729, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ibo_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174579, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ibo_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 260496, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ibo_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263145, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ibo_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287041, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ibo_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260665, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ibo_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264408, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ibo_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285669, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ibo_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265640, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ibo_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260669, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ibo_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 177808, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ibo_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 270923, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ibo_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 268598, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ibo_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280703, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ibo_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 260961, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ibo_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282105, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ibo_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 276900, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ibo_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266270, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ibo_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225260, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ibo_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175255, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ibo_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 262731, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ibo_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 275817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ibo_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267375, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ibo_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 278490, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ibo_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277558, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ibo_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 281732, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ibo_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265415, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ibo_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303571, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ibo_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 280761, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ibo_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273177, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ibo_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ibo_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 260988, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ibo_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ibo_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269638, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ibo_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285211, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ibo_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287284, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ibo_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297595, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ibo_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 282922, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ibo_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251621, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ibo_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288369, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ibo_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 255840, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ibo_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 277517, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ibo_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272106, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ibo_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 269399, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ibo_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282163, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ibo_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 292738, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ibo_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278299, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ibo_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246349, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ibo_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274627, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ibo_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265360, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ibo_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 280815, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ibo_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264459, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ibo_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254148, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ibo_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 274822, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ibo_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267782, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ibo_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267415, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ibo_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236614, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ibo_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271627, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ibo_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 268717, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ibo_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 267985, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ibo_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286222, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ibo_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307105, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 132.96442687747034, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kmr_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241993, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kmr_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256759, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kmr_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293730, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kmr_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261430, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kmr_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283584, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kmr_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257067, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kmr_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269526, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmr_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289104, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kmr_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266133, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kmr_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272305, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kmr_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275942, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kmr_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287911, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kmr_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257305, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kmr_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283803, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kmr_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259780, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmr_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252827, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kmr_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287369, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmr_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258970, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kmr_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243945, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kmr_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278363, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kmr_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262338, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmr_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259099, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmr_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276234, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kmr_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279708, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kmr_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254199, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kmr_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283719, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kmr_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248332, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kmr_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245485, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kmr_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291123, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kmr_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261664, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kmr_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269001, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kmr_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265904, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kmr_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263423, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kmr_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291368, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kmr_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261152, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmr_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270849, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kmr_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242449, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kmr_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259414, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kmr_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259831, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kmr_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276328, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kmr_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276134, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kmr_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256771, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kmr_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269855, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kmr_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279098, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kmr_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264818, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kmr_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270062, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kmr_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258983, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kmr_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270194, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kmr_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264932, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kmr_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271535, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kmr_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273790, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kmr_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kmr_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265119, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kmr_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239418, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kmr_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252602, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kmr_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257820, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmr_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290052, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kmr_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246372, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kmr_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268199, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kmr_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251015, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kmr_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262545, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kmr_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260291, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kmr_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263022, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kmr_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268554, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmr_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264646, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kmr_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272690, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kmr_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269344, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kmr_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276660, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kmr_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271399, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kmr_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268476, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kmr_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279695, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kmr_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277436, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kmr_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278523, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kmr_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285549, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kmr_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260155, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kmr_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280266, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kmr_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267154, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kmr_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278943, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmr_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267743, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kmr_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263609, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kmr_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217504, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kmr_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262559, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kmr_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270500, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kmr_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286510, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kmr_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196337, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kmr_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255029, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kmr_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277637, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kmr_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284505, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kmr_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268967, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kmr_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238353, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kmr_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269747, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kmr_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265632, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kmr_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267300, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmr_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 260432, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmr_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276899, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kmr_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272468, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kmr_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254789, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kmr_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295118, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kmr_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246679, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kmr_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268196, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kmr_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288010, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kmr_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187315, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kmr_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276366, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kmr_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293541, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kmr_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272993, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kmr_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254276, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kmr_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257545, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kmr_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284130, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kmr_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276153, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmr_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274891, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kmr_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260926, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kmr_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268058, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kmr_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277879, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kmr_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258629, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kmr_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263654, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kmr_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269495, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kmr_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246800, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kmr_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289057, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kmr_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252534, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kmr_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299769, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kmr_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272569, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kmr_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263815, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kmr_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270163, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kmr_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262842, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kmr_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272030, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kmr_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243935, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kmr_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257629, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmr_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286790, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kmr_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258728, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kmr_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260701, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kmr_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262510, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kmr_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268943, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kmr_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276243, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kmr_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257216, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kmr_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244142, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kmr_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269633, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kmr_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292969, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kmr_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268412, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmr_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272038, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kmr_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257275, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmr_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318656, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kmr_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296541, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kmr_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170391, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kmr_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256308, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kmr_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258957, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kmr_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282853, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kmr_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256477, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kmr_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260220, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kmr_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281481, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kmr_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261452, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kmr_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256481, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kmr_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173620, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kmr_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266735, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmr_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264410, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kmr_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276515, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kmr_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256773, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kmr_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277917, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kmr_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272712, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kmr_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262082, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kmr_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kmr_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171067, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kmr_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258543, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kmr_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271629, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kmr_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263187, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kmr_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274302, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kmr_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273370, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kmr_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277544, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kmr_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261227, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kmr_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299383, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kmr_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276573, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kmr_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268989, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kmr_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265455, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kmr_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256800, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kmr_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265776, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kmr_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265450, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kmr_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281023, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kmr_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283096, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kmr_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293407, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kmr_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278734, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kmr_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247433, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kmr_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284181, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kmr_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251652, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kmr_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273329, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kmr_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267918, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kmr_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265211, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kmr_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277975, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kmr_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288550, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kmr_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274111, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kmr_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242161, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kmr_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270439, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kmr_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261172, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kmr_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276627, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kmr_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260271, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kmr_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249960, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kmr_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270634, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kmr_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263594, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kmr_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263227, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kmr_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232426, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kmr_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267439, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kmr_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264529, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kmr_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263797, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kmr_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282034, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kmr_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302917, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 128.82608695652175, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "min_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 252784, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "min_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 267550, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "min_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 304521, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "min_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 272221, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "min_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 294375, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "min_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 267858, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "min_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 280317, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 299895, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "min_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 276924, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "min_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 283096, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "min_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 286733, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "min_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 298702, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "min_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 268096, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "min_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 294594, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "min_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 270571, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 263618, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "min_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 298160, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 269761, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "min_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 254736, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "min_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289154, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "min_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 273129, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 269890, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 287025, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "min_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 290499, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "min_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 264990, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "min_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 294510, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "min_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 259123, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "min_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 256276, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "min_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 301914, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "min_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 272455, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "min_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 279792, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "min_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276695, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "min_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 274214, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "min_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 302159, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "min_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 281640, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "min_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 253240, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "min_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 270205, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "min_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 270622, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "min_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 287119, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "min_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 286925, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "min_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 267562, + "unique_pairs": 1011, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "min_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 280646, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "min_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 289889, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "min_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275609, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "min_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 280853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "min_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 269774, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "min_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 280985, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "min_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 275723, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "min_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 271535, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "min_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 284581, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "min_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 278009, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "min_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 275910, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "min_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 250209, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "min_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 263393, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "min_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 268611, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 300843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "min_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 257163, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "min_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278990, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "min_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 261806, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "min_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 273336, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "min_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 271082, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "min_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 273813, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "min_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 279345, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 275437, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "min_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 283481, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "min_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 280135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "min_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 287451, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "min_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 282190, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "min_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 279267, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "min_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 290486, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "min_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 288227, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "min_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 289314, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "min_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 296340, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "min_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 270946, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "min_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 291057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "min_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 277945, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "min_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 289734, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 278534, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "min_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 274400, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "min_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 228295, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "min_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 273350, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "min_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 281291, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "min_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 297301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "min_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 207128, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "min_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 265820, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "min_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 288428, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "min_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 295296, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "min_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 279758, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "min_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 249144, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "min_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 280538, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "min_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 276423, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "min_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 278091, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 271223, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 287690, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "min_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283259, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "min_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 265580, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "min_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 305909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "min_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 257470, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "min_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "min_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 298801, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "min_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 198106, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "min_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 287157, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "min_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 304332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "min_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 283784, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "min_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 265067, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "min_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 268336, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "min_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 294921, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "min_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 286944, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 285682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "min_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 271717, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "min_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 278849, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "min_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 288670, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "min_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 269420, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "min_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274445, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "min_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 280286, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "min_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 257591, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "min_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 299848, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "min_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 263325, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "min_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 310560, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "min_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 283360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "min_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 274606, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "min_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 280954, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "min_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 273633, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "min_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 282821, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "min_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 254726, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "min_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 268420, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 297581, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "min_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "min_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 271492, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "min_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 273301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "min_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 279734, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "min_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287034, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "min_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 268007, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "min_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 254933, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "min_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 280424, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "min_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 303760, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "min_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 279203, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 282829, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "min_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 268066, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 329447, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "min_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 307332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "min_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 181182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "min_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 267099, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "min_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 269748, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "min_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 293644, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "min_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 267268, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "min_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 271011, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "min_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 292272, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "min_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 272243, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "min_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 267272, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "min_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 184411, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "min_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 277526, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 275201, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "min_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 287306, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "min_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 267564, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "min_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 288708, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "min_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 283503, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "min_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 272873, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "min_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 231863, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "min_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 181858, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "min_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 269334, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "min_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 282420, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "min_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 273978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "min_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 285093, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "min_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 284161, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "min_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 288335, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "min_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 272018, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "min_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 310174, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "min_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 287364, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "min_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 279780, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "min_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 276246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "min_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 267591, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "min_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276567, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "min_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 276241, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "min_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 291814, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "min_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 293887, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "min_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 304198, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "min_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 289525, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "min_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 258224, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "min_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 294972, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "min_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 262443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "min_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 284120, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "min_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 278709, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "min_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 276002, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "min_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 288766, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "min_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 299341, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "min_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 284902, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "min_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 252952, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "min_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 281230, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "min_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 271963, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "min_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 287418, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "min_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 271062, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "min_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 260751, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "min_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 281425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "min_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274385, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "min_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 274018, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "min_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 243217, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "min_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278230, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "min_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 275320, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "min_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 274588, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "min_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 292825, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "min_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 313708, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 139.4891304347826, + "max_sentence1_length": 363, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "por_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 255039, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "por_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 269805, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "por_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 306776, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "por_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 274476, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "por_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 296630, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "por_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 270113, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "por_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 282572, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "por_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 302150, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "por_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 279179, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "por_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 285351, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "por_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 288988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "por_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 300957, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "por_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 270351, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "por_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 296849, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "por_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 272826, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "por_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 265873, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "por_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 300415, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "por_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 272016, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "por_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 256991, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "por_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "por_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 275384, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "por_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 272145, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "por_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 289280, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "por_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 292754, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "por_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 267245, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "por_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 296765, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "por_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 261378, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "por_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 258531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "por_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 304169, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "por_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 274710, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "por_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 282047, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "por_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278950, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "por_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 276469, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "por_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 304414, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "por_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274198, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "por_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 283895, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "por_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 255495, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "por_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 272460, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "por_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 272877, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "por_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 289374, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "por_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 289180, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "por_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 269817, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "por_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 282901, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "por_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 292144, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "por_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277864, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "por_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 283108, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "por_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 272029, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "por_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 283240, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "por_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 277978, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "por_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 273790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "por_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 284581, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "por_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 280264, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "por_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 278165, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "por_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 252464, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "por_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 265648, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "por_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 270866, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "por_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 303098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "por_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 259418, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "por_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281245, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "por_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 264061, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "por_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 275591, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "por_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 273337, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "por_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 276068, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "por_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 281600, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "por_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 277692, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "por_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 285736, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "por_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 282390, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "por_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 289706, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "por_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 284445, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "por_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 281522, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "por_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 292741, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "por_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 290482, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "por_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 291569, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "por_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 298595, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "por_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 273201, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "por_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 293312, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "por_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 280200, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "por_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 291989, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "por_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 280789, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "por_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 276655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "por_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 230550, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "por_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 275605, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "por_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 283546, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "por_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 299556, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "por_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 209383, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "por_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 268075, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "por_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 290683, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "por_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 297551, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "por_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 282013, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "por_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 251399, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "por_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 282793, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "por_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 278678, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "por_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 280346, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "por_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 273478, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "por_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 289945, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "por_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285514, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "por_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 267835, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "por_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 308164, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "por_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 259725, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "por_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281242, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "por_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 301056, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "por_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 200361, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "por_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 289412, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "por_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 306587, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "por_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 286039, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "por_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 267322, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "por_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 270591, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "por_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 297176, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "por_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 289199, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "por_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 287937, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "por_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 273972, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "por_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 281104, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "por_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 290925, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "por_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 271675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "por_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276700, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "por_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 282541, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "por_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 259846, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "por_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 302103, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "por_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 265580, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "por_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 312815, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "por_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 285615, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "por_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 276861, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "por_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 283209, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "por_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 275888, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "por_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 285076, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "por_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 256981, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "por_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 270675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "por_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 299836, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "por_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 271774, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "por_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 273747, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "por_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 275556, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "por_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 281989, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "por_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289289, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "por_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 270262, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "por_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 257188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "por_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 282679, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "por_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 306015, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "por_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 281458, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "por_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 285084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "por_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 270321, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "por_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 331702, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "por_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 309587, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "por_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 183437, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "por_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 269354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "por_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 272003, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "por_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 295899, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "por_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 269523, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "por_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 273266, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "por_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 294527, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "por_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 274498, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "por_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 269527, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "por_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 186666, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "por_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 279781, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "por_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 277456, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "por_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 289561, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "por_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 269819, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "por_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 290963, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "por_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 285758, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "por_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 275128, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "por_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 234118, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "por_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 184113, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "por_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 271589, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "por_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 284675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "por_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 276233, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "por_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 287348, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "por_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 286416, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "por_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 290590, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "por_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 274273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "por_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 312429, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "por_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 289619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "por_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 282035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "por_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 278501, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "por_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 269846, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "por_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278822, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "por_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 278496, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "por_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 294069, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "por_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 296142, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "por_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 306453, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "por_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 291780, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "por_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 260479, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "por_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 297227, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "por_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 264698, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "por_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 286375, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "por_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 280964, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "por_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 278257, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "por_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 291021, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "por_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 301596, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "por_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 287157, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "por_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 255207, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "por_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 283485, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "por_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 274218, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "por_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 289673, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "por_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 273317, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "por_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 263006, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "por_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 283680, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "por_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276640, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "por_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 276273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "por_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 245472, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "por_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280485, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "por_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 277575, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "por_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 276843, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "por_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 295080, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "por_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 315963, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 141.7173913043478, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "sun_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 248467, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "sun_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 263233, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "sun_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 300204, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sun_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 267904, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "sun_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 290058, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "sun_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 263541, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "sun_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276000, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sun_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 295578, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "sun_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 272607, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "sun_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 278779, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sun_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 282416, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sun_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 294385, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "sun_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 263779, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "sun_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 290277, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "sun_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 266254, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sun_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 259301, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sun_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 293843, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sun_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 265444, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "sun_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 250419, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "sun_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284837, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sun_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 268812, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sun_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 265573, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sun_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 282708, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sun_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 286182, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sun_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 260673, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "sun_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 290193, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sun_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 254806, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "sun_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 251959, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "sun_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 297597, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "sun_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268138, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sun_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 275475, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "sun_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272378, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sun_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 269897, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sun_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 297842, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "sun_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267626, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sun_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 277323, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "sun_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 248923, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "sun_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 265888, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sun_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 266305, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "sun_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 282802, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sun_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 282608, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "sun_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 263245, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sun_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 276329, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sun_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 285572, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sun_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271292, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sun_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 276536, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sun_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 265457, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sun_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 276668, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sun_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 271406, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sun_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 267218, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sun_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278009, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sun_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 280264, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sun_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 271593, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sun_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 245892, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "sun_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259076, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "sun_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 264294, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sun_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 296526, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "sun_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 252846, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "sun_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274673, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "sun_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 257489, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "sun_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269019, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sun_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 266765, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sun_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 269496, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sun_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 275028, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sun_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271120, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sun_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 279164, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sun_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 275818, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sun_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283134, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "sun_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 277873, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sun_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 274950, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "sun_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 286169, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sun_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 283910, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sun_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 284997, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "sun_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 292023, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "sun_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 266629, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sun_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 286740, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sun_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 273628, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sun_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 285417, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sun_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 274217, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "sun_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270083, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sun_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 223978, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "sun_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 269033, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sun_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 276974, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sun_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 292984, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "sun_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 202811, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "sun_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 261503, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "sun_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284111, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "sun_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 290979, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "sun_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 275441, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "sun_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 244827, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "sun_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 276221, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sun_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272106, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "sun_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 273774, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sun_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 266906, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sun_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 283373, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sun_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278942, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sun_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 261263, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sun_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 301592, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sun_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253153, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sun_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274670, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sun_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 294484, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "sun_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 193789, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "sun_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 282840, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sun_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300015, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sun_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 279467, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "sun_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 260750, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sun_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264019, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sun_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 290604, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "sun_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 282627, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sun_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 281365, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sun_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 267400, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "sun_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 274532, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "sun_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 284353, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sun_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265103, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "sun_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270128, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sun_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 275969, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "sun_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 253274, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sun_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 295531, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sun_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259008, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "sun_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 306243, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "sun_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 279043, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sun_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 270289, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sun_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 276637, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sun_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 269316, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sun_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 278504, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sun_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 250409, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "sun_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264103, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sun_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 293264, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "sun_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 265202, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "sun_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 267175, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "sun_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 268984, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "sun_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 275417, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sun_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282717, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "sun_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 263690, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sun_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 250616, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "sun_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276107, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sun_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 299443, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "sun_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sun_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 278512, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sun_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sun_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325130, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "sun_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303015, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sun_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 176865, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "sun_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 262782, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sun_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 265431, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sun_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 289327, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "sun_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 262951, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "sun_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 266694, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sun_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 287955, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "sun_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 267926, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sun_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 262955, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sun_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180094, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "sun_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 273209, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sun_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 270884, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sun_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 282989, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sun_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 263247, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "sun_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 284391, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "sun_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 279186, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "sun_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 268556, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sun_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 227546, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "sun_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 177541, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "sun_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265017, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sun_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278103, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sun_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 269661, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "sun_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 280776, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sun_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 279844, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "sun_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284018, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sun_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 267701, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sun_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 305857, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "sun_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 283047, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sun_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 275463, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "sun_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 271929, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sun_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 263274, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sun_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272250, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sun_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 271924, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "sun_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 287497, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sun_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 289570, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "sun_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 299881, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sun_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 285208, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "sun_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 253907, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "sun_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 290655, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sun_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258126, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "sun_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 279803, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "sun_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 274392, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sun_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 271685, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sun_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 284449, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "sun_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 295024, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sun_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 280585, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "sun_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 248635, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "sun_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 276913, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "sun_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 267646, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sun_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283101, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sun_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 266745, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "sun_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 256434, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "sun_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277108, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sun_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270068, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "sun_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 269701, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "sun_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 238900, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "sun_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273913, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sun_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271003, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sun_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 270271, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sun_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 288508, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "sun_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 309391, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 135.22332015810278, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "umb_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246368, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "umb_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261134, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "umb_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298105, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "umb_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265805, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "umb_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 287959, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "umb_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261442, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "umb_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273901, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "umb_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293479, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "umb_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270508, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "umb_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 276680, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "umb_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280317, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "umb_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292286, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "umb_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 261680, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "umb_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288178, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "umb_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264155, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "umb_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257202, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "umb_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291744, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "umb_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263345, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "umb_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248320, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "umb_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282738, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "umb_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266713, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "umb_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263474, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "umb_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280609, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "umb_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284083, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "umb_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "umb_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288094, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "umb_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 252707, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "umb_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249860, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "umb_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295498, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "umb_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266039, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "umb_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273376, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "umb_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270279, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "umb_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267798, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "umb_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295743, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "umb_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265527, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "umb_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275224, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "umb_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246824, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "umb_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263789, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "umb_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264206, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "umb_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 280703, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "umb_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280509, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "umb_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261146, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "umb_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274230, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "umb_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283473, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "umb_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269193, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "umb_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274437, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "umb_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263358, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "umb_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274569, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "umb_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269307, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "umb_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265119, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "umb_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 275910, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "umb_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278165, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "umb_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271593, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "umb_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243793, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "umb_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 256977, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "umb_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262195, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "umb_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294427, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "umb_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250747, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "umb_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272574, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "umb_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255390, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "umb_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 266920, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "umb_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264666, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "umb_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267397, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "umb_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 272929, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "umb_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269021, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "umb_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277065, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "umb_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273719, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "umb_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281035, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "umb_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275774, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "umb_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272851, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "umb_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284070, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "umb_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281811, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "umb_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282898, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "umb_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 289924, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "umb_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264530, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "umb_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284641, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "umb_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271529, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "umb_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283318, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "umb_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272118, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "umb_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 267984, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "umb_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221879, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "umb_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 266934, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "umb_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274875, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "umb_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290885, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "umb_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200712, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "umb_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259404, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "umb_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282012, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "umb_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288880, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "umb_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273342, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "umb_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242728, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "umb_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274122, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "umb_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 270007, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "umb_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 271675, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "umb_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264807, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "umb_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281274, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "umb_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276843, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "umb_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259164, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "umb_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299493, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "umb_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251054, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "umb_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272571, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "umb_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292385, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "umb_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 191690, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "umb_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280741, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "umb_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 297916, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "umb_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277368, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "umb_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258651, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "umb_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 261920, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "umb_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288505, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "umb_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280528, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "umb_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279266, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "umb_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265301, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "umb_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272433, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "umb_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282254, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "umb_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263004, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "umb_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268029, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "umb_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273870, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "umb_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251175, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "umb_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293432, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "umb_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 256909, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "umb_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304144, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "umb_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 276944, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "umb_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268190, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "umb_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274538, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "umb_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267217, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "umb_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276405, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "umb_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248310, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "umb_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262004, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "umb_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291165, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "umb_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263103, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "umb_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265076, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "umb_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266885, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "umb_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273318, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "umb_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280618, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "umb_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261591, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "umb_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248517, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "umb_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274008, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "umb_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297344, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "umb_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272787, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "umb_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276413, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "umb_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261650, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "umb_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323031, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "umb_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 300916, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "umb_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174766, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "umb_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 260683, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "umb_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263332, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "umb_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287228, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "umb_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260852, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "umb_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264595, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "umb_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285856, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "umb_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265827, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "umb_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260856, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "umb_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 177995, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "umb_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271110, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "umb_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 268785, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "umb_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280890, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "umb_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261148, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "umb_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282292, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "umb_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277087, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "umb_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266457, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "umb_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225447, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "umb_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175442, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "umb_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 262918, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "umb_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276004, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "umb_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267562, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "umb_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 278677, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "umb_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277745, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "umb_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 281919, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "umb_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265602, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "umb_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303758, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "umb_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 280948, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "umb_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273364, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "umb_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269830, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "umb_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261175, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "umb_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270151, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "umb_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269825, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "umb_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285398, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "umb_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287471, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "umb_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297782, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "umb_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283109, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "umb_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251808, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "umb_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288556, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "umb_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256027, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "umb_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 277704, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "umb_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272293, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "umb_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 269586, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "umb_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282350, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "umb_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 292925, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "umb_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278486, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "umb_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246536, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "umb_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274814, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "umb_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265547, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "umb_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281002, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "umb_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264646, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "umb_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254335, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "umb_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275009, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "umb_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267969, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "umb_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267602, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "umb_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236801, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "umb_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271814, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "umb_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 268904, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "umb_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268172, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "umb_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286409, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "umb_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307292, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.149209486166, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ajp_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 220667, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ajp_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 235433, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ajp_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 272404, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ajp_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 240104, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ajp_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 262258, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ajp_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 235741, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ajp_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 248200, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ajp_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 267778, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ajp_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 244807, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ajp_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 250979, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ajp_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 254616, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ajp_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 266585, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ajp_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 235979, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ajp_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 262477, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ajp_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 238454, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ajp_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 231501, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ajp_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 266043, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ajp_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 237644, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ajp_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 222619, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ajp_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257037, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ajp_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 241012, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ajp_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 237773, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ajp_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 254908, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ajp_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 258382, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ajp_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 232873, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ajp_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 262393, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ajp_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 227006, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ajp_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 224159, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ajp_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 269797, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ajp_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 240338, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ajp_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 247675, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ajp_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244578, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ajp_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 242097, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ajp_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 270042, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ajp_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 239826, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ajp_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 249523, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ajp_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 221123, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ajp_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 238088, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ajp_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 238505, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ajp_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 255002, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ajp_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 254808, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ajp_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 235445, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ajp_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 248529, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ajp_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 257772, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ajp_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 243492, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ajp_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 248736, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ajp_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 237657, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ajp_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 248868, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ajp_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 243606, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ajp_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 239418, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ajp_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 250209, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ajp_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 252464, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ajp_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 245892, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ajp_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 243793, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ajp_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 231276, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ajp_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 236494, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ajp_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 268726, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ajp_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 225046, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ajp_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246873, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ajp_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 229689, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ajp_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 241219, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ajp_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 238965, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ajp_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 241696, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ajp_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 247228, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ajp_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 243320, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ajp_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 251364, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ajp_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 248018, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ajp_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 255334, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ajp_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 250073, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ajp_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 247150, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ajp_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 258369, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ajp_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 256110, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ajp_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 257197, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ajp_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 264223, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ajp_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 238829, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ajp_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 258940, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ajp_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 245828, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ajp_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 257617, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ajp_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 246417, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ajp_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 242283, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ajp_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 196178, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ajp_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 241233, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ajp_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 249174, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ajp_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 265184, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ajp_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 175011, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ajp_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 233703, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ajp_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 256311, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ajp_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 263179, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ajp_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 247641, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ajp_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 217027, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ajp_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 248421, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ajp_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 244306, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ajp_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 245974, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ajp_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 239106, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ajp_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 255573, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ajp_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251142, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ajp_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 233463, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ajp_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 273792, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ajp_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 225353, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ajp_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246870, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ajp_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 266684, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ajp_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 165989, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ajp_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 255040, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ajp_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 272215, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ajp_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 251667, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ajp_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 232950, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ajp_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 236219, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ajp_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 262804, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ajp_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 254827, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ajp_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 253565, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ajp_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 239600, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ajp_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 246732, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ajp_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 256553, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ajp_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 237303, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ajp_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 242328, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ajp_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 248169, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ajp_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 225474, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ajp_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 267731, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ajp_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 231208, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ajp_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 278443, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ajp_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 251243, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ajp_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 242489, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ajp_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 248837, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ajp_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 241516, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ajp_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 250704, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ajp_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 222609, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ajp_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 236303, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ajp_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 265464, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ajp_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 237402, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ajp_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 239375, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ajp_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 241184, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ajp_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 247617, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ajp_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254917, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ajp_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 235890, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ajp_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 222816, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ajp_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 248307, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ajp_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 271643, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ajp_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 247086, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ajp_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 250712, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ajp_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 235949, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ajp_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 297330, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ajp_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 275215, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ajp_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 149065, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ajp_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 234982, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ajp_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 237631, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ajp_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 261527, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ajp_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 235151, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ajp_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 238894, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ajp_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 260155, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ajp_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 240126, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ajp_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 235155, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ajp_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 152294, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ajp_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 245409, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ajp_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 243084, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ajp_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 255189, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ajp_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 235447, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ajp_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 256591, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ajp_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 251386, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ajp_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 240756, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ajp_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 199746, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ajp_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 149741, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ajp_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 237217, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ajp_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 250303, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ajp_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 241861, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ajp_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 252976, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ajp_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 252044, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ajp_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 256218, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ajp_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 239901, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ajp_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 278057, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ajp_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 255247, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ajp_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 247663, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ajp_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 244129, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ajp_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 235474, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ajp_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244450, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ajp_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 244124, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ajp_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 259697, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ajp_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 261770, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ajp_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 272081, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ajp_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 257408, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ajp_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 226107, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ajp_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 262855, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ajp_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 230326, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ajp_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 252003, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ajp_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 246592, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ajp_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 243885, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ajp_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 256649, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ajp_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 267224, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ajp_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 252785, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ajp_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 220835, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ajp_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 249113, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ajp_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 239846, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ajp_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 255301, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ajp_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 238945, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ajp_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 228634, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ajp_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 249308, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ajp_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 242268, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ajp_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 241901, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ajp_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 211100, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ajp_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246113, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ajp_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 243203, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ajp_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 242471, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ajp_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 260708, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ajp_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 281591, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 107.75296442687747, + "max_sentence1_length": 310, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bjn_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 233851, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bjn_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 248617, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bjn_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 285588, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bjn_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 253288, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bjn_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 275442, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bjn_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 248925, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bjn_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 261384, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 280962, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bjn_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 257991, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bjn_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 264163, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bjn_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 267800, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bjn_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 279769, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bjn_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 249163, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bjn_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 275661, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bjn_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 251638, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 244685, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bjn_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 279227, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 250828, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bjn_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 235803, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bjn_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270221, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bjn_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 254196, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 250957, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 268092, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bjn_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 271566, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bjn_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 246057, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bjn_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 275577, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bjn_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 240190, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bjn_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 237343, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bjn_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 282981, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bjn_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 253522, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bjn_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 260859, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bjn_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257762, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bjn_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 255281, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bjn_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 283226, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bjn_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253010, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 262707, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bjn_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 234307, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bjn_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 251272, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bjn_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 251689, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bjn_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 268186, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bjn_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 267992, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bjn_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 248629, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bjn_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 261713, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bjn_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 270956, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bjn_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256676, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bjn_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 261920, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bjn_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 250841, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bjn_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 262052, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bjn_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 256790, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bjn_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 252602, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bjn_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 263393, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bjn_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 265648, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bjn_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 259076, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bjn_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 256977, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bjn_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 231276, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bjn_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 249678, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 281910, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bjn_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 238230, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bjn_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260057, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bjn_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 242873, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bjn_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 254403, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bjn_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 252149, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bjn_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 254880, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 260412, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 256504, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bjn_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 264548, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bjn_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 261202, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bjn_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 268518, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bjn_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 263257, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bjn_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 260334, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bjn_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 271553, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bjn_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 269294, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bjn_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 270381, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bjn_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 277407, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bjn_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 252013, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bjn_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 272124, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bjn_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 259012, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bjn_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 270801, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 259601, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bjn_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 255467, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bjn_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 209362, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bjn_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 254417, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bjn_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 262358, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bjn_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 278368, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bjn_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 188195, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bjn_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 246887, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bjn_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 269495, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bjn_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 276363, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bjn_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 260825, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bjn_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 230211, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bjn_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 261605, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bjn_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 257490, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bjn_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 259158, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 252290, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 268757, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bjn_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264326, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 246647, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bjn_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 286976, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bjn_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 238537, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bjn_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260054, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bjn_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 279868, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bjn_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 179173, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bjn_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 268224, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bjn_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 285399, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bjn_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 264851, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bjn_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 246134, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bjn_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 249403, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bjn_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 275988, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bjn_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 268011, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 266749, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bjn_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 252784, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bjn_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 259916, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bjn_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 269737, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bjn_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 250487, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bjn_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255512, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bjn_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 261353, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bjn_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 238658, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bjn_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 280915, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bjn_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 244392, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bjn_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 291627, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bjn_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 264427, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bjn_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 255673, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bjn_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 262021, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bjn_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 254700, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bjn_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 263888, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bjn_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 235793, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bjn_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 249487, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 278648, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bjn_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 250586, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bjn_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 252559, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bjn_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 254368, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bjn_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 260801, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bjn_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268101, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bjn_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 249074, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bjn_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 236000, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bjn_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 261491, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bjn_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 284827, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bjn_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 260270, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 263896, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bjn_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 249133, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 310514, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bjn_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 288399, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bjn_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 162249, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bjn_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 248166, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bjn_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 250815, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bjn_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 274711, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bjn_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 248335, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bjn_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 252078, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bjn_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 273339, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bjn_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 253310, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bjn_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 248339, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bjn_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 165478, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bjn_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 258593, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 256268, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bjn_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 268373, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bjn_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 248631, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bjn_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 269775, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bjn_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 264570, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bjn_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 253940, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bjn_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 212930, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bjn_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 162925, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bjn_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 250401, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bjn_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 263487, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bjn_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 255045, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bjn_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 266160, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bjn_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 265228, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bjn_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 269402, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bjn_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 253085, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bjn_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 291241, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bjn_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 268431, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 260847, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bjn_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 257313, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bjn_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 248658, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bjn_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257634, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 257308, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bjn_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 272881, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bjn_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 274954, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bjn_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 285265, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bjn_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 270592, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bjn_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 239291, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bjn_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 276039, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bjn_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 243510, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bjn_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 265187, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bjn_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 259776, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bjn_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 257069, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bjn_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 269833, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bjn_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 280408, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bjn_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 265969, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bjn_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 234019, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bjn_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 262297, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bjn_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 253030, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bjn_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 268485, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bjn_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 252129, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bjn_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 241818, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bjn_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 262492, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255452, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bjn_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 255085, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bjn_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 224284, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bjn_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259297, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bjn_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 256387, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bjn_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 255655, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bjn_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 273892, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bjn_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 294775, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 120.78063241106719, + "max_sentence1_length": 319, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ewe_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 239069, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ewe_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253835, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ewe_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290806, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ewe_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 258506, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ewe_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 280660, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ewe_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 254143, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ewe_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 266602, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ewe_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 286180, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ewe_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 263209, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ewe_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 269381, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ewe_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 273018, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ewe_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284987, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ewe_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 254381, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ewe_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280879, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ewe_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256856, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ewe_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249903, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ewe_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 284445, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ewe_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 256046, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ewe_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 241021, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ewe_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275439, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ewe_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 259414, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ewe_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 256175, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ewe_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 273310, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ewe_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276784, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ewe_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 251275, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ewe_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280795, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ewe_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 245408, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ewe_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 242561, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ewe_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 288199, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ewe_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 258740, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ewe_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 266077, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ewe_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262980, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ewe_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 260499, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ewe_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 288444, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ewe_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258228, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ewe_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267925, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ewe_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 239525, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ewe_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 256490, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ewe_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256907, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ewe_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 273404, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ewe_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 273210, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ewe_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253847, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ewe_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266931, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ewe_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 276174, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ewe_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261894, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ewe_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 267138, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ewe_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 256059, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ewe_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 267270, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ewe_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 262008, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ewe_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257820, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ewe_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 268611, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ewe_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270866, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ewe_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 264294, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ewe_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 262195, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ewe_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 236494, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ewe_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 249678, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ewe_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 287128, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ewe_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 243448, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ewe_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265275, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ewe_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 248091, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ewe_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 259621, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ewe_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 257367, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ewe_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 260098, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ewe_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 265630, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ewe_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 261722, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ewe_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269766, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ewe_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 266420, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ewe_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 273736, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ewe_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 268475, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ewe_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 265552, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ewe_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276771, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ewe_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 274512, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ewe_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 275599, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ewe_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 282625, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ewe_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 257231, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ewe_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 277342, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ewe_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 264230, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ewe_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 276019, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ewe_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264819, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ewe_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 260685, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ewe_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 214580, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ewe_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 259635, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ewe_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 267576, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ewe_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 283586, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ewe_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 193413, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ewe_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 252105, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ewe_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 274713, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ewe_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 281581, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ewe_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 266043, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ewe_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 235429, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ewe_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266823, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ewe_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 262708, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ewe_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 264376, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ewe_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 257508, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ewe_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273975, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ewe_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269544, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ewe_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251865, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ewe_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 292194, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ewe_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243755, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ewe_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265272, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ewe_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 285086, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ewe_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 184391, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ewe_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 273442, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ewe_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 290617, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ewe_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 270069, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ewe_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 251352, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ewe_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 254621, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ewe_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 281206, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ewe_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 273229, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ewe_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271967, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ewe_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 258002, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ewe_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 265134, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ewe_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274955, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ewe_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 255705, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ewe_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260730, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ewe_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 266571, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ewe_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243876, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ewe_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 286133, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ewe_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 249610, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ewe_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296845, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ewe_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 269645, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ewe_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260891, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ewe_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 267239, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ewe_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259918, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ewe_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 269106, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ewe_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 241011, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ewe_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 254705, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ewe_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283866, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ewe_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255804, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ewe_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257777, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ewe_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 259586, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ewe_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 266019, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ewe_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273319, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ewe_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 254292, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ewe_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 241218, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ewe_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 266709, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ewe_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 290045, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ewe_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 265488, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ewe_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 269114, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ewe_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 254351, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ewe_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 315732, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ewe_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 293617, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ewe_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 167467, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ewe_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 253384, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ewe_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 256033, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ewe_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279929, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ewe_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 253553, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ewe_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 257296, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ewe_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 278557, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ewe_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 258528, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ewe_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 253557, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ewe_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 170696, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ewe_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263811, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ewe_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 261486, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ewe_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 273591, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ewe_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253849, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ewe_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274993, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ewe_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269788, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ewe_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 259158, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ewe_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 218148, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ewe_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 168143, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ewe_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 255619, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ewe_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 268705, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ewe_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 260263, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ewe_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 271378, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ewe_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 270446, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ewe_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 274620, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ewe_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 258303, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ewe_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 296459, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ewe_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 273649, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ewe_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 266065, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ewe_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 262531, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ewe_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253876, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ewe_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262852, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ewe_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 262526, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ewe_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 278099, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ewe_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 280172, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ewe_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 290483, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ewe_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275810, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ewe_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 244509, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ewe_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 281257, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ewe_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 248728, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ewe_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 270405, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ewe_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264994, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ewe_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 262287, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ewe_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 275051, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ewe_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 285626, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ewe_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 271187, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ewe_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 239237, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ewe_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 267515, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ewe_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 258248, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ewe_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 273703, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ewe_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 257347, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ewe_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 247036, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ewe_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 267710, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ewe_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260670, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ewe_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 260303, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ewe_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 229502, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ewe_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264515, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ewe_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 261605, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ewe_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260873, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ewe_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 279110, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ewe_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299993, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 125.93675889328063, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ilo_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 271301, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ilo_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 286067, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ilo_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 323038, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ilo_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 290738, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ilo_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 312892, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ilo_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 286375, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ilo_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 298834, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ilo_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 318412, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ilo_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 295441, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ilo_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 301613, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ilo_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 305250, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ilo_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 317219, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ilo_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 286613, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ilo_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 313111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ilo_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 289088, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ilo_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 282135, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ilo_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 316677, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ilo_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 288278, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ilo_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 273253, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ilo_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 307671, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ilo_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 291646, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ilo_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 288407, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ilo_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 305542, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ilo_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 309016, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ilo_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 283507, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ilo_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 313027, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ilo_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 277640, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ilo_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 274793, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ilo_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 320431, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ilo_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 290972, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ilo_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 298309, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ilo_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295212, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ilo_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 292731, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ilo_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 320676, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ilo_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290460, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ilo_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 300157, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ilo_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ilo_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 288722, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ilo_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 289139, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ilo_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 305636, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ilo_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 305442, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ilo_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 286079, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ilo_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 299163, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ilo_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 308406, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ilo_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294126, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ilo_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 299370, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ilo_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 288291, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ilo_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 299502, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ilo_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 294240, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ilo_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 290052, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ilo_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 300843, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ilo_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 303098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ilo_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 296526, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ilo_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 294427, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ilo_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 268726, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ilo_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 281910, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ilo_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 287128, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ilo_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 275680, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ilo_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297507, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ilo_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 280323, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ilo_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 291853, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ilo_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 289599, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ilo_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 292330, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ilo_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 297862, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ilo_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 293954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ilo_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 301998, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ilo_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 298652, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ilo_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 305968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ilo_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 300707, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ilo_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 297784, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ilo_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 309003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ilo_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 306744, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ilo_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 307831, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ilo_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 314857, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ilo_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 289463, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ilo_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 309574, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ilo_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 296462, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ilo_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 308251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ilo_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 297051, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ilo_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 292917, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ilo_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 246812, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ilo_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 291867, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ilo_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 299808, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ilo_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 315818, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ilo_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 225645, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ilo_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 284337, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ilo_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 306945, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ilo_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 313813, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ilo_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 298275, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ilo_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 267661, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ilo_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 299055, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ilo_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 294940, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ilo_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 296608, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ilo_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 289740, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ilo_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 306207, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ilo_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301776, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ilo_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 284097, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ilo_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 324426, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ilo_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 275987, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ilo_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297504, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ilo_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 317318, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ilo_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 216623, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ilo_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 305674, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ilo_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 322849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ilo_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 302301, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ilo_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 283584, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ilo_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 286853, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ilo_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 313438, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ilo_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 305461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ilo_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 304199, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ilo_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 290234, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ilo_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 297366, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ilo_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 307187, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ilo_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 287937, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ilo_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292962, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ilo_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 298803, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ilo_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 276108, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ilo_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 318365, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ilo_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 281842, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ilo_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 329077, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ilo_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 301877, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ilo_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 293123, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ilo_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 299471, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ilo_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 292150, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ilo_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 301338, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ilo_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 273243, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ilo_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 286937, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ilo_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 316098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ilo_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 288036, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ilo_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 290009, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ilo_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 291818, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ilo_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 298251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ilo_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305551, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ilo_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 286524, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ilo_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 273450, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ilo_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 298941, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ilo_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 322277, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ilo_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 297720, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ilo_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 301346, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ilo_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 286583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ilo_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 347964, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ilo_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 325849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ilo_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 199699, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ilo_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 285616, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ilo_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 288265, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ilo_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 312161, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ilo_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 285785, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ilo_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 289528, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ilo_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 310789, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ilo_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 290760, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ilo_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 285789, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ilo_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 202928, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ilo_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 296043, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ilo_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 293718, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ilo_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 305823, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ilo_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 286081, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ilo_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 307225, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ilo_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 302020, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ilo_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 291390, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ilo_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 250380, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ilo_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 200375, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ilo_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 287851, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ilo_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 300937, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ilo_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 292495, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ilo_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 303610, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ilo_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 302678, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ilo_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 306852, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ilo_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 290535, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ilo_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 328691, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ilo_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 305881, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ilo_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 298297, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ilo_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 294763, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ilo_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 286108, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ilo_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295084, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ilo_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 294758, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ilo_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 310331, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ilo_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 312404, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ilo_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 322715, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ilo_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 308042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ilo_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 276741, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ilo_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 313489, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ilo_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 280960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ilo_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 302637, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ilo_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 297226, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ilo_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 294519, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ilo_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 307283, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ilo_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 317858, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ilo_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 303419, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ilo_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 271469, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ilo_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 299747, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ilo_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 290480, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ilo_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 305935, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ilo_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 289579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ilo_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 279268, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ilo_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 299942, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ilo_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292902, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ilo_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 292535, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ilo_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 261734, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ilo_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296747, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ilo_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 293837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ilo_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 293105, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ilo_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 311342, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ilo_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 332225, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 157.78656126482213, + "max_sentence1_length": 432, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "knc_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 227621, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "knc_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 242387, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "knc_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 279358, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "knc_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 247058, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "knc_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 269212, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "knc_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 242695, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "knc_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 255154, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 274732, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "knc_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 251761, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "knc_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 257933, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "knc_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 261570, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "knc_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 273539, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "knc_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 242933, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "knc_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 269431, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "knc_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 245408, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 238455, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "knc_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 272997, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 244598, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "knc_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 229573, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "knc_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263991, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "knc_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 247966, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 244727, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 261862, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "knc_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 265336, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "knc_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 239827, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "knc_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 269347, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "knc_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 233960, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "knc_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 231113, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "knc_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 276751, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "knc_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 247292, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "knc_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 254629, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "knc_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251532, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "knc_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 249051, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "knc_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 276996, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "knc_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246780, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 256477, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "knc_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 228077, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "knc_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 245042, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "knc_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 245459, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "knc_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 261956, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "knc_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 261762, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "knc_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 242399, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "knc_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 255483, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "knc_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 264726, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "knc_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250446, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "knc_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 255690, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "knc_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 244611, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "knc_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 255822, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "knc_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 250560, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "knc_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 246372, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "knc_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 257163, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "knc_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 259418, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "knc_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 252846, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "knc_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 250747, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "knc_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 225046, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "knc_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 238230, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "knc_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 243448, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 275680, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "knc_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253827, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "knc_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 236643, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "knc_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 248173, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "knc_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 245919, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "knc_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 248650, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 254182, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 250274, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "knc_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 258318, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "knc_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 254972, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "knc_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 262288, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "knc_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 257027, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "knc_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 254104, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "knc_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 265323, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "knc_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 263064, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "knc_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 264151, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "knc_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 271177, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "knc_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 245783, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "knc_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 265894, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "knc_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 252782, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "knc_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 264571, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 253371, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "knc_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 249237, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "knc_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 203132, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "knc_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 248187, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "knc_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 256128, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "knc_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 272138, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "knc_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 181965, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "knc_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 240657, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "knc_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 263265, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "knc_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 270133, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "knc_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 254595, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "knc_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 223981, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "knc_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 255375, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "knc_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 251260, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "knc_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 252928, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 246060, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 262527, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "knc_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258096, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 240417, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "knc_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 280746, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "knc_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 232307, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "knc_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253824, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "knc_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 273638, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "knc_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 172943, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "knc_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 261994, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "knc_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 279169, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "knc_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 258621, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "knc_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 239904, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "knc_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 243173, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "knc_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 269758, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "knc_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 261781, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 260519, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "knc_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 246554, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "knc_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 253686, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "knc_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 263507, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "knc_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 244257, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "knc_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249282, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "knc_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 255123, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "knc_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 232428, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "knc_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 274685, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "knc_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 238162, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "knc_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 285397, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "knc_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 258197, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "knc_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 249443, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "knc_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 255791, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "knc_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 248470, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "knc_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 257658, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "knc_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 229563, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "knc_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 243257, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 272418, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "knc_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 244356, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "knc_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 246329, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "knc_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 248138, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "knc_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 254571, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "knc_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261871, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "knc_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 242844, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "knc_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 229770, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "knc_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 255261, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "knc_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 278597, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "knc_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 254040, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 257666, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "knc_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 242903, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 304284, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "knc_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 282169, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "knc_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 156019, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "knc_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 241936, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "knc_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 244585, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "knc_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 268481, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "knc_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 242105, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "knc_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 245848, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "knc_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 267109, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "knc_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 247080, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "knc_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 242109, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "knc_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 159248, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "knc_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 252363, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 250038, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "knc_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 262143, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "knc_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 242401, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "knc_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 263545, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "knc_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 258340, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "knc_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 247710, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "knc_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 206700, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "knc_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 156695, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "knc_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 244171, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "knc_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 257257, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "knc_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 248815, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "knc_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 259930, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "knc_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 258998, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "knc_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 263172, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "knc_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 246855, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "knc_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 285011, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "knc_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 262201, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 254617, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "knc_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 251083, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "knc_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 242428, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "knc_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251404, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 251078, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "knc_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 266651, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "knc_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 268724, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "knc_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 279035, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "knc_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 264362, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "knc_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 233061, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "knc_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 269809, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "knc_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 237280, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "knc_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 258957, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "knc_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 253546, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "knc_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 250839, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "knc_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 263603, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "knc_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 274178, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "knc_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 259739, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "knc_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 227789, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "knc_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 256067, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "knc_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 246800, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "knc_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 262255, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "knc_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 245899, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "knc_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 235588, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "knc_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 256262, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249222, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "knc_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 248855, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "knc_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 218054, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "knc_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253067, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "knc_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 250157, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "knc_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 249425, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "knc_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 267662, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "knc_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 288545, + "unique_pairs": 1012, + "min_sentence1_length": 25, + "average_sentence1_length": 114.62450592885375, + "max_sentence1_length": 405, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249448, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264214, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mkd_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301185, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 268885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291039, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264522, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276981, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296559, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273588, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279760, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283397, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295366, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264760, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 291258, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mkd_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267235, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260282, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 294824, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266425, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251400, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285818, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 269793, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266554, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283689, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287163, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261654, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291174, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 255787, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252940, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298578, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276456, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273359, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 270878, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 298823, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268607, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278304, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249904, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 266869, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267286, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283783, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283589, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264226, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277310, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286553, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277517, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266438, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277649, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272387, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268199, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278990, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281245, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274673, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272574, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 246873, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260057, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 265275, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297507, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 253827, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258470, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 267746, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270477, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276009, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272101, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 276799, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284115, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 278854, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275931, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287150, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 284891, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285978, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mkd_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293004, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267610, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287721, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274609, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286398, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275198, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271064, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224959, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270014, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277955, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293965, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 203792, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262484, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285092, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291960, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276422, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 245808, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277202, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273087, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 274755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 267887, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279923, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262244, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302573, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254134, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275651, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295465, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194770, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 283821, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300996, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280448, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 261731, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291585, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283608, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282346, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268381, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275513, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285334, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271109, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276950, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254255, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296512, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259989, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307224, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280024, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 271270, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277618, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270297, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279485, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251390, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294245, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266183, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mkd_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268156, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269965, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276398, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283698, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264671, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251597, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277088, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300424, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 275867, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279493, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 264730, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326111, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303996, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 177846, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263763, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266412, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290308, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263932, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288936, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268907, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263936, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181075, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274190, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 271865, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283970, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264228, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285372, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280167, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269537, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228527, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178522, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265998, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270642, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mkd_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 281757, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 280825, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mkd_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284999, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 306838, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284028, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276444, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272910, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264255, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273231, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272905, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288478, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290551, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mkd_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 300862, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286189, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 254888, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259107, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mkd_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280784, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 275373, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272666, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285430, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296005, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281566, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249616, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 277894, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268627, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284082, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 267726, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257415, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278089, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271049, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mkd_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 239881, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274894, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271984, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271252, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289489, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mkd_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310372, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.19268774703556, + "max_sentence1_length": 364, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "prs_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 232264, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "prs_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 247030, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "prs_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 284001, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "prs_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 251701, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "prs_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 273855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "prs_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 247338, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "prs_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 259797, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "prs_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 279375, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "prs_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 256404, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "prs_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 262576, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "prs_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 266213, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "prs_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 278182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "prs_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 247576, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "prs_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 274074, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "prs_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 250051, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "prs_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 243098, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "prs_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 277640, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "prs_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 249241, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "prs_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 234216, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "prs_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268634, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "prs_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 252609, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "prs_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 249370, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "prs_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 266505, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "prs_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 269979, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "prs_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 244470, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "prs_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 273990, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "prs_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 238603, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "prs_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 235756, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "prs_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 281394, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "prs_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 251935, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "prs_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 259272, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "prs_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256175, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "prs_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 253694, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "prs_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 281639, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "prs_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251423, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "prs_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 261120, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "prs_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 232720, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "prs_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 249685, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "prs_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 250102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "prs_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 266599, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "prs_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 266405, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "prs_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 247042, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "prs_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 260126, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "prs_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 269369, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "prs_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255089, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "prs_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 260333, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "prs_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 249254, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "prs_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 260465, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "prs_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 255203, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "prs_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 251015, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "prs_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 261806, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "prs_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 264061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "prs_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 257489, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "prs_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 255390, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "prs_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 229689, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "prs_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 242873, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "prs_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 248091, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "prs_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 280323, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "prs_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 236643, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "prs_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258470, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "prs_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 252816, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "prs_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 250562, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "prs_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 253293, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "prs_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 258825, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "prs_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 254917, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "prs_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 262961, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "prs_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 259615, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "prs_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 266931, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "prs_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 261670, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "prs_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 258747, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "prs_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 269966, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "prs_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 267707, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "prs_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 268794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "prs_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 275820, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "prs_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 250426, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "prs_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 270537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "prs_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 257425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "prs_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 269214, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "prs_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 258014, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "prs_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 253880, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "prs_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 207775, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "prs_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 252830, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "prs_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 260771, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "prs_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 276781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "prs_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 186608, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "prs_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 245300, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "prs_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 267908, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "prs_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 274776, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "prs_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 259238, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "prs_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 228624, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "prs_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 260018, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "prs_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 255903, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "prs_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 257571, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "prs_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 250703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "prs_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 267170, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "prs_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262739, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "prs_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 245060, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "prs_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 285389, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "prs_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 236950, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "prs_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "prs_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 278281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "prs_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 177586, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "prs_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 266637, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "prs_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 283812, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "prs_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 263264, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "prs_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 244547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "prs_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 247816, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "prs_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 274401, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "prs_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 266424, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "prs_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 265162, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "prs_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 251197, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "prs_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 258329, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "prs_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 268150, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "prs_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 248900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "prs_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253925, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "prs_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 259766, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "prs_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 237071, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "prs_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 279328, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "prs_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 242805, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "prs_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 290040, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "prs_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 262840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "prs_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 254086, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "prs_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 260434, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "prs_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 253113, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "prs_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 262301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "prs_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 234206, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "prs_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 247900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "prs_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 277061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "prs_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 248999, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "prs_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 250972, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "prs_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 252781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "prs_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 259214, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "prs_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266514, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "prs_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 247487, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "prs_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 234413, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "prs_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 259904, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "prs_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 283240, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "prs_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 258683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "prs_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 262309, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "prs_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 247546, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "prs_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 308927, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "prs_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 286812, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "prs_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 160662, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "prs_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 246579, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "prs_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 249228, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "prs_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 273124, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "prs_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 246748, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "prs_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 250491, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "prs_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 271752, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "prs_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 251723, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "prs_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 246752, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "prs_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 163891, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "prs_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 257006, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "prs_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 254681, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "prs_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 266786, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "prs_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 247044, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "prs_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 268188, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "prs_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 262983, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "prs_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 252353, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "prs_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 211343, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "prs_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 161338, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "prs_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 248814, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "prs_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 261900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "prs_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 253458, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "prs_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 264573, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "prs_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 263641, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "prs_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 267815, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "prs_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 251498, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "prs_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 289654, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "prs_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 266844, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "prs_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 259260, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "prs_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 255726, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "prs_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 247071, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "prs_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256047, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "prs_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 255721, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "prs_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 271294, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "prs_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 273367, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "prs_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 283678, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "prs_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 269005, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "prs_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 237704, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "prs_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 274452, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "prs_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 241923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "prs_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 263600, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "prs_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 258189, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "prs_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 255482, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "prs_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 268246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "prs_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 278821, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "prs_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 264382, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "prs_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 232432, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "prs_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 260710, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "prs_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 251443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "prs_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 266898, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "prs_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 250542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "prs_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 240231, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "prs_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 260905, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "prs_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253865, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "prs_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 253498, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "prs_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 222697, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "prs_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257710, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "prs_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 254800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "prs_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 254068, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "prs_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 272305, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "prs_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 293188, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.21245059288538, + "max_sentence1_length": 320, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "swe_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 243794, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "swe_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 258560, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "swe_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 295531, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "swe_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263231, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "swe_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 285385, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "swe_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 258868, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "swe_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271327, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swe_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 290905, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "swe_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 267934, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "swe_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274106, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "swe_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 277743, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "swe_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 289712, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "swe_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259106, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "swe_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 285604, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "swe_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 261581, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swe_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 254628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "swe_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289170, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swe_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 260771, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "swe_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 245746, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "swe_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280164, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "swe_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264139, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swe_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 260900, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swe_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278035, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "swe_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281509, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "swe_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256000, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "swe_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285520, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "swe_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250133, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "swe_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247286, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "swe_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 292924, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "swe_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263465, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "swe_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 270802, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "swe_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267705, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "swe_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "swe_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293169, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "swe_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262953, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swe_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 272650, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "swe_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244250, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "swe_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261215, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "swe_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 261632, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "swe_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278129, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "swe_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 277935, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "swe_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 258572, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "swe_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 271656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "swe_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 280899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "swe_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266619, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "swe_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 271863, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "swe_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 260784, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "swe_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 271995, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "swe_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 266733, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "swe_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 262545, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "swe_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273336, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "swe_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 275591, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "swe_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269019, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "swe_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 266920, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "swe_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241219, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "swe_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 254403, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "swe_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 259621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swe_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 291853, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "swe_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248173, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "swe_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270000, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "swe_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 252816, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "swe_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262092, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "swe_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 264823, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swe_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270355, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swe_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 266447, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "swe_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274491, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "swe_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271145, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "swe_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 278461, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "swe_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273200, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "swe_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270277, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "swe_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281496, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "swe_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279237, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "swe_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280324, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "swe_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287350, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "swe_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 261956, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "swe_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282067, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "swe_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 268955, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "swe_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 280744, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swe_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 269544, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "swe_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 265410, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "swe_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219305, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "swe_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 264360, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "swe_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "swe_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "swe_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198138, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "swe_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 256830, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "swe_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 279438, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "swe_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286306, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "swe_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 270768, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "swe_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240154, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "swe_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 271548, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "swe_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 267433, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "swe_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269101, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swe_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262233, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swe_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 278700, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "swe_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274269, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swe_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 256590, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "swe_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 296919, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "swe_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248480, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "swe_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269997, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "swe_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 289811, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "swe_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189116, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "swe_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278167, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "swe_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295342, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "swe_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 274794, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "swe_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256077, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "swe_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259346, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "swe_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 285931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "swe_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 277954, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swe_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 276692, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "swe_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 262727, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "swe_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 269859, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "swe_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 279680, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "swe_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 260430, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "swe_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265455, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "swe_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271296, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "swe_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 248601, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "swe_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 290858, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "swe_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254335, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "swe_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 301570, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "swe_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 274370, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "swe_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 265616, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "swe_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 271964, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "swe_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 264643, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "swe_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 273831, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "swe_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 245736, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "swe_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 259430, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swe_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 288591, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "swe_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 260529, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "swe_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262502, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "swe_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "swe_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 270744, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "swe_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278044, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "swe_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259017, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "swe_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 245943, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "swe_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 271434, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "swe_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 294770, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "swe_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270213, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swe_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 273839, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "swe_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259076, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swe_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 320457, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "swe_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298342, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "swe_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172192, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "swe_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258109, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "swe_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 260758, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "swe_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 284654, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "swe_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258278, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "swe_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "swe_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283282, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "swe_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263253, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "swe_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258282, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "swe_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 175421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "swe_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 268536, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swe_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266211, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "swe_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278316, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "swe_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "swe_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 279718, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "swe_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274513, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "swe_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 263883, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "swe_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 222873, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "swe_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 172868, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "swe_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260344, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "swe_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 273430, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "swe_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 264988, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "swe_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276103, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "swe_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275171, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "swe_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279345, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "swe_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263028, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "swe_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301184, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "swe_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 278374, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swe_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 270790, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "swe_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267256, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "swe_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 258601, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "swe_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267577, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swe_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267251, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "swe_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 282824, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "swe_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 284897, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "swe_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295208, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "swe_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 280535, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "swe_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249234, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "swe_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 285982, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "swe_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 253453, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "swe_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275130, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "swe_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 269719, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "swe_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267012, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "swe_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 279776, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "swe_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290351, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "swe_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 275912, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "swe_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 243962, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "swe_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272240, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "swe_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 262973, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "swe_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 278428, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "swe_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262072, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "swe_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 251761, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "swe_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 272435, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swe_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265395, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "swe_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 265028, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "swe_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234227, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "swe_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269240, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "swe_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266330, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "swe_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 265598, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "swe_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 283835, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "swe_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 304718, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.60573122529644, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "urd_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241540, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "urd_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256306, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "urd_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293277, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "urd_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260977, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "urd_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283131, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "urd_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "urd_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269073, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "urd_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288651, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "urd_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265680, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "urd_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "urd_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275489, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "urd_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287458, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "urd_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "urd_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283350, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "urd_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259327, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "urd_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252374, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "urd_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286916, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "urd_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258517, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "urd_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243492, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "urd_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "urd_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261885, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "urd_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258646, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "urd_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275781, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "urd_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "urd_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253746, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "urd_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283266, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "urd_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247879, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "urd_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245032, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "urd_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290670, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "urd_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "urd_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268548, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "urd_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265451, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "urd_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262970, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "urd_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290915, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "urd_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260699, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "urd_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270396, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "urd_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241996, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "urd_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 258961, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "urd_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "urd_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 275875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "urd_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275681, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "urd_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256318, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "urd_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269402, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "urd_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278645, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "urd_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264365, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "urd_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269609, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "urd_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258530, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "urd_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269741, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "urd_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "urd_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260291, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "urd_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271082, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "urd_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273337, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "urd_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266765, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "urd_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264666, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "urd_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238965, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "urd_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252149, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "urd_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257367, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "urd_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289599, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "urd_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245919, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "urd_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267746, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "urd_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250562, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "urd_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262092, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "urd_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262569, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "urd_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268101, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "urd_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264193, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "urd_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272237, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "urd_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268891, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "urd_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276207, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "urd_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270946, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "urd_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268023, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "urd_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279242, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "urd_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276983, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "urd_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278070, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "urd_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285096, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "urd_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 259702, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "urd_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279813, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "urd_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266701, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "urd_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278490, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "urd_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267290, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "urd_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263156, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "urd_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217051, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "urd_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262106, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "urd_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270047, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "urd_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286057, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "urd_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195884, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "urd_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254576, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "urd_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277184, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "urd_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284052, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "urd_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268514, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "urd_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237900, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "urd_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269294, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "urd_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265179, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "urd_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266847, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "urd_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259979, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "urd_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276446, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "urd_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272015, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "urd_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254336, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "urd_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294665, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "urd_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246226, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "urd_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267743, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "urd_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287557, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "urd_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 186862, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "urd_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275913, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "urd_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293088, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "urd_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272540, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "urd_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253823, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "urd_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257092, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "urd_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283677, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "urd_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275700, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "urd_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274438, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "urd_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260473, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "urd_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "urd_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277426, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "urd_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258176, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "urd_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "urd_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269042, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "urd_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246347, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "urd_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288604, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "urd_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252081, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "urd_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299316, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "urd_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272116, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "urd_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263362, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "urd_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269710, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "urd_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262389, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "urd_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271577, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "urd_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243482, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "urd_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257176, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "urd_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286337, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "urd_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258275, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "urd_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "urd_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262057, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "urd_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268490, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "urd_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275790, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "urd_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256763, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "urd_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243689, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "urd_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269180, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "urd_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292516, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "urd_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267959, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "urd_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271585, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "urd_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256822, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "urd_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318203, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "urd_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296088, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "urd_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169938, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "urd_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255855, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "urd_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258504, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "urd_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282400, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "urd_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256024, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "urd_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 259767, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "urd_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281028, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "urd_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "urd_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256028, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "urd_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173167, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "urd_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266282, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "urd_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263957, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "urd_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276062, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "urd_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256320, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "urd_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277464, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "urd_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272259, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "urd_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261629, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "urd_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220619, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "urd_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "urd_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "urd_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271176, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "urd_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262734, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "urd_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273849, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "urd_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272917, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "urd_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277091, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "urd_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "urd_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298930, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "urd_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276120, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "urd_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268536, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "urd_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265002, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "urd_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256347, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "urd_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265323, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "urd_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264997, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "urd_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280570, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "urd_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282643, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "urd_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292954, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "urd_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278281, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "urd_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246980, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "urd_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283728, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "urd_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251199, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "urd_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 272876, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "urd_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267465, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "urd_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264758, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "urd_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277522, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "urd_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288097, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "urd_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273658, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "urd_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241708, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "urd_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269986, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "urd_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260719, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "urd_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276174, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "urd_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 259818, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "urd_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249507, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "urd_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270181, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "urd_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263141, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "urd_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "urd_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231973, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "urd_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266986, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "urd_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264076, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "urd_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263344, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "urd_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281581, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "urd_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302464, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37845849802372, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "aka_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244271, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "aka_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259037, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "aka_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296008, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "aka_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263708, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "aka_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 285862, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "aka_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259345, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "aka_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271804, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aka_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291382, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "aka_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268411, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "aka_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "aka_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278220, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "aka_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290189, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "aka_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "aka_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286081, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "aka_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262058, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aka_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255105, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "aka_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289647, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aka_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261248, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "aka_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246223, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "aka_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280641, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "aka_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264616, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aka_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261377, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aka_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278512, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "aka_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281986, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "aka_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "aka_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285997, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "aka_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250610, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "aka_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247763, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "aka_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293401, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "aka_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263942, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "aka_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271279, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "aka_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268182, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "aka_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265701, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "aka_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293646, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "aka_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263430, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "aka_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273127, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "aka_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244727, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "aka_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261692, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "aka_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262109, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "aka_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278606, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "aka_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278412, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "aka_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259049, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "aka_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272133, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "aka_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "aka_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267096, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "aka_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272340, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "aka_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261261, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "aka_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 272472, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "aka_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267210, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "aka_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263022, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "aka_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273813, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "aka_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276068, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "aka_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269496, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "aka_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267397, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "aka_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241696, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "aka_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 254880, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "aka_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aka_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292330, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "aka_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248650, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "aka_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "aka_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253293, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "aka_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 264823, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "aka_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262569, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "aka_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270832, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aka_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 266924, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "aka_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "aka_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271622, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "aka_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 278938, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "aka_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273677, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "aka_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270754, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "aka_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281973, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "aka_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279714, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "aka_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280801, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "aka_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287827, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "aka_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 262433, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "aka_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282544, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "aka_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 269432, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "aka_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aka_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270021, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "aka_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 265887, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "aka_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219782, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "aka_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 264837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "aka_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272778, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "aka_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288788, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "aka_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198615, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "aka_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257307, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "aka_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 279915, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "aka_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286783, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "aka_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271245, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "aka_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240631, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "aka_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272025, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "aka_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 267910, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "aka_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269578, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aka_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262710, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aka_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279177, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "aka_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274746, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "aka_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257067, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "aka_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297396, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "aka_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248957, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "aka_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270474, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "aka_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290288, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "aka_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189593, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "aka_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278644, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "aka_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295819, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "aka_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275271, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "aka_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256554, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "aka_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259823, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "aka_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286408, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "aka_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 278431, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aka_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277169, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "aka_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263204, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "aka_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270336, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "aka_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280157, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "aka_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 260907, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "aka_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265932, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "aka_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271773, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "aka_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249078, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "aka_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291335, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "aka_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254812, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "aka_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302047, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "aka_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 274847, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "aka_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266093, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "aka_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 272441, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "aka_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "aka_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274308, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "aka_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246213, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "aka_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 259907, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aka_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289068, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "aka_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261006, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "aka_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262979, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "aka_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264788, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "aka_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "aka_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278521, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "aka_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259494, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "aka_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 246420, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "aka_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 271911, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "aka_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295247, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "aka_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270690, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aka_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274316, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "aka_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259553, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aka_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 320934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "aka_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298819, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "aka_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172669, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "aka_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258586, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "aka_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261235, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "aka_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285131, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "aka_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258755, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "aka_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262498, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "aka_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283759, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "aka_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263730, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "aka_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258759, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "aka_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 175898, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "aka_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269013, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aka_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266688, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "aka_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278793, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "aka_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259051, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "aka_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280195, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "aka_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274990, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "aka_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264360, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "aka_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223350, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "aka_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173345, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "aka_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260821, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "aka_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 273907, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "aka_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 265465, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "aka_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "aka_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275648, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "aka_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279822, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "aka_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "aka_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301661, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "aka_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 278851, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "aka_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271267, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "aka_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267733, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "aka_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259078, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "aka_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268054, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "aka_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267728, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "aka_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283301, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "aka_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285374, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "aka_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295685, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "aka_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281012, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "aka_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249711, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "aka_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 286459, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "aka_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 253930, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "aka_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275607, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "aka_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270196, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "aka_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267489, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "aka_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280253, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "aka_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290828, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "aka_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276389, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "aka_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 244439, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "aka_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "aka_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 263450, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "aka_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 278905, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "aka_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262549, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "aka_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252238, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "aka_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 272912, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "aka_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265872, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "aka_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 265505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "aka_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234704, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "aka_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "aka_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266807, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "aka_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266075, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "aka_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284312, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "aka_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305195, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 131.07707509881422, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bjn_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249803, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bjn_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264569, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bjn_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301540, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bjn_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269240, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bjn_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291394, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bjn_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264877, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bjn_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277336, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296914, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bjn_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273943, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bjn_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280115, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bjn_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283752, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bjn_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295721, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bjn_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265115, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bjn_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 291613, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bjn_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267590, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260637, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bjn_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295179, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266780, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bjn_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251755, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bjn_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286173, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bjn_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270148, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266909, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284044, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bjn_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287518, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bjn_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262009, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bjn_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291529, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bjn_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256142, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bjn_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253295, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bjn_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298933, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bjn_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269474, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bjn_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276811, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bjn_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273714, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bjn_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271233, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bjn_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299178, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bjn_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268962, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278659, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bjn_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250259, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bjn_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267224, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bjn_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267641, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bjn_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284138, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bjn_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283944, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bjn_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264581, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bjn_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277665, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bjn_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286908, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bjn_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272628, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bjn_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277872, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bjn_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266793, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bjn_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278004, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bjn_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272742, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bjn_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268554, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bjn_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279345, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bjn_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281600, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bjn_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275028, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bjn_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272929, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bjn_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247228, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bjn_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260412, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bjn_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 265630, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297862, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bjn_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254182, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bjn_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276009, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bjn_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258825, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bjn_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270355, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bjn_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268101, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bjn_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270832, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bjn_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272456, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bjn_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280500, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bjn_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277154, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bjn_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284470, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bjn_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279209, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bjn_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 276286, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bjn_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287505, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bjn_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285246, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bjn_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286333, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bjn_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293359, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bjn_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267965, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bjn_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 288076, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bjn_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274964, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bjn_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286753, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275553, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bjn_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271419, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bjn_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225314, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bjn_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270369, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bjn_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278310, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bjn_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294320, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bjn_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204147, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bjn_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262839, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bjn_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285447, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bjn_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292315, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bjn_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276777, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bjn_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246163, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bjn_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277557, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bjn_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273442, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bjn_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275110, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268242, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284709, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bjn_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280278, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bjn_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262599, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bjn_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302928, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bjn_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254489, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bjn_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276006, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bjn_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295820, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bjn_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195125, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bjn_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284176, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bjn_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301351, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bjn_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280803, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bjn_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262086, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bjn_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265355, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bjn_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291940, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bjn_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283963, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282701, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bjn_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268736, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bjn_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275868, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bjn_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285689, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bjn_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266439, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bjn_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271464, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bjn_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277305, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bjn_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254610, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bjn_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296867, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bjn_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260344, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bjn_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307579, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bjn_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280379, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bjn_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 271625, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bjn_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277973, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bjn_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270652, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bjn_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279840, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bjn_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251745, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bjn_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265439, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294600, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bjn_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266538, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bjn_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268511, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bjn_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270320, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bjn_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276753, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bjn_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284053, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bjn_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265026, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bjn_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251952, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bjn_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277443, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bjn_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300779, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bjn_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 276222, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279848, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bjn_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265085, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326466, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bjn_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304351, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bjn_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178201, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bjn_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264118, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bjn_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266767, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bjn_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290663, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bjn_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264287, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bjn_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268030, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bjn_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289291, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bjn_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269262, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bjn_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264291, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bjn_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181430, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bjn_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274545, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272220, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bjn_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284325, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bjn_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264583, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bjn_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285727, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bjn_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280522, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bjn_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269892, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bjn_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228882, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bjn_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178877, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bjn_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266353, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bjn_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279439, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bjn_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270997, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bjn_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282112, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bjn_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281180, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bjn_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285354, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bjn_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269037, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bjn_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307193, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bjn_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284383, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bjn_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276799, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bjn_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273265, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bjn_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264610, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bjn_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273586, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bjn_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273260, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bjn_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288833, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bjn_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290906, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bjn_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301217, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bjn_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286544, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bjn_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255243, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bjn_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291991, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bjn_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259462, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bjn_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281139, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bjn_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 275728, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bjn_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273021, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bjn_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285785, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bjn_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296360, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bjn_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281921, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bjn_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249971, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bjn_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278249, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bjn_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268982, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bjn_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284437, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bjn_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268081, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bjn_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257770, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bjn_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278444, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bjn_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271404, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bjn_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271037, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bjn_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240236, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bjn_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275249, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bjn_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272339, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bjn_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271607, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bjn_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289844, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bjn_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310727, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 136.54347826086956, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fao_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 245895, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fao_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 260661, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fao_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 297632, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fao_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265332, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fao_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 287486, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fao_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 260969, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fao_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273428, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fao_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293006, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fao_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fao_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 276207, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fao_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 279844, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fao_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 291813, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fao_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 261207, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fao_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 287705, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fao_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 263682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fao_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 256729, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fao_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291271, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fao_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 262872, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fao_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 247847, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fao_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282265, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fao_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266240, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fao_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263001, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fao_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280136, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fao_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 283610, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fao_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258101, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fao_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 287621, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fao_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 252234, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fao_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249387, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fao_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295025, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fao_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 265566, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fao_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 272903, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fao_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269806, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fao_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267325, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fao_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295270, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fao_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265054, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fao_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 274751, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fao_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246351, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fao_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263316, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fao_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 263733, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fao_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 280230, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fao_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280036, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fao_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 260673, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fao_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 273757, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fao_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fao_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268720, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fao_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 273964, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fao_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 262885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fao_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274096, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fao_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 268834, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fao_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 264646, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fao_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 275437, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fao_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 277692, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fao_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271120, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fao_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 269021, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fao_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243320, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fao_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 256504, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fao_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 261722, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fao_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 293954, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fao_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250274, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fao_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272101, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fao_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 254917, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fao_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 266447, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fao_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264193, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fao_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 266924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fao_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 272456, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fao_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 276592, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fao_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273246, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fao_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 280562, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fao_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275301, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fao_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272378, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fao_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 283597, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fao_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281338, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fao_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282425, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fao_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 289451, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "fao_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264057, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fao_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284168, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fao_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271056, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fao_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 282845, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fao_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 271645, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fao_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 267511, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fao_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221406, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fao_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 266461, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fao_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274402, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fao_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290412, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fao_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200239, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fao_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 258931, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fao_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 281539, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fao_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288407, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fao_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 272869, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fao_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242255, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fao_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 273649, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fao_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 269534, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "fao_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 271202, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fao_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264334, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fao_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 280801, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fao_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276370, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fao_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 258691, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fao_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299020, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fao_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 250581, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fao_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fao_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 291912, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "fao_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 191217, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fao_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280268, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fao_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 297443, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fao_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 276895, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fao_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258178, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fao_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 261447, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fao_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288032, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fao_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280055, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fao_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 278793, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fao_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 264828, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fao_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 271960, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fao_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 281781, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fao_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 262531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fao_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267556, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fao_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273397, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fao_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 250702, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fao_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 292959, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fao_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 256436, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "fao_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 303671, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fao_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 276471, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fao_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 267717, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fao_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274065, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fao_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 266744, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fao_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 275932, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fao_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 247837, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fao_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 261531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fao_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 290692, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fao_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 262630, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fao_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 264603, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fao_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266412, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fao_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 272845, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fao_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fao_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fao_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248044, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fao_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 273535, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fao_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 296871, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fao_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272314, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fao_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 275940, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fao_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261177, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fao_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 322558, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fao_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 300443, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fao_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174293, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fao_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 260210, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fao_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 262859, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fao_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 286755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fao_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260379, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fao_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264122, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fao_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285383, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fao_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fao_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260383, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fao_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 177522, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fao_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 270637, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fao_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 268312, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fao_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280417, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fao_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 260675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fao_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 281819, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fao_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 276614, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fao_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 265984, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fao_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 224974, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fao_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 174969, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fao_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 262445, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fao_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 275531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fao_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267089, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fao_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 278204, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fao_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277272, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fao_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 281446, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fao_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fao_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303285, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fao_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 280475, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fao_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 272891, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fao_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269357, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fao_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 260702, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fao_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269678, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fao_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269352, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fao_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 284925, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fao_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 286998, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fao_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297309, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fao_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 282636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fao_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251335, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fao_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288083, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fao_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 255554, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fao_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 277231, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fao_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 271820, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fao_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 269113, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fao_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 281877, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fao_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 292452, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fao_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278013, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fao_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246063, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fao_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274341, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fao_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265074, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fao_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 280529, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fao_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264173, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fao_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 253862, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fao_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 274536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fao_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267496, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fao_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fao_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236328, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fao_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fao_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 268431, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fao_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 267699, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fao_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 285936, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fao_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 306819, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 132.6818181818182, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ind_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253939, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ind_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268705, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ind_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305676, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ind_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 273376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ind_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 295530, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ind_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 269013, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ind_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 281472, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ind_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 301050, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ind_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 278079, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ind_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ind_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287888, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ind_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299857, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ind_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ind_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295749, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ind_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271726, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ind_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264773, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ind_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299315, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ind_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270916, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ind_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255891, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ind_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290309, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ind_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274284, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ind_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 271045, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ind_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 288180, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ind_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291654, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ind_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 266145, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ind_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295665, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ind_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260278, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ind_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 257431, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ind_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 303069, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ind_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 273610, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ind_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280947, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ind_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277850, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ind_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 275369, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ind_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303314, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ind_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ind_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282795, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ind_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 254395, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ind_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 271360, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ind_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271777, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ind_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288274, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ind_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 288080, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ind_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ind_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281801, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ind_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 291044, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ind_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276764, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ind_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 282008, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ind_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270929, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ind_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 282140, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ind_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276878, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ind_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272690, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ind_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 283481, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ind_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285736, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ind_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 279164, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ind_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 277065, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ind_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 251364, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ind_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 264548, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ind_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269766, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ind_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 301998, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ind_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258318, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ind_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ind_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262961, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ind_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 274491, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ind_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272237, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ind_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ind_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 280500, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ind_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 276592, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ind_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281290, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ind_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 288606, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ind_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 283345, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ind_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 280422, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ind_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 291641, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ind_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 289382, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ind_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 290469, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ind_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 297495, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ind_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 272101, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ind_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 292212, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ind_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 279100, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ind_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290889, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ind_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279689, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ind_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 275555, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ind_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 229450, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ind_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 274505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ind_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 282446, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ind_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 298456, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ind_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208283, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ind_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266975, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ind_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 289583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ind_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 296451, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ind_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280913, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ind_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250299, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ind_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281693, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ind_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 277578, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ind_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279246, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ind_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 272378, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ind_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288845, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ind_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284414, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ind_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266735, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ind_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 307064, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ind_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 258625, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ind_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280142, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ind_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299956, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ind_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199261, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ind_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288312, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ind_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 305487, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ind_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284939, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ind_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ind_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 269491, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ind_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 296076, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ind_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 288099, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ind_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ind_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272872, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ind_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 280004, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ind_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289825, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ind_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 270575, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ind_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275600, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ind_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 281441, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ind_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258746, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ind_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 301003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ind_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 264480, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ind_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311715, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ind_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 284515, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ind_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275761, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ind_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 282109, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ind_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274788, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ind_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 283976, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ind_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255881, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ind_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 269575, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ind_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298736, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ind_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270674, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ind_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 272647, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ind_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 274456, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ind_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280889, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ind_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288189, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ind_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 269162, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ind_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 256088, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ind_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 281579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ind_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304915, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ind_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 280358, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ind_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 283984, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ind_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 269221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ind_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 330602, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ind_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 308487, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ind_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 182337, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ind_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268254, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ind_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270903, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ind_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294799, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ind_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 268423, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ind_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 272166, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ind_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 293427, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ind_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 273398, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ind_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 268427, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ind_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 185566, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ind_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278681, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ind_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 276356, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ind_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 288461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ind_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268719, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ind_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289863, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ind_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 284658, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ind_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 274028, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ind_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 233018, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ind_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 183013, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ind_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 270489, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ind_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 283575, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ind_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 275133, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ind_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286248, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ind_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 285316, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ind_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 289490, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ind_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 273173, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ind_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 311329, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ind_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 288519, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ind_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280935, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ind_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 277401, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ind_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268746, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ind_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277722, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ind_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 277396, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ind_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292969, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ind_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 295042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ind_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 305353, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ind_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290680, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ind_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 259379, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ind_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 296127, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ind_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 263598, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ind_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 285275, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ind_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279864, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ind_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 277157, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ind_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289921, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ind_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 300496, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ind_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 286057, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ind_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 254107, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ind_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 282385, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ind_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 273118, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ind_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 288573, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ind_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 272217, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ind_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261906, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ind_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 282580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ind_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275540, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ind_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 275173, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ind_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 244372, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ind_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279385, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ind_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 276475, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ind_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275743, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ind_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 293980, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ind_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314863, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 140.6304347826087, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "knc_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250593, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "knc_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265359, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "knc_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 302330, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "knc_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270030, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "knc_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292184, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "knc_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265667, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "knc_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278126, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297704, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "knc_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274733, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "knc_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280905, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "knc_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284542, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "knc_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296511, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "knc_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265905, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "knc_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292403, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "knc_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268380, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261427, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "knc_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295969, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267570, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "knc_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252545, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "knc_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286963, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "knc_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270938, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267699, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284834, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "knc_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 288308, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "knc_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262799, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "knc_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 292319, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "knc_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256932, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "knc_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254085, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "knc_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299723, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "knc_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270264, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "knc_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277601, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "knc_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274504, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "knc_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272023, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "knc_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299968, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "knc_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269752, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279449, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "knc_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251049, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "knc_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268014, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "knc_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268431, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "knc_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284928, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "knc_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284734, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "knc_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265371, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "knc_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278455, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "knc_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287698, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "knc_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273418, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "knc_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278662, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "knc_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267583, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "knc_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278794, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "knc_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273532, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "knc_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269344, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "knc_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280135, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "knc_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282390, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "knc_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275818, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "knc_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273719, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "knc_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248018, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "knc_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261202, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "knc_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266420, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298652, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "knc_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254972, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "knc_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276799, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "knc_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259615, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "knc_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271145, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "knc_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268891, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "knc_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271622, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "knc_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277154, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273246, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "knc_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 281290, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "knc_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285260, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "knc_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279999, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "knc_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277076, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "knc_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 288295, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "knc_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286036, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "knc_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287123, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "knc_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294149, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "knc_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268755, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "knc_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 288866, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "knc_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275754, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "knc_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287543, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276343, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "knc_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272209, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "knc_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226104, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "knc_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271159, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "knc_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279100, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "knc_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295110, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "knc_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204937, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "knc_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263629, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "knc_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286237, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "knc_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293105, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "knc_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277567, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "knc_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246953, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "knc_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 278347, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "knc_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274232, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "knc_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275900, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269032, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285499, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "knc_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281068, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "knc_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263389, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "knc_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303718, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "knc_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255279, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "knc_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276796, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "knc_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296610, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "knc_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195915, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "knc_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284966, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "knc_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302141, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "knc_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281593, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "knc_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262876, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "knc_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266145, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "knc_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292730, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "knc_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284753, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283491, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "knc_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269526, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "knc_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276658, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "knc_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286479, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "knc_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267229, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "knc_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272254, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "knc_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278095, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "knc_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255400, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "knc_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297657, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "knc_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261134, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "knc_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308369, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "knc_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281169, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "knc_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272415, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "knc_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278763, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "knc_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271442, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "knc_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280630, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "knc_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252535, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "knc_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266229, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295390, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "knc_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 267328, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "knc_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 269301, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "knc_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271110, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "knc_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277543, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "knc_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284843, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "knc_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265816, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "knc_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252742, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "knc_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 278233, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "knc_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301569, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "knc_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277012, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280638, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "knc_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265875, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327256, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "knc_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305141, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "knc_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178991, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "knc_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264908, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "knc_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267557, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "knc_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291453, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "knc_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265077, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "knc_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268820, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "knc_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290081, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "knc_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270052, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "knc_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265081, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "knc_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182220, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "knc_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 275335, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273010, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "knc_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285115, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "knc_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265373, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "knc_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286517, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "knc_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 281312, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "knc_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270682, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "knc_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229672, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "knc_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179667, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "knc_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267143, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "knc_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280229, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "knc_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271787, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "knc_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282902, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "knc_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281970, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "knc_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286144, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "knc_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269827, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "knc_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307983, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "knc_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285173, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "knc_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277589, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "knc_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274055, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "knc_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265400, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "knc_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274376, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "knc_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274050, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "knc_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289623, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "knc_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291696, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "knc_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302007, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "knc_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 287334, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "knc_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256033, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "knc_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292781, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "knc_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260252, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "knc_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281929, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "knc_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276518, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "knc_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273811, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "knc_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286575, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "knc_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297150, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "knc_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282711, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "knc_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250761, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "knc_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279039, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "knc_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269772, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "knc_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285227, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "knc_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268871, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "knc_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258560, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "knc_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279234, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "knc_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272194, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "knc_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271827, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "knc_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241026, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "knc_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276039, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "knc_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273129, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "knc_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272397, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "knc_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290634, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "knc_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311517, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.32411067193675, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mlt_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257909, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mlt_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272675, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mlt_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309646, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mlt_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277346, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mlt_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299500, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mlt_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272983, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mlt_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285442, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mlt_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 305020, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mlt_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 282049, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mlt_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 288221, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mlt_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291858, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mlt_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303827, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mlt_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 273221, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mlt_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299719, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mlt_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275696, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mlt_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268743, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mlt_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 303285, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mlt_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mlt_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259861, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mlt_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294279, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mlt_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 278254, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mlt_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 275015, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mlt_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 292150, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mlt_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295624, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mlt_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 270115, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mlt_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299635, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mlt_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 264248, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mlt_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261401, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mlt_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 307039, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mlt_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277580, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mlt_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284917, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mlt_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281820, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mlt_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279339, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mlt_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 307284, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mlt_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277068, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mlt_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286765, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mlt_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258365, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mlt_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 275330, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mlt_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275747, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mlt_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 292244, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mlt_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 292050, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mlt_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272687, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mlt_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285771, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mlt_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 295014, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mlt_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280734, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mlt_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285978, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mlt_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274899, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mlt_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 286110, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mlt_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280848, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mlt_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276660, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mlt_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287451, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mlt_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289706, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mlt_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 283134, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mlt_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 281035, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mlt_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255334, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mlt_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268518, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mlt_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273736, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mlt_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305968, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mlt_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 262288, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mlt_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284115, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mlt_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266931, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mlt_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278461, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mlt_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 276207, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mlt_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278938, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mlt_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284470, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mlt_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280562, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mlt_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288606, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mlt_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 285260, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mlt_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 287315, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mlt_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284392, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mlt_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295611, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mlt_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293352, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mlt_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294439, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mlt_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301465, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mlt_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 276071, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mlt_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 296182, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mlt_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 283070, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mlt_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294859, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mlt_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283659, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mlt_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279525, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mlt_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233420, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mlt_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278475, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mlt_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286416, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mlt_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302426, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mlt_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 212253, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mlt_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270945, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mlt_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293553, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mlt_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300421, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mlt_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284883, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mlt_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 254269, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mlt_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285663, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mlt_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281548, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mlt_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 283216, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mlt_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276348, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mlt_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292815, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mlt_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288384, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mlt_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270705, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mlt_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 311034, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mlt_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262595, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mlt_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284112, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mlt_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303926, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mlt_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 203231, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mlt_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 292282, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mlt_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309457, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mlt_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288909, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mlt_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 270192, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mlt_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273461, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mlt_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 300046, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mlt_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 292069, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mlt_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290807, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mlt_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276842, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mlt_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283974, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mlt_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293795, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mlt_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274545, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mlt_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279570, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mlt_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285411, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mlt_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262716, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mlt_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304973, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mlt_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268450, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mlt_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315685, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mlt_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288485, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mlt_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279731, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mlt_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 286079, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mlt_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278758, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mlt_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287946, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mlt_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259851, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mlt_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273545, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mlt_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302706, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mlt_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274644, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mlt_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276617, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mlt_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278426, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mlt_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284859, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mlt_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292159, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mlt_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 273132, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mlt_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 260058, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mlt_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285549, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mlt_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308885, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mlt_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 284328, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mlt_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287954, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mlt_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 273191, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mlt_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334572, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mlt_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312457, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mlt_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 186307, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mlt_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 272224, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mlt_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274873, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mlt_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298769, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mlt_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272393, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mlt_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 276136, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mlt_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297397, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mlt_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277368, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mlt_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272397, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mlt_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189536, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mlt_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282651, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mlt_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 280326, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mlt_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292431, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mlt_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272689, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mlt_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293833, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mlt_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288628, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mlt_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277998, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mlt_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236988, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mlt_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186983, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mlt_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274459, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mlt_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287545, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mlt_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 279103, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mlt_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 290218, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mlt_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 289286, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mlt_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293460, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mlt_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 277143, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mlt_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 315299, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mlt_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292489, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mlt_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284905, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mlt_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281371, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mlt_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272716, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mlt_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281692, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mlt_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281366, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mlt_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296939, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mlt_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 299012, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mlt_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 309323, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mlt_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294650, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mlt_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263349, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mlt_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 300097, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mlt_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267568, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mlt_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 289245, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mlt_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283834, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mlt_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 281127, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mlt_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293891, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mlt_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304466, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mlt_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 290027, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mlt_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 258077, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mlt_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286355, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mlt_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 277088, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mlt_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292543, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mlt_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 276187, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mlt_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265876, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mlt_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286550, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mlt_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279510, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mlt_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 279143, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mlt_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248342, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mlt_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283355, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mlt_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280445, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mlt_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279713, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mlt_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297950, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mlt_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318833, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.55335968379447, + "max_sentence1_length": 400, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "quy_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 252648, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "quy_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 267414, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "quy_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 304385, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "quy_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 272085, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "quy_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 294239, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "quy_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 267722, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "quy_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 280181, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "quy_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 299759, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "quy_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 276788, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "quy_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 282960, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "quy_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 286597, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "quy_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 298566, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "quy_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 267960, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "quy_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 294458, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "quy_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 270435, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "quy_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 263482, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "quy_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 298024, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "quy_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 269625, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "quy_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 254600, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "quy_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289018, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "quy_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 272993, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "quy_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 269754, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "quy_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 286889, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "quy_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 290363, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "quy_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 264854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "quy_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 294374, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "quy_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 258987, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "quy_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 256140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "quy_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 301778, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "quy_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 272319, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "quy_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 279656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "quy_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276559, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "quy_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 274078, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "quy_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 302023, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "quy_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271807, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "quy_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 281504, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "quy_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 253104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "quy_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 270069, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "quy_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 270486, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "quy_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 286983, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "quy_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 286789, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "quy_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 267426, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "quy_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 280510, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "quy_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 289753, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "quy_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275473, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "quy_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 280717, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "quy_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 269638, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "quy_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 280849, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "quy_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 275587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "quy_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 271399, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "quy_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 282190, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "quy_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 284445, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "quy_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 277873, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "quy_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 275774, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "quy_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 250073, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "quy_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 263257, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "quy_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 268475, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "quy_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 300707, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "quy_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 257027, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "quy_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "quy_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 261670, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "quy_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 273200, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "quy_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 270946, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "quy_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 273677, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "quy_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 279209, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "quy_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 275301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "quy_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 283345, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "quy_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 279999, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "quy_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 287315, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "quy_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 279131, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "quy_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 290350, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "quy_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 288091, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "quy_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 289178, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "quy_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 296204, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "quy_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 270810, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "quy_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 290921, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "quy_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 277809, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "quy_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 289598, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "quy_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 278398, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "quy_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 274264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "quy_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 228159, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "quy_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 273214, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "quy_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 281155, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "quy_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 297165, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "quy_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 206992, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "quy_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 265684, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "quy_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 288292, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "quy_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 295160, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "quy_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 279622, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "quy_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 249008, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "quy_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 280402, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "quy_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 276287, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "quy_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 277955, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "quy_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 271087, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "quy_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 287554, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "quy_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283123, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "quy_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 265444, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "quy_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 305773, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "quy_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 257334, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "quy_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278851, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "quy_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 298665, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "quy_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 197970, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "quy_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 287021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "quy_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 304196, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "quy_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 283648, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "quy_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 264931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "quy_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 268200, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "quy_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 294785, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "quy_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 286808, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "quy_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 285546, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "quy_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 271581, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "quy_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 278713, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "quy_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 288534, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "quy_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 269284, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "quy_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274309, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "quy_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 280150, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "quy_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 257455, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "quy_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 299712, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "quy_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 263189, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "quy_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 310424, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "quy_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 283224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "quy_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 274470, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "quy_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 280818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "quy_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 273497, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "quy_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 282685, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "quy_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 254590, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "quy_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 268284, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "quy_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 297445, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "quy_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 269383, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "quy_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 271356, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "quy_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 273165, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "quy_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 279598, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "quy_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286898, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "quy_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 267871, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "quy_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 254797, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "quy_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 280288, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "quy_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 303624, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "quy_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 279067, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "quy_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 282693, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "quy_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 267930, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "quy_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 329311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "quy_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 307196, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "quy_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 181046, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "quy_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 266963, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "quy_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 269612, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "quy_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 293508, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "quy_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 267132, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "quy_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 270875, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "quy_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 292136, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "quy_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 272107, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "quy_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 267136, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "quy_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 184275, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "quy_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 277390, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "quy_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 275065, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "quy_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 287170, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "quy_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 267428, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "quy_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 288572, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "quy_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 283367, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "quy_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 272737, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "quy_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 231727, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "quy_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 181722, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "quy_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 269198, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "quy_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 282284, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "quy_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 273842, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "quy_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 284957, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "quy_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 284025, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "quy_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 288199, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "quy_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 271882, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "quy_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 310038, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "quy_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 287228, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "quy_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 279644, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "quy_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 276110, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "quy_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 267455, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "quy_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276431, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "quy_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 276105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "quy_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 291678, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "quy_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 293751, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "quy_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 304062, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "quy_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 289389, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "quy_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 258088, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "quy_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 294836, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "quy_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 262307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "quy_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 283984, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "quy_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 278573, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "quy_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 275866, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "quy_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 288630, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "quy_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 299205, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "quy_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 284766, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "quy_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 252816, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "quy_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 281094, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "quy_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 271827, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "quy_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 287282, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "quy_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 270926, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "quy_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 260615, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "quy_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 281289, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "quy_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274249, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "quy_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 273882, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "quy_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 243081, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "quy_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278094, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "quy_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 275184, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "quy_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 274452, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "quy_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 292689, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "quy_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 313572, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 139.35474308300394, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "swh_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249725, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "swh_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264491, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "swh_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301462, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "swh_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269162, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "swh_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291316, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "swh_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264799, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "swh_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277258, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swh_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296836, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "swh_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273865, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "swh_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "swh_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283674, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "swh_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295643, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "swh_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "swh_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 291535, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "swh_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267512, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swh_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260559, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "swh_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295101, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swh_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266702, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "swh_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251677, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "swh_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286095, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "swh_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270070, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swh_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266831, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swh_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283966, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "swh_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287440, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "swh_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261931, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "swh_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291451, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "swh_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256064, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "swh_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253217, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "swh_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "swh_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269396, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "swh_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276733, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "swh_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273636, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "swh_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271155, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "swh_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299100, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "swh_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268884, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swh_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278581, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "swh_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250181, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "swh_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267146, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "swh_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267563, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "swh_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284060, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "swh_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283866, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "swh_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264503, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "swh_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277587, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "swh_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286830, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "swh_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272550, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "swh_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "swh_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266715, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "swh_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277926, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "swh_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272664, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "swh_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268476, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "swh_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279267, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "swh_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281522, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "swh_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274950, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "swh_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272851, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "swh_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247150, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "swh_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260334, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "swh_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 265552, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swh_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297784, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "swh_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254104, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "swh_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275931, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "swh_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258747, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "swh_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270277, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "swh_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268023, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "swh_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270754, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "swh_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276286, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swh_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272378, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "swh_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280422, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "swh_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277076, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "swh_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284392, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "swh_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279131, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "swh_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287427, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "swh_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285168, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "swh_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286255, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "swh_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "swh_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267887, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "swh_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287998, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "swh_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "swh_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286675, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swh_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275475, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "swh_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "swh_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225236, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "swh_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270291, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "swh_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278232, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "swh_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294242, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "swh_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204069, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "swh_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262761, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "swh_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285369, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "swh_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292237, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "swh_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276699, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "swh_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246085, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "swh_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277479, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "swh_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273364, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "swh_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275032, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swh_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268164, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swh_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284631, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "swh_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280200, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "swh_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262521, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "swh_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302850, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "swh_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254411, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "swh_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275928, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "swh_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295742, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "swh_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195047, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "swh_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284098, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "swh_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "swh_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280725, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "swh_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262008, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "swh_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265277, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "swh_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291862, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "swh_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283885, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swh_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "swh_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268658, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "swh_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275790, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "swh_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285611, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "swh_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266361, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "swh_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271386, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "swh_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277227, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "swh_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "swh_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296789, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "swh_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "swh_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307501, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "swh_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "swh_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 271547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "swh_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277895, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "swh_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270574, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "swh_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279762, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "swh_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251667, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "swh_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265361, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swh_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294522, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "swh_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266460, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "swh_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268433, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "swh_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270242, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "swh_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276675, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "swh_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283975, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "swh_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264948, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "swh_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251874, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "swh_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277365, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "swh_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300701, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "swh_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 276144, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swh_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279770, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "swh_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265007, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swh_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326388, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "swh_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "swh_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178123, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "swh_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264040, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "swh_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266689, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "swh_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290585, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "swh_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264209, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "swh_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267952, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "swh_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289213, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "swh_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269184, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "swh_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264213, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "swh_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181352, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "swh_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swh_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272142, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "swh_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284247, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "swh_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264505, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "swh_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285649, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "swh_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280444, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "swh_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269814, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "swh_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228804, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "swh_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178799, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "swh_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266275, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "swh_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279361, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "swh_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270919, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "swh_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282034, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "swh_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "swh_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285276, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "swh_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268959, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "swh_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307115, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "swh_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284305, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "swh_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276721, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "swh_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273187, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "swh_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "swh_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273508, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "swh_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "swh_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288755, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "swh_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290828, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "swh_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301139, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "swh_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286466, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "swh_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255165, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "swh_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291913, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "swh_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259384, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "swh_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "swh_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 275650, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "swh_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "swh_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285707, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "swh_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296282, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "swh_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "swh_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249893, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "swh_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278171, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "swh_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268904, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "swh_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284359, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "swh_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268003, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "swh_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257692, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "swh_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278366, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "swh_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271326, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "swh_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270959, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "swh_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240158, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "swh_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275171, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "swh_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272261, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "swh_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "swh_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289766, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "swh_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310649, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.46640316205534, + "max_sentence1_length": 384, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "uzn_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 260944, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "uzn_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 275710, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "uzn_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 312681, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "uzn_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 280381, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "uzn_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 302535, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "uzn_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 276018, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "uzn_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 288477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uzn_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 308055, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "uzn_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 285084, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "uzn_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 291256, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "uzn_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 294893, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "uzn_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 306862, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "uzn_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 276256, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "uzn_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 302754, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "uzn_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 278731, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uzn_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 271778, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "uzn_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 306320, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uzn_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 277921, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "uzn_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 262896, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "uzn_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297314, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "uzn_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 281289, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uzn_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 278050, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uzn_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 295185, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "uzn_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 298659, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "uzn_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 273150, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "uzn_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 302670, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "uzn_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 267283, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "uzn_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 264436, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "uzn_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 310074, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "uzn_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 280615, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "uzn_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 287952, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "uzn_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284855, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "uzn_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 282374, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "uzn_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 310319, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "uzn_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280103, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uzn_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 289800, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "uzn_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 261400, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "uzn_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 278365, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "uzn_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 278782, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "uzn_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 295279, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "uzn_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 295085, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "uzn_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 275722, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "uzn_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 288806, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "uzn_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 298049, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "uzn_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283769, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "uzn_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 289013, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "uzn_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 277934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "uzn_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 289145, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "uzn_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 283883, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "uzn_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 279695, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "uzn_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 290486, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "uzn_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 292741, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "uzn_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 286169, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "uzn_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 284070, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "uzn_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 258369, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "uzn_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 271553, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "uzn_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 276771, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uzn_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 309003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "uzn_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 265323, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "uzn_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287150, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "uzn_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 269966, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "uzn_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 281496, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "uzn_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 279242, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "uzn_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 281973, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "uzn_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 287505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uzn_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 283597, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "uzn_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 291641, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "uzn_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 288295, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "uzn_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 295611, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "uzn_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 290350, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "uzn_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 287427, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "uzn_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 296387, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "uzn_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 297474, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "uzn_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 304500, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "uzn_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 279106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "uzn_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 299217, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "uzn_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 286105, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "uzn_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 297894, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uzn_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 286694, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "uzn_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 282560, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "uzn_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 236455, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "uzn_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 281510, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "uzn_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 289451, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "uzn_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 305461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "uzn_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 215288, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "uzn_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 273980, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "uzn_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 296588, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "uzn_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 303456, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "uzn_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 287918, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "uzn_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 257304, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "uzn_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 288698, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "uzn_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 284583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "uzn_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 286251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uzn_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 279383, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uzn_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 295850, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "uzn_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291419, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "uzn_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 273740, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "uzn_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 314069, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "uzn_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 265630, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "uzn_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287147, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "uzn_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 306961, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "uzn_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 206266, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "uzn_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 295317, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "uzn_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 312492, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "uzn_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 291944, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "uzn_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 273227, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "uzn_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 276496, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "uzn_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 303081, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "uzn_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 295104, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uzn_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 293842, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "uzn_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 279877, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "uzn_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 287009, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "uzn_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 296830, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "uzn_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 277580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "uzn_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "uzn_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 288446, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "uzn_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 265751, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "uzn_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 308008, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "uzn_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 271485, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "uzn_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 318720, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "uzn_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 291520, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "uzn_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 282766, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "uzn_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 289114, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "uzn_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 281793, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "uzn_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 290981, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "uzn_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 262886, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "uzn_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 276580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uzn_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 305741, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "uzn_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 277679, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "uzn_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 279652, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "uzn_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 281461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "uzn_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 287894, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "uzn_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295194, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "uzn_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 276167, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "uzn_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 263093, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "uzn_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 288584, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "uzn_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 311920, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "uzn_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 287363, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uzn_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 290989, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "uzn_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 276226, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uzn_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 337607, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "uzn_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 315492, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "uzn_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 189342, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "uzn_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 275259, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "uzn_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 277908, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "uzn_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 301804, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "uzn_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 275428, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "uzn_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 279171, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "uzn_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 300432, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "uzn_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 280403, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "uzn_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 275432, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "uzn_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 192571, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "uzn_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 285686, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uzn_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 283361, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "uzn_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 295466, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "uzn_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 275724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "uzn_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 296868, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "uzn_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 291663, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "uzn_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 281033, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "uzn_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 240023, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "uzn_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 190018, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "uzn_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 277494, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "uzn_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 290580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "uzn_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 282138, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "uzn_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 293253, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "uzn_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 292321, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "uzn_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 296495, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "uzn_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 280178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "uzn_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 318334, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "uzn_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 295524, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "uzn_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 287940, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "uzn_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 284406, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "uzn_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 275751, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "uzn_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284727, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "uzn_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 284401, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "uzn_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 299974, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "uzn_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 302047, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "uzn_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 312358, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "uzn_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 297685, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "uzn_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 266384, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "uzn_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 303132, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "uzn_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 270603, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "uzn_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 292280, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "uzn_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 286869, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "uzn_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 284162, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "uzn_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 296926, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "uzn_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 307501, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "uzn_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 293062, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "uzn_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 261112, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "uzn_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 289390, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "uzn_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 280123, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "uzn_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 295578, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "uzn_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 279222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "uzn_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 268911, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "uzn_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 289585, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "uzn_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282545, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "uzn_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 282178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "uzn_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 251377, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "uzn_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286390, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "uzn_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 283480, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "uzn_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 282748, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "uzn_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 300985, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "uzn_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 321868, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 147.55237154150197, + "max_sentence1_length": 394, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "als_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 258685, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "als_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 273451, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "als_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 310422, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "als_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 278122, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "als_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 300276, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "als_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 273759, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "als_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 286218, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "als_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 305796, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "als_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 282825, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "als_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 288997, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "als_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 292634, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "als_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 304603, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "als_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 273997, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "als_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 300495, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "als_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 276472, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "als_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "als_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304061, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "als_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 275662, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "als_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 260637, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "als_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295055, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "als_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279030, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "als_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 275791, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "als_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 292926, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "als_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 296400, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "als_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 270891, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "als_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 300411, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "als_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265024, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "als_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 262177, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "als_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 307815, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "als_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 278356, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "als_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 285693, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "als_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282596, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "als_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 280115, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "als_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308060, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "als_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277844, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "als_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 287541, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "als_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 259141, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "als_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 276106, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "als_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 276523, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "als_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293020, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "als_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 292826, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "als_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 273463, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "als_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 286547, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "als_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 295790, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "als_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281510, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "als_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 286754, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "als_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 275675, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "als_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 286886, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "als_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 281624, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "als_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 277436, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "als_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 288227, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "als_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 290482, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "als_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 283910, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "als_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 281811, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "als_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 256110, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "als_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 269294, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "als_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 274512, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "als_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 306744, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "als_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263064, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "als_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284891, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "als_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 267707, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "als_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 279237, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "als_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 276983, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "als_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 279714, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "als_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 285246, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "als_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 281338, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "als_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 289382, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "als_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286036, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "als_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 293352, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "als_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 288091, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "als_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 285168, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "als_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 296387, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "als_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 295215, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "als_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 302241, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "als_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 276847, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "als_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 296958, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "als_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 283846, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "als_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 295635, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "als_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 284435, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "als_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 280301, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "als_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 234196, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "als_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 279251, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "als_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 287192, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "als_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 303202, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "als_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213029, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "als_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 271721, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "als_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 294329, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "als_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 301197, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "als_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 285659, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "als_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255045, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "als_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 286439, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "als_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 282324, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "als_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 283992, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "als_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 277124, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "als_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 293591, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "als_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289160, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "als_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 271481, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "als_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 311810, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "als_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 263371, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "als_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284888, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "als_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 304702, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "als_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204007, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "als_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293058, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "als_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 310233, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "als_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 289685, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "als_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 270968, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "als_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 274237, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "als_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 300822, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "als_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 292845, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "als_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 291583, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "als_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 277618, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "als_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 284750, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "als_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 294571, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "als_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 275321, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "als_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280346, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "als_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 286187, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "als_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 263492, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "als_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 305749, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "als_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 269226, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "als_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 316461, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "als_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 289261, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "als_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 280507, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "als_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 286855, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "als_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 279534, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "als_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 288722, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "als_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 260627, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "als_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 274321, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "als_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 303482, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "als_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 275420, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "als_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 277393, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "als_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 279202, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "als_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 285635, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "als_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292935, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "als_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 273908, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "als_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 260834, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "als_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 286325, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "als_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 309661, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "als_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 285104, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "als_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 288730, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "als_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 273967, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "als_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 335348, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "als_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 313233, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "als_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 187083, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "als_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273000, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "als_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 275649, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "als_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 299545, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "als_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 273169, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "als_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 276912, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "als_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 298173, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "als_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 278144, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "als_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 273173, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "als_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 190312, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "als_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 283427, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "als_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 281102, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "als_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 293207, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "als_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 273465, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "als_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 294609, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "als_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 289404, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "als_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 278774, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "als_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 237764, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "als_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 187759, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "als_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 275235, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "als_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 288321, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "als_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 279879, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "als_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 290994, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "als_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290062, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "als_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 294236, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "als_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 277919, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "als_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 316075, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "als_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 293265, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "als_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 285681, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "als_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 282147, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "als_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 273492, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "als_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282468, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "als_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 282142, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "als_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 297715, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "als_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 299788, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "als_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 310099, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "als_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 295426, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "als_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 264125, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "als_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 300873, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "als_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 268344, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "als_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290021, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "als_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 284610, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "als_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 281903, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "als_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 294667, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "als_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 305242, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "als_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 290803, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "als_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 258853, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "als_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 287131, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "als_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 277864, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "als_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 293319, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "als_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 276963, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "als_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 266652, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "als_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 287326, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "als_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280286, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "als_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "als_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 249118, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "als_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284131, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "als_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 281221, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "als_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 280489, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "als_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 298726, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "als_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 319609, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 145.3201581027668, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bod_Tibt-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 259772, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bod_Tibt-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 274538, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bod_Tibt-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 311509, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bod_Tibt-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 279209, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bod_Tibt-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 301363, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bod_Tibt-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 274846, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bod_Tibt-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 287305, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bod_Tibt-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 306883, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bod_Tibt-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 283912, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bod_Tibt-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 290084, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bod_Tibt-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 293721, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bod_Tibt-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 305690, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bod_Tibt-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 275084, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bod_Tibt-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 301582, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bod_Tibt-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 277559, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bod_Tibt-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 270606, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bod_Tibt-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 305148, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bod_Tibt-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 276749, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bod_Tibt-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 261724, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bod_Tibt-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296142, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bod_Tibt-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 280117, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bod_Tibt-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 276878, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bod_Tibt-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 294013, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bod_Tibt-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 297487, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bod_Tibt-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 271978, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bod_Tibt-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 301498, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bod_Tibt-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 266111, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bod_Tibt-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 263264, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bod_Tibt-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 308902, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bod_Tibt-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 279443, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bod_Tibt-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 286780, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bod_Tibt-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283683, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bod_Tibt-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 281202, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bod_Tibt-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 309147, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bod_Tibt-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278931, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bod_Tibt-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 288628, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bod_Tibt-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 260228, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bod_Tibt-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 277193, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bod_Tibt-est_Latn": { + "num_samples": 1012, + "number_of_characters": 277610, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bod_Tibt-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 294107, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bod_Tibt-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 293913, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bod_Tibt-min_Arab": { + "num_samples": 1012, + "number_of_characters": 274550, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bod_Tibt-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 287634, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bod_Tibt-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 296877, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bod_Tibt-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282597, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bod_Tibt-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 287841, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bod_Tibt-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 276762, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bod_Tibt-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 287973, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bod_Tibt-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 282711, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bod_Tibt-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 278523, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bod_Tibt-min_Latn": { + "num_samples": 1012, + "number_of_characters": 289314, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bod_Tibt-por_Latn": { + "num_samples": 1012, + "number_of_characters": 291569, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bod_Tibt-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284997, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bod_Tibt-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 282898, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bod_Tibt-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 257197, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bod_Tibt-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 270381, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bod_Tibt-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 275599, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bod_Tibt-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 307831, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bod_Tibt-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 264151, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bod_Tibt-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285978, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bod_Tibt-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 268794, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bod_Tibt-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 280324, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bod_Tibt-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 278070, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bod_Tibt-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 280801, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bod_Tibt-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 286333, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bod_Tibt-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 282425, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bod_Tibt-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 290469, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bod_Tibt-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 287123, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bod_Tibt-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 294439, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bod_Tibt-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 289178, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bod_Tibt-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 286255, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bod_Tibt-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 297474, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bod_Tibt-als_Latn": { + "num_samples": 1012, + "number_of_characters": 295215, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bod_Tibt-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 303328, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bod_Tibt-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 277934, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bod_Tibt-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 298045, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bod_Tibt-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 284933, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bod_Tibt-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 296722, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bod_Tibt-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 285522, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bod_Tibt-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 281388, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bod_Tibt-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 235283, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bod_Tibt-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 280338, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bod_Tibt-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 288279, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bod_Tibt-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 304289, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bod_Tibt-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 214116, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bod_Tibt-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 272808, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bod_Tibt-run_Latn": { + "num_samples": 1012, + "number_of_characters": 295416, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bod_Tibt-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 302284, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bod_Tibt-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 286746, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bod_Tibt-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 256132, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bod_Tibt-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 287526, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bod_Tibt-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 283411, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bod_Tibt-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 285079, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bod_Tibt-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 278211, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bod_Tibt-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 294678, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bod_Tibt-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290247, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bod_Tibt-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 272568, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bod_Tibt-war_Latn": { + "num_samples": 1012, + "number_of_characters": 312897, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bod_Tibt-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 264458, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bod_Tibt-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285975, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bod_Tibt-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 305789, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bod_Tibt-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 205094, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bod_Tibt-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 294145, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bod_Tibt-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 311320, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bod_Tibt-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 290772, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bod_Tibt-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 272055, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bod_Tibt-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 275324, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bod_Tibt-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 301909, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bod_Tibt-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 293932, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bod_Tibt-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 292670, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bod_Tibt-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 278705, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bod_Tibt-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 285837, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bod_Tibt-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 295658, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bod_Tibt-san_Deva": { + "num_samples": 1012, + "number_of_characters": 276408, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bod_Tibt-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281433, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bod_Tibt-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 287274, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bod_Tibt-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 264579, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bod_Tibt-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 306836, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bod_Tibt-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 270313, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bod_Tibt-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 317548, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bod_Tibt-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 290348, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bod_Tibt-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 281594, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bod_Tibt-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 287942, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bod_Tibt-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 280621, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bod_Tibt-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 289809, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bod_Tibt-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 261714, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bod_Tibt-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 275408, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bod_Tibt-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 304569, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bod_Tibt-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 276507, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bod_Tibt-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 278480, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bod_Tibt-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 280289, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bod_Tibt-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 286722, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bod_Tibt-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294022, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bod_Tibt-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274995, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bod_Tibt-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 261921, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bod_Tibt-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 287412, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bod_Tibt-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 310748, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bod_Tibt-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 286191, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bod_Tibt-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 289817, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bod_Tibt-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 275054, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bod_Tibt-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 336435, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bod_Tibt-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 314320, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bod_Tibt-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 188170, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bod_Tibt-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 274087, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bod_Tibt-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 276736, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bod_Tibt-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 300632, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bod_Tibt-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 274256, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bod_Tibt-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277999, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bod_Tibt-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 299260, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bod_Tibt-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 279231, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bod_Tibt-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 274260, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bod_Tibt-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 191399, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bod_Tibt-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 284514, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bod_Tibt-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 282189, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bod_Tibt-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 294294, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bod_Tibt-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 274552, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bod_Tibt-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 295696, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bod_Tibt-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 290491, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bod_Tibt-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 279861, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bod_Tibt-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 238851, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bod_Tibt-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 188846, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bod_Tibt-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 276322, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bod_Tibt-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 289408, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bod_Tibt-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 280966, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bod_Tibt-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 292081, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bod_Tibt-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 291149, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bod_Tibt-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 295323, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bod_Tibt-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 279006, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bod_Tibt-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 317162, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bod_Tibt-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 294352, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bod_Tibt-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 286768, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bod_Tibt-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 283234, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bod_Tibt-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 274579, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bod_Tibt-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283555, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bod_Tibt-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 283229, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bod_Tibt-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 298802, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bod_Tibt-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 300875, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bod_Tibt-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 311186, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bod_Tibt-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 296513, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bod_Tibt-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 265212, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bod_Tibt-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 301960, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bod_Tibt-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 269431, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bod_Tibt-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 291108, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bod_Tibt-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 285697, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bod_Tibt-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282990, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bod_Tibt-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 295754, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bod_Tibt-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 306329, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bod_Tibt-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 291890, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bod_Tibt-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 259940, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bod_Tibt-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 288218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bod_Tibt-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 278951, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bod_Tibt-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 294406, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bod_Tibt-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 278050, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bod_Tibt-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 267739, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bod_Tibt-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 288413, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bod_Tibt-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281373, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bod_Tibt-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 281006, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bod_Tibt-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 250205, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bod_Tibt-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bod_Tibt-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 282308, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bod_Tibt-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 281576, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bod_Tibt-som_Latn": { + "num_samples": 1012, + "number_of_characters": 299813, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bod_Tibt-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 320696, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.39426877470356, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fij_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 266798, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fij_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 281564, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fij_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 318535, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fij_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 286235, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fij_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 308389, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fij_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 281872, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fij_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 294331, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fij_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 313909, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fij_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 290938, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fij_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 297110, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fij_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 300747, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fij_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 312716, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fij_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 282110, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fij_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 308608, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fij_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 284585, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fij_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 277632, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fij_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 312174, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fij_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 283775, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fij_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 268750, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fij_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303168, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fij_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 287143, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fij_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 283904, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fij_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 301039, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fij_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 304513, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fij_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 279004, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fij_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 308524, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fij_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 273137, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fij_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 270290, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fij_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 315928, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fij_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 286469, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fij_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 293806, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fij_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290709, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fij_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 288228, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fij_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 316173, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fij_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285957, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fij_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 295654, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fij_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 267254, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fij_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 284219, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fij_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 284636, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fij_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 301133, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fij_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 300939, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fij_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 281576, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fij_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 294660, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fij_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 303903, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fij_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289623, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fij_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 294867, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fij_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 283788, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fij_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 294999, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fij_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 289737, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fij_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 285549, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fij_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 296340, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fij_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 298595, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fij_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 292023, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fij_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 289924, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fij_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 264223, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fij_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 277407, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fij_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 282625, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fij_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 314857, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fij_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 271177, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fij_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293004, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fij_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 275820, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fij_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 287350, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fij_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 285096, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fij_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 287827, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fij_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 293359, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fij_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 289451, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fij_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 297495, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fij_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 294149, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fij_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 301465, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fij_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 296204, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fij_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 293281, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fij_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 304500, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fij_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 302241, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fij_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 303328, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fij_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 284960, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fij_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 305071, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fij_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 291959, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fij_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 303748, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fij_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 292548, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fij_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 288414, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fij_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 242309, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fij_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 287364, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fij_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 295305, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fij_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 311315, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fij_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 221142, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fij_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 279834, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fij_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 302442, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fij_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 309310, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fij_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 293772, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fij_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 263158, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fij_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 294552, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fij_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 290437, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "fij_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 292105, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fij_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 285237, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fij_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 301704, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fij_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297273, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fij_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 279594, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fij_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 319923, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fij_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 271484, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fij_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293001, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fij_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 312815, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "fij_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 212120, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fij_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 301171, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fij_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 318346, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fij_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 297798, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fij_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 279081, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fij_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 282350, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fij_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 308935, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fij_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 300958, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fij_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 299696, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fij_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 285731, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fij_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 292863, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fij_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 302684, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fij_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 283434, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fij_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288459, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fij_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 294300, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fij_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 271605, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fij_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 313862, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fij_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 277339, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "fij_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 324574, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fij_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 297374, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fij_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 288620, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fij_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 294968, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fij_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 287647, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fij_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 296835, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fij_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 268740, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fij_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 282434, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fij_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 311595, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fij_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 283533, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fij_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 285506, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fij_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 287315, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fij_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 293748, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fij_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301048, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fij_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 282021, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fij_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 268947, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fij_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 294438, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fij_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 317774, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fij_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 293217, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fij_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 296843, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fij_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 282080, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fij_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 343461, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fij_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 321346, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fij_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 195196, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fij_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 281113, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fij_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 283762, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fij_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 307658, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fij_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 281282, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fij_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 285025, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fij_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 306286, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fij_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 286257, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fij_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 281286, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fij_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 198425, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fij_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 291540, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fij_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 289215, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fij_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 301320, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fij_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 281578, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fij_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 302722, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fij_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 297517, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fij_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 286887, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fij_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 245877, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fij_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 195872, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fij_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 283348, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fij_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 296434, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fij_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 287992, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fij_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 299107, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fij_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 298175, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fij_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 302349, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fij_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 286032, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fij_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 324188, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fij_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 301378, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fij_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 293794, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fij_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 290260, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fij_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 281605, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fij_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290581, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fij_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 290255, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fij_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 305828, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fij_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 307901, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fij_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 318212, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fij_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 303539, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fij_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 272238, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fij_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 308986, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fij_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 276457, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fij_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 298134, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fij_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 292723, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fij_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 290016, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fij_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 302780, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fij_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 313355, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fij_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 298916, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fij_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 266966, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fij_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 295244, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fij_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 285977, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fij_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 301432, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fij_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 285076, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fij_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 274765, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fij_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 295439, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fij_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288399, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fij_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 288032, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fij_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 257231, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fij_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292244, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fij_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 289334, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fij_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 288602, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fij_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 306839, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fij_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 327722, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 153.33695652173913, + "max_sentence1_length": 543, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "isl_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241404, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "isl_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256170, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "isl_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293141, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "isl_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260841, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "isl_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 282995, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "isl_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256478, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "isl_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 268937, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "isl_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288515, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "isl_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265544, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "isl_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "isl_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275353, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "isl_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287322, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "isl_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "isl_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "isl_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259191, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "isl_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252238, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "isl_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286780, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "isl_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258381, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "isl_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243356, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "isl_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "isl_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261749, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "isl_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258510, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "isl_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275645, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "isl_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279119, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "isl_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253610, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "isl_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283130, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "isl_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247743, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "isl_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 244896, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "isl_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290534, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "isl_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261075, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "isl_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268412, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "isl_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265315, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "isl_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262834, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "isl_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290779, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "isl_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260563, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "isl_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270260, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "isl_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241860, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "isl_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 258825, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "isl_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259242, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "isl_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 275739, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "isl_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275545, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "isl_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256182, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "isl_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269266, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "isl_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278509, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "isl_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264229, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "isl_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269473, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "isl_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "isl_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "isl_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "isl_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260155, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "isl_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 270946, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "isl_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "isl_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266629, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "isl_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264530, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "isl_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238829, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "isl_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252013, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "isl_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257231, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "isl_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289463, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "isl_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245783, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "isl_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267610, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "isl_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250426, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "isl_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 261956, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "isl_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 259702, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "isl_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262433, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "isl_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 267965, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "isl_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264057, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "isl_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272101, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "isl_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268755, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "isl_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276071, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "isl_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270810, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "isl_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 267887, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "isl_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279106, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "isl_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276847, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "isl_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 277934, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "isl_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 284960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "isl_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279677, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "isl_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266565, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "isl_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278354, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "isl_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267154, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "isl_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263020, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "isl_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 216915, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "isl_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 261970, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "isl_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 269911, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "isl_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 285921, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "isl_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195748, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "isl_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254440, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "isl_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277048, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "isl_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 283916, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "isl_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "isl_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237764, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "isl_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269158, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "isl_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265043, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "isl_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266711, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "isl_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259843, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "isl_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276310, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "isl_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271879, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "isl_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254200, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "isl_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294529, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "isl_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "isl_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267607, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "isl_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287421, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "isl_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 186726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "isl_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275777, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "isl_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 292952, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "isl_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272404, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "isl_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253687, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "isl_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 256956, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "isl_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283541, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "isl_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275564, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "isl_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274302, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "isl_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260337, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "isl_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267469, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "isl_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277290, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "isl_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258040, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "isl_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263065, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "isl_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 268906, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "isl_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "isl_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288468, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "isl_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 251945, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "isl_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299180, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "isl_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 271980, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "isl_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263226, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "isl_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269574, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "isl_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262253, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "isl_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271441, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "isl_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243346, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "isl_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257040, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "isl_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "isl_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258139, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "isl_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260112, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "isl_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 261921, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "isl_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268354, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "isl_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "isl_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256627, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "isl_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "isl_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269044, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "isl_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292380, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "isl_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267823, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "isl_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271449, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "isl_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256686, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "isl_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318067, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "isl_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 295952, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "isl_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169802, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "isl_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255719, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "isl_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258368, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "isl_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282264, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "isl_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 255888, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "isl_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 259631, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "isl_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 280892, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "isl_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260863, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "isl_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 255892, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "isl_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173031, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "isl_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266146, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "isl_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "isl_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 275926, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "isl_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256184, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "isl_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277328, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "isl_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272123, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "isl_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261493, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "isl_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220483, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "isl_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170478, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "isl_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 257954, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "isl_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271040, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "isl_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262598, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "isl_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273713, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "isl_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272781, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "isl_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 276955, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "isl_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260638, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "isl_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298794, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "isl_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 275984, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "isl_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268400, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "isl_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 264866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "isl_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "isl_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265187, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "isl_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264861, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "isl_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280434, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "isl_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282507, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "isl_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292818, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "isl_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278145, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "isl_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246844, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "isl_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283592, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "isl_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251063, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "isl_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 272740, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "isl_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267329, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "isl_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264622, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "isl_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277386, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "isl_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 287961, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "isl_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273522, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "isl_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241572, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "isl_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269850, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "isl_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260583, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "isl_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276038, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "isl_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 259682, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "isl_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249371, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "isl_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270045, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "isl_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263005, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "isl_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262638, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "isl_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "isl_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266850, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "isl_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 263940, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "isl_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263208, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "isl_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "isl_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302328, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.24407114624506, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kon_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 261515, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kon_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 276281, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kon_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 313252, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kon_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 280952, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kon_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 303106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kon_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 276589, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kon_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 289048, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kon_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 308626, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kon_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 285655, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kon_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 291827, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kon_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 295464, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kon_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 307433, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kon_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 276827, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kon_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 303325, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kon_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 279302, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kon_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 272349, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kon_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 306891, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kon_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 278492, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kon_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 263467, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kon_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297885, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kon_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 281860, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kon_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 278621, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kon_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 295756, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kon_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 299230, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kon_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 273721, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kon_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 303241, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kon_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 267854, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kon_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 265007, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kon_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 310645, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kon_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 281186, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kon_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 288523, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kon_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285426, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kon_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 282945, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kon_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 310890, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kon_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280674, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kon_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 290371, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kon_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 261971, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kon_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 278936, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kon_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 279353, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kon_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 295850, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kon_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 295656, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kon_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 276293, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kon_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 289377, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kon_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 298620, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kon_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284340, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kon_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 289584, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kon_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 278505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kon_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 289716, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kon_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 284454, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kon_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 280266, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kon_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 291057, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kon_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 293312, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kon_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 286740, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kon_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 284641, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kon_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 258940, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kon_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 272124, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kon_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 277342, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kon_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 309574, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kon_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 265894, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kon_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287721, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kon_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 270537, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kon_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 282067, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kon_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 279813, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kon_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 282544, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kon_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 288076, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kon_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 284168, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kon_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 292212, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kon_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 288866, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kon_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 296182, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kon_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 290921, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kon_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 287998, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kon_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 299217, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kon_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 296958, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kon_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 298045, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kon_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 305071, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kon_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 279677, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kon_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 286676, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kon_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 298465, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kon_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 287265, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kon_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 283131, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kon_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 237026, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kon_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 282081, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kon_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 290022, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kon_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 306032, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kon_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 215859, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kon_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 274551, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kon_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 297159, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kon_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 304027, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kon_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 288489, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kon_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 257875, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kon_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 289269, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kon_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 285154, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kon_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 286822, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kon_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 279954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kon_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 296421, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kon_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291990, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kon_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 274311, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kon_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 314640, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kon_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 266201, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kon_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287718, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kon_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 307532, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kon_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 206837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kon_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 295888, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kon_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 313063, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kon_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 292515, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kon_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 273798, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kon_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 277067, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kon_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 303652, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kon_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 295675, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kon_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 294413, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kon_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 280448, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kon_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 287580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kon_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 297401, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kon_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 278151, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kon_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283176, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kon_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 289017, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kon_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 266322, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kon_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 308579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kon_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 272056, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kon_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 319291, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kon_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 292091, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kon_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 283337, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kon_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 289685, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kon_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 282364, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kon_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 291552, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kon_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 263457, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kon_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 277151, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kon_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 306312, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kon_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 278250, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kon_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 280223, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kon_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 282032, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kon_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 288465, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kon_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295765, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kon_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 276738, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kon_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 263664, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kon_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 289155, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kon_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 312491, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kon_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 287934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kon_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 291560, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kon_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 276797, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kon_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 338178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kon_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 316063, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kon_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 189913, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kon_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 275830, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kon_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 278479, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kon_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 302375, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kon_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 275999, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kon_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 279742, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kon_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 301003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kon_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 280974, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kon_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 276003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kon_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 193142, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kon_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 286257, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kon_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 283932, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kon_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 296037, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kon_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 276295, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kon_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 297439, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kon_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 292234, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kon_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 281604, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kon_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 240594, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kon_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 190589, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kon_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 278065, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kon_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 291151, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kon_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 282709, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kon_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 293824, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kon_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 292892, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kon_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 297066, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kon_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 280749, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kon_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 318905, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kon_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 296095, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kon_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 288511, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kon_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 284977, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kon_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 276322, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kon_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285298, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kon_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 284972, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kon_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 300545, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kon_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 302618, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kon_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 312929, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kon_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 298256, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kon_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 266955, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kon_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 303703, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kon_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 271174, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kon_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 292851, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kon_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 287440, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kon_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 284733, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kon_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 297497, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kon_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 308072, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kon_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 293633, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kon_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 261683, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kon_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 289961, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kon_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 280694, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kon_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 296149, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kon_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 279793, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kon_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 269482, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kon_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 290156, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kon_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283116, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kon_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 282749, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kon_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 251948, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kon_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286961, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kon_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 284051, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kon_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 283319, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kon_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 301556, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kon_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 322439, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 148.11660079051384, + "max_sentence1_length": 410, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mni_Beng-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 248403, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mni_Beng-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 263169, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mni_Beng-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 300140, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mni_Beng-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 267840, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mni_Beng-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 289994, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mni_Beng-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 263477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mni_Beng-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 275936, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mni_Beng-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 295514, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mni_Beng-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 272543, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mni_Beng-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 278715, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mni_Beng-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 282352, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mni_Beng-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 294321, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mni_Beng-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 263715, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mni_Beng-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 290213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mni_Beng-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 266190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mni_Beng-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 259237, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mni_Beng-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 293779, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mni_Beng-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 265380, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mni_Beng-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 250355, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mni_Beng-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284773, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mni_Beng-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 268748, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mni_Beng-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 265509, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mni_Beng-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 282644, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mni_Beng-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 286118, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mni_Beng-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 260609, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mni_Beng-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 290129, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mni_Beng-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 254742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mni_Beng-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 251895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mni_Beng-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 297533, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mni_Beng-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268074, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mni_Beng-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 275411, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mni_Beng-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272314, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mni_Beng-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 269833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mni_Beng-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 297778, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mni_Beng-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267562, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mni_Beng-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 277259, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mni_Beng-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 248859, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mni_Beng-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 265824, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mni_Beng-est_Latn": { + "num_samples": 1012, + "number_of_characters": 266241, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mni_Beng-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 282738, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mni_Beng-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 282544, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mni_Beng-min_Arab": { + "num_samples": 1012, + "number_of_characters": 263181, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mni_Beng-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 276265, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mni_Beng-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 285508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mni_Beng-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271228, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mni_Beng-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 276472, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mni_Beng-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 265393, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mni_Beng-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 276604, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mni_Beng-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 271342, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mni_Beng-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 267154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mni_Beng-min_Latn": { + "num_samples": 1012, + "number_of_characters": 277945, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mni_Beng-por_Latn": { + "num_samples": 1012, + "number_of_characters": 280200, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mni_Beng-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 273628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mni_Beng-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 271529, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mni_Beng-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 245828, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mni_Beng-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mni_Beng-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 264230, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mni_Beng-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 296462, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mni_Beng-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 252782, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mni_Beng-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274609, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mni_Beng-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 257425, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mni_Beng-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 268955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mni_Beng-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 266701, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mni_Beng-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 269432, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mni_Beng-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 274964, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mni_Beng-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271056, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mni_Beng-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 279100, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mni_Beng-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 275754, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mni_Beng-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283070, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mni_Beng-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 277809, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mni_Beng-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mni_Beng-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 286105, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mni_Beng-als_Latn": { + "num_samples": 1012, + "number_of_characters": 283846, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mni_Beng-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 284933, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mni_Beng-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 291959, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mni_Beng-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 266565, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mni_Beng-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 286676, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mni_Beng-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 285353, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mni_Beng-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 274153, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mni_Beng-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270019, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mni_Beng-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 223914, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mni_Beng-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 268969, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mni_Beng-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 276910, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mni_Beng-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 292920, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mni_Beng-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 202747, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mni_Beng-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 261439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mni_Beng-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284047, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mni_Beng-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 290915, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mni_Beng-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 275377, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mni_Beng-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 244763, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mni_Beng-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 276157, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mni_Beng-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mni_Beng-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 273710, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mni_Beng-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 266842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mni_Beng-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 283309, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mni_Beng-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278878, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mni_Beng-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 261199, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mni_Beng-war_Latn": { + "num_samples": 1012, + "number_of_characters": 301528, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mni_Beng-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253089, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mni_Beng-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274606, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mni_Beng-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 294420, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mni_Beng-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 193725, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mni_Beng-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 282776, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mni_Beng-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 299951, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mni_Beng-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 279403, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mni_Beng-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 260686, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mni_Beng-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 263955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mni_Beng-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 290540, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mni_Beng-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 282563, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mni_Beng-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 281301, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mni_Beng-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 267336, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mni_Beng-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 274468, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mni_Beng-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 284289, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mni_Beng-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mni_Beng-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270064, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mni_Beng-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 275905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mni_Beng-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 253210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mni_Beng-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 295467, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mni_Beng-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 258944, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mni_Beng-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 306179, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mni_Beng-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 278979, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mni_Beng-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 270225, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mni_Beng-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 276573, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mni_Beng-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 269252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mni_Beng-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 278440, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mni_Beng-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 250345, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mni_Beng-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mni_Beng-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 293200, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mni_Beng-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 265138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mni_Beng-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 267111, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mni_Beng-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 268920, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mni_Beng-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 275353, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mni_Beng-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282653, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mni_Beng-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 263626, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mni_Beng-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 250552, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mni_Beng-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276043, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mni_Beng-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 299379, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mni_Beng-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 274822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mni_Beng-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 278448, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mni_Beng-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 263685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mni_Beng-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mni_Beng-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 302951, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mni_Beng-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 176801, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mni_Beng-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 262718, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mni_Beng-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 265367, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mni_Beng-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 289263, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mni_Beng-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 262887, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mni_Beng-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 266630, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mni_Beng-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 287891, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mni_Beng-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 267862, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mni_Beng-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 262891, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mni_Beng-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180030, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mni_Beng-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 273145, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mni_Beng-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 270820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mni_Beng-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 282925, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mni_Beng-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 263183, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mni_Beng-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 284327, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mni_Beng-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 279122, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mni_Beng-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 268492, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mni_Beng-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 227482, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mni_Beng-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 177477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mni_Beng-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 264953, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mni_Beng-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mni_Beng-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 269597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mni_Beng-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 280712, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mni_Beng-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 279780, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mni_Beng-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 283954, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mni_Beng-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 267637, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mni_Beng-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 305793, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mni_Beng-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 282983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mni_Beng-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 275399, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mni_Beng-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 271865, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mni_Beng-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 263210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mni_Beng-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272186, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mni_Beng-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 271860, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mni_Beng-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 287433, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mni_Beng-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 289506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mni_Beng-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 299817, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mni_Beng-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 285144, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mni_Beng-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 253843, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mni_Beng-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 290591, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mni_Beng-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258062, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mni_Beng-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 279739, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mni_Beng-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 274328, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mni_Beng-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 271621, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mni_Beng-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 284385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mni_Beng-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 294960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mni_Beng-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 280521, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mni_Beng-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 248571, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mni_Beng-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 276849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mni_Beng-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 267582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mni_Beng-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283037, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mni_Beng-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 266681, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mni_Beng-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 256370, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mni_Beng-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277044, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mni_Beng-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270004, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mni_Beng-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 269637, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mni_Beng-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 238836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mni_Beng-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mni_Beng-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 270939, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mni_Beng-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 270207, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mni_Beng-som_Latn": { + "num_samples": 1012, + "number_of_characters": 288444, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mni_Beng-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 309327, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.1600790513834, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ron_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 260192, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ron_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 274958, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ron_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 311929, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ron_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 279629, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ron_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 301783, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ron_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 275266, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ron_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 287725, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ron_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 307303, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ron_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 284332, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ron_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 290504, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ron_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 294141, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ron_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 306110, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ron_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 275504, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ron_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 302002, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ron_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 277979, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ron_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 271026, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ron_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 305568, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ron_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 277169, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ron_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 262144, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ron_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296562, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ron_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 280537, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ron_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 277298, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ron_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 294433, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ron_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 297907, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ron_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 272398, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ron_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 301918, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ron_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 266531, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ron_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 263684, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ron_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 309322, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ron_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 279863, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ron_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 287200, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ron_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284103, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ron_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 281622, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ron_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 309567, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ron_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279351, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ron_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 289048, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ron_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 260648, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ron_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 277613, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ron_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 278030, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ron_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 294527, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ron_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 294333, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ron_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 274970, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ron_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 288054, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ron_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 297297, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ron_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283017, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ron_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 288261, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ron_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 277182, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ron_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 288393, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ron_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 283131, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ron_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 278943, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ron_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 289734, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ron_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 291989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ron_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 285417, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ron_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 283318, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ron_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 257617, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ron_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 270801, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ron_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 276019, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ron_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 308251, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ron_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 264571, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ron_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286398, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ron_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 269214, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ron_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 280744, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ron_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 278490, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ron_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 281221, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ron_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 286753, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ron_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 282845, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ron_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 290889, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ron_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 287543, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ron_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 294859, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ron_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 289598, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ron_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 286675, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ron_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 297894, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ron_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 295635, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ron_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 296722, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ron_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 303748, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ron_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 278354, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ron_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 298465, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ron_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 285353, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ron_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 285942, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ron_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 281808, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ron_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 235703, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ron_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 280758, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ron_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 288699, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ron_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 304709, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ron_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 214536, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ron_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 273228, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ron_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 295836, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ron_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 302704, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ron_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 287166, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ron_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 256552, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ron_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 287946, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ron_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 283831, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ron_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 285499, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ron_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 278631, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ron_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 295098, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ron_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290667, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ron_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 272988, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ron_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 313317, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ron_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 264878, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ron_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286395, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ron_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 306209, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ron_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 205514, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ron_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 294565, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ron_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 311740, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ron_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 291192, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ron_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 272475, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ron_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 275744, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ron_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 302329, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ron_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 294352, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ron_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 293090, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ron_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 279125, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ron_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 286257, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ron_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 296078, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ron_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 276828, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ron_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281853, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ron_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 287694, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ron_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 264999, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ron_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 307256, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ron_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 270733, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ron_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 317968, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ron_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 290768, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ron_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 282014, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ron_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 288362, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ron_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 281041, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ron_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 290229, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ron_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 262134, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ron_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 275828, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ron_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 304989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ron_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 276927, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ron_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 278900, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ron_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 280709, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ron_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 287142, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ron_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294442, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ron_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 275415, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ron_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 262341, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ron_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 287832, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ron_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 311168, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ron_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 286611, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ron_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 290237, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ron_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 275474, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ron_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 336855, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ron_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 314740, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ron_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 188590, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ron_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 274507, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ron_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 277156, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ron_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 301052, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ron_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 274676, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ron_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 278419, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ron_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 299680, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ron_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 279651, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ron_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 274680, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ron_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 191819, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ron_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 284934, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ron_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 282609, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ron_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 294714, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ron_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 274972, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ron_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 296116, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ron_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 290911, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ron_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 280281, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ron_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 239271, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ron_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 189266, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ron_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 276742, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ron_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 289828, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ron_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 281386, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ron_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 292501, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ron_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 291569, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ron_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 295743, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ron_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 279426, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ron_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 317582, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ron_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 294772, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ron_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 287188, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ron_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 283654, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ron_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 274999, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ron_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283975, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ron_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 283649, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ron_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 299222, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ron_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 301295, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ron_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 311606, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ron_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 296933, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ron_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 265632, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ron_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 302380, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ron_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 269851, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ron_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 291528, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ron_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 286117, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ron_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 283410, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ron_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 296174, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ron_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 306749, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ron_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 292310, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ron_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 260360, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ron_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 288638, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ron_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 279371, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ron_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 294826, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ron_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 278470, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ron_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 268159, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ron_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 288833, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ron_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281793, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ron_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 281426, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ron_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 250625, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ron_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285638, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ron_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 282728, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ron_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 281996, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ron_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 300233, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ron_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 321116, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 146.8092885375494, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "szl_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 248992, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "szl_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 263758, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "szl_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 300729, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "szl_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 268429, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "szl_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 290583, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "szl_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264066, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "szl_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276525, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "szl_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296103, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "szl_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273132, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "szl_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279304, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "szl_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 282941, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "szl_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 294910, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "szl_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264304, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "szl_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 290802, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "szl_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 266779, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "szl_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 259826, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "szl_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 294368, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "szl_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 265969, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "szl_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 250944, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "szl_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285362, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "szl_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 269337, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "szl_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "szl_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283233, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "szl_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 286707, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "szl_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261198, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "szl_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 290718, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "szl_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 255331, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "szl_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252484, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "szl_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298122, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "szl_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268663, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "szl_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "szl_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272903, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "szl_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 270422, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "szl_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 298367, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "szl_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268151, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "szl_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 277848, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "szl_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249448, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "szl_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 266413, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "szl_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 266830, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "szl_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "szl_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283133, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "szl_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 263770, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "szl_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 276854, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "szl_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286097, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "szl_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271817, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "szl_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277061, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "szl_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 265982, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "szl_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277193, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "szl_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 271931, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "szl_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 267743, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "szl_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278534, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "szl_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 280789, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "szl_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274217, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "szl_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272118, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "szl_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 246417, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "szl_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259601, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "szl_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 264819, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "szl_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297051, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "szl_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 253371, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "szl_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275198, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "szl_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258014, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "szl_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269544, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "szl_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 267290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "szl_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270021, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "szl_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 275553, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "szl_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271645, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "szl_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 279689, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "szl_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 276343, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "szl_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283659, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "szl_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 278398, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "szl_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275475, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "szl_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 286694, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "szl_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 284435, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "szl_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285522, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "szl_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 292548, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "szl_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267154, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "szl_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287265, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "szl_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274153, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "szl_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 285942, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "szl_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270608, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "szl_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224503, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "szl_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 269558, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "szl_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277499, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "szl_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293509, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "szl_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 203336, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "szl_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262028, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "szl_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "szl_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291504, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "szl_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 275966, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "szl_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 245352, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "szl_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 276746, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "szl_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272631, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "szl_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 274299, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "szl_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 267431, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "szl_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 283898, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "szl_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279467, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "szl_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 261788, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "szl_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302117, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "szl_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253678, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "szl_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275195, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "szl_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295009, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "szl_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194314, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "szl_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 283365, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "szl_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300540, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "szl_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 279992, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "szl_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 261275, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "szl_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264544, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "szl_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "szl_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283152, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "szl_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 281890, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "szl_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 267925, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "szl_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275057, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "szl_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 284878, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "szl_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265628, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "szl_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270653, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "szl_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276494, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "szl_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 253799, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "szl_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296056, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "szl_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259533, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "szl_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 306768, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "szl_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 279568, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "szl_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 270814, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "szl_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277162, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "szl_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 269841, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "szl_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279029, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "szl_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 250934, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "szl_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264628, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "szl_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 293789, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "szl_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 265727, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "szl_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 267700, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "szl_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269509, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "szl_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 275942, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "szl_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283242, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "szl_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264215, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "szl_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251141, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "szl_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276632, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "szl_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 299968, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "szl_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 275411, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "szl_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279037, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "szl_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 264274, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "szl_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "szl_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303540, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "szl_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 177390, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "szl_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263307, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "szl_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 265956, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "szl_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 289852, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "szl_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263476, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "szl_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267219, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "szl_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288480, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "szl_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268451, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "szl_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263480, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "szl_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "szl_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 273734, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "szl_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 271409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "szl_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283514, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "szl_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 263772, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "szl_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 284916, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "szl_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 279711, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "szl_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269081, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "szl_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228071, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "szl_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178066, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "szl_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265542, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "szl_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278628, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "szl_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270186, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "szl_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 281301, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "szl_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 280369, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "szl_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284543, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "szl_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268226, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "szl_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 306382, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "szl_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 283572, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "szl_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 275988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "szl_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272454, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "szl_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 263799, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "szl_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272775, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "szl_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272449, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "szl_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288022, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "szl_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290095, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "szl_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 300406, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "szl_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 285733, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "szl_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 254432, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "szl_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291180, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "szl_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258651, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "szl_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280328, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "szl_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 274917, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "szl_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272210, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "szl_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 284974, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "szl_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 295549, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "szl_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281110, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "szl_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249160, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "szl_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 277438, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "szl_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268171, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "szl_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283626, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "szl_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 267270, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "szl_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 256959, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "szl_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277633, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "szl_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270593, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "szl_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270226, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "szl_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 239425, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "szl_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274438, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "szl_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271528, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "szl_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 270796, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "szl_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289033, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "szl_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 309916, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.7420948616601, + "max_sentence1_length": 372, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "vec_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244858, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "vec_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259624, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "vec_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296595, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "vec_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 264295, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "vec_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286449, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "vec_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259932, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "vec_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vec_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291969, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "vec_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268998, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "vec_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 275170, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "vec_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278807, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "vec_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290776, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "vec_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 260170, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "vec_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286668, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "vec_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262645, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vec_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255692, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "vec_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 290234, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vec_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261835, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "vec_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246810, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "vec_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281228, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "vec_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 265203, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vec_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261964, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vec_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 279099, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "vec_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282573, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "vec_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257064, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "vec_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286584, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "vec_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251197, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "vec_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 248350, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "vec_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293988, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "vec_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264529, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "vec_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271866, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "vec_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268769, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "vec_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 266288, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "vec_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 294233, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "vec_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264017, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vec_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273714, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "vec_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 245314, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "vec_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 262279, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "vec_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262696, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "vec_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279193, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "vec_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278999, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "vec_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259636, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "vec_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272720, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "vec_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281963, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "vec_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267683, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "vec_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272927, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "vec_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261848, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "vec_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273059, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "vec_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267797, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "vec_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263609, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "vec_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274400, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "vec_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276655, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "vec_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 270083, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "vec_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267984, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "vec_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 242283, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "vec_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255467, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "vec_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260685, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vec_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292917, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "vec_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 249237, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "vec_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271064, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "vec_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253880, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "vec_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265410, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "vec_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 263156, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "vec_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265887, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vec_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271419, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vec_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267511, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "vec_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275555, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "vec_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 272209, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "vec_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279525, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "vec_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 274264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "vec_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "vec_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282560, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "vec_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 280301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "vec_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281388, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "vec_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288414, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "vec_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263020, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "vec_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 283131, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "vec_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270019, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "vec_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281808, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vec_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270608, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "vec_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 220369, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "vec_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265424, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "vec_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 273365, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "vec_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 289375, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "vec_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 199202, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "vec_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257894, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "vec_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280502, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "vec_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 287370, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "vec_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271832, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "vec_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 241218, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "vec_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272612, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "vec_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268497, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "vec_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 270165, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vec_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 263297, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vec_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279764, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "vec_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275333, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vec_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257654, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "vec_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297983, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "vec_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249544, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "vec_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271061, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "vec_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290875, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "vec_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 190180, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "vec_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 279231, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "vec_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296406, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "vec_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275858, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "vec_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 257141, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "vec_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260410, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "vec_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286995, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "vec_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279018, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vec_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277756, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "vec_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263791, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "vec_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270923, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "vec_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280744, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "vec_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261494, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "vec_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266519, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "vec_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 272360, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "vec_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249665, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "vec_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291922, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "vec_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255399, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "vec_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302634, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "vec_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275434, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "vec_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266680, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "vec_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273028, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "vec_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265707, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "vec_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274895, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "vec_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246800, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "vec_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260494, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vec_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289655, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "vec_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261593, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "vec_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263566, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "vec_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 265375, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "vec_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271808, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "vec_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279108, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "vec_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 260081, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "vec_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 247007, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "vec_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272498, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "vec_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295834, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "vec_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 271277, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vec_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274903, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "vec_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 260140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vec_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321521, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "vec_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299406, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "vec_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 173256, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "vec_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 259173, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "vec_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261822, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "vec_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285718, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "vec_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 259342, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "vec_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 263085, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "vec_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 284346, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "vec_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 264317, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "vec_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 259346, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "vec_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176485, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "vec_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269600, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vec_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 267275, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "vec_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 279380, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "vec_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259638, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "vec_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280782, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "vec_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275577, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "vec_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264947, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "vec_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223937, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "vec_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173932, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "vec_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261408, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "vec_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274494, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "vec_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266052, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "vec_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 277167, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "vec_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 276235, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "vec_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280409, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "vec_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 264092, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "vec_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 302248, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "vec_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279438, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vec_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "vec_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 268320, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "vec_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259665, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "vec_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268641, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vec_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 268315, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "vec_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283888, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "vec_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285961, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "vec_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 296272, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "vec_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281599, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "vec_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 250298, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "vec_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287046, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "vec_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254517, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "vec_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276194, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "vec_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270783, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "vec_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268076, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "vec_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280840, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "vec_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291415, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "vec_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276976, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "vec_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245026, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "vec_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 273304, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "vec_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264037, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "vec_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279492, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "vec_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 263136, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "vec_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252825, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "vec_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273499, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vec_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266459, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "vec_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 266092, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "vec_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 235291, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "vec_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270304, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "vec_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "vec_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266662, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "vec_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "vec_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305782, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 131.65711462450594, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "amh_Ethi-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 198753, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "amh_Ethi-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 213519, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "amh_Ethi-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 250490, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "amh_Ethi-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 218190, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "amh_Ethi-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 240344, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "amh_Ethi-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 213827, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "amh_Ethi-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 226286, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "amh_Ethi-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 245864, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "amh_Ethi-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 222893, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "amh_Ethi-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 229065, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "amh_Ethi-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 232702, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "amh_Ethi-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 244671, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "amh_Ethi-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 214065, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "amh_Ethi-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 240563, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "amh_Ethi-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 216540, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "amh_Ethi-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 209587, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "amh_Ethi-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 244129, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "amh_Ethi-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 215730, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "amh_Ethi-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 200705, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "amh_Ethi-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 235123, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "amh_Ethi-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 219098, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "amh_Ethi-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 215859, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "amh_Ethi-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 232994, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "amh_Ethi-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 236468, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "amh_Ethi-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 210959, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "amh_Ethi-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 240479, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "amh_Ethi-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 205092, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "amh_Ethi-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 202245, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "amh_Ethi-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 247883, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "amh_Ethi-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 218424, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "amh_Ethi-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 225761, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "amh_Ethi-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 222664, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "amh_Ethi-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 220183, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "amh_Ethi-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 248128, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "amh_Ethi-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 217912, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "amh_Ethi-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 227609, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "amh_Ethi-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 199209, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "amh_Ethi-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 216174, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "amh_Ethi-est_Latn": { + "num_samples": 1012, + "number_of_characters": 216591, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "amh_Ethi-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 233088, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "amh_Ethi-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 232894, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "amh_Ethi-min_Arab": { + "num_samples": 1012, + "number_of_characters": 213531, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "amh_Ethi-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 226615, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "amh_Ethi-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 235858, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "amh_Ethi-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 221578, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "amh_Ethi-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 226822, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "amh_Ethi-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 215743, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "amh_Ethi-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 226954, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "amh_Ethi-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 221692, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "amh_Ethi-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 217504, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "amh_Ethi-min_Latn": { + "num_samples": 1012, + "number_of_characters": 228295, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "amh_Ethi-por_Latn": { + "num_samples": 1012, + "number_of_characters": 230550, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "amh_Ethi-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 223978, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "amh_Ethi-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 221879, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "amh_Ethi-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 196178, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "amh_Ethi-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 209362, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "amh_Ethi-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 214580, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "amh_Ethi-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 246812, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "amh_Ethi-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 203132, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "amh_Ethi-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 224959, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "amh_Ethi-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 207775, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "amh_Ethi-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 219305, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "amh_Ethi-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 217051, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "amh_Ethi-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 219782, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "amh_Ethi-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 225314, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "amh_Ethi-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 221406, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "amh_Ethi-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 229450, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "amh_Ethi-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 226104, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "amh_Ethi-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 233420, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "amh_Ethi-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 228159, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "amh_Ethi-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 225236, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "amh_Ethi-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 236455, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "amh_Ethi-als_Latn": { + "num_samples": 1012, + "number_of_characters": 234196, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "amh_Ethi-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 235283, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "amh_Ethi-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 242309, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "amh_Ethi-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 216915, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "amh_Ethi-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 237026, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "amh_Ethi-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 223914, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "amh_Ethi-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 235703, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "amh_Ethi-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 224503, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "amh_Ethi-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 220369, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "amh_Ethi-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 219319, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "amh_Ethi-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 227260, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "amh_Ethi-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 243270, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "amh_Ethi-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 153097, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "amh_Ethi-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 211789, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "amh_Ethi-run_Latn": { + "num_samples": 1012, + "number_of_characters": 234397, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "amh_Ethi-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 241265, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "amh_Ethi-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 225727, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "amh_Ethi-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 195113, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "amh_Ethi-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 226507, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "amh_Ethi-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 222392, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "amh_Ethi-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 224060, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "amh_Ethi-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 217192, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "amh_Ethi-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 233659, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "amh_Ethi-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 229228, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "amh_Ethi-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 211549, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "amh_Ethi-war_Latn": { + "num_samples": 1012, + "number_of_characters": 251878, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "amh_Ethi-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 203439, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "amh_Ethi-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 224956, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "amh_Ethi-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 244770, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "amh_Ethi-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 144075, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "amh_Ethi-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 233126, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "amh_Ethi-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 250301, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "amh_Ethi-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 229753, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "amh_Ethi-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 211036, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "amh_Ethi-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 214305, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "amh_Ethi-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 240890, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "amh_Ethi-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 232913, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "amh_Ethi-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 231651, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "amh_Ethi-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 217686, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "amh_Ethi-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 224818, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "amh_Ethi-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 234639, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "amh_Ethi-san_Deva": { + "num_samples": 1012, + "number_of_characters": 215389, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "amh_Ethi-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 220414, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "amh_Ethi-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 226255, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "amh_Ethi-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 203560, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "amh_Ethi-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 245817, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "amh_Ethi-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 209294, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "amh_Ethi-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 256529, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "amh_Ethi-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 229329, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "amh_Ethi-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 220575, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "amh_Ethi-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 226923, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "amh_Ethi-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 219602, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "amh_Ethi-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 228790, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "amh_Ethi-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 200695, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "amh_Ethi-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 214389, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "amh_Ethi-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 243550, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "amh_Ethi-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 215488, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "amh_Ethi-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 217461, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "amh_Ethi-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 219270, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "amh_Ethi-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 225703, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "amh_Ethi-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 233003, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "amh_Ethi-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 213976, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "amh_Ethi-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 200902, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "amh_Ethi-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 226393, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "amh_Ethi-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 249729, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "amh_Ethi-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 225172, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "amh_Ethi-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 228798, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "amh_Ethi-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 214035, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "amh_Ethi-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 275416, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "amh_Ethi-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 253301, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "amh_Ethi-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 127151, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "amh_Ethi-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 213068, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "amh_Ethi-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 215717, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "amh_Ethi-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 239613, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "amh_Ethi-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 213237, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "amh_Ethi-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 216980, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "amh_Ethi-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 238241, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "amh_Ethi-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 218212, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "amh_Ethi-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 213241, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "amh_Ethi-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 130380, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "amh_Ethi-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 223495, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "amh_Ethi-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 221170, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "amh_Ethi-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 233275, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "amh_Ethi-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 213533, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "amh_Ethi-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 234677, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "amh_Ethi-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 229472, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "amh_Ethi-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 218842, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "amh_Ethi-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 177832, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "amh_Ethi-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 127827, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "amh_Ethi-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 215303, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "amh_Ethi-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 228389, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "amh_Ethi-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 219947, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "amh_Ethi-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 231062, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "amh_Ethi-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 230130, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "amh_Ethi-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 234304, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "amh_Ethi-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 217987, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "amh_Ethi-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 256143, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "amh_Ethi-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 233333, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "amh_Ethi-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 225749, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "amh_Ethi-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 222215, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "amh_Ethi-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 213560, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "amh_Ethi-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 222536, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "amh_Ethi-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 222210, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "amh_Ethi-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 237783, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "amh_Ethi-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 239856, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "amh_Ethi-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 250167, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "amh_Ethi-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 235494, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "amh_Ethi-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 204193, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "amh_Ethi-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 240941, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "amh_Ethi-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 208412, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "amh_Ethi-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 230089, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "amh_Ethi-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 224678, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "amh_Ethi-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 221971, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "amh_Ethi-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 234735, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "amh_Ethi-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 245310, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "amh_Ethi-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 230871, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "amh_Ethi-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 198921, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "amh_Ethi-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 227199, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "amh_Ethi-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 217932, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "amh_Ethi-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 233387, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "amh_Ethi-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 217031, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "amh_Ethi-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 206720, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "amh_Ethi-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 227394, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "amh_Ethi-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 220354, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "amh_Ethi-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 219987, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "amh_Ethi-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 189186, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "amh_Ethi-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 224199, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "amh_Ethi-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 221289, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "amh_Ethi-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 220557, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "amh_Ethi-som_Latn": { + "num_samples": 1012, + "number_of_characters": 238794, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "amh_Ethi-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 259677, + "unique_pairs": 1012, + "min_sentence1_length": 26, + "average_sentence1_length": 86.09881422924902, + "max_sentence1_length": 219, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bos_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 243808, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bos_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bos_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 295545, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bos_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263245, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bos_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 285399, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bos_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 258882, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bos_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bos_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 290919, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bos_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 267948, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bos_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bos_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 277757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bos_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 289726, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bos_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bos_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 285618, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bos_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 261595, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bos_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 254642, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bos_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289184, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bos_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 260785, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bos_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 245760, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bos_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bos_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264153, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bos_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 260914, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bos_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278049, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bos_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281523, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bos_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256014, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bos_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285534, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bos_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250147, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bos_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247300, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bos_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 292938, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bos_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263479, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bos_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 270816, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bos_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267719, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bos_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265238, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bos_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293183, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bos_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262967, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bos_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 272664, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bos_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244264, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bos_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261229, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bos_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 261646, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bos_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278143, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bos_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 277949, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bos_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 258586, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bos_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 271670, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bos_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 280913, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bos_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266633, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bos_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 271877, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bos_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 260798, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bos_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 272009, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bos_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 266747, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bos_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 262559, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bos_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273350, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bos_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 275605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bos_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269033, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bos_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 266934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bos_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241233, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bos_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 254417, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bos_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 259635, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bos_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 291867, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bos_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248187, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bos_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270014, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bos_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 252830, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bos_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 264360, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bos_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bos_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 264837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bos_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270369, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bos_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 266461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bos_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bos_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271159, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bos_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 278475, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bos_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273214, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bos_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270291, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bos_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281510, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bos_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bos_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280338, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bos_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287364, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bos_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 261970, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bos_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282081, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bos_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 268969, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bos_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 280758, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bos_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 269558, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bos_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 265424, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bos_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219319, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bos_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272315, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bos_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288325, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bos_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198152, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bos_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 256844, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bos_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 279452, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bos_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286320, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bos_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 270782, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bos_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240168, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bos_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 271562, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bos_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 267447, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bos_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269115, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bos_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262247, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bos_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 278714, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bos_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274283, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bos_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 256604, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bos_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 296933, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bos_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248494, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bos_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270011, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bos_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 289825, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bos_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189130, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bos_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278181, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bos_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295356, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bos_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 274808, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bos_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256091, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bos_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259360, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bos_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 285945, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bos_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 277968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bos_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 276706, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bos_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 262741, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bos_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 269873, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bos_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 279694, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bos_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 260444, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bos_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265469, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bos_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271310, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bos_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 248615, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bos_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 290872, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bos_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254349, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bos_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 301584, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bos_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 274384, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bos_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 265630, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bos_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 271978, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bos_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 264657, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bos_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 273845, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bos_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 245750, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bos_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 259444, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bos_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 288605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bos_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 260543, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bos_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262516, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bos_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264325, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bos_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 270758, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bos_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278058, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bos_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259031, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bos_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 245957, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bos_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 271448, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bos_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 294784, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bos_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270227, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bos_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 273853, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bos_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259090, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bos_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 320471, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bos_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298356, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bos_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172206, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bos_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258123, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bos_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 260772, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bos_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 284668, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bos_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258292, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bos_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262035, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bos_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283296, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bos_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263267, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bos_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258296, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bos_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 175435, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bos_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 268550, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bos_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266225, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bos_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278330, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bos_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 258588, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bos_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 279732, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bos_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274527, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bos_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 263897, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bos_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 222887, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bos_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 172882, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bos_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260358, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bos_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 273444, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bos_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 265002, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bos_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276117, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bos_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275185, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bos_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279359, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bos_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bos_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bos_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 278388, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bos_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 270804, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bos_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267270, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bos_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 258615, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bos_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267591, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bos_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267265, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bos_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 282838, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bos_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 284911, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bos_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bos_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 280549, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bos_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249248, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bos_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 285996, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bos_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 253467, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bos_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275144, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bos_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 269733, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bos_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267026, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bos_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 279790, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bos_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290365, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bos_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 275926, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bos_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 243976, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bos_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272254, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bos_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 262987, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bos_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 278442, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bos_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262086, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bos_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 251775, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bos_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 272449, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bos_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265409, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bos_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 265042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bos_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234241, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bos_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269254, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bos_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266344, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bos_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 265612, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bos_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 283849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bos_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 304732, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 130.6195652173913, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fin_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fin_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266515, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fin_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303486, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fin_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 271186, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fin_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 293340, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fin_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266823, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fin_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 279282, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fin_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298860, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fin_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275889, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fin_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 282061, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fin_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285698, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fin_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297667, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fin_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 267061, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fin_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293559, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fin_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fin_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262583, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fin_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 297125, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fin_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268726, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fin_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253701, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fin_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fin_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 272094, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fin_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268855, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fin_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285990, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fin_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289464, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fin_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263955, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fin_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293475, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fin_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 258088, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fin_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 255241, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fin_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300879, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fin_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 271420, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fin_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278757, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fin_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275660, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fin_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 273179, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fin_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 301124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fin_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270908, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fin_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 280605, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fin_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 252205, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fin_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 269170, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fin_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269587, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fin_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 286084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fin_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285890, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fin_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266527, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fin_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279611, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fin_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288854, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fin_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274574, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fin_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279818, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fin_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268739, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fin_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279950, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fin_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274688, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fin_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270500, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fin_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 281291, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fin_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283546, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fin_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276974, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fin_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274875, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fin_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 249174, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fin_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 262358, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fin_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267576, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fin_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299808, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fin_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 256128, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fin_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277955, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fin_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260771, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fin_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 272301, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fin_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 270047, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fin_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272778, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fin_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 278310, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fin_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 274402, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fin_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282446, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fin_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 279100, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fin_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 286416, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fin_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 281155, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fin_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 278232, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fin_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289451, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fin_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 287192, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fin_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 288279, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fin_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 295305, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "fin_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269911, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fin_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 290022, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fin_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276910, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fin_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288699, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fin_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277499, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fin_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 273365, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fin_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 227260, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fin_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 272315, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fin_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 296266, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fin_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 206093, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fin_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264785, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fin_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 287393, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fin_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 294261, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fin_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278723, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fin_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 248109, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fin_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279503, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fin_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 275388, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "fin_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 277056, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fin_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 270188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fin_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fin_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282224, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fin_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264545, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fin_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304874, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fin_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 256435, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fin_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277952, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fin_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297766, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "fin_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 197071, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fin_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 286122, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fin_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 303297, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fin_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fin_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 264032, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fin_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 267301, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fin_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293886, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fin_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285909, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fin_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284647, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fin_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fin_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277814, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fin_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287635, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fin_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 268385, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fin_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273410, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fin_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 279251, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fin_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256556, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fin_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298813, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fin_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 262290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "fin_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309525, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fin_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 282325, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fin_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273571, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fin_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fin_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272598, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fin_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281786, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fin_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253691, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fin_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 267385, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fin_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296546, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fin_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268484, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fin_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270457, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fin_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 272266, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fin_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278699, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fin_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285999, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fin_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266972, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fin_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253898, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fin_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 279389, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fin_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302725, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fin_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 278168, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fin_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281794, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fin_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 267031, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fin_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 328412, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fin_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 306297, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fin_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 180147, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fin_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 266064, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fin_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268713, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fin_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292609, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fin_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 266233, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fin_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269976, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fin_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 291237, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fin_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 271208, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fin_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 266237, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fin_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 183376, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fin_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276491, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fin_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 274166, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fin_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 286271, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fin_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266529, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fin_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287673, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fin_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282468, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fin_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271838, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fin_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230828, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fin_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180823, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fin_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 268299, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fin_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 281385, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fin_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272943, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fin_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 284058, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fin_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 283126, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fin_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 287300, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fin_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270983, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fin_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 309139, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fin_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 286329, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fin_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278745, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fin_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 275211, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fin_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266556, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fin_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275532, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fin_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 275206, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fin_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290779, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fin_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292852, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fin_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 303163, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fin_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288490, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fin_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 257189, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fin_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293937, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fin_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 261408, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fin_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 283085, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fin_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277674, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fin_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274967, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fin_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287731, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fin_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 298306, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fin_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283867, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fin_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251917, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fin_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 280195, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fin_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270928, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fin_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 286383, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fin_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 270027, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fin_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259716, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fin_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 280390, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fin_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273350, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fin_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272983, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fin_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 242182, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fin_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277195, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fin_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 274285, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fin_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273553, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fin_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fin_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312673, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 138.46640316205534, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ita_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 267759, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ita_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 282525, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ita_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 319496, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ita_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 287196, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ita_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 309350, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ita_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 282833, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ita_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 295292, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ita_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 314870, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ita_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 291899, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ita_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 298071, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ita_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 301708, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ita_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 313677, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ita_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 283071, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ita_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 309569, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ita_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 285546, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ita_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 278593, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ita_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 313135, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ita_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 284736, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ita_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 269711, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ita_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304129, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ita_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 288104, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ita_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 284865, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ita_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 302000, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ita_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 305474, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ita_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 279965, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ita_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 309485, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ita_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 274098, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ita_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 271251, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ita_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 316889, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ita_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 287430, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ita_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 294767, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ita_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291670, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ita_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 289189, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ita_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 317134, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ita_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286918, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ita_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 296615, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ita_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 268215, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ita_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 285180, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ita_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 285597, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ita_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 302094, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ita_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 301900, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ita_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 282537, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ita_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 295621, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ita_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 304864, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ita_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290584, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ita_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 295828, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ita_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 284749, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ita_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 295960, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ita_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 290698, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ita_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 286510, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ita_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 297301, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ita_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 299556, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ita_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 292984, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ita_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 290885, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ita_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 265184, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ita_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 278368, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ita_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 283586, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ita_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 315818, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ita_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 272138, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ita_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293965, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ita_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 276781, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ita_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 288311, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ita_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 286057, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ita_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 288788, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ita_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 294320, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ita_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 290412, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ita_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 298456, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ita_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 295110, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ita_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 302426, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ita_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 297165, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ita_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 294242, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ita_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 305461, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ita_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 303202, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ita_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 304289, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ita_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 311315, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ita_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 285921, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ita_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 306032, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ita_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 292920, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ita_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 304709, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ita_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 293509, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ita_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 289375, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ita_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 243270, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ita_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 288325, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ita_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 296266, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ita_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 222103, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ita_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 280795, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ita_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 303403, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ita_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 310271, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ita_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 294733, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ita_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 264119, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ita_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 295513, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ita_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 291398, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ita_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 293066, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ita_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 286198, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ita_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 302665, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ita_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298234, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ita_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 280555, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ita_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 320884, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ita_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 272445, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ita_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293962, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ita_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 313776, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ita_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 213081, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ita_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 302132, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ita_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 319307, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ita_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 298759, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ita_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 280042, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ita_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 283311, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ita_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 309896, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ita_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 301919, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ita_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 300657, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ita_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 286692, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ita_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 293824, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ita_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 303645, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ita_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 284395, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ita_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289420, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ita_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 295261, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ita_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 272566, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ita_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 314823, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ita_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 278300, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ita_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 325535, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ita_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 298335, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ita_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 289581, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ita_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 295929, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ita_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 288608, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ita_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 297796, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ita_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 269701, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ita_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 283395, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ita_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 312556, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ita_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 284494, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ita_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 286467, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ita_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 288276, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ita_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 294709, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ita_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302009, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ita_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 282982, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ita_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 269908, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ita_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 295399, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ita_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 318735, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ita_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 294178, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ita_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 297804, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ita_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 283041, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ita_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 344422, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ita_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 322307, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ita_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 196157, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ita_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 282074, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ita_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 284723, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ita_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 308619, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ita_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 282243, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ita_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 285986, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ita_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 307247, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ita_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 287218, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ita_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 282247, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ita_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 199386, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ita_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 292501, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ita_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 290176, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ita_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 302281, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ita_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 282539, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ita_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 303683, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ita_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 298478, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ita_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 287848, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ita_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 246838, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ita_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 196833, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ita_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 284309, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ita_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 297395, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ita_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 288953, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ita_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 300068, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ita_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 299136, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ita_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 303310, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ita_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 286993, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ita_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 325149, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ita_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 302339, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ita_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 294755, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ita_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 291221, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ita_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 282566, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ita_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291542, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ita_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 291216, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ita_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 306789, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ita_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 308862, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ita_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 319173, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ita_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 304500, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ita_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 273199, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ita_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 309947, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ita_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 277418, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ita_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 299095, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ita_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 293684, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ita_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 290977, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ita_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 303741, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ita_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 314316, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ita_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 299877, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ita_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 267927, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ita_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 296205, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ita_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 286938, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ita_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 302393, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ita_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 286037, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ita_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 275726, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ita_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 296400, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ita_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289360, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ita_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 288993, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ita_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 258192, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ita_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293205, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ita_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 290295, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ita_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 289563, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ita_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 307800, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ita_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 328683, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 154.28656126482213, + "max_sentence1_length": 455, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kor_Hang-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 177586, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kor_Hang-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 192352, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kor_Hang-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 229323, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kor_Hang-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 197023, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kor_Hang-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 219177, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kor_Hang-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 192660, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kor_Hang-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 205119, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kor_Hang-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 224697, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kor_Hang-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 201726, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kor_Hang-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 207898, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kor_Hang-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 211535, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kor_Hang-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 223504, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kor_Hang-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 192898, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kor_Hang-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 219396, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kor_Hang-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 195373, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kor_Hang-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 188420, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kor_Hang-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 222962, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kor_Hang-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 194563, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kor_Hang-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 179538, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kor_Hang-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 213956, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kor_Hang-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 197931, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kor_Hang-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 194692, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kor_Hang-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 211827, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kor_Hang-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 215301, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kor_Hang-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 189792, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kor_Hang-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 219312, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kor_Hang-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 183925, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kor_Hang-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 181078, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kor_Hang-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 226716, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kor_Hang-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 197257, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kor_Hang-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 204594, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kor_Hang-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 201497, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kor_Hang-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 199016, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kor_Hang-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 226961, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kor_Hang-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 196745, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kor_Hang-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 206442, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kor_Hang-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 178042, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kor_Hang-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 195007, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kor_Hang-est_Latn": { + "num_samples": 1012, + "number_of_characters": 195424, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kor_Hang-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 211921, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kor_Hang-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 211727, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kor_Hang-min_Arab": { + "num_samples": 1012, + "number_of_characters": 192364, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kor_Hang-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 205448, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kor_Hang-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 214691, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kor_Hang-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 200411, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kor_Hang-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 205655, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kor_Hang-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 194576, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kor_Hang-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 205787, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kor_Hang-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 200525, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kor_Hang-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 196337, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kor_Hang-min_Latn": { + "num_samples": 1012, + "number_of_characters": 207128, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kor_Hang-por_Latn": { + "num_samples": 1012, + "number_of_characters": 209383, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kor_Hang-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 202811, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kor_Hang-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 200712, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kor_Hang-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 175011, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kor_Hang-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 188195, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kor_Hang-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 193413, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kor_Hang-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 225645, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kor_Hang-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 181965, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kor_Hang-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 203792, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kor_Hang-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 186608, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kor_Hang-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 198138, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kor_Hang-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 195884, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kor_Hang-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 198615, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kor_Hang-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 204147, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kor_Hang-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 200239, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kor_Hang-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 208283, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kor_Hang-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 204937, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kor_Hang-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 212253, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kor_Hang-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 206992, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kor_Hang-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 204069, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kor_Hang-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 215288, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kor_Hang-als_Latn": { + "num_samples": 1012, + "number_of_characters": 213029, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kor_Hang-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 214116, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kor_Hang-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 221142, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kor_Hang-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 195748, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kor_Hang-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 215859, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kor_Hang-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 202747, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kor_Hang-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 214536, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kor_Hang-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 203336, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kor_Hang-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 199202, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kor_Hang-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 153097, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kor_Hang-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 198152, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kor_Hang-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 206093, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kor_Hang-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 222103, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kor_Hang-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 190622, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kor_Hang-run_Latn": { + "num_samples": 1012, + "number_of_characters": 213230, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kor_Hang-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 220098, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kor_Hang-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 204560, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kor_Hang-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 173946, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kor_Hang-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 205340, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kor_Hang-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 201225, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kor_Hang-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 202893, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kor_Hang-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 196025, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kor_Hang-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 212492, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kor_Hang-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 208061, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kor_Hang-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 190382, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kor_Hang-war_Latn": { + "num_samples": 1012, + "number_of_characters": 230711, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kor_Hang-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 182272, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kor_Hang-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 203789, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kor_Hang-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 223603, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kor_Hang-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 122908, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kor_Hang-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 211959, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kor_Hang-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 229134, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kor_Hang-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 208586, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kor_Hang-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 189869, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kor_Hang-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 193138, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kor_Hang-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 219723, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kor_Hang-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 211746, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kor_Hang-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 210484, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kor_Hang-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 196519, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kor_Hang-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 203651, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kor_Hang-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 213472, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kor_Hang-san_Deva": { + "num_samples": 1012, + "number_of_characters": 194222, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kor_Hang-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 199247, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kor_Hang-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 205088, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kor_Hang-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 182393, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kor_Hang-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 224650, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kor_Hang-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 188127, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kor_Hang-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 235362, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kor_Hang-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 208162, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kor_Hang-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 199408, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kor_Hang-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 205756, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kor_Hang-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 198435, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kor_Hang-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 207623, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kor_Hang-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 179528, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kor_Hang-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 193222, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kor_Hang-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 222383, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kor_Hang-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 194321, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kor_Hang-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 196294, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kor_Hang-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 198103, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kor_Hang-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 204536, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kor_Hang-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 211836, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kor_Hang-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 192809, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kor_Hang-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 179735, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kor_Hang-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 205226, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kor_Hang-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 228562, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kor_Hang-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 204005, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kor_Hang-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 207631, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kor_Hang-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 192868, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kor_Hang-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 254249, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kor_Hang-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 232134, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kor_Hang-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 105984, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kor_Hang-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 191901, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kor_Hang-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 194550, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kor_Hang-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 218446, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kor_Hang-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 192070, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kor_Hang-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 195813, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kor_Hang-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 217074, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kor_Hang-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 197045, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kor_Hang-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 192074, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kor_Hang-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 109213, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kor_Hang-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 202328, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kor_Hang-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 200003, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kor_Hang-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 212108, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kor_Hang-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 192366, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kor_Hang-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 213510, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kor_Hang-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 208305, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kor_Hang-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 197675, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kor_Hang-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 156665, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kor_Hang-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 106660, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kor_Hang-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 194136, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kor_Hang-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 207222, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kor_Hang-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 198780, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kor_Hang-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 209895, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kor_Hang-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 208963, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kor_Hang-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 213137, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kor_Hang-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 196820, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kor_Hang-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 234976, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kor_Hang-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 212166, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kor_Hang-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 204582, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kor_Hang-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 201048, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kor_Hang-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 192393, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kor_Hang-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 201369, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kor_Hang-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 201043, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kor_Hang-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 216616, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kor_Hang-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 218689, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kor_Hang-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 229000, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kor_Hang-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 214327, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kor_Hang-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 183026, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kor_Hang-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 219774, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kor_Hang-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 187245, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kor_Hang-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 208922, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kor_Hang-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 203511, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kor_Hang-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 200804, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kor_Hang-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 213568, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kor_Hang-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 224143, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kor_Hang-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 209704, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kor_Hang-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 177754, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kor_Hang-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 206032, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kor_Hang-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 196765, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kor_Hang-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 212220, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kor_Hang-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 195864, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kor_Hang-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 185553, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kor_Hang-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 206227, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kor_Hang-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 199187, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kor_Hang-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 198820, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kor_Hang-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 168019, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kor_Hang-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 203032, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kor_Hang-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 200122, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kor_Hang-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 199390, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kor_Hang-som_Latn": { + "num_samples": 1012, + "number_of_characters": 217627, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kor_Hang-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 238510, + "unique_pairs": 1012, + "min_sentence1_length": 20, + "average_sentence1_length": 65.18280632411067, + "max_sentence1_length": 177, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mos_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 236278, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mos_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 251044, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mos_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 288015, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mos_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 255715, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mos_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 277869, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mos_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 251352, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mos_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 263811, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mos_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 283389, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mos_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 260418, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mos_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 266590, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mos_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 270227, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mos_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 282196, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mos_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 251590, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mos_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 278088, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mos_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 254065, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mos_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 247112, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mos_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 281654, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mos_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 253255, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mos_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 238230, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mos_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272648, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mos_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 256623, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mos_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 253384, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mos_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 270519, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mos_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 273993, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mos_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 248484, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mos_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 278004, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mos_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 242617, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mos_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 239770, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mos_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 285408, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mos_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 255949, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mos_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 263286, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mos_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260189, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mos_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 257708, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mos_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 285653, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mos_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255437, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mos_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 265134, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mos_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 236734, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mos_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 253699, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mos_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 254116, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mos_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 270613, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mos_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 270419, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mos_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 251056, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mos_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 264140, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mos_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 273383, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mos_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259103, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mos_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 264347, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mos_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 253268, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mos_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 264479, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mos_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 259217, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mos_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 255029, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mos_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 265820, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mos_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 268075, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mos_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 261503, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mos_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 259404, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mos_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 233703, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mos_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 246887, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mos_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 252105, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mos_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 284337, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mos_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 240657, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mos_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262484, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mos_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 245300, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mos_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 256830, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mos_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 254576, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mos_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 257307, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mos_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 262839, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mos_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 258931, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mos_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 266975, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mos_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 263629, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mos_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 270945, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mos_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 265684, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mos_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 262761, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mos_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 273980, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mos_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 271721, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mos_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 272808, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mos_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 279834, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mos_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 254440, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mos_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 274551, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mos_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 261439, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mos_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 273228, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mos_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 262028, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mos_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 257894, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mos_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 211789, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mos_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 256844, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mos_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 264785, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mos_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 280795, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mos_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 190622, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mos_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 271922, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mos_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 278790, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mos_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 263252, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mos_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 232638, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mos_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 264032, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mos_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 259917, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mos_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 261585, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mos_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 254717, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mos_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 271184, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mos_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266753, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mos_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 249074, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mos_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 289403, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mos_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 240964, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mos_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262481, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mos_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 282295, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mos_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 181600, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mos_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 270651, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mos_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 287826, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mos_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 267278, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mos_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 248561, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mos_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 251830, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mos_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 278415, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mos_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 270438, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mos_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 269176, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mos_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 255211, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mos_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 262343, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mos_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 272164, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mos_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 252914, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mos_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257939, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mos_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 263780, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mos_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 241085, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mos_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 283342, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mos_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 246819, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mos_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 294054, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mos_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 266854, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mos_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 258100, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mos_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 264448, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mos_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 257127, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mos_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 266315, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mos_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 238220, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mos_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 251914, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mos_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 281075, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mos_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 253013, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mos_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 254986, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mos_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 256795, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mos_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 263228, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mos_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270528, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mos_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 251501, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mos_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 238427, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mos_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 263918, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mos_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 287254, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mos_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 262697, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mos_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 266323, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mos_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 251560, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mos_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 312941, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mos_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 290826, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mos_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 164676, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mos_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 250593, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mos_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 253242, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mos_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 277138, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mos_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 250762, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mos_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 254505, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mos_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 275766, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mos_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 255737, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mos_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 250766, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mos_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 167905, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mos_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 261020, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mos_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 258695, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mos_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 270800, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mos_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 251058, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mos_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 272202, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mos_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 266997, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mos_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 256367, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mos_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 215357, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mos_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 165352, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mos_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 252828, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mos_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 265914, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mos_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 257472, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mos_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 268587, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mos_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 267655, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mos_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 271829, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mos_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 255512, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mos_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 293668, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mos_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 270858, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mos_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 263274, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mos_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 259740, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mos_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 251085, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mos_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260061, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mos_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 259735, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mos_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 275308, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mos_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 277381, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mos_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 287692, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mos_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 273019, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mos_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 241718, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mos_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 278466, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mos_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 245937, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mos_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 267614, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mos_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 262203, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mos_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 259496, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mos_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 272260, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mos_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 282835, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mos_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 268396, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mos_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 236446, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mos_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 264724, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mos_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 255457, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mos_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 270912, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mos_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 254556, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mos_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 244245, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mos_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 264919, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mos_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257879, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mos_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 257512, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mos_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 226711, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mos_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261724, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mos_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 258814, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mos_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 258082, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mos_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 276319, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mos_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 297202, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 123.17885375494072, + "max_sentence1_length": 342, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "run_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 258886, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "run_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 273652, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "run_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 310623, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "run_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 278323, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "run_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 300477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "run_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 273960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "run_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 286419, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "run_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 305997, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "run_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 283026, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "run_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 289198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "run_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 292835, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "run_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 304804, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "run_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 274198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "run_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 300696, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "run_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 276673, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "run_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 269720, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "run_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304262, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "run_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 275863, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "run_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 260838, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "run_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295256, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "run_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279231, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "run_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 275992, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "run_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 293127, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "run_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 296601, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "run_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 271092, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "run_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 300612, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "run_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265225, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "run_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 262378, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "run_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 308016, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "run_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 278557, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "run_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 285894, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "run_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282797, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "run_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 280316, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "run_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308261, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "run_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278045, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "run_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 287742, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "run_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 259342, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "run_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 276307, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "run_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 276724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "run_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293221, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "run_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 293027, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "run_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 273664, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "run_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 286748, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "run_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 295991, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "run_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281711, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "run_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 286955, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "run_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 275876, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "run_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 287087, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "run_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 281825, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "run_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 277637, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "run_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 288428, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "run_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 290683, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "run_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "run_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 282012, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "run_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 256311, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "run_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 269495, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "run_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 274713, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "run_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 306945, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "run_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263265, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "run_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285092, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "run_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 267908, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "run_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 279438, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "run_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 277184, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "run_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 279915, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "run_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 285447, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "run_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 281539, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "run_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 289583, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "run_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286237, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "run_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 293553, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "run_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 288292, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "run_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 285369, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "run_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 296588, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "run_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 294329, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "run_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 295416, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "run_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 302442, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "run_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 277048, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "run_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 297159, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "run_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 284047, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "run_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 295836, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "run_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 284636, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "run_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 280502, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "run_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 234397, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "run_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 279452, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "run_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 287393, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "run_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 303403, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "run_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213230, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "run_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 271922, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "run_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 301398, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "run_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 285860, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "run_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255246, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "run_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 286640, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "run_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 282525, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "run_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 284193, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "run_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 277325, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "run_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 293792, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "run_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289361, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "run_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 271682, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "run_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 312011, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "run_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 263572, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "run_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285089, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "run_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 304903, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "run_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204208, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "run_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293259, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "run_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 310434, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "run_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 289886, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "run_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 271169, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "run_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 274438, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "run_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 301023, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "run_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 293046, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "run_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 291784, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "run_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 277819, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "run_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 284951, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "run_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 294772, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "run_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 275522, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "run_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280547, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "run_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 286388, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "run_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 263693, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "run_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 305950, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "run_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 269427, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "run_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 316662, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "run_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 289462, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "run_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 280708, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "run_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 287056, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "run_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 279735, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "run_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 288923, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "run_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 260828, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "run_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 274522, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "run_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 303683, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "run_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 275621, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "run_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 277594, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "run_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 279403, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "run_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 285836, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "run_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293136, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "run_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274109, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "run_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 261035, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "run_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 286526, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "run_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 309862, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "run_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 285305, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "run_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 288931, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "run_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 274168, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "run_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 335549, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "run_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 313434, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "run_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 187284, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "run_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273201, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "run_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 275850, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "run_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 299746, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "run_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 273370, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "run_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277113, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "run_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 298374, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "run_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 278345, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "run_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 273374, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "run_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 190513, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "run_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 283628, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "run_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 281303, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "run_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 293408, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "run_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 273666, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "run_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 294810, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "run_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 289605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "run_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 278975, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "run_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 237965, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "run_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 187960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "run_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 275436, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "run_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 288522, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "run_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 280080, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "run_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 291195, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "run_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290263, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "run_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 294437, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "run_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 278120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "run_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 316276, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "run_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 293466, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "run_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 285882, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "run_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 282348, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "run_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 273693, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "run_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282669, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "run_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 282343, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "run_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 297916, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "run_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 299989, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "run_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 310300, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "run_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 295627, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "run_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 264326, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "run_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 301074, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "run_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 268545, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "run_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "run_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 284811, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "run_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282104, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "run_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 294868, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "run_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 305443, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "run_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 291004, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "run_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 259054, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "run_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 287332, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "run_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 278065, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "run_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 293520, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "run_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 277164, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "run_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 266853, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "run_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 287527, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "run_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280487, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "run_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 280120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "run_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 249319, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "run_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284332, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "run_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 281422, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "run_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 280690, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "run_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 298927, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "run_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 319810, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 145.5187747035573, + "max_sentence1_length": 411, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tam_Taml-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 265754, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tam_Taml-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 280520, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tam_Taml-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 317491, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tam_Taml-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 285191, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tam_Taml-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 307345, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tam_Taml-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 280828, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tam_Taml-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 293287, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tam_Taml-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 312865, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tam_Taml-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 289894, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tam_Taml-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 296066, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tam_Taml-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 299703, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tam_Taml-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 311672, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tam_Taml-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 281066, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tam_Taml-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 307564, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tam_Taml-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 283541, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tam_Taml-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 276588, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tam_Taml-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 311130, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tam_Taml-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 282731, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tam_Taml-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 267706, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tam_Taml-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302124, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tam_Taml-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tam_Taml-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 282860, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tam_Taml-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 299995, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tam_Taml-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 303469, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tam_Taml-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 277960, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tam_Taml-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 307480, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tam_Taml-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 272093, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tam_Taml-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 269246, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tam_Taml-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 314884, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tam_Taml-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 285425, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tam_Taml-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 292762, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tam_Taml-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289665, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tam_Taml-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 287184, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tam_Taml-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 315129, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tam_Taml-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284913, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tam_Taml-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 294610, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tam_Taml-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 266210, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tam_Taml-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 283175, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tam_Taml-est_Latn": { + "num_samples": 1012, + "number_of_characters": 283592, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tam_Taml-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 300089, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tam_Taml-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 299895, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tam_Taml-min_Arab": { + "num_samples": 1012, + "number_of_characters": 280532, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tam_Taml-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 293616, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tam_Taml-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tam_Taml-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288579, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tam_Taml-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 293823, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tam_Taml-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 282744, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tam_Taml-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 293955, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tam_Taml-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 288693, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tam_Taml-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 284505, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tam_Taml-min_Latn": { + "num_samples": 1012, + "number_of_characters": 295296, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tam_Taml-por_Latn": { + "num_samples": 1012, + "number_of_characters": 297551, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tam_Taml-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 290979, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tam_Taml-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 288880, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tam_Taml-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 263179, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tam_Taml-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 276363, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tam_Taml-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 281581, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tam_Taml-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 313813, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tam_Taml-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 270133, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tam_Taml-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291960, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tam_Taml-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 274776, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tam_Taml-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 286306, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tam_Taml-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 284052, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tam_Taml-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 286783, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tam_Taml-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 292315, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tam_Taml-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 288407, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tam_Taml-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 296451, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tam_Taml-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 293105, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tam_Taml-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 300421, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tam_Taml-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 295160, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tam_Taml-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 292237, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tam_Taml-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 303456, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tam_Taml-als_Latn": { + "num_samples": 1012, + "number_of_characters": 301197, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tam_Taml-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 302284, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tam_Taml-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 309310, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tam_Taml-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 283916, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tam_Taml-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 304027, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tam_Taml-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 290915, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tam_Taml-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 302704, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tam_Taml-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 291504, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tam_Taml-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 287370, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tam_Taml-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 241265, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tam_Taml-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 286320, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tam_Taml-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 294261, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tam_Taml-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 310271, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tam_Taml-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 220098, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tam_Taml-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 278790, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tam_Taml-run_Latn": { + "num_samples": 1012, + "number_of_characters": 301398, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tam_Taml-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 292728, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tam_Taml-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 262114, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tam_Taml-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 293508, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tam_Taml-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 289393, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tam_Taml-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 291061, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tam_Taml-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 284193, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tam_Taml-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 300660, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tam_Taml-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296229, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tam_Taml-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 278550, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tam_Taml-war_Latn": { + "num_samples": 1012, + "number_of_characters": 318879, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tam_Taml-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 270440, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tam_Taml-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291957, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tam_Taml-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 311771, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tam_Taml-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 211076, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tam_Taml-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 300127, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tam_Taml-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 317302, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tam_Taml-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 296754, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tam_Taml-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 278037, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tam_Taml-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 281306, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tam_Taml-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 307891, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tam_Taml-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 299914, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tam_Taml-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 298652, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tam_Taml-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 284687, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tam_Taml-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 291819, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tam_Taml-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 301640, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tam_Taml-san_Deva": { + "num_samples": 1012, + "number_of_characters": 282390, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tam_Taml-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287415, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tam_Taml-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 293256, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tam_Taml-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 270561, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tam_Taml-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 312818, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tam_Taml-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 276295, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tam_Taml-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 323530, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tam_Taml-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 296330, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tam_Taml-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 287576, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tam_Taml-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 293924, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tam_Taml-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 286603, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tam_Taml-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 295791, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tam_Taml-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 267696, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tam_Taml-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 281390, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tam_Taml-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 310551, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tam_Taml-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 282489, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tam_Taml-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 284462, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tam_Taml-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 286271, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tam_Taml-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 292704, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tam_Taml-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300004, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tam_Taml-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 280977, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tam_Taml-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 267903, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tam_Taml-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 293394, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tam_Taml-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 316730, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tam_Taml-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 292173, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tam_Taml-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 295799, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tam_Taml-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 281036, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tam_Taml-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 342417, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tam_Taml-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 320302, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tam_Taml-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 194152, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tam_Taml-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 280069, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tam_Taml-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 282718, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tam_Taml-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 306614, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tam_Taml-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 280238, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tam_Taml-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 283981, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tam_Taml-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 305242, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tam_Taml-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 285213, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tam_Taml-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 280242, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tam_Taml-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 197381, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tam_Taml-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 290496, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tam_Taml-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 288171, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tam_Taml-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 300276, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tam_Taml-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 280534, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tam_Taml-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 301678, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tam_Taml-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 296473, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tam_Taml-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 285843, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tam_Taml-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 244833, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tam_Taml-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 194828, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tam_Taml-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 282304, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tam_Taml-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 295390, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tam_Taml-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 286948, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tam_Taml-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 298063, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tam_Taml-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 297131, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tam_Taml-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 301305, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tam_Taml-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 284988, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tam_Taml-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 323144, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tam_Taml-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 300334, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tam_Taml-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 292750, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tam_Taml-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 289216, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tam_Taml-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 280561, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tam_Taml-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289537, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tam_Taml-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 289211, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tam_Taml-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 304784, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tam_Taml-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 306857, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tam_Taml-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 317168, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tam_Taml-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 302495, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tam_Taml-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 271194, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tam_Taml-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 307942, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tam_Taml-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 275413, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tam_Taml-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 297090, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tam_Taml-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 291679, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tam_Taml-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 288972, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tam_Taml-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 301736, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tam_Taml-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 312311, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tam_Taml-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 297872, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tam_Taml-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 265922, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tam_Taml-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 294200, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tam_Taml-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 284933, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tam_Taml-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 300388, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tam_Taml-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 284032, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tam_Taml-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 273721, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tam_Taml-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 294395, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tam_Taml-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287355, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tam_Taml-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 286988, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tam_Taml-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 256187, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tam_Taml-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291200, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tam_Taml-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 288290, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tam_Taml-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 287558, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tam_Taml-som_Latn": { + "num_samples": 1012, + "number_of_characters": 305795, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tam_Taml-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 326678, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "vie_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250216, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "vie_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264982, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "vie_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301953, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "vie_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269653, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "vie_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291807, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "vie_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265290, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "vie_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277749, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vie_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297327, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "vie_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274356, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "vie_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280528, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "vie_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284165, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "vie_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296134, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "vie_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265528, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "vie_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292026, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "vie_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268003, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vie_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261050, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "vie_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295592, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vie_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267193, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "vie_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252168, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "vie_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286586, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "vie_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270561, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vie_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267322, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vie_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284457, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "vie_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287931, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "vie_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262422, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "vie_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291942, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "vie_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256555, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "vie_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253708, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "vie_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299346, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "vie_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269887, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "vie_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277224, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "vie_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "vie_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271646, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "vie_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299591, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "vie_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269375, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vie_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279072, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "vie_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250672, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "vie_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267637, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "vie_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268054, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "vie_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284551, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "vie_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284357, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "vie_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "vie_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278078, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "vie_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287321, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "vie_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273041, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "vie_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278285, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "vie_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267206, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "vie_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278417, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "vie_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273155, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "vie_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "vie_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279758, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "vie_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282013, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "vie_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275441, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "vie_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273342, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "vie_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247641, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "vie_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260825, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "vie_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266043, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vie_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298275, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "vie_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254595, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "vie_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276422, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "vie_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259238, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "vie_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270768, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "vie_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268514, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "vie_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271245, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "vie_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276777, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vie_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272869, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "vie_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280913, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "vie_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277567, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "vie_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284883, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "vie_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279622, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "vie_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 276699, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "vie_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "vie_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285659, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "vie_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286746, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "vie_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293772, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "vie_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268378, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "vie_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 288489, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "vie_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275377, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "vie_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287166, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vie_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275966, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "vie_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271832, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "vie_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225727, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "vie_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270782, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "vie_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278723, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "vie_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "vie_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204560, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "vie_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263252, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "vie_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285860, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "vie_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292728, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "vie_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246576, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "vie_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277970, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "vie_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273855, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "vie_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275523, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vie_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268655, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vie_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285122, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "vie_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280691, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "vie_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263012, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "vie_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "vie_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254902, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "vie_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276419, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "vie_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296233, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "vie_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195538, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "vie_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284589, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "vie_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301764, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "vie_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281216, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "vie_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262499, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "vie_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265768, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "vie_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292353, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "vie_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284376, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vie_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283114, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "vie_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269149, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "vie_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276281, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "vie_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286102, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "vie_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266852, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "vie_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271877, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "vie_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "vie_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255023, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "vie_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297280, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "vie_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "vie_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307992, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "vie_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280792, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "vie_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272038, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "vie_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278386, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "vie_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271065, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "vie_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "vie_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252158, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "vie_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265852, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vie_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295013, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "vie_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266951, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "vie_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268924, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "vie_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "vie_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277166, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "vie_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284466, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "vie_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265439, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "vie_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252365, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "vie_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277856, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "vie_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301192, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "vie_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 276635, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vie_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280261, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "vie_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265498, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vie_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326879, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "vie_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304764, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "vie_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178614, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "vie_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264531, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "vie_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267180, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "vie_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291076, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "vie_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "vie_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268443, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "vie_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289704, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "vie_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269675, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "vie_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264704, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "vie_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181843, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "vie_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274958, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vie_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272633, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "vie_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284738, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "vie_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264996, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "vie_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286140, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "vie_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280935, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "vie_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270305, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "vie_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229295, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "vie_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179290, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "vie_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266766, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "vie_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279852, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "vie_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271410, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "vie_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282525, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "vie_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281593, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "vie_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285767, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "vie_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269450, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "vie_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307606, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "vie_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "vie_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277212, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "vie_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273678, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "vie_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265023, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "vie_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273999, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "vie_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273673, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "vie_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289246, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "vie_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291319, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "vie_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301630, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "vie_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286957, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "vie_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "vie_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292404, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "vie_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259875, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "vie_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281552, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "vie_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276141, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "vie_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273434, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "vie_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286198, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "vie_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296773, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "vie_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282334, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "vie_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250384, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "vie_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278662, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "vie_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269395, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "vie_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284850, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "vie_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268494, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "vie_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258183, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "vie_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278857, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "vie_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "vie_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271450, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "vie_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240649, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "vie_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275662, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "vie_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272752, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "vie_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272020, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "vie_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290257, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "vie_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311140, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 136.95158102766797, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "apc_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 219602, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "apc_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 234368, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "apc_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 271339, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "apc_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 239039, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "apc_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 261193, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "apc_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 234676, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "apc_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 247135, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "apc_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 266713, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "apc_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 243742, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "apc_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 249914, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "apc_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 253551, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "apc_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 265520, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "apc_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 234914, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "apc_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 261412, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "apc_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 237389, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "apc_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 230436, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "apc_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 264978, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "apc_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 236579, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "apc_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 221554, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "apc_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255972, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "apc_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 239947, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "apc_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 236708, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "apc_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 253843, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "apc_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 257317, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "apc_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 231808, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "apc_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 261328, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "apc_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 225941, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "apc_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 223094, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "apc_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 268732, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "apc_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 239273, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "apc_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 246610, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "apc_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 243513, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "apc_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 241032, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "apc_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 268977, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "apc_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 238761, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "apc_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 248458, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "apc_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 220058, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "apc_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 237023, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "apc_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 237440, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "apc_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 253937, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "apc_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 253743, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "apc_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 234380, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "apc_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 247464, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "apc_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 256707, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "apc_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 242427, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "apc_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 247671, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "apc_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 236592, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "apc_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 247803, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "apc_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 242541, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "apc_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 238353, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "apc_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 249144, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "apc_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 251399, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "apc_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 244827, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "apc_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 242728, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "apc_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 217027, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "apc_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 230211, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "apc_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 235429, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "apc_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 267661, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "apc_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 223981, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "apc_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245808, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "apc_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 228624, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "apc_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 240154, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "apc_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 237900, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "apc_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 240631, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "apc_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 246163, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "apc_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 242255, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "apc_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 250299, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "apc_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 246953, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "apc_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 254269, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "apc_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 249008, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "apc_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 246085, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "apc_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 257304, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "apc_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 255045, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "apc_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 256132, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "apc_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 263158, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "apc_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 237764, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "apc_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 257875, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "apc_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 244763, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "apc_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 256552, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "apc_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 245352, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "apc_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 241218, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "apc_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 195113, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "apc_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 240168, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "apc_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 248109, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "apc_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 264119, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "apc_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 173946, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "apc_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 232638, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "apc_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 255246, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "apc_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 262114, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "apc_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 246576, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "apc_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 247356, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "apc_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 243241, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "apc_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 244909, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "apc_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 238041, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "apc_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 254508, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "apc_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250077, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "apc_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 232398, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "apc_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 272727, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "apc_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 224288, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "apc_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245805, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "apc_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 265619, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "apc_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 164924, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "apc_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 253975, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "apc_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 271150, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "apc_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 250602, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "apc_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 231885, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "apc_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 235154, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "apc_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 261739, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "apc_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 253762, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "apc_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 252500, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "apc_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 238535, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "apc_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 245667, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "apc_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 255488, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "apc_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 236238, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "apc_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 241263, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "apc_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 247104, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "apc_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 224409, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "apc_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 266666, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "apc_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 230143, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "apc_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 277378, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "apc_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 250178, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "apc_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 241424, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "apc_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 247772, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "apc_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 240451, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "apc_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 249639, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "apc_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 221544, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "apc_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 235238, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "apc_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 264399, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "apc_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 236337, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "apc_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 238310, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "apc_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 240119, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "apc_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 246552, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "apc_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253852, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "apc_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 234825, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "apc_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 221751, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "apc_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 247242, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "apc_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 270578, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "apc_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 246021, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "apc_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 249647, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "apc_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 234884, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "apc_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 296265, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "apc_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 274150, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "apc_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 148000, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "apc_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 233917, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "apc_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 236566, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "apc_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 260462, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "apc_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 234086, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "apc_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 237829, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "apc_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 259090, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "apc_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 239061, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "apc_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 234090, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "apc_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 151229, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "apc_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 244344, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "apc_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 242019, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "apc_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 254124, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "apc_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 234382, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "apc_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 255526, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "apc_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 250321, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "apc_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 239691, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "apc_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 198681, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "apc_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 148676, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "apc_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 236152, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "apc_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 249238, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "apc_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 240796, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "apc_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 251911, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "apc_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 250979, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "apc_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 255153, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "apc_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 238836, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "apc_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 276992, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "apc_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 254182, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "apc_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 246598, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "apc_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 243064, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "apc_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 234409, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "apc_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 243385, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "apc_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 243059, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "apc_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 258632, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "apc_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 260705, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "apc_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 271016, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "apc_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 256343, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "apc_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 225042, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "apc_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 261790, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "apc_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 229261, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "apc_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 250938, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "apc_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 245527, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "apc_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 242820, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "apc_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 255584, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "apc_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 266159, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "apc_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 251720, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "apc_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 219770, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "apc_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 248048, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "apc_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 238781, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "apc_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 254236, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "apc_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 237880, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "apc_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 227569, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "apc_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 248243, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "apc_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 241203, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "apc_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 240836, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "apc_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 210035, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "apc_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245048, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "apc_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 242138, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "apc_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 241406, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "apc_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 259643, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "apc_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 280526, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 106.7005928853755, + "max_sentence1_length": 306, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bug_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250996, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bug_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265762, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bug_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 302733, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bug_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270433, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bug_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292587, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bug_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266070, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bug_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bug_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298107, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bug_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275136, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bug_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281308, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bug_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284945, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bug_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296914, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bug_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266308, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bug_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292806, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bug_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268783, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bug_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261830, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bug_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296372, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bug_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267973, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bug_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252948, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bug_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287366, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bug_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bug_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bug_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285237, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bug_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 288711, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bug_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263202, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bug_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 292722, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bug_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257335, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bug_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254488, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bug_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300126, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bug_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270667, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bug_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278004, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bug_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274907, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bug_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272426, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bug_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300371, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bug_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270155, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bug_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279852, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bug_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251452, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bug_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bug_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268834, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bug_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285331, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bug_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285137, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bug_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265774, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bug_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278858, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bug_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288101, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bug_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273821, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bug_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279065, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bug_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267986, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bug_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279197, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bug_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273935, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bug_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269747, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bug_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280538, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bug_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282793, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bug_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276221, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bug_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274122, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bug_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248421, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bug_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261605, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bug_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266823, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bug_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299055, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bug_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255375, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bug_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277202, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bug_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260018, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bug_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271548, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bug_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269294, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bug_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272025, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bug_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277557, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bug_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273649, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bug_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 281693, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bug_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278347, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bug_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285663, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bug_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280402, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bug_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277479, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bug_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 288698, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bug_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286439, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bug_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287526, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bug_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294552, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bug_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269158, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bug_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289269, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bug_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276157, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bug_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287946, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bug_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276746, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bug_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272612, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bug_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226507, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bug_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271562, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bug_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279503, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bug_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295513, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bug_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205340, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bug_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264032, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bug_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286640, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bug_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293508, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bug_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bug_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247356, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bug_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274635, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bug_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276303, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bug_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269435, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bug_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285902, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bug_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281471, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bug_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263792, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bug_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304121, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bug_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bug_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277199, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bug_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297013, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bug_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196318, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bug_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285369, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bug_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302544, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bug_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281996, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bug_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263279, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bug_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266548, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bug_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293133, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bug_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285156, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bug_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283894, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bug_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269929, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bug_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bug_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286882, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bug_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267632, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bug_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272657, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bug_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278498, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bug_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255803, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bug_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298060, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bug_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bug_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308772, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bug_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281572, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bug_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272818, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bug_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279166, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bug_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271845, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bug_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281033, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bug_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252938, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bug_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266632, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bug_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295793, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bug_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 267731, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bug_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 269704, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bug_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271513, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bug_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277946, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bug_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bug_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266219, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bug_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253145, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bug_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 278636, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bug_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301972, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bug_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277415, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bug_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281041, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bug_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266278, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bug_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327659, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bug_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305544, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bug_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179394, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bug_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265311, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bug_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267960, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bug_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291856, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bug_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265480, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bug_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269223, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bug_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290484, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bug_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270455, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bug_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265484, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bug_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182623, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bug_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 275738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bug_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273413, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bug_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285518, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bug_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265776, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bug_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286920, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bug_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 281715, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bug_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271085, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bug_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230075, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bug_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180070, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bug_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267546, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bug_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280632, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bug_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272190, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bug_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283305, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bug_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282373, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bug_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bug_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270230, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bug_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308386, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bug_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285576, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bug_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277992, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bug_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274458, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bug_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265803, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bug_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274779, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bug_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274453, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bug_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290026, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bug_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292099, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bug_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302410, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bug_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 287737, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bug_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256436, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bug_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293184, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bug_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bug_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bug_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276921, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bug_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274214, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bug_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bug_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297553, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bug_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283114, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bug_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251164, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bug_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279442, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bug_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270175, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bug_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285630, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bug_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269274, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bug_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258963, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bug_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279637, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bug_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272597, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bug_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272230, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bug_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241429, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bug_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276442, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bug_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bug_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bug_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bug_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311920, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 137.72233201581028, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fon_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fon_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261647, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fon_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298618, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fon_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 266318, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fon_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288472, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fon_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fon_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 274414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fon_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293992, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fon_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 271021, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fon_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 277193, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fon_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280830, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fon_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292799, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fon_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 262193, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fon_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288691, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fon_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264668, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fon_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257715, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fon_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 292257, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fon_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263858, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fon_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fon_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283251, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fon_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 267226, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fon_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fon_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 281122, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fon_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284596, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fon_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 259087, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fon_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288607, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fon_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 253220, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fon_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 250373, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fon_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 296011, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fon_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266552, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fon_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273889, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fon_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270792, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fon_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 268311, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fon_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 296256, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fon_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fon_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fon_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 247337, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fon_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 264302, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fon_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264719, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fon_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 281216, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fon_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 281022, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fon_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261659, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fon_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274743, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fon_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283986, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fon_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269706, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fon_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274950, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fon_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fon_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 275082, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fon_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fon_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265632, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fon_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276423, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fon_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278678, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fon_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 272106, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fon_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 270007, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fon_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 244306, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fon_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257490, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fon_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262708, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fon_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294940, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fon_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 251260, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fon_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273087, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fon_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255903, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fon_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267433, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fon_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 265179, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fon_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267910, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fon_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273442, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fon_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269534, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fon_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277578, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fon_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 274232, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fon_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281548, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fon_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 276287, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fon_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 273364, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fon_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284583, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fon_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 282324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fon_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 283411, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fon_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290437, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "fon_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 265043, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fon_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 285154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fon_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 272042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fon_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283831, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fon_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272631, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fon_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268497, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fon_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 222392, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fon_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267447, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fon_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 275388, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fon_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 291398, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fon_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 201225, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fon_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259917, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fon_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282525, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fon_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 289393, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fon_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273855, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fon_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 243241, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fon_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274635, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fon_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 272188, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fon_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 265320, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fon_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fon_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277356, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fon_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259677, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fon_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 300006, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fon_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251567, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fon_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273084, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fon_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292898, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "fon_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 192203, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fon_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 281254, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fon_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298429, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fon_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fon_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 259164, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fon_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262433, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fon_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 289018, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fon_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 281041, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fon_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279779, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fon_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265814, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fon_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272946, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fon_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fon_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fon_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268542, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fon_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 274383, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fon_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251688, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fon_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293945, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fon_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257422, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "fon_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304657, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fon_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277457, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fon_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268703, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fon_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 275051, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fon_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267730, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fon_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276918, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fon_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248823, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fon_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fon_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291678, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fon_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fon_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265589, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fon_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 267398, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fon_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273831, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fon_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281131, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fon_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 262104, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fon_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 249030, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fon_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274521, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fon_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297857, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fon_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 273300, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fon_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276926, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fon_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 262163, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fon_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323544, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fon_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301429, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fon_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 175279, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fon_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 261196, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fon_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263845, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fon_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287741, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fon_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 261365, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fon_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 265108, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fon_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 286369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fon_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 266340, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fon_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 261369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fon_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 178508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fon_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271623, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fon_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 269298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fon_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 281403, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fon_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fon_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fon_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fon_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266970, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fon_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fon_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fon_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263431, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fon_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fon_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 268075, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fon_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 279190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fon_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 278258, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fon_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282432, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fon_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 266115, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fon_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 304271, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fon_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281461, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fon_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273877, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fon_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 270343, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fon_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261688, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fon_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fon_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 270338, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fon_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285911, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fon_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fon_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 298295, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fon_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283622, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fon_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 252321, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fon_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 289069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fon_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256540, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fon_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 278217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fon_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272806, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fon_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 270099, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fon_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282863, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fon_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293438, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fon_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278999, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fon_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 247049, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fon_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 275327, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fon_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 266060, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fon_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281515, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fon_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 265159, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fon_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254848, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fon_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275522, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fon_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268482, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fon_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 268115, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fon_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 237314, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fon_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272327, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fon_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 269417, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fon_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fon_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286922, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fon_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.65612648221344, + "max_sentence1_length": 481, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "jav_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 248549, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "jav_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 263315, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "jav_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 300286, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "jav_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 267986, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "jav_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 290140, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "jav_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 263623, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "jav_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276082, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jav_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 295660, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "jav_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 272689, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "jav_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 278861, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "jav_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 282498, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "jav_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 294467, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "jav_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 263861, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "jav_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 290359, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "jav_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 266336, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jav_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 259383, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "jav_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 293925, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jav_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 265526, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "jav_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 250501, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "jav_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284919, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "jav_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 268894, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jav_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 265655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jav_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 282790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "jav_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 286264, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "jav_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 260755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "jav_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 290275, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "jav_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 254888, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "jav_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252041, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "jav_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 297679, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "jav_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268220, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "jav_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 275557, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "jav_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272460, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "jav_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 269979, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "jav_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 297924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "jav_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267708, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jav_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 277405, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "jav_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249005, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "jav_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 265970, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "jav_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 266387, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "jav_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 282884, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "jav_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 282690, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "jav_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 263327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "jav_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 276411, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "jav_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 285654, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "jav_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271374, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "jav_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 276618, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "jav_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 265539, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "jav_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 276750, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "jav_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 271488, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "jav_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 267300, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "jav_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278091, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "jav_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 280346, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "jav_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 273774, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "jav_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 271675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "jav_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 245974, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "jav_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259158, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "jav_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 264376, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jav_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 296608, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "jav_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 252928, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "jav_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "jav_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 257571, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "jav_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269101, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "jav_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 266847, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "jav_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 269578, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jav_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 275110, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jav_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271202, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "jav_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 279246, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "jav_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 275900, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "jav_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283216, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "jav_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 277955, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "jav_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275032, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "jav_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 286251, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "jav_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 283992, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "jav_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285079, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "jav_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 292105, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "jav_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 266711, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "jav_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 286822, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "jav_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 273710, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "jav_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 285499, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jav_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 274299, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "jav_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270165, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "jav_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224060, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "jav_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 269115, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "jav_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277056, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "jav_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293066, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "jav_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 202893, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "jav_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 261585, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "jav_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284193, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "jav_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291061, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "jav_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 275523, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "jav_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 244909, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "jav_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 276303, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "jav_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "jav_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 266988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jav_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 283455, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "jav_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279024, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jav_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 261345, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "jav_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 301674, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "jav_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253235, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "jav_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274752, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "jav_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 294566, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "jav_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 193871, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "jav_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 282922, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "jav_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300097, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "jav_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 279549, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "jav_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 260832, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "jav_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264101, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "jav_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 290686, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "jav_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 282709, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jav_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 281447, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "jav_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 267482, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "jav_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 274614, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "jav_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 284435, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "jav_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265185, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "jav_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270210, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "jav_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276051, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "jav_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 253356, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "jav_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 295613, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "jav_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259090, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "jav_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 306325, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "jav_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 279125, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "jav_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 270371, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "jav_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 276719, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "jav_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 269398, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "jav_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 278586, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "jav_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 250491, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "jav_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264185, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jav_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 293346, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "jav_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 265284, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "jav_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 267257, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "jav_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269066, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "jav_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 275499, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "jav_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282799, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "jav_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 263772, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "jav_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 250698, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "jav_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276189, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "jav_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 299525, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "jav_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 274968, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jav_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 278594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "jav_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 263831, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jav_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325212, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "jav_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303097, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "jav_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 176947, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "jav_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 262864, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "jav_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 265513, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "jav_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 289409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "jav_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263033, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "jav_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 266776, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "jav_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288037, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "jav_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268008, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "jav_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263037, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "jav_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180176, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "jav_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 273291, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jav_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 270966, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "jav_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283071, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "jav_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 263329, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "jav_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 284473, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "jav_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 279268, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "jav_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 268638, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "jav_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 227628, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "jav_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 177623, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "jav_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265099, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "jav_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278185, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "jav_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 269743, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "jav_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 280858, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "jav_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 279926, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "jav_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284100, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "jav_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 267783, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "jav_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 305939, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "jav_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 283129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jav_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 275545, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "jav_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272011, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "jav_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 263356, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "jav_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272332, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jav_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272006, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "jav_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 287579, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "jav_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 289652, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "jav_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 299963, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "jav_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 285290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "jav_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 253989, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "jav_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 290737, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "jav_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258208, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "jav_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 279885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "jav_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 274474, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "jav_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 271767, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "jav_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 284531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "jav_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 295106, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "jav_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 280667, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "jav_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 248717, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "jav_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 276995, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "jav_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 267728, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "jav_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283183, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "jav_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 266827, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "jav_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 256516, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "jav_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277190, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jav_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270150, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "jav_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 269783, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "jav_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 238982, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "jav_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273995, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "jav_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271085, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "jav_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 270353, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "jav_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 288590, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "jav_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 309473, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 135.30434782608697, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lao_Laoo-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241681, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lao_Laoo-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256447, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lao_Laoo-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293418, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lao_Laoo-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lao_Laoo-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283272, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lao_Laoo-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256755, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lao_Laoo-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269214, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lao_Laoo-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288792, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lao_Laoo-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265821, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lao_Laoo-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271993, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lao_Laoo-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275630, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lao_Laoo-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287599, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lao_Laoo-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256993, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lao_Laoo-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283491, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lao_Laoo-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259468, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lao_Laoo-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252515, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lao_Laoo-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287057, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lao_Laoo-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258658, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lao_Laoo-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243633, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lao_Laoo-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278051, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lao_Laoo-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262026, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lao_Laoo-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258787, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lao_Laoo-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275922, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lao_Laoo-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279396, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lao_Laoo-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253887, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lao_Laoo-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283407, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lao_Laoo-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248020, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lao_Laoo-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245173, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lao_Laoo-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290811, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lao_Laoo-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261352, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lao_Laoo-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268689, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lao_Laoo-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265592, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lao_Laoo-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lao_Laoo-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291056, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lao_Laoo-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260840, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lao_Laoo-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270537, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lao_Laoo-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242137, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lao_Laoo-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259102, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lao_Laoo-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259519, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lao_Laoo-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276016, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lao_Laoo-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275822, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lao_Laoo-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256459, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lao_Laoo-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269543, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lao_Laoo-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278786, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lao_Laoo-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264506, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lao_Laoo-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269750, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lao_Laoo-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258671, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lao_Laoo-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269882, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lao_Laoo-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264620, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lao_Laoo-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260432, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lao_Laoo-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271223, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lao_Laoo-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273478, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lao_Laoo-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266906, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lao_Laoo-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264807, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lao_Laoo-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lao_Laoo-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252290, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lao_Laoo-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257508, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lao_Laoo-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289740, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lao_Laoo-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246060, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lao_Laoo-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267887, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lao_Laoo-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250703, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lao_Laoo-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262233, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lao_Laoo-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 259979, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lao_Laoo-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262710, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lao_Laoo-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268242, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lao_Laoo-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264334, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lao_Laoo-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272378, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lao_Laoo-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269032, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lao_Laoo-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276348, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lao_Laoo-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271087, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lao_Laoo-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268164, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lao_Laoo-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279383, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lao_Laoo-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277124, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lao_Laoo-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278211, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lao_Laoo-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285237, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lao_Laoo-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 259843, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lao_Laoo-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lao_Laoo-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266842, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lao_Laoo-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278631, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lao_Laoo-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267431, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lao_Laoo-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263297, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lao_Laoo-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217192, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lao_Laoo-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262247, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lao_Laoo-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270188, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lao_Laoo-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lao_Laoo-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196025, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lao_Laoo-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lao_Laoo-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277325, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lao_Laoo-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284193, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lao_Laoo-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268655, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lao_Laoo-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238041, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lao_Laoo-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269435, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lao_Laoo-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265320, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lao_Laoo-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266988, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lao_Laoo-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276587, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lao_Laoo-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272156, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lao_Laoo-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lao_Laoo-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294806, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lao_Laoo-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246367, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lao_Laoo-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267884, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lao_Laoo-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287698, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lao_Laoo-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lao_Laoo-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276054, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lao_Laoo-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293229, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lao_Laoo-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272681, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lao_Laoo-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253964, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lao_Laoo-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257233, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lao_Laoo-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283818, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lao_Laoo-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275841, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lao_Laoo-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lao_Laoo-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260614, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lao_Laoo-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267746, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lao_Laoo-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277567, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lao_Laoo-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258317, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lao_Laoo-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263342, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lao_Laoo-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269183, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lao_Laoo-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246488, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lao_Laoo-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288745, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lao_Laoo-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lao_Laoo-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299457, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lao_Laoo-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272257, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lao_Laoo-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263503, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lao_Laoo-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269851, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lao_Laoo-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262530, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lao_Laoo-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271718, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lao_Laoo-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243623, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lao_Laoo-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257317, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lao_Laoo-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286478, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lao_Laoo-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258416, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lao_Laoo-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260389, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lao_Laoo-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lao_Laoo-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268631, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lao_Laoo-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275931, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lao_Laoo-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256904, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lao_Laoo-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243830, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lao_Laoo-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269321, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lao_Laoo-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292657, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lao_Laoo-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268100, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lao_Laoo-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271726, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lao_Laoo-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256963, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lao_Laoo-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318344, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lao_Laoo-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296229, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lao_Laoo-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170079, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lao_Laoo-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255996, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lao_Laoo-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258645, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lao_Laoo-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282541, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lao_Laoo-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256165, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lao_Laoo-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 259908, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lao_Laoo-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281169, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lao_Laoo-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261140, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lao_Laoo-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256169, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lao_Laoo-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173308, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lao_Laoo-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266423, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lao_Laoo-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lao_Laoo-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276203, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lao_Laoo-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lao_Laoo-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lao_Laoo-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272400, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lao_Laoo-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261770, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lao_Laoo-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220760, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lao_Laoo-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170755, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lao_Laoo-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258231, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lao_Laoo-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271317, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lao_Laoo-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262875, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lao_Laoo-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273990, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lao_Laoo-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273058, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lao_Laoo-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277232, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lao_Laoo-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260915, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lao_Laoo-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299071, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lao_Laoo-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276261, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lao_Laoo-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268677, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lao_Laoo-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265143, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lao_Laoo-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256488, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lao_Laoo-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265464, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lao_Laoo-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265138, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lao_Laoo-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280711, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lao_Laoo-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282784, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lao_Laoo-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293095, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lao_Laoo-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278422, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lao_Laoo-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247121, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lao_Laoo-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283869, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lao_Laoo-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251340, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lao_Laoo-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273017, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lao_Laoo-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267606, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lao_Laoo-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264899, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lao_Laoo-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277663, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lao_Laoo-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288238, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lao_Laoo-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273799, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lao_Laoo-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lao_Laoo-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270127, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lao_Laoo-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260860, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lao_Laoo-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276315, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lao_Laoo-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 259959, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lao_Laoo-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249648, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lao_Laoo-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270322, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lao_Laoo-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263282, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lao_Laoo-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262915, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lao_Laoo-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232114, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lao_Laoo-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267127, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lao_Laoo-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264217, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lao_Laoo-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263485, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lao_Laoo-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281722, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lao_Laoo-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 128.5177865612648, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mri_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 258148, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mri_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272914, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mri_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mri_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277585, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mri_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299739, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mri_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 273222, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mri_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285681, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mri_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 305259, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mri_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 282288, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mri_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 288460, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mri_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 292097, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mri_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 304066, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mri_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 273460, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mri_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299958, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mri_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275935, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mri_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268982, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mri_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 303524, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mri_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 275125, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mri_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 260100, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mri_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294518, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mri_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 278493, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mri_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 275254, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mri_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 292389, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mri_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295863, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mri_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 270354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mri_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299874, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mri_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 264487, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mri_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261640, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mri_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 307278, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mri_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277819, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mri_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 285156, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mri_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282059, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mri_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279578, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mri_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 307523, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mri_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277307, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mri_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 287004, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mri_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258604, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mri_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 275569, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mri_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275986, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mri_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 292483, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mri_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 292289, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mri_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272926, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mri_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 286010, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mri_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 295253, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mri_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280973, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mri_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 286217, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mri_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 275138, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mri_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 286349, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mri_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 281087, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mri_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276899, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mri_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287690, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mri_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289945, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mri_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 283373, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mri_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 281274, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mri_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255573, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mri_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268757, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mri_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273975, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mri_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 306207, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mri_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 262527, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mri_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mri_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 267170, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mri_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278700, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mri_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 276446, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mri_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 279177, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mri_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284709, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mri_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280801, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mri_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288845, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mri_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 285499, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mri_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292815, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mri_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 287554, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mri_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284631, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mri_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295850, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mri_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293591, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mri_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294678, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mri_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301704, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mri_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 276310, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mri_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 296421, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mri_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 283309, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mri_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 295098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mri_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283898, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mri_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279764, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mri_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233659, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mri_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278714, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mri_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mri_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302665, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mri_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 212492, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mri_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 271184, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mri_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293792, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mri_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300660, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mri_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 285122, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mri_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 254508, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mri_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285902, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mri_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mri_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 283455, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mri_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276587, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mri_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288623, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mri_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270944, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mri_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 311273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mri_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262834, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mri_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284351, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mri_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 304165, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mri_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 203470, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mri_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 292521, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mri_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309696, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mri_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 289148, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mri_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 270431, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mri_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273700, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mri_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 300285, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mri_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 292308, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mri_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 291046, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mri_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 277081, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mri_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 284213, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mri_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 294034, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mri_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274784, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mri_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279809, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mri_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285650, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mri_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262955, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mri_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 305212, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mri_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268689, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mri_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mri_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288724, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mri_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279970, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mri_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 286318, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mri_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278997, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mri_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 288185, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mri_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 260090, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mri_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273784, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mri_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302945, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mri_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274883, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mri_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276856, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mri_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278665, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mri_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 285098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mri_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292398, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mri_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 273371, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mri_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 260297, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mri_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285788, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mri_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 309124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mri_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 284567, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mri_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 288193, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mri_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 273430, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mri_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334811, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mri_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312696, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mri_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 186546, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mri_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 272463, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mri_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 275112, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mri_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 299008, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mri_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272632, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mri_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 276375, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mri_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mri_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277607, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mri_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mri_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189775, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mri_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282890, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mri_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 280565, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mri_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292670, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mri_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272928, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mri_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 294072, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mri_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288867, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mri_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 278237, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mri_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 237227, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mri_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 187222, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mri_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274698, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mri_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287784, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mri_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 279342, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mri_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 290457, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mri_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 289525, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mri_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293699, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mri_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 277382, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mri_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 315538, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mri_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292728, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mri_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 285144, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mri_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281610, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mri_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272955, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mri_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281931, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mri_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281605, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mri_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 297178, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mri_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 299251, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mri_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 309562, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mri_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294889, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mri_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263588, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mri_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 300336, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mri_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267807, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mri_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 289484, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mri_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 284073, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mri_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 281366, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mri_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 294130, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mri_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304705, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mri_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 290266, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mri_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 258316, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mri_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mri_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 277327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mri_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292782, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mri_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 276426, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mri_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 266115, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mri_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286789, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mri_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mri_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 279382, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mri_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248581, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mri_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mri_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280684, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mri_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279952, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mri_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 298189, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mri_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 319072, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 144.7895256916996, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253717, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268483, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "rus_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305454, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 273154, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "rus_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 295308, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 268791, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "rus_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 281250, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "rus_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 300828, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 277857, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284029, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287666, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299635, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269029, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "rus_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295527, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "rus_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271504, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "rus_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264551, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "rus_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299093, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "rus_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270694, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "rus_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255669, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290087, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "rus_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274062, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 270823, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 287958, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291432, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "rus_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 265923, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "rus_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295443, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260056, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "rus_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 257209, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 302847, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "rus_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 273388, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280725, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277628, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 275147, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "rus_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303092, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "rus_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272876, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "rus_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282573, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "rus_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 254173, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 271138, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "rus_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271555, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288052, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 287858, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "rus_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268495, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "rus_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281579, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 290822, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276542, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "rus_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 281786, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270707, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "rus_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 281918, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276656, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272468, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "rus_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 283259, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "rus_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285514, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "rus_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 278942, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "rus_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 276843, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 251142, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 264326, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269544, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 301776, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "rus_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258096, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279923, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "rus_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262739, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "rus_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 274269, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "rus_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272015, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "rus_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274746, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 280278, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "rus_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 276370, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 284414, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "rus_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281068, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 288384, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "rus_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 283123, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "rus_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 280200, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "rus_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 291419, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "rus_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 289160, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 290247, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "rus_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 297273, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "rus_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 271879, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 291990, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 278878, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290667, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "rus_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279467, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "rus_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 275333, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "rus_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 229228, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 274283, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "rus_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 282224, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 298234, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208061, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266753, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "rus_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 289361, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "rus_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 296229, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "rus_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280691, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "rus_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250077, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281471, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "rus_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 277356, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "rus_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279024, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 272156, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288623, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "rus_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266513, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "rus_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 306842, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "rus_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 258403, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279920, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "rus_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299734, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "rus_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199039, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288090, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "rus_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 305265, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "rus_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284717, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "rus_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266000, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "rus_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 269269, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "rus_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 295854, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "rus_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 287877, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "rus_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286615, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272650, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 279782, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "rus_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289603, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "rus_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 270353, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "rus_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275378, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "rus_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 281219, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258524, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 300781, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "rus_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 264258, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311493, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 284293, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "rus_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275539, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "rus_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 281887, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274566, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 283754, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255659, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "rus_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 269353, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "rus_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298514, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270452, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "rus_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 272425, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "rus_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 274234, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "rus_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280667, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287967, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "rus_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 268940, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "rus_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 255866, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "rus_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 281357, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "rus_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304693, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "rus_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 280136, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 283762, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "rus_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 268999, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "rus_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 330380, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 308265, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "rus_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 182115, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "rus_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268032, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270681, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "rus_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294577, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 268201, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 271944, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "rus_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 293205, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "rus_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 273176, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 268205, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "rus_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 185344, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278459, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "rus_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 276134, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "rus_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 288239, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268497, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289641, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "rus_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 284436, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "rus_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 273806, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 232796, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "rus_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 182791, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "rus_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 270267, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "rus_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 283353, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "rus_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 274911, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "rus_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286026, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 285094, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "rus_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 289268, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "rus_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 272951, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 311107, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "rus_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 288297, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280713, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "rus_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 277179, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "rus_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268524, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277500, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 277174, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "rus_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292747, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "rus_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 294820, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "rus_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 305131, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "rus_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290458, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "rus_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 259157, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "rus_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 295905, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 263376, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "rus_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 285053, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "rus_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279642, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "rus_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 276935, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "rus_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289699, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 300274, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "rus_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 285835, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "rus_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 253885, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "rus_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 282163, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "rus_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 272896, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 288351, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "rus_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 271995, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "rus_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261684, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 282358, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "rus_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275318, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "rus_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 274951, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "rus_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 244150, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "rus_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279163, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "rus_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 276253, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "rus_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275521, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "rus_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 293758, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "rus_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314641, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 140.41106719367588, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "taq_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 236038, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "taq_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 250804, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "taq_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 287775, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "taq_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 255475, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "taq_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 277629, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "taq_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 251112, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "taq_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 263571, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 283149, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "taq_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 260178, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "taq_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 266350, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "taq_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 269987, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "taq_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 281956, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "taq_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 251350, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "taq_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 277848, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "taq_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 253825, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 246872, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "taq_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 281414, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 253015, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "taq_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 237990, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "taq_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272408, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "taq_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 256383, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 253144, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 270279, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "taq_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 273753, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "taq_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 248244, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "taq_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 277764, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "taq_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 242377, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "taq_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 239530, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "taq_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 285168, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "taq_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 255709, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "taq_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 263046, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "taq_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259949, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "taq_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 257468, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "taq_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 285413, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "taq_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255197, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 264894, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "taq_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 236494, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "taq_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 253459, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "taq_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 253876, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "taq_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 270373, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "taq_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 270179, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "taq_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 250816, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "taq_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 263900, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "taq_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 273143, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "taq_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258863, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "taq_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 264107, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "taq_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 253028, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "taq_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 264239, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "taq_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 258977, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "taq_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 254789, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "taq_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 265580, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "taq_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 267835, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "taq_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 261263, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "taq_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 259164, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "taq_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 233463, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "taq_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 246647, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "taq_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 251865, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 284097, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "taq_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 240417, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "taq_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262244, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "taq_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 245060, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "taq_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 256590, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "taq_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 254336, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "taq_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 257067, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 262599, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 258691, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "taq_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 266735, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "taq_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 263389, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "taq_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 270705, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "taq_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 265444, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "taq_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 262521, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "taq_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 273740, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "taq_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 271481, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "taq_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 272568, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "taq_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 279594, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "taq_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 254200, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "taq_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 274311, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "taq_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 261199, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "taq_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 272988, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 261788, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "taq_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 257654, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "taq_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 211549, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "taq_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 256604, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "taq_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 264545, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "taq_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 280555, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "taq_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 190382, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "taq_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 249074, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "taq_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 271682, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "taq_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 278550, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "taq_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 263012, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "taq_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 232398, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "taq_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 263792, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "taq_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 259677, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "taq_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 261345, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 254477, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 270944, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "taq_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266513, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 289163, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "taq_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 240724, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "taq_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262241, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "taq_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 282055, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "taq_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 181360, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "taq_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 270411, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "taq_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 287586, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "taq_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 267038, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "taq_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 248321, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "taq_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 251590, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "taq_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 278175, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "taq_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 270198, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 268936, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "taq_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 254971, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "taq_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 262103, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "taq_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 271924, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "taq_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 252674, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "taq_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257699, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "taq_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 263540, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "taq_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 240845, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "taq_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 283102, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "taq_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 246579, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "taq_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 293814, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "taq_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 266614, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "taq_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 257860, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "taq_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 264208, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "taq_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 256887, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "taq_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 266075, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "taq_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 237980, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "taq_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 251674, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 280835, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "taq_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 252773, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "taq_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 254746, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "taq_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 256555, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "taq_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 262988, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "taq_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270288, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "taq_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 251261, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "taq_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 238187, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "taq_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 263678, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "taq_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 287014, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "taq_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 262457, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 266083, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "taq_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 251320, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 312701, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "taq_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 290586, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "taq_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 164436, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "taq_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 250353, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "taq_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 253002, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "taq_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 276898, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "taq_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 250522, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "taq_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 254265, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "taq_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 275526, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "taq_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 255497, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "taq_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 250526, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "taq_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 167665, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "taq_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 260780, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 258455, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "taq_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 270560, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "taq_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 250818, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "taq_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 271962, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "taq_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 266757, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "taq_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 256127, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "taq_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 215117, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "taq_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 165112, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "taq_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 252588, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "taq_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 265674, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "taq_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 257232, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "taq_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 268347, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "taq_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 267415, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "taq_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 271589, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "taq_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 255272, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "taq_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 293428, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "taq_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 270618, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 263034, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "taq_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 259500, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "taq_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 250845, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "taq_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259821, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 259495, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "taq_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 275068, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "taq_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 277141, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "taq_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 287452, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "taq_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 272779, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "taq_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 241478, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "taq_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 278226, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "taq_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 245697, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "taq_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 267374, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "taq_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 261963, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "taq_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 259256, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "taq_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 272020, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "taq_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 282595, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "taq_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 268156, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "taq_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 236206, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "taq_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 264484, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "taq_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 255217, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "taq_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 270672, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "taq_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 254316, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "taq_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 244005, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "taq_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 264679, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257639, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "taq_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 257272, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "taq_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 226471, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "taq_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261484, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "taq_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "taq_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 257842, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "taq_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 276079, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "taq_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 296962, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.94169960474308, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "war_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 276367, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "war_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 291133, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "war_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 328104, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "war_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 295804, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "war_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 317958, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "war_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 291441, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "war_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 303900, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "war_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 323478, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "war_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 300507, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "war_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 306679, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "war_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 310316, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "war_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 322285, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "war_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 291679, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "war_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 318177, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "war_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 294154, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "war_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 287201, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "war_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 321743, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "war_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 293344, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "war_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 278319, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "war_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 312737, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "war_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 296712, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "war_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 293473, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "war_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 310608, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "war_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 314082, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "war_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 288573, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "war_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 318093, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "war_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 282706, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "war_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 279859, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "war_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 325497, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "war_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 296038, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "war_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 303375, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "war_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300278, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "war_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 297797, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "war_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 325742, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "war_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295526, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "war_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 305223, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "war_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 276823, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "war_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 293788, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "war_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 294205, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "war_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 310702, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "war_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 310508, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "war_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 291145, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "war_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 304229, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "war_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 313472, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "war_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299192, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "war_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 304436, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "war_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 293357, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "war_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 304568, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "war_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 299306, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "war_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 295118, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "war_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 305909, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "war_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 308164, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "war_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 301592, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "war_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 299493, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "war_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 273792, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "war_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 286976, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "war_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 292194, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "war_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 324426, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "war_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 280746, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "war_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302573, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "war_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 285389, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "war_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 296919, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "war_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 294665, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "war_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 297396, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "war_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 302928, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "war_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 299020, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "war_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 307064, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "war_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 303718, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "war_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 311034, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "war_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 305773, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "war_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 302850, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "war_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 314069, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "war_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 311810, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "war_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 312897, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "war_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 319923, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "war_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 294529, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "war_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 314640, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "war_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 301528, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "war_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 313317, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "war_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 302117, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "war_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 297983, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "war_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 251878, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "war_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 296933, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "war_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 304874, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "war_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 320884, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "war_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 230711, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "war_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 289403, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "war_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 312011, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "war_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 318879, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "war_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 303341, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "war_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 272727, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "war_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 304121, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "war_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 300006, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "war_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 301674, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "war_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 294806, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "war_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 311273, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "war_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306842, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "war_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 289163, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "war_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 281053, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "war_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302570, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "war_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 322384, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "war_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 221689, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "war_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 310740, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "war_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 327915, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "war_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 307367, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "war_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 288650, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "war_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 291919, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "war_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 318504, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "war_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 310527, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "war_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 309265, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "war_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 295300, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "war_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 302432, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "war_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 312253, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "war_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 293003, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "war_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298028, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "war_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 303869, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "war_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 281174, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "war_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 323431, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "war_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 286908, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "war_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 334143, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "war_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 306943, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "war_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 298189, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "war_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 304537, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "war_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 297216, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "war_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 306404, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "war_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 278309, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "war_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 292003, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "war_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 321164, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "war_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 293102, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "war_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 295075, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "war_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 296884, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "war_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 303317, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "war_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 310617, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "war_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 291590, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "war_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 278516, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "war_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 304007, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "war_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 327343, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "war_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 302786, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "war_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 306412, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "war_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 291649, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "war_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 353030, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "war_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 330915, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "war_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 204765, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "war_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 290682, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "war_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 293331, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "war_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 317227, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "war_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 290851, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "war_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 294594, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "war_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 315855, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "war_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 295826, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "war_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 290855, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "war_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 207994, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "war_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 301109, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "war_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 298784, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "war_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 310889, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "war_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 291147, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "war_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 312291, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "war_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 307086, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "war_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 296456, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "war_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 255446, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "war_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 205441, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "war_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 292917, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "war_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 306003, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "war_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 297561, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "war_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 308676, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "war_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 307744, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "war_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 311918, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "war_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 295601, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "war_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 333757, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "war_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 310947, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "war_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 303363, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "war_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 299829, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "war_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 291174, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "war_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300150, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "war_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 299824, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "war_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 315397, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "war_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 317470, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "war_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 327781, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "war_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 313108, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "war_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 281807, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "war_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 318555, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "war_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 286026, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "war_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 307703, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "war_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 302292, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "war_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 299585, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "war_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 312349, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "war_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 322924, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "war_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 308485, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "war_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 276535, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "war_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 304813, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "war_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 295546, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "war_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 311001, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "war_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 294645, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "war_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 284334, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "war_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 305008, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "war_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297968, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "war_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 297601, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "war_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 266800, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "war_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301813, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "war_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 298903, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "war_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 298171, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "war_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 316408, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "war_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 337291, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 162.79249011857706, + "max_sentence1_length": 434, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "arb_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 227928, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "arb_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 242694, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "arb_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 279665, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "arb_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 247365, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "arb_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "arb_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 243002, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "arb_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 255461, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 275039, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "arb_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 252068, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "arb_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 258240, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arb_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 261877, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arb_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 273846, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "arb_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 243240, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "arb_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 269738, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "arb_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 245715, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 238762, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "arb_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 273304, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 244905, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "arb_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 229880, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "arb_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264298, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arb_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 248273, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 245034, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 262169, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "arb_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 265643, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "arb_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 240134, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "arb_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 269654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "arb_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 234267, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "arb_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 231420, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "arb_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 277058, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "arb_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 247599, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arb_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 254936, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "arb_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251839, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arb_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 249358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arb_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 277303, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "arb_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247087, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 256784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "arb_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 228384, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "arb_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 245349, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "arb_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 245766, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "arb_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 262263, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arb_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 262069, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "arb_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 242706, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "arb_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 255790, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arb_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 265033, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arb_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250753, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arb_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 255997, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arb_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 244918, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arb_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 256129, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "arb_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 250867, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arb_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 246679, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arb_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 257470, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "arb_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 259725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arb_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 253153, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "arb_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 251054, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "arb_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 225353, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "arb_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 238537, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "arb_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 243755, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 275987, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "arb_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 232307, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "arb_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254134, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "arb_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 236950, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "arb_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 248480, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "arb_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 246226, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "arb_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 248957, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 254489, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 250581, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "arb_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 258625, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arb_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 255279, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "arb_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 262595, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "arb_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 257334, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "arb_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 254411, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "arb_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 265630, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arb_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 263371, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "arb_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 264458, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "arb_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 271484, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "arb_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 246090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "arb_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 266201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "arb_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 253089, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arb_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 264878, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 253678, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "arb_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 249544, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arb_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 203439, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "arb_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 248494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "arb_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 256435, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "arb_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 272445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "arb_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 182272, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "arb_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 240964, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "arb_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 263572, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "arb_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 270440, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "arb_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 254902, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "arb_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 224288, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "arb_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 255682, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arb_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 251567, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "arb_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 253235, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 246367, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 262834, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arb_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258403, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 240724, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "arb_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 281053, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "arb_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254131, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arb_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 273945, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "arb_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 173250, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "arb_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 262301, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arb_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 279476, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arb_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 258928, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "arb_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 240211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "arb_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 243480, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "arb_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 270065, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "arb_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 262088, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 260826, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "arb_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 246861, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "arb_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 253993, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "arb_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 263814, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arb_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 244564, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "arb_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "arb_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 255430, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "arb_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 232735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "arb_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 274992, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "arb_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 238469, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "arb_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 285704, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "arb_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 258504, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "arb_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 249750, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arb_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 256098, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "arb_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 248777, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "arb_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 257965, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arb_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 229870, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "arb_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 243564, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 272725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "arb_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 244663, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "arb_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 246636, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "arb_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 248445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "arb_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 254878, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arb_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262178, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "arb_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 243151, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arb_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 230077, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "arb_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 255568, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "arb_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 278904, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "arb_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 254347, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 257973, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "arb_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 243210, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 304591, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "arb_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 282476, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "arb_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 156326, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "arb_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 242243, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "arb_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 244892, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arb_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 268788, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "arb_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 242412, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "arb_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 246155, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "arb_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 267416, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "arb_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 247387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arb_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 242416, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "arb_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 159555, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "arb_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 252670, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 250345, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "arb_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 262450, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arb_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 242708, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "arb_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 263852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "arb_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 258647, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "arb_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 248017, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arb_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 207007, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "arb_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 157002, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "arb_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 244478, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "arb_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 257564, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "arb_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 249122, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "arb_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 260237, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arb_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 259305, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "arb_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 263479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "arb_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 247162, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "arb_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 285318, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "arb_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 262508, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 254924, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "arb_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 251390, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arb_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 242735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arb_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251711, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 251385, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "arb_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 266958, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "arb_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 269031, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "arb_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 279342, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "arb_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 264669, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "arb_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 233368, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "arb_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 270116, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "arb_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 237587, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "arb_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 259264, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "arb_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 253853, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arb_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 251146, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "arb_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 263910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "arb_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 274485, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arb_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 260046, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "arb_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 228096, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "arb_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 256374, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "arb_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 247107, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "arb_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 262562, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "arb_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 246206, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "arb_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 235895, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "arb_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 256569, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249529, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "arb_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 249162, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "arb_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 218361, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "arb_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253374, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arb_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 250464, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "arb_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 249732, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "arb_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 267969, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "arb_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 288852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 114.92786561264822, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bul_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301182, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 268882, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bul_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291036, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bul_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276978, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bul_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296556, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273585, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279757, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283394, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295363, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264757, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bul_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 291255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bul_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267232, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bul_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260279, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bul_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 294821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bul_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266422, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bul_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251397, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285815, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bul_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 269790, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266551, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283686, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bul_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261651, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bul_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291171, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 255784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bul_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252937, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298575, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bul_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269116, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276453, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273356, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 270875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bul_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 298820, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bul_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268604, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bul_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278301, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bul_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249901, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 266866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bul_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267283, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283780, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283586, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bul_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264223, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bul_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277307, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286550, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272270, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bul_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277514, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266435, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bul_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277646, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272384, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268196, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bul_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278987, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bul_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281242, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bul_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274670, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bul_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272571, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 246870, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260054, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 265272, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297504, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bul_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 253824, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275651, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bul_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258467, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bul_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269997, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bul_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 267743, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bul_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270474, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276006, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bul_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272098, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280142, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bul_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 276796, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284112, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bul_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 278851, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bul_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275928, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bul_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287147, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bul_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 284888, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285975, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bul_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293001, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bul_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267607, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287718, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274606, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286395, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bul_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275195, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bul_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271061, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bul_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224956, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270011, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bul_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277952, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293962, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 203789, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262481, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bul_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285089, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bul_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291957, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bul_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276419, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bul_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 245805, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277199, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bul_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273084, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bul_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 274752, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 267884, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284351, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bul_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279920, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bul_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262241, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bul_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302570, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bul_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254131, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bul_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295462, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bul_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194767, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 283818, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bul_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300993, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bul_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bul_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 261728, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bul_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264997, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bul_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291582, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bul_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bul_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275510, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bul_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285331, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bul_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266081, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bul_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271106, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bul_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276947, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254252, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296509, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bul_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259986, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307221, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280021, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bul_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 271267, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bul_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277615, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270294, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279482, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bul_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265081, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bul_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294242, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266180, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bul_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268153, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bul_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269962, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bul_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276395, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283695, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bul_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264668, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bul_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251594, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bul_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277085, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bul_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300421, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bul_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 275864, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279490, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bul_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 264727, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bul_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326108, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303993, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bul_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 177843, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bul_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263760, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266409, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bul_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290305, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263929, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267672, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bul_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288933, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bul_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268904, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263933, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bul_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181072, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274187, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bul_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 271862, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bul_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264225, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285369, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bul_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280164, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bul_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269534, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228524, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bul_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bul_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265995, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bul_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279081, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bul_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270639, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bul_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 281754, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 280822, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bul_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284996, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bul_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268679, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 306835, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bul_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284025, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276441, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bul_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272907, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bul_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264252, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273228, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272902, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bul_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288475, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bul_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290548, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bul_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 300859, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bul_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286186, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bul_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 254885, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bul_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291633, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259104, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bul_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280781, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bul_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 275370, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bul_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272663, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bul_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285427, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296002, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bul_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281563, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bul_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249613, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bul_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 277891, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bul_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268624, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284079, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bul_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 267723, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bul_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257412, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278086, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bul_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271046, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "bul_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270679, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bul_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 239878, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bul_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274891, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bul_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271981, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bul_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271249, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bul_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289486, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bul_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310369, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 136.1897233201581, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fra_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 269259, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fra_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 284025, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fra_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 320996, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fra_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 288696, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fra_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 310850, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fra_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 284333, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fra_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 296792, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fra_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 316370, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fra_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 293399, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fra_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 299571, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fra_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 303208, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fra_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 315177, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fra_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 284571, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fra_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 311069, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fra_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 287046, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fra_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 280093, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fra_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 314635, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fra_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 286236, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fra_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 271211, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fra_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305629, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fra_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 289604, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fra_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 286365, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fra_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 303500, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fra_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 306974, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fra_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 281465, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fra_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 310985, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fra_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 275598, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fra_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 272751, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fra_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 318389, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fra_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 288930, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fra_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 296267, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fra_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293170, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fra_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 290689, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fra_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 318634, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fra_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288418, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fra_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 298115, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fra_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 269715, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fra_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 286680, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fra_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 287097, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fra_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 303594, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fra_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 303400, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fra_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 284037, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fra_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 297121, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fra_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 306364, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fra_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292084, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fra_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 297328, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fra_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 286249, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fra_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 297460, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fra_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 292198, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fra_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 288010, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fra_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 298801, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fra_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 301056, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fra_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 294484, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fra_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 292385, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fra_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 266684, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fra_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 279868, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fra_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 285086, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fra_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 317318, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fra_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 273638, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fra_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295465, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fra_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 278281, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fra_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 289811, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fra_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 287557, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fra_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 290288, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fra_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 295820, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fra_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 291912, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fra_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 299956, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fra_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 296610, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fra_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 303926, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fra_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 298665, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fra_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 295742, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fra_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 306961, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fra_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 304702, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fra_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 305789, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fra_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 312815, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "fra_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 287421, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fra_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 307532, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fra_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 294420, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fra_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 306209, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fra_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 295009, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fra_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 290875, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fra_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 244770, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fra_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 289825, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fra_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 297766, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fra_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 313776, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fra_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 223603, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fra_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 282295, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fra_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 304903, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fra_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 311771, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fra_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 296233, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fra_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 265619, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fra_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 297013, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fra_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 292898, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "fra_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 294566, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fra_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 287698, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fra_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 304165, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fra_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299734, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fra_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 282055, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fra_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 322384, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fra_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 273945, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fra_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295462, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fra_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 214581, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fra_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 303632, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fra_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 320807, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fra_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 300259, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fra_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 281542, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fra_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 284811, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fra_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 311396, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fra_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 303419, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fra_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 302157, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fra_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 288192, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fra_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 295324, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fra_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 305145, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fra_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 285895, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fra_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290920, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fra_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 296761, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fra_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 274066, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fra_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 316323, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fra_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 279800, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "fra_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 327035, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fra_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 299835, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fra_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 291081, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fra_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 297429, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fra_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 290108, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fra_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 299296, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fra_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 271201, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fra_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 284895, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fra_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 314056, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fra_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 285994, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fra_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 287967, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fra_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 289776, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fra_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 296209, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fra_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303509, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fra_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 284482, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fra_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 271408, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fra_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 296899, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fra_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 320235, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fra_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 295678, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fra_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 299304, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fra_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 284541, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fra_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 345922, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fra_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 323807, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fra_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 197657, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fra_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 283574, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fra_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 286223, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fra_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 310119, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fra_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 283743, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fra_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 287486, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fra_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 308747, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fra_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 288718, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fra_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 283747, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fra_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 200886, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fra_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 294001, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fra_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 291676, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fra_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 303781, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fra_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 284039, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fra_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 305183, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fra_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 299978, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fra_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 289348, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fra_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 248338, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fra_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 198333, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fra_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 285809, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fra_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 298895, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fra_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 290453, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fra_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 301568, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fra_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 300636, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fra_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 304810, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fra_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 288493, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fra_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 326649, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fra_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 303839, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fra_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 296255, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fra_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 292721, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fra_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 284066, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fra_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293042, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fra_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 292716, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fra_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 308289, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fra_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 310362, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fra_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 320673, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fra_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 306000, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fra_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 274699, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fra_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 311447, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fra_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 278918, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fra_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 300595, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fra_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 295184, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fra_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 292477, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fra_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 305241, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fra_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 315816, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fra_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 301377, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fra_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 269427, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fra_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 297705, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fra_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 288438, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fra_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 303893, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fra_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 287537, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fra_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 277226, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fra_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 297900, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fra_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290860, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fra_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 290493, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fra_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 259692, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fra_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294705, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fra_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 291795, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fra_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 291063, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fra_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 309300, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fra_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 330183, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 155.7687747035573, + "max_sentence1_length": 415, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 168564, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 183330, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "jpn_Jpan-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 220301, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 188001, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "jpn_Jpan-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 210155, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 183638, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "jpn_Jpan-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 196097, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jpn_Jpan-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 215675, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 192704, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 198876, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 202513, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 214482, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 183876, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "jpn_Jpan-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 210374, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "jpn_Jpan-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 186351, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jpn_Jpan-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 179398, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "jpn_Jpan-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 213940, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jpn_Jpan-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 185541, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "jpn_Jpan-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 170516, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 204934, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "jpn_Jpan-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 188909, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 185670, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 202805, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 206279, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "jpn_Jpan-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 180770, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "jpn_Jpan-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 210290, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 174903, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "jpn_Jpan-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 172056, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 217694, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "jpn_Jpan-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 188235, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 195572, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 192475, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 189994, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "jpn_Jpan-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 217939, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "jpn_Jpan-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 187723, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jpn_Jpan-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 197420, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "jpn_Jpan-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 169020, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 185985, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "jpn_Jpan-est_Latn": { + "num_samples": 1012, + "number_of_characters": 186402, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 202899, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 202705, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "jpn_Jpan-min_Arab": { + "num_samples": 1012, + "number_of_characters": 183342, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "jpn_Jpan-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 196426, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 205669, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 191389, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "jpn_Jpan-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 196633, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 185554, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "jpn_Jpan-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 196765, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 191503, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 187315, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "jpn_Jpan-min_Latn": { + "num_samples": 1012, + "number_of_characters": 198106, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "jpn_Jpan-por_Latn": { + "num_samples": 1012, + "number_of_characters": 200361, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "jpn_Jpan-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 193789, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "jpn_Jpan-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 191690, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 165989, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 179173, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 184391, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 216623, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "jpn_Jpan-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 172943, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 194770, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "jpn_Jpan-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 177586, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "jpn_Jpan-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 189116, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "jpn_Jpan-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 186862, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "jpn_Jpan-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 189593, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 195125, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jpn_Jpan-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 191217, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 199261, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "jpn_Jpan-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 195915, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 203231, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "jpn_Jpan-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 197970, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "jpn_Jpan-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 195047, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "jpn_Jpan-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 206266, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "jpn_Jpan-als_Latn": { + "num_samples": 1012, + "number_of_characters": 204007, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 205094, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "jpn_Jpan-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 212120, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "jpn_Jpan-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 186726, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 206837, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 193725, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 205514, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jpn_Jpan-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 194314, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "jpn_Jpan-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 190180, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "jpn_Jpan-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 144075, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 189130, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "jpn_Jpan-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 197071, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 213081, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 122908, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 181600, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "jpn_Jpan-run_Latn": { + "num_samples": 1012, + "number_of_characters": 204208, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "jpn_Jpan-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 211076, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "jpn_Jpan-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 195538, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "jpn_Jpan-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 164924, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 196318, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "jpn_Jpan-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 192203, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "jpn_Jpan-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 193871, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 187003, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 203470, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "jpn_Jpan-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 199039, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "jpn_Jpan-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 181360, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "jpn_Jpan-war_Latn": { + "num_samples": 1012, + "number_of_characters": 221689, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "jpn_Jpan-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 173250, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 194767, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "jpn_Jpan-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 214581, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 202937, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "jpn_Jpan-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 220112, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "jpn_Jpan-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 199564, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "jpn_Jpan-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 180847, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "jpn_Jpan-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 184116, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "jpn_Jpan-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 210701, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "jpn_Jpan-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 202724, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jpn_Jpan-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 201462, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 187497, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 194629, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "jpn_Jpan-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 204450, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "jpn_Jpan-san_Deva": { + "num_samples": 1012, + "number_of_characters": 185200, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "jpn_Jpan-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 190225, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "jpn_Jpan-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 196066, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 173371, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 215628, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "jpn_Jpan-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 179105, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 226340, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 199140, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "jpn_Jpan-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 190386, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "jpn_Jpan-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 196734, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 189413, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 198601, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 170506, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "jpn_Jpan-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 184200, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jpn_Jpan-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 213361, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 185299, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "jpn_Jpan-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 187272, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "jpn_Jpan-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 189081, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "jpn_Jpan-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 195514, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 202814, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "jpn_Jpan-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 183787, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "jpn_Jpan-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 170713, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "jpn_Jpan-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 196204, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "jpn_Jpan-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 219540, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "jpn_Jpan-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 194983, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 198609, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "jpn_Jpan-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 183846, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jpn_Jpan-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 245227, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 223112, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "jpn_Jpan-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 96962, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "jpn_Jpan-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 182879, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 185528, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "jpn_Jpan-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 209424, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 183048, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 186791, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "jpn_Jpan-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 208052, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "jpn_Jpan-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 188023, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 183052, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "jpn_Jpan-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 100191, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 193306, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jpn_Jpan-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 190981, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "jpn_Jpan-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 203086, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 183344, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 204488, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "jpn_Jpan-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 199283, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "jpn_Jpan-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 188653, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 147643, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "jpn_Jpan-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 97638, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "jpn_Jpan-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 185114, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "jpn_Jpan-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 198200, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "jpn_Jpan-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 189758, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "jpn_Jpan-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 200873, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 199941, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "jpn_Jpan-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 204115, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "jpn_Jpan-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 187798, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 225954, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "jpn_Jpan-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 203144, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 195560, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "jpn_Jpan-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 192026, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "jpn_Jpan-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 183371, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 192347, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 192021, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "jpn_Jpan-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 207594, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "jpn_Jpan-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 209667, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "jpn_Jpan-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 219978, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "jpn_Jpan-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 205305, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "jpn_Jpan-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 174004, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "jpn_Jpan-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 210752, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 178223, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "jpn_Jpan-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 199900, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "jpn_Jpan-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 194489, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "jpn_Jpan-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 191782, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "jpn_Jpan-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 204546, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 215121, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "jpn_Jpan-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 200682, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "jpn_Jpan-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 168732, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "jpn_Jpan-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 197010, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "jpn_Jpan-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 187743, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 203198, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "jpn_Jpan-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 186842, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "jpn_Jpan-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 176531, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 197205, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "jpn_Jpan-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 190165, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "jpn_Jpan-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 189798, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "jpn_Jpan-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 158997, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "jpn_Jpan-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 194010, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "jpn_Jpan-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 191100, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "jpn_Jpan-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 190368, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "jpn_Jpan-som_Latn": { + "num_samples": 1012, + "number_of_characters": 208605, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "jpn_Jpan-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 229488, + "unique_pairs": 1012, + "min_sentence1_length": 17, + "average_sentence1_length": 56.26778656126482, + "max_sentence1_length": 139, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lij_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257615, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lij_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272381, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lij_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309352, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lij_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277052, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lij_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299206, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lij_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272689, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lij_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285148, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lij_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304726, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lij_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281755, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lij_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287927, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lij_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291564, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lij_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303533, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lij_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272927, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lij_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299425, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lij_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275402, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lij_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268449, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lij_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302991, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lij_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274592, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lij_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259567, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lij_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293985, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lij_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277960, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lij_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274721, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lij_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 291856, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lij_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295330, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lij_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269821, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lij_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299341, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lij_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263954, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lij_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261107, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lij_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306745, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lij_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277286, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lij_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284623, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lij_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281526, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lij_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279045, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lij_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306990, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lij_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276774, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lij_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286471, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lij_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258071, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lij_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 275036, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lij_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275453, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lij_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 291950, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lij_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291756, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lij_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272393, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lij_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285477, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lij_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294720, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lij_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280440, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lij_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285684, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lij_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274605, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lij_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285816, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lij_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280554, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lij_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276366, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lij_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287157, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lij_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289412, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lij_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282840, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lij_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280741, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lij_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255040, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lij_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268224, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lij_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273442, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lij_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305674, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lij_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261994, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lij_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283821, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lij_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266637, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lij_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278167, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lij_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275913, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lij_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278644, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lij_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284176, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lij_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280268, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lij_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288312, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lij_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284966, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lij_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292282, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lij_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 287021, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lij_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284098, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lij_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295317, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lij_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293058, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lij_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294145, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lij_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301171, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lij_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275777, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lij_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295888, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lij_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282776, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lij_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294565, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lij_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283365, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lij_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279231, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lij_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233126, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lij_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278181, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lij_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286122, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lij_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302132, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lij_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211959, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lij_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270651, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lij_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293259, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lij_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300127, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lij_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284589, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lij_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253975, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lij_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285369, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lij_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281254, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lij_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282922, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lij_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276054, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lij_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292521, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lij_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288090, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lij_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270411, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lij_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310740, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lij_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262301, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lij_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283818, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lij_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303632, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lij_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202937, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lij_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309163, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lij_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288615, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lij_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269898, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lij_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273167, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lij_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299752, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lij_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291775, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lij_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290513, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lij_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276548, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lij_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283680, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lij_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293501, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lij_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274251, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lij_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279276, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lij_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285117, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lij_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262422, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lij_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304679, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lij_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268156, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lij_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315391, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lij_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288191, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lij_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279437, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lij_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285785, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lij_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278464, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lij_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287652, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lij_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259557, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lij_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273251, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lij_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302412, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lij_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274350, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lij_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276323, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lij_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278132, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lij_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284565, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lij_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291865, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lij_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272838, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lij_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259764, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lij_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285255, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lij_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308591, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lij_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 284034, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lij_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287660, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lij_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272897, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lij_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334278, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lij_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312163, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lij_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 186013, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lij_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271930, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lij_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274579, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lij_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298475, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lij_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272099, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lij_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275842, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lij_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297103, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lij_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277074, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lij_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272103, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lij_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189242, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lij_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282357, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lij_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 280032, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lij_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292137, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lij_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272395, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lij_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293539, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lij_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288334, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lij_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277704, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lij_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236694, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lij_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186689, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lij_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274165, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lij_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287251, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lij_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278809, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lij_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289924, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lij_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288992, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lij_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293166, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lij_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276849, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lij_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 315005, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lij_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292195, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lij_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284611, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lij_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281077, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lij_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272422, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lij_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281398, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lij_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281072, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lij_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296645, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lij_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298718, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lij_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 309029, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lij_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294356, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lij_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263055, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lij_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299803, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lij_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267274, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lij_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288951, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lij_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283540, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lij_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280833, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lij_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293597, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lij_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304172, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lij_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289733, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lij_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257783, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lij_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286061, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lij_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276794, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lij_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292249, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lij_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275893, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lij_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265582, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lij_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286256, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lij_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279216, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lij_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278849, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lij_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248048, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lij_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283061, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lij_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280151, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lij_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279419, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lij_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297656, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lij_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318539, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 144.26284584980237, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "mya_Mymr-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 274790, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "mya_Mymr-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 289556, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "mya_Mymr-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 326527, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mya_Mymr-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 294227, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "mya_Mymr-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 316381, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "mya_Mymr-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 289864, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "mya_Mymr-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 302323, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mya_Mymr-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 321901, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "mya_Mymr-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 298930, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "mya_Mymr-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 305102, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mya_Mymr-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 308739, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mya_Mymr-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 320708, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "mya_Mymr-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 290102, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "mya_Mymr-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 316600, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "mya_Mymr-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 292577, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mya_Mymr-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 285624, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mya_Mymr-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 320166, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mya_Mymr-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 291767, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "mya_Mymr-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 276742, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "mya_Mymr-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 311160, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mya_Mymr-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 295135, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mya_Mymr-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 291896, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mya_Mymr-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 309031, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mya_Mymr-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 312505, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mya_Mymr-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 286996, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "mya_Mymr-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 316516, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mya_Mymr-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 281129, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "mya_Mymr-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 278282, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "mya_Mymr-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 323920, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "mya_Mymr-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 294461, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mya_Mymr-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 301798, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "mya_Mymr-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298701, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mya_Mymr-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 296220, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mya_Mymr-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 324165, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "mya_Mymr-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293949, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mya_Mymr-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 303646, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "mya_Mymr-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 275246, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "mya_Mymr-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 292211, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mya_Mymr-est_Latn": { + "num_samples": 1012, + "number_of_characters": 292628, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "mya_Mymr-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 309125, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mya_Mymr-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 308931, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "mya_Mymr-min_Arab": { + "num_samples": 1012, + "number_of_characters": 289568, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mya_Mymr-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 302652, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mya_Mymr-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 311895, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mya_Mymr-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297615, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mya_Mymr-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mya_Mymr-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 291780, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mya_Mymr-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 302991, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mya_Mymr-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 297729, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mya_Mymr-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 293541, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mya_Mymr-min_Latn": { + "num_samples": 1012, + "number_of_characters": 304332, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "mya_Mymr-por_Latn": { + "num_samples": 1012, + "number_of_characters": 306587, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mya_Mymr-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 300015, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mya_Mymr-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 297916, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mya_Mymr-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 272215, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "mya_Mymr-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 285399, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "mya_Mymr-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 290617, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mya_Mymr-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 322849, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "mya_Mymr-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 279169, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "mya_Mymr-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300996, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "mya_Mymr-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 283812, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "mya_Mymr-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 295342, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mya_Mymr-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 293088, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mya_Mymr-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 295819, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mya_Mymr-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 301351, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mya_Mymr-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 297443, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mya_Mymr-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 305487, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "mya_Mymr-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 302141, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mya_Mymr-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 309457, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "mya_Mymr-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 304196, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mya_Mymr-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 301273, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "mya_Mymr-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 312492, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "mya_Mymr-als_Latn": { + "num_samples": 1012, + "number_of_characters": 310233, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mya_Mymr-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 311320, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "mya_Mymr-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 318346, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "mya_Mymr-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 292952, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mya_Mymr-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 313063, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "mya_Mymr-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 299951, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mya_Mymr-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 311740, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mya_Mymr-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 300540, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "mya_Mymr-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 296406, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mya_Mymr-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 250301, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "mya_Mymr-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 295356, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mya_Mymr-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 303297, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mya_Mymr-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 319307, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "mya_Mymr-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 229134, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "mya_Mymr-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 287826, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "mya_Mymr-run_Latn": { + "num_samples": 1012, + "number_of_characters": 310434, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "mya_Mymr-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 317302, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "mya_Mymr-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 301764, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "mya_Mymr-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 271150, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "mya_Mymr-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 302544, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mya_Mymr-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 298429, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "mya_Mymr-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 300097, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mya_Mymr-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 293229, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mya_Mymr-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 309696, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mya_Mymr-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305265, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "mya_Mymr-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 287586, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mya_Mymr-war_Latn": { + "num_samples": 1012, + "number_of_characters": 327915, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "mya_Mymr-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 279476, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mya_Mymr-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300993, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mya_Mymr-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 320807, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "mya_Mymr-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 220112, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "mya_Mymr-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 309163, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mya_Mymr-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 305790, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "mya_Mymr-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 287073, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "mya_Mymr-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 290342, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "mya_Mymr-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 316927, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "mya_Mymr-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 308950, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mya_Mymr-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 307688, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "mya_Mymr-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 293723, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "mya_Mymr-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 300855, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "mya_Mymr-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 310676, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mya_Mymr-san_Deva": { + "num_samples": 1012, + "number_of_characters": 291426, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "mya_Mymr-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296451, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mya_Mymr-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 302292, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "mya_Mymr-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 279597, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "mya_Mymr-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 321854, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "mya_Mymr-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 285331, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "mya_Mymr-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 332566, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "mya_Mymr-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 305366, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mya_Mymr-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 296612, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mya_Mymr-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 302960, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mya_Mymr-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 295639, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "mya_Mymr-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 304827, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "mya_Mymr-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 276732, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "mya_Mymr-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 290426, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mya_Mymr-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 319587, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "mya_Mymr-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 291525, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "mya_Mymr-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 293498, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "mya_Mymr-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 295307, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "mya_Mymr-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 301740, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mya_Mymr-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 309040, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "mya_Mymr-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 290013, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "mya_Mymr-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 276939, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "mya_Mymr-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 302430, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "mya_Mymr-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 325766, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "mya_Mymr-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 301209, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mya_Mymr-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 304835, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "mya_Mymr-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 290072, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mya_Mymr-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 351453, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "mya_Mymr-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 329338, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "mya_Mymr-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 203188, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "mya_Mymr-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 289105, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "mya_Mymr-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 291754, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "mya_Mymr-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 315650, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "mya_Mymr-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 289274, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "mya_Mymr-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 293017, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "mya_Mymr-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 314278, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "mya_Mymr-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 294249, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "mya_Mymr-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 289278, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "mya_Mymr-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 206417, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "mya_Mymr-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 299532, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mya_Mymr-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 297207, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "mya_Mymr-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 309312, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "mya_Mymr-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 289570, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "mya_Mymr-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 310714, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "mya_Mymr-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 305509, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "mya_Mymr-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 294879, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "mya_Mymr-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 253869, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "mya_Mymr-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 203864, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "mya_Mymr-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 291340, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "mya_Mymr-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 304426, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mya_Mymr-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 295984, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "mya_Mymr-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 307099, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mya_Mymr-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 306167, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "mya_Mymr-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 310341, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "mya_Mymr-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 294024, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "mya_Mymr-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 332180, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "mya_Mymr-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 309370, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "mya_Mymr-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 301786, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "mya_Mymr-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 298252, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "mya_Mymr-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 289597, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "mya_Mymr-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298573, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mya_Mymr-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 298247, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "mya_Mymr-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 313820, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mya_Mymr-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 315893, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "mya_Mymr-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 326204, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "mya_Mymr-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 311531, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "mya_Mymr-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 280230, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "mya_Mymr-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 316978, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "mya_Mymr-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 284449, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "mya_Mymr-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 306126, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "mya_Mymr-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 300715, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "mya_Mymr-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 298008, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mya_Mymr-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 310772, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "mya_Mymr-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 321347, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "mya_Mymr-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 306908, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "mya_Mymr-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 274958, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "mya_Mymr-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 303236, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "mya_Mymr-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 293969, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "mya_Mymr-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 309424, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "mya_Mymr-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 293068, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "mya_Mymr-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 282757, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "mya_Mymr-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 303431, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "mya_Mymr-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296391, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "mya_Mymr-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 296024, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "mya_Mymr-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 265223, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "mya_Mymr-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300236, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "mya_Mymr-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 297326, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mya_Mymr-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 296594, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "mya_Mymr-som_Latn": { + "num_samples": 1012, + "number_of_characters": 314831, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "mya_Mymr-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 335714, + "unique_pairs": 1012, + "min_sentence1_length": 54, + "average_sentence1_length": 161.23418972332016, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "sag_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 254242, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "sag_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 269008, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "sag_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305979, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sag_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 273679, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "sag_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 295833, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "sag_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 269316, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "sag_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 281775, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sag_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 301353, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "sag_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 278382, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "sag_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284554, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sag_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 288191, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sag_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 300160, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "sag_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269554, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "sag_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 296052, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "sag_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 272029, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sag_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 265076, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sag_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299618, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sag_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 271219, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "sag_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 256194, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "sag_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290612, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sag_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274587, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sag_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 271348, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sag_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 288483, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sag_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291957, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sag_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 266448, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "sag_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295968, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sag_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260581, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "sag_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 257734, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "sag_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 303372, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "sag_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 273913, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sag_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 281250, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "sag_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278153, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sag_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 275672, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sag_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303617, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "sag_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273401, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sag_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 283098, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "sag_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 254698, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "sag_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 271663, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sag_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 272080, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "sag_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288577, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sag_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 288383, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "sag_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 269020, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sag_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 282104, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sag_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 291347, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sag_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277067, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sag_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 282311, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sag_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 271232, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sag_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 282443, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sag_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 277181, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sag_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272993, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sag_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 283784, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sag_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 286039, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sag_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 279467, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sag_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 277368, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sag_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 251667, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "sag_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 264851, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "sag_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 270069, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sag_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 302301, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "sag_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258621, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "sag_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280448, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "sag_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 263264, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "sag_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 274794, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sag_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272540, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sag_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 275271, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sag_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 280803, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sag_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 276895, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sag_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 284939, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sag_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281593, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sag_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 288909, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "sag_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 283648, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sag_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 280725, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "sag_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 291944, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sag_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 289685, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sag_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 290772, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "sag_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 297798, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "sag_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 272404, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sag_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 292515, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sag_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 279403, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sag_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 291192, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sag_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279992, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "sag_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 275858, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sag_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 229753, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "sag_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 274808, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sag_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 282749, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sag_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 298759, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "sag_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208586, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "sag_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 267278, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "sag_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 289886, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "sag_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 296754, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "sag_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 281216, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "sag_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250602, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "sag_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281996, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sag_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 277881, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "sag_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279549, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sag_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 272681, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sag_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 289148, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sag_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284717, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sag_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 267038, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sag_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 307367, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sag_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 258928, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sag_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280445, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sag_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 300259, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "sag_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199564, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "sag_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288615, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sag_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 305790, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sag_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266525, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sag_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 269794, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sag_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 296379, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "sag_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 288402, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sag_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 287140, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sag_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 273175, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "sag_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 280307, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "sag_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 290128, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sag_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 270878, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "sag_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275903, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sag_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 281744, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "sag_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 259049, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sag_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 301306, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sag_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 264783, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "sag_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 312018, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "sag_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 284818, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sag_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 276064, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sag_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 282412, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sag_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 275091, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sag_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 284279, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sag_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 256184, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "sag_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 269878, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sag_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 299039, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "sag_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270977, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "sag_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "sag_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 274759, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "sag_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 281192, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sag_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288492, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "sag_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 269465, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sag_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 256391, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "sag_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 281882, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sag_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 305218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "sag_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 280661, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sag_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 284287, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sag_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 269524, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sag_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 330905, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "sag_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 308790, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sag_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 182640, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "sag_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268557, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sag_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 271206, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sag_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 295102, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "sag_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 268726, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "sag_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 272469, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sag_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 293730, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "sag_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 273701, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sag_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 268730, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sag_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 185869, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "sag_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278984, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sag_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 276659, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sag_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 288764, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sag_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 269022, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "sag_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 290166, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "sag_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 284961, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "sag_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 274331, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sag_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 233321, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "sag_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 183316, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "sag_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 270792, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sag_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 283878, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sag_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 275436, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "sag_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286551, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sag_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 285619, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "sag_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 289793, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sag_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 273476, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sag_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 311632, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "sag_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 288822, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sag_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 281238, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "sag_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 277704, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sag_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 269049, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sag_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278025, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sag_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 277699, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "sag_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 293272, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sag_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 295345, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "sag_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 305656, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sag_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290983, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "sag_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 259682, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "sag_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 296430, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sag_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 263901, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "sag_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 285578, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "sag_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 280167, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sag_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 277460, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sag_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 290224, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "sag_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 300799, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sag_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 286360, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "sag_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 254410, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "sag_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 282688, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "sag_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 273421, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sag_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 288876, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sag_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 272520, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "sag_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 262209, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "sag_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 282883, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sag_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275843, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "sag_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 275476, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "sag_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 244675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "sag_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279688, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sag_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 276778, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sag_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 276046, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sag_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 294283, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "sag_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 315166, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 140.9298418972332, + "max_sentence1_length": 406, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "taq_Tfng-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 235525, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "taq_Tfng-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 250291, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "taq_Tfng-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 287262, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "taq_Tfng-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 254962, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "taq_Tfng-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 277116, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "taq_Tfng-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 250599, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "taq_Tfng-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 263058, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Tfng-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 282636, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "taq_Tfng-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 259665, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "taq_Tfng-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 265837, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "taq_Tfng-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 269474, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "taq_Tfng-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 281443, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "taq_Tfng-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 250837, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "taq_Tfng-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 277335, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "taq_Tfng-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 253312, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Tfng-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 246359, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "taq_Tfng-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 280901, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Tfng-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 252502, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "taq_Tfng-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 237477, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "taq_Tfng-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271895, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "taq_Tfng-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 255870, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Tfng-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 252631, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Tfng-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 269766, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "taq_Tfng-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 273240, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "taq_Tfng-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 247731, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "taq_Tfng-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 277251, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "taq_Tfng-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 241864, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "taq_Tfng-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 239017, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "taq_Tfng-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 284655, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "taq_Tfng-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 255196, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "taq_Tfng-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 262533, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "taq_Tfng-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259436, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "taq_Tfng-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 256955, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "taq_Tfng-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 284900, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "taq_Tfng-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254684, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Tfng-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 264381, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "taq_Tfng-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 235981, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "taq_Tfng-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 252946, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "taq_Tfng-est_Latn": { + "num_samples": 1012, + "number_of_characters": 253363, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "taq_Tfng-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 269860, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "taq_Tfng-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 269666, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "taq_Tfng-min_Arab": { + "num_samples": 1012, + "number_of_characters": 250303, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "taq_Tfng-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 263387, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "taq_Tfng-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 272630, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "taq_Tfng-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258350, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "taq_Tfng-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 263594, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "taq_Tfng-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 252515, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "taq_Tfng-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 263726, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "taq_Tfng-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 258464, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "taq_Tfng-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 254276, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "taq_Tfng-min_Latn": { + "num_samples": 1012, + "number_of_characters": 265067, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "taq_Tfng-por_Latn": { + "num_samples": 1012, + "number_of_characters": 267322, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "taq_Tfng-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 260750, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "taq_Tfng-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 258651, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "taq_Tfng-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 232950, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "taq_Tfng-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 246134, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "taq_Tfng-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 251352, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Tfng-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 283584, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "taq_Tfng-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 239904, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "taq_Tfng-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261731, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "taq_Tfng-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 244547, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "taq_Tfng-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 256077, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "taq_Tfng-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 253823, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "taq_Tfng-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 256554, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "taq_Tfng-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 262086, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Tfng-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 258178, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "taq_Tfng-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 266222, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "taq_Tfng-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 262876, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "taq_Tfng-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 270192, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "taq_Tfng-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 264931, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "taq_Tfng-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 262008, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "taq_Tfng-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 273227, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "taq_Tfng-als_Latn": { + "num_samples": 1012, + "number_of_characters": 270968, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "taq_Tfng-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 272055, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "taq_Tfng-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 279081, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "taq_Tfng-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 253687, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "taq_Tfng-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 273798, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "taq_Tfng-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 260686, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "taq_Tfng-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 272475, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Tfng-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 261275, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "taq_Tfng-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 257141, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "taq_Tfng-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 211036, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "taq_Tfng-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 256091, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "taq_Tfng-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 264032, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "taq_Tfng-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 280042, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "taq_Tfng-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 189869, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "taq_Tfng-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 248561, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "taq_Tfng-run_Latn": { + "num_samples": 1012, + "number_of_characters": 271169, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "taq_Tfng-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 278037, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "taq_Tfng-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 262499, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "taq_Tfng-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 231885, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "taq_Tfng-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 263279, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "taq_Tfng-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 259164, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "taq_Tfng-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 260832, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Tfng-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 253964, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Tfng-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 270431, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "taq_Tfng-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266000, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "taq_Tfng-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 248321, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "taq_Tfng-war_Latn": { + "num_samples": 1012, + "number_of_characters": 288650, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "taq_Tfng-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 240211, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "taq_Tfng-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261728, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "taq_Tfng-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 281542, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "taq_Tfng-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 180847, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "taq_Tfng-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 269898, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "taq_Tfng-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 287073, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "taq_Tfng-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 266525, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "taq_Tfng-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 251077, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "taq_Tfng-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 277662, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "taq_Tfng-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 269685, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Tfng-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 268423, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "taq_Tfng-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 254458, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "taq_Tfng-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 261590, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "taq_Tfng-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 271411, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "taq_Tfng-san_Deva": { + "num_samples": 1012, + "number_of_characters": 252161, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "taq_Tfng-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257186, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "taq_Tfng-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 263027, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "taq_Tfng-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 240332, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "taq_Tfng-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 282589, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "taq_Tfng-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 246066, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "taq_Tfng-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 293301, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "taq_Tfng-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 266101, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "taq_Tfng-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 257347, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "taq_Tfng-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 263695, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "taq_Tfng-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 256374, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "taq_Tfng-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 265562, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "taq_Tfng-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 237467, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "taq_Tfng-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 251161, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Tfng-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 280322, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "taq_Tfng-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 252260, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "taq_Tfng-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 254233, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "taq_Tfng-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 256042, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "taq_Tfng-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 262475, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "taq_Tfng-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269775, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "taq_Tfng-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 250748, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "taq_Tfng-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 237674, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "taq_Tfng-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 263165, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "taq_Tfng-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 286501, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "taq_Tfng-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 261944, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Tfng-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 265570, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "taq_Tfng-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 250807, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Tfng-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 312188, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "taq_Tfng-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 290073, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "taq_Tfng-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 163923, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "taq_Tfng-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 249840, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "taq_Tfng-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 252489, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "taq_Tfng-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 276385, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "taq_Tfng-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 250009, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "taq_Tfng-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 253752, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "taq_Tfng-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 275013, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "taq_Tfng-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 254984, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "taq_Tfng-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 250013, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "taq_Tfng-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 167152, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "taq_Tfng-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 260267, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Tfng-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 257942, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "taq_Tfng-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 270047, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "taq_Tfng-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 250305, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "taq_Tfng-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 271449, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "taq_Tfng-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 266244, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "taq_Tfng-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 255614, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "taq_Tfng-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 214604, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "taq_Tfng-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 164599, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "taq_Tfng-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 252075, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "taq_Tfng-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 265161, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "taq_Tfng-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 256719, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "taq_Tfng-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 267834, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "taq_Tfng-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 266902, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "taq_Tfng-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 271076, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "taq_Tfng-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 254759, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "taq_Tfng-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 292915, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "taq_Tfng-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 270105, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "taq_Tfng-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 262521, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "taq_Tfng-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 258987, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "taq_Tfng-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 250332, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "taq_Tfng-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259308, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "taq_Tfng-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 258982, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "taq_Tfng-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 274555, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "taq_Tfng-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 276628, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "taq_Tfng-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 286939, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "taq_Tfng-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 272266, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "taq_Tfng-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 240965, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "taq_Tfng-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 277713, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "taq_Tfng-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 245184, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "taq_Tfng-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 266861, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "taq_Tfng-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 261450, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "taq_Tfng-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 258743, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "taq_Tfng-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 271507, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "taq_Tfng-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 282082, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "taq_Tfng-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 267643, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "taq_Tfng-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 235693, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "taq_Tfng-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 263971, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "taq_Tfng-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 254704, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "taq_Tfng-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 270159, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "taq_Tfng-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 253803, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "taq_Tfng-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 243492, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "taq_Tfng-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 264166, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "taq_Tfng-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257126, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "taq_Tfng-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 256759, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "taq_Tfng-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 225958, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "taq_Tfng-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260971, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "taq_Tfng-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 258061, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "taq_Tfng-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 257329, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "taq_Tfng-som_Latn": { + "num_samples": 1012, + "number_of_characters": 275566, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "taq_Tfng-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 296449, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 122.43478260869566, + "max_sentence1_length": 312, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "wol_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "wol_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253560, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "wol_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290531, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "wol_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 258231, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "wol_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 280385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "wol_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253868, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "wol_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 266327, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "wol_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285905, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "wol_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262934, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "wol_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 269106, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "wol_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 272743, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "wol_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284712, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "wol_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 254106, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "wol_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "wol_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256581, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "wol_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249628, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "wol_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 284170, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "wol_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255771, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "wol_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240746, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "wol_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275164, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "wol_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 259139, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "wol_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255900, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "wol_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 273035, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "wol_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276509, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "wol_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 251000, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "wol_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280520, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "wol_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 245133, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "wol_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 242286, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "wol_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287924, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "wol_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 258465, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "wol_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265802, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "wol_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262705, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "wol_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 260224, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "wol_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 288169, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "wol_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257953, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "wol_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267650, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "wol_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 239250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "wol_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 256215, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "wol_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256632, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "wol_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 273129, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "wol_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272935, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "wol_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253572, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "wol_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "wol_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275899, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "wol_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261619, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "wol_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266863, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "wol_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255784, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "wol_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266995, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "wol_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 261733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "wol_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257545, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "wol_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 268336, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "wol_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270591, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "wol_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 264019, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "wol_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261920, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "wol_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 236219, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "wol_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 249403, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "wol_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 254621, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "wol_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286853, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "wol_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 243173, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "wol_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265000, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "wol_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247816, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "wol_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 259346, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "wol_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 257092, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "wol_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259823, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "wol_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 265355, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "wol_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 261447, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "wol_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269491, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "wol_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 266145, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "wol_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 273461, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "wol_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 268200, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "wol_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 265277, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "wol_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276496, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "wol_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 274237, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "wol_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 275324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "wol_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 282350, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "wol_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256956, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "wol_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 277067, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "wol_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263955, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "wol_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 275744, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "wol_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264544, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "wol_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 260410, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "wol_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 214305, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "wol_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 259360, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "wol_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 267301, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "wol_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 283311, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "wol_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 193138, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "wol_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251830, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "wol_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 274438, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "wol_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 281306, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "wol_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265768, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "wol_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 235154, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "wol_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266548, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "wol_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 262433, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "wol_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 264101, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "wol_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 257233, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "wol_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "wol_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269269, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "wol_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251590, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "wol_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291919, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "wol_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243480, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "wol_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "wol_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284811, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "wol_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 184116, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "wol_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 273167, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "wol_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 290342, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "wol_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "wol_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 251077, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "wol_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280931, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "wol_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272954, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "wol_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271692, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "wol_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 257727, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "wol_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264859, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "wol_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274680, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "wol_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 255430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "wol_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260455, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "wol_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 266296, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "wol_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243601, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "wol_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285858, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "wol_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 249335, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "wol_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296570, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "wol_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 269370, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "wol_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260616, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "wol_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "wol_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "wol_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268831, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "wol_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 240736, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "wol_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 254430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "wol_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283591, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "wol_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255529, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "wol_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257502, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "wol_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 259311, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "wol_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 265744, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "wol_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273044, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "wol_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 254017, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "wol_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240943, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "wol_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 266434, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "wol_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289770, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "wol_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 265213, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "wol_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268839, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "wol_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 254076, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "wol_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 315457, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "wol_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 293342, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "wol_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 167192, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "wol_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 253109, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "wol_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255758, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "wol_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279654, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "wol_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 253278, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "wol_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 257021, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "wol_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 278282, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "wol_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 258253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "wol_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 253282, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "wol_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 170421, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "wol_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263536, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "wol_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 261211, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "wol_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 273316, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "wol_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253574, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "wol_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "wol_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269513, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "wol_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258883, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "wol_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217873, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "wol_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167868, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "wol_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 255344, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "wol_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 268430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "wol_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259988, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "wol_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 271103, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "wol_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 270171, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "wol_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 274345, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "wol_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 258028, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "wol_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 296184, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "wol_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 273374, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "wol_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265790, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "wol_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 262256, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "wol_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253601, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "wol_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262577, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "wol_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 262251, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "wol_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277824, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "wol_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279897, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "wol_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 290208, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "wol_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275535, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "wol_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 244234, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "wol_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280982, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "wol_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 248453, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "wol_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 270130, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "wol_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264719, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "wol_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 262012, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "wol_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274776, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "wol_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 285351, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "wol_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270912, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "wol_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238962, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "wol_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 267240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "wol_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257973, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "wol_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 273428, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "wol_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 257072, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "wol_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246761, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "wol_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 267435, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "wol_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260395, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "wol_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 260028, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "wol_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 229227, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "wol_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "wol_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 261330, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "wol_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260598, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "wol_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278835, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "wol_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.66501976284584, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "arb_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 265379, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "arb_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "arb_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 317116, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "arb_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 284816, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "arb_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 306970, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "arb_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 280453, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "arb_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 292912, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 312490, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "arb_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 289519, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "arb_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 295691, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arb_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 299328, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arb_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 311297, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "arb_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 280691, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "arb_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 307189, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "arb_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 283166, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 276213, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "arb_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 310755, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 282356, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "arb_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 267331, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "arb_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arb_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 285724, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 282485, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 299620, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "arb_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 303094, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "arb_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 277585, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "arb_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 307105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "arb_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 271718, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "arb_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 268871, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "arb_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 314509, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "arb_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 285050, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arb_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 292387, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "arb_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289290, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arb_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 286809, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arb_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 314754, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "arb_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284538, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 294235, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "arb_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 265835, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "arb_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 282800, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "arb_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 283217, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "arb_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 299714, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arb_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 299520, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "arb_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 280157, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "arb_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 293241, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arb_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 302484, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arb_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288204, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arb_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 293448, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arb_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 282369, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arb_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 293580, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "arb_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 288318, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arb_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 284130, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arb_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 294921, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "arb_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 297176, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arb_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 290604, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "arb_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 288505, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "arb_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 262804, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "arb_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 275988, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "arb_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 281206, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 313438, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "arb_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 269758, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "arb_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291585, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "arb_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 274401, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "arb_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 285931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "arb_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 283677, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "arb_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 286408, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arb_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 291940, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 288032, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "arb_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 296076, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arb_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 292730, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "arb_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 300046, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "arb_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 294785, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "arb_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 291862, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "arb_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 303081, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arb_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 300822, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "arb_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 301909, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "arb_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 308935, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "arb_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 283541, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "arb_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 303652, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "arb_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 290540, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arb_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 302329, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 291129, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "arb_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 286995, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arb_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 240890, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "arb_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 285945, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "arb_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 293886, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "arb_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 309896, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "arb_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 219723, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "arb_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 278415, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "arb_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 301023, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "arb_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 307891, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "arb_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 292353, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "arb_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 261739, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "arb_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 293133, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arb_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 289018, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "arb_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 290686, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 283818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 300285, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arb_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arb_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 278175, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "arb_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 318504, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "arb_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 270065, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "arb_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291582, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arb_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 311396, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "arb_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 210701, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "arb_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 299752, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arb_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 316927, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arb_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 296379, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "arb_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 277662, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "arb_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 280931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "arb_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 299539, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 298277, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "arb_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 284312, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "arb_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 291444, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "arb_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 301265, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arb_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 282015, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "arb_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287040, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "arb_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 292881, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "arb_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 270186, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "arb_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 312443, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "arb_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 275920, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "arb_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 323155, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "arb_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 295955, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "arb_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 287201, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arb_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 293549, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "arb_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 286228, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "arb_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 295416, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arb_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 267321, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "arb_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 281015, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 310176, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "arb_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 282114, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "arb_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 284087, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "arb_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 285896, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "arb_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 292329, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arb_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299629, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "arb_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 280602, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arb_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 267528, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "arb_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 293019, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "arb_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 316355, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "arb_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 291798, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 295424, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "arb_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 280661, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 342042, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "arb_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 319927, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "arb_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 193777, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "arb_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 279694, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "arb_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 282343, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arb_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 306239, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "arb_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 279863, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "arb_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 283606, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "arb_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 304867, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "arb_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 284838, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arb_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 279867, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "arb_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 197006, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "arb_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 290121, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 287796, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "arb_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 299901, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arb_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 280159, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "arb_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 301303, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "arb_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 296098, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "arb_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 285468, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arb_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 244458, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "arb_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 194453, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "arb_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 281929, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "arb_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 295015, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "arb_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 286573, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "arb_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 297688, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arb_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 296756, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "arb_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 300930, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "arb_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 284613, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "arb_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 322769, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "arb_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 299959, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arb_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 292375, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "arb_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 288841, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arb_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 280186, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arb_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289162, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arb_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 288836, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "arb_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 304409, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "arb_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 306482, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "arb_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 316793, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "arb_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 302120, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "arb_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 270819, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "arb_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 307567, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "arb_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 275038, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "arb_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 296715, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "arb_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 291304, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arb_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 288597, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "arb_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 301361, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "arb_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 311936, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arb_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 297497, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "arb_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 265547, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "arb_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 293825, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "arb_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 284558, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "arb_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 300013, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "arb_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 283657, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "arb_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 273346, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "arb_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 294020, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arb_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286980, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "arb_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 286613, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "arb_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 255812, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "arb_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290825, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arb_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 287915, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "arb_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 287183, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "arb_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 305420, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "arb_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 326303, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 151.93478260869566, + "max_sentence1_length": 420, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "cat_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257402, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "cat_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272168, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "cat_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309139, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "cat_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 276839, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "cat_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 298993, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "cat_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272476, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "cat_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 284935, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cat_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304513, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "cat_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281542, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "cat_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287714, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cat_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cat_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303320, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "cat_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272714, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "cat_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299212, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "cat_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275189, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cat_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268236, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "cat_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302778, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cat_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274379, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "cat_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259354, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "cat_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293772, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cat_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277747, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cat_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274508, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cat_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 291643, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "cat_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295117, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "cat_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269608, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "cat_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299128, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "cat_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263741, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "cat_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 260894, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "cat_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306532, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "cat_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277073, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cat_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284410, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "cat_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281313, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cat_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 278832, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cat_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306777, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "cat_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276561, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cat_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286258, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "cat_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 257858, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "cat_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 274823, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "cat_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275240, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "cat_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 291737, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cat_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291543, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "cat_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272180, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "cat_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285264, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cat_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294507, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cat_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280227, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cat_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285471, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cat_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274392, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cat_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285603, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "cat_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280341, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cat_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276153, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cat_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 286944, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "cat_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289199, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cat_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282627, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "cat_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280528, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "cat_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 254827, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "cat_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268011, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "cat_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273229, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cat_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305461, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "cat_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261781, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "cat_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283608, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "cat_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266424, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "cat_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 277954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "cat_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275700, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "cat_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278431, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cat_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 283963, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cat_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280055, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "cat_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288099, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cat_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284753, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "cat_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292069, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "cat_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 286808, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "cat_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 283885, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "cat_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295104, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cat_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 292845, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "cat_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 293932, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "cat_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 300958, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "cat_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275564, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "cat_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295675, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "cat_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282563, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cat_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294352, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cat_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283152, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "cat_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279018, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cat_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 232913, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "cat_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 277968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "cat_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 285909, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "cat_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 301919, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "cat_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211746, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "cat_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270438, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "cat_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293046, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "cat_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 299914, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "cat_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "cat_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253762, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "cat_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285156, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cat_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281041, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "cat_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282709, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cat_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 275841, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cat_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292308, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cat_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287877, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cat_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270198, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "cat_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310527, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "cat_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262088, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "cat_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283605, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cat_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303419, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "cat_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "cat_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 291775, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cat_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 308950, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cat_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288402, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "cat_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269685, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "cat_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 272954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "cat_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299539, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "cat_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290300, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "cat_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276335, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "cat_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283467, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "cat_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293288, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cat_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274038, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "cat_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279063, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "cat_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 284904, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "cat_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262209, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "cat_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304466, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "cat_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 267943, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "cat_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "cat_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 287978, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "cat_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279224, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cat_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285572, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "cat_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278251, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "cat_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287439, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cat_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259344, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "cat_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273038, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cat_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302199, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "cat_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274137, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "cat_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276110, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "cat_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 277919, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "cat_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284352, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cat_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291652, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "cat_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272625, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cat_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259551, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "cat_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285042, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "cat_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308378, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "cat_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 283821, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cat_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287447, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "cat_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272684, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cat_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334065, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "cat_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 311950, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "cat_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 185800, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "cat_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "cat_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274366, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cat_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298262, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "cat_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 271886, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "cat_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275629, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "cat_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 296890, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "cat_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 276861, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cat_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 271890, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "cat_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189029, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "cat_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282144, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cat_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 279819, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "cat_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 291924, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cat_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272182, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "cat_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293326, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "cat_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288121, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "cat_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277491, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cat_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236481, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "cat_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186476, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "cat_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 273952, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "cat_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287038, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "cat_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278596, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "cat_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289711, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cat_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288779, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "cat_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 292953, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "cat_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276636, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "cat_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 314792, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "cat_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 291982, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cat_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284398, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "cat_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 280864, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cat_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272209, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cat_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281185, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cat_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 280859, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "cat_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296432, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "cat_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298505, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "cat_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 308816, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "cat_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294143, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "cat_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 262842, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "cat_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299590, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "cat_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267061, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "cat_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288738, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "cat_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283327, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cat_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280620, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "cat_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293384, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "cat_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 303959, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cat_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289520, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "cat_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257570, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "cat_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 285848, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "cat_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276581, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "cat_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292036, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "cat_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275680, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "cat_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265369, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "cat_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286043, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cat_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "cat_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278636, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "cat_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 247835, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "cat_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282848, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cat_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 279938, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "cat_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279206, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "cat_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297443, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "cat_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318326, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 144.05237154150197, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fur_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 256140, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fur_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 270906, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fur_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 307877, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fur_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 275577, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fur_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 297731, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fur_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 271214, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fur_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 283673, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fur_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 303251, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fur_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 280280, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fur_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 286452, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fur_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 290089, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fur_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 302058, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fur_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 271452, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fur_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 297950, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fur_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 273927, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fur_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 266974, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fur_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 301516, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fur_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 273117, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fur_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 258092, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fur_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292510, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fur_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 276485, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fur_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 273246, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fur_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 290381, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fur_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 293855, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fur_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 268346, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fur_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 297866, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fur_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 262479, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fur_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 259632, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fur_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 305270, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fur_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 275811, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fur_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 283148, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fur_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280051, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fur_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 277570, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fur_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 305515, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fur_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275299, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fur_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 284996, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fur_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 256596, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fur_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 273561, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fur_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 273978, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fur_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 290475, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fur_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 290281, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fur_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 270918, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fur_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 284002, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fur_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 293245, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fur_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278965, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fur_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 284209, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fur_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 273130, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fur_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 284341, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fur_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 279079, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fur_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 274891, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fur_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 285682, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fur_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 287937, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fur_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 281365, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fur_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 279266, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fur_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 253565, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fur_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 266749, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fur_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 271967, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fur_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 304199, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fur_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 260519, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fur_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282346, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fur_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 265162, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fur_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 276692, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fur_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 274438, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fur_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 277169, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fur_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 282701, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fur_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 278793, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fur_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 286837, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fur_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 283491, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fur_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 290807, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fur_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 285546, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fur_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fur_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 293842, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fur_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 291583, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fur_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 292670, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fur_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 299696, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "fur_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 274302, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fur_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 294413, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fur_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 281301, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fur_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 293090, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fur_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 281890, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fur_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 277756, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fur_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 231651, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fur_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 276706, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fur_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 284647, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fur_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 300657, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fur_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 210484, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fur_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 269176, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fur_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 291784, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fur_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 298652, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fur_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 283114, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fur_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 252500, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fur_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 283894, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fur_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 279779, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "fur_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 281447, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fur_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 274579, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fur_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 291046, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fur_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286615, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fur_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 268936, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fur_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 309265, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fur_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 260826, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fur_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282343, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fur_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 302157, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "fur_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 201462, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fur_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 290513, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fur_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 307688, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fur_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 287140, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fur_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 268423, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fur_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 271692, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fur_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 298277, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fur_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 290300, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fur_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 275073, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fur_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 282205, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fur_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 292026, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fur_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 272776, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fur_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277801, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fur_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 283642, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fur_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 260947, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fur_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 303204, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fur_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 266681, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "fur_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 313916, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fur_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 286716, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fur_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 277962, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fur_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 284310, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fur_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 276989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fur_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 286177, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fur_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 258082, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fur_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 271776, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fur_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 300937, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fur_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 272875, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fur_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 274848, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fur_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 276657, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fur_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 283090, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fur_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290390, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fur_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 271363, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fur_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 258289, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fur_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 283780, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fur_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 307116, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fur_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 282559, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fur_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 286185, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fur_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 271422, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fur_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 332803, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fur_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 310688, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fur_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 184538, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fur_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 270455, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fur_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 273104, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fur_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 297000, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fur_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 270624, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fur_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 274367, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fur_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 295628, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fur_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 275599, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fur_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 270628, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fur_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 187767, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fur_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 280882, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fur_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 278557, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fur_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 290662, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fur_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 270920, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fur_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 292064, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fur_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 286859, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fur_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 276229, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fur_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 235219, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fur_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 185214, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fur_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 272690, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fur_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 285776, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fur_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 277334, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fur_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 288449, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fur_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 287517, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fur_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 291691, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fur_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 275374, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fur_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 313530, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fur_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 290720, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fur_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 283136, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fur_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 279602, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fur_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 270947, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fur_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279923, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fur_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 279597, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fur_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 295170, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fur_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 297243, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fur_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 307554, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fur_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 292881, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fur_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 261580, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fur_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 298328, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fur_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 265799, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fur_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 287476, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fur_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 282065, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fur_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 279358, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fur_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 292122, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fur_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 302697, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fur_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 288258, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fur_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 256308, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fur_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 284586, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fur_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 275319, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fur_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 290774, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fur_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 274418, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fur_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 264107, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fur_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 284781, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fur_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277741, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fur_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 277374, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fur_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 246573, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fur_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281586, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fur_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 278676, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fur_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 277944, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fur_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 296181, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fur_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 317064, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 142.80533596837944, + "max_sentence1_length": 399, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kab_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242175, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kab_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256941, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kab_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293912, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kab_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261612, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kab_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283766, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kab_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257249, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kab_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269708, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kab_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289286, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kab_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266315, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kab_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272487, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kab_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276124, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kab_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288093, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kab_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257487, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kab_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283985, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kab_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259962, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kab_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253009, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kab_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287551, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kab_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259152, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kab_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244127, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kab_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278545, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kab_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262520, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kab_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259281, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kab_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276416, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kab_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279890, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kab_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254381, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kab_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283901, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kab_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248514, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kab_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245667, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kab_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291305, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kab_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261846, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kab_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kab_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266086, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kab_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kab_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291550, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kab_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261334, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kab_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271031, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kab_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242631, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kab_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259596, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kab_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260013, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kab_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276510, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kab_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276316, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kab_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256953, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kab_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270037, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kab_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279280, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kab_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265000, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kab_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270244, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kab_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259165, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kab_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270376, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kab_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265114, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kab_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260926, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kab_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271717, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kab_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273972, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kab_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267400, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kab_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265301, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kab_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239600, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kab_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kab_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258002, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kab_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290234, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kab_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246554, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kab_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268381, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kab_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251197, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kab_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262727, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kab_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260473, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kab_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263204, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kab_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268736, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kab_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264828, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kab_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272872, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kab_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269526, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kab_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276842, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kab_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271581, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kab_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268658, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kab_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279877, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kab_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277618, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kab_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278705, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kab_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285731, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kab_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260337, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kab_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280448, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kab_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267336, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kab_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279125, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kab_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267925, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kab_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263791, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kab_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217686, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kab_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262741, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kab_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270682, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kab_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286692, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kab_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kab_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kab_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277819, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kab_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284687, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kab_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269149, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kab_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238535, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kab_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269929, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kab_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265814, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kab_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267482, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kab_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 260614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kab_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277081, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kab_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272650, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kab_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254971, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kab_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295300, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kab_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246861, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kab_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kab_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288192, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kab_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187497, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kab_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276548, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kab_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293723, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kab_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273175, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kab_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254458, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kab_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257727, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kab_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284312, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kab_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276335, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kab_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275073, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kab_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268240, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kab_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278061, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kab_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258811, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kab_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263836, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kab_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269677, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kab_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246982, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kab_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289239, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kab_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kab_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299951, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kab_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272751, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kab_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263997, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kab_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270345, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kab_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263024, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kab_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272212, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kab_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244117, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kab_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257811, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kab_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286972, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kab_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kab_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260883, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kab_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262692, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kab_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269125, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kab_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276425, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kab_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257398, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kab_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244324, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kab_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269815, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kab_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293151, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kab_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268594, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kab_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272220, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kab_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257457, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kab_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318838, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kab_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296723, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kab_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170573, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kab_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256490, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kab_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259139, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kab_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283035, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kab_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256659, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kab_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260402, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kab_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281663, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kab_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261634, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kab_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256663, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kab_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173802, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kab_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266917, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kab_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264592, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kab_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276697, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kab_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256955, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kab_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278099, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kab_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272894, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kab_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262264, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kab_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221254, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kab_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171249, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kab_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kab_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271811, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kab_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263369, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kab_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274484, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kab_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273552, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kab_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kab_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261409, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kab_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299565, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kab_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276755, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kab_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269171, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kab_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265637, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kab_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256982, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kab_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265958, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kab_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265632, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kab_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281205, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kab_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283278, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kab_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kab_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278916, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kab_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247615, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kab_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284363, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kab_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251834, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kab_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273511, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kab_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268100, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kab_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265393, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kab_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278157, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kab_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288732, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kab_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274293, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kab_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kab_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270621, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kab_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261354, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kab_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276809, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kab_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260453, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kab_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250142, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kab_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270816, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kab_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263776, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kab_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263409, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kab_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232608, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kab_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267621, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kab_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264711, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kab_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263979, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kab_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282216, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kab_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303099, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 129.00592885375494, + "max_sentence1_length": 358, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lim_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249307, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lim_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264073, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lim_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301044, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lim_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 268744, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lim_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 290898, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lim_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264381, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lim_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lim_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296418, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lim_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273447, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lim_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279619, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lim_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283256, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lim_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295225, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lim_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264619, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lim_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 291117, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lim_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267094, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lim_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260141, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lim_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 294683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lim_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266284, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lim_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251259, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lim_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285677, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lim_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 269652, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lim_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266413, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lim_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283548, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lim_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287022, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lim_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261513, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lim_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291033, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lim_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 255646, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lim_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252799, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lim_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298437, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lim_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lim_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276315, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lim_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273218, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lim_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 270737, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lim_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 298682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lim_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268466, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lim_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278163, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lim_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249763, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lim_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 266728, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lim_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267145, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lim_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283642, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lim_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283448, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lim_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264085, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lim_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277169, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lim_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286412, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lim_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272132, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lim_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277376, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lim_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266297, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lim_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277508, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lim_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lim_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268058, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lim_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278849, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lim_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281104, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lim_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lim_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272433, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lim_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 246732, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lim_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259916, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lim_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 265134, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lim_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297366, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lim_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 253686, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lim_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275513, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lim_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258329, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lim_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269859, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lim_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 267605, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lim_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270336, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lim_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 275868, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lim_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271960, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lim_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280004, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lim_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 276658, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lim_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283974, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lim_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 278713, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lim_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275790, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lim_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287009, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lim_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 284750, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lim_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285837, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lim_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 292863, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lim_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267469, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lim_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287580, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lim_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274468, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lim_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286257, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lim_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lim_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lim_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224818, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lim_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 269873, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lim_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277814, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lim_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293824, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lim_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 203651, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lim_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262343, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lim_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284951, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lim_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291819, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lim_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lim_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 245667, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lim_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lim_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272946, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lim_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 274614, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lim_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 267746, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lim_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284213, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lim_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279782, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lim_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262103, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lim_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302432, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lim_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253993, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lim_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275510, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lim_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295324, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lim_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194629, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lim_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 283680, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lim_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lim_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280307, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lim_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 261590, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lim_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264859, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lim_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291444, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lim_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lim_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282205, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lim_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268240, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lim_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285193, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lim_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lim_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270968, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lim_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276809, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lim_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254114, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lim_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296371, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lim_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259848, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lim_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307083, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lim_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 279883, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lim_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 271129, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lim_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277477, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lim_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270156, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lim_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lim_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251249, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lim_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lim_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294104, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lim_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266042, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lim_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268015, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lim_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269824, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lim_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276257, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lim_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283557, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lim_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264530, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lim_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251456, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lim_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276947, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lim_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300283, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lim_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 275726, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lim_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279352, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lim_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 264589, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lim_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lim_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lim_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 177705, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lim_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263622, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lim_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266271, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lim_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290167, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lim_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263791, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lim_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267534, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lim_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288795, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lim_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268766, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lim_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263795, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lim_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180934, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lim_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274049, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lim_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 271724, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lim_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283829, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lim_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264087, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lim_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285231, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lim_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280026, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lim_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269396, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lim_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228386, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lim_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178381, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lim_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265857, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lim_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lim_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270501, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lim_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 281616, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lim_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 280684, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lim_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284858, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lim_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268541, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lim_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 306697, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lim_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 283887, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lim_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276303, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lim_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272769, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lim_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264114, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lim_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273090, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lim_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272764, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lim_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288337, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lim_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290410, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lim_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 300721, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lim_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286048, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lim_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 254747, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lim_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291495, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lim_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258966, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lim_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280643, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lim_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 275232, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lim_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272525, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lim_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285289, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lim_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 295864, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lim_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lim_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249475, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lim_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 277753, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lim_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268486, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lim_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283941, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lim_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 267585, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lim_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257274, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lim_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277948, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lim_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270908, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lim_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270541, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lim_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 239740, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lim_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274753, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lim_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lim_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271111, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lim_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289348, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lim_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310231, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.05335968379447, + "max_sentence1_length": 416, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "nld_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 259128, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "nld_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 273894, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "nld_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 310865, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nld_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 278565, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "nld_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 300719, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "nld_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 274202, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "nld_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 286661, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nld_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 306239, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "nld_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 283268, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "nld_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 289440, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nld_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 293077, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nld_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 305046, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "nld_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 274440, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "nld_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 300938, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "nld_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 276915, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nld_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 269962, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nld_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304504, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nld_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 276105, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "nld_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 261080, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "nld_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295498, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nld_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279473, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nld_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 276234, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nld_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 293369, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nld_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 296843, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nld_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 271334, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "nld_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 300854, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nld_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265467, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "nld_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 262620, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "nld_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 308258, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "nld_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 278799, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nld_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 286136, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "nld_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283039, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nld_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 280558, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nld_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308503, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "nld_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278287, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nld_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 287984, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "nld_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 259584, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "nld_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 276549, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nld_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 276966, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "nld_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293463, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nld_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 293269, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "nld_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 273906, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nld_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 286990, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nld_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 296233, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nld_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281953, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nld_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 287197, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nld_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 276118, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nld_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 287329, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nld_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 282067, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nld_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 277879, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nld_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 288670, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nld_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 290925, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nld_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284353, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nld_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 282254, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nld_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 256553, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "nld_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 269737, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "nld_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 274955, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nld_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 307187, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "nld_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263507, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "nld_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285334, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "nld_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 268150, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "nld_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 279680, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nld_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 277426, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nld_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 280157, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nld_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 285689, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nld_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 281781, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nld_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 289825, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nld_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286479, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nld_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 293795, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "nld_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 288534, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nld_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 285611, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "nld_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 296830, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nld_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 294571, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nld_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 295658, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "nld_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 302684, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "nld_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 277290, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nld_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 297401, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nld_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 284289, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nld_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 296078, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nld_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 284878, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "nld_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 280744, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nld_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 234639, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "nld_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 279694, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nld_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 287635, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nld_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 303645, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "nld_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213472, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "nld_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 272164, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "nld_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 294772, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "nld_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 301640, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "nld_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 286102, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "nld_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255488, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "nld_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 286882, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nld_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 282767, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "nld_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 284435, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nld_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 277567, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nld_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 294034, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nld_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289603, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nld_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 271924, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nld_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 312253, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nld_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 263814, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nld_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285331, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nld_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 305145, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "nld_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204450, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "nld_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293501, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nld_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 310676, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nld_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 290128, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "nld_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 271411, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nld_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 274680, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nld_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 301265, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "nld_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 293288, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nld_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 292026, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nld_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 278061, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "nld_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 285193, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "nld_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 275764, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "nld_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280789, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nld_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 286630, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "nld_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 263935, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nld_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 306192, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nld_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 269669, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "nld_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 316904, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "nld_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 289704, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nld_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 280950, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nld_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 287298, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nld_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 279977, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nld_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 289165, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nld_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 261070, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "nld_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 274764, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nld_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 303925, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "nld_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 275863, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "nld_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 277836, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "nld_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 279645, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "nld_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 286078, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nld_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293378, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "nld_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274351, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nld_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 261277, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "nld_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 286768, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nld_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 310104, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "nld_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 285547, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nld_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 289173, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nld_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 274410, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nld_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 335791, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "nld_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 313676, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nld_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 187526, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "nld_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273443, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nld_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 276092, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nld_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 299988, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "nld_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 273612, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "nld_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277355, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nld_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 298616, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "nld_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 278587, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nld_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 273616, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nld_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 190755, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "nld_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 283870, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nld_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 281545, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nld_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 293650, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nld_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 273908, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "nld_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 295052, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "nld_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 289847, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "nld_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 279217, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nld_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 238207, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "nld_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 188202, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "nld_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 275678, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nld_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 288764, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nld_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 280322, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "nld_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 291437, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nld_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290505, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "nld_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 294679, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nld_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 278362, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nld_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 316518, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "nld_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 293708, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nld_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 286124, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "nld_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 282590, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nld_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 273935, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nld_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282911, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nld_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 282585, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "nld_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 298158, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nld_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 300231, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "nld_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 310542, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nld_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 295869, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "nld_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 264568, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "nld_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 301316, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nld_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 268787, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "nld_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290464, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "nld_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 285053, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nld_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282346, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nld_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 295110, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "nld_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 305685, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nld_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 291246, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "nld_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 259296, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "nld_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 287574, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "nld_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 278307, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nld_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 293762, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nld_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 277406, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "nld_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 267095, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "nld_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 287769, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nld_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280729, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "nld_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 280362, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "nld_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 249561, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "nld_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284574, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nld_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 281664, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nld_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 280932, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nld_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 299169, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "nld_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 320052, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 145.7579051383399, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "san_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 239878, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "san_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 254644, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "san_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 291615, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "san_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259315, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "san_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281469, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "san_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 254952, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "san_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267411, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "san_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 286989, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "san_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264018, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "san_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270190, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "san_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 273827, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "san_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 285796, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "san_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255190, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "san_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 281688, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "san_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 257665, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "san_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 250712, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "san_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285254, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "san_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 256855, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "san_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 241830, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "san_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276248, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "san_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260223, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "san_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 256984, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "san_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274119, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "san_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 277593, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "san_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252084, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "san_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 281604, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "san_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246217, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "san_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243370, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "san_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289008, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "san_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 259549, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "san_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 266886, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "san_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263789, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "san_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261308, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "san_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289253, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "san_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259037, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "san_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 268734, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "san_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240334, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "san_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257299, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "san_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 257716, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "san_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274213, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "san_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274019, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "san_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 254656, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "san_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 267740, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "san_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 276983, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "san_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262703, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "san_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 267947, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "san_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 256868, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "san_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268079, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "san_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 262817, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "san_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 258629, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "san_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269420, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "san_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 271675, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "san_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265103, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "san_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263004, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "san_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237303, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "san_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250487, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "san_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 255705, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "san_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 287937, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "san_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244257, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "san_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266084, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "san_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 248900, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "san_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260430, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "san_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258176, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "san_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 260907, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "san_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266439, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "san_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 262531, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "san_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 270575, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "san_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267229, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "san_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 274545, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "san_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269284, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "san_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266361, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "san_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 277580, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "san_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275321, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "san_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276408, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "san_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283434, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "san_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258040, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "san_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278151, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "san_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265039, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "san_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 276828, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "san_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 265628, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "san_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261494, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "san_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215389, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "san_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260444, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "san_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268385, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "san_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284395, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "san_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194222, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "san_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 252914, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "san_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275522, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "san_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282390, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "san_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 266852, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "san_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236238, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "san_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 267632, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "san_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263517, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "san_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265185, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "san_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258317, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "san_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 274784, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "san_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270353, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "san_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 252674, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "san_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293003, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "san_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 244564, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "san_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266081, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "san_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 285895, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "san_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185200, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "san_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274251, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "san_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291426, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "san_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 270878, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "san_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252161, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "san_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255430, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "san_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282015, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "san_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274038, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "san_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 272776, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "san_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 258811, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "san_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 265943, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "san_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 275764, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "san_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261539, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "san_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267380, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "san_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 244685, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "san_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 286942, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "san_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250419, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "san_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 297654, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "san_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270454, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "san_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 261700, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "san_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268048, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "san_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 260727, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "san_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 269915, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "san_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 241820, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "san_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255514, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "san_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 284675, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "san_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 256613, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "san_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 258586, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "san_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260395, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "san_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 266828, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "san_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274128, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "san_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255101, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "san_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242027, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "san_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267518, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "san_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 290854, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "san_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266297, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "san_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 269923, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "san_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255160, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "san_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 316541, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "san_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294426, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "san_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168276, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "san_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254193, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "san_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 256842, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "san_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 280738, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "san_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254362, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "san_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258105, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "san_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279366, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "san_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259337, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "san_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254366, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "san_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171505, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "san_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 264620, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "san_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262295, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "san_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274400, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "san_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 254658, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "san_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 275802, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "san_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 270597, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "san_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 259967, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "san_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 218957, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "san_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 168952, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "san_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 256428, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "san_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269514, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "san_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261072, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "san_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272187, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "san_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271255, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "san_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275429, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "san_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259112, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "san_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297268, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "san_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274458, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "san_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 266874, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "san_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263340, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "san_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 254685, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "san_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263661, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "san_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263335, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "san_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 278908, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "san_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 280981, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "san_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291292, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "san_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 276619, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "san_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245318, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "san_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282066, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "san_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 249537, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "san_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271214, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "san_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 265803, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "san_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263096, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "san_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 275860, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "san_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286435, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "san_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 271996, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "san_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240046, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "san_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268324, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "san_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259057, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "san_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274512, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "san_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258156, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "san_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 247845, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "san_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268519, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "san_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261479, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "san_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261112, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "san_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230311, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "san_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265324, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "san_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262414, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "san_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 261682, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "san_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "san_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 300802, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.73616600790514, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244903, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259669, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tat_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296640, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 264340, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tat_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259977, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tat_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272436, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tat_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 292014, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 269043, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 275215, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 260215, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tat_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286713, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tat_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262690, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tat_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255737, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tat_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 290279, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tat_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261880, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tat_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246855, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281273, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tat_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 265248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 262009, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 279144, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282618, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tat_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257109, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tat_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286629, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251242, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tat_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 248395, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 294033, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tat_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264574, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271911, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268814, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 266333, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tat_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 294278, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tat_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264062, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tat_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273759, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tat_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 245359, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 262324, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tat_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262741, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279238, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 279044, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tat_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259681, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tat_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272765, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 282008, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267728, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tat_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272972, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261893, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tat_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273104, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267842, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tat_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tat_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276700, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tat_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 270128, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tat_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 268029, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 242328, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255512, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260730, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292962, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tat_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 249282, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271109, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tat_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253925, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tat_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265455, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tat_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 263201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tat_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265932, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271464, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tat_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267556, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275600, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tat_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 272254, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279570, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tat_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 274309, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tat_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 271386, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tat_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tat_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 280346, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281433, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tat_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288459, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tat_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263065, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 283176, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270064, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281853, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tat_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270653, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tat_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tat_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 220414, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265469, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tat_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 273410, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 289420, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 199247, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257939, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tat_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280547, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tat_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 287415, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tat_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271877, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tat_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 241263, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272657, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tat_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268542, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tat_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 270210, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 263342, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279809, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tat_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tat_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257699, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tat_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 298028, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tat_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271106, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tat_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290920, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tat_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 190225, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 279276, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tat_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296451, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tat_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275903, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tat_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 257186, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tat_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260455, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tat_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 287040, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tat_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279063, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tat_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277801, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263836, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270968, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tat_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280789, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tat_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261539, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tat_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 272405, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249710, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tat_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255444, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302679, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tat_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tat_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273073, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265752, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274940, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246845, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tat_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260539, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tat_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289700, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261638, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tat_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263611, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tat_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 265420, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tat_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271853, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279153, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tat_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 260126, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tat_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 247052, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tat_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272543, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tat_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295879, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tat_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 271322, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274948, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tat_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 260185, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tat_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321566, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299451, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tat_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 173301, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tat_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 259218, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261867, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tat_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285763, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 259387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 263130, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tat_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 284391, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tat_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 264362, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 259391, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tat_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176530, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269645, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tat_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 267320, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tat_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 279425, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259683, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280827, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tat_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275622, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tat_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264992, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223982, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tat_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173977, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tat_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261453, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tat_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274539, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tat_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266097, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tat_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 277212, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 276280, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tat_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280454, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tat_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 264137, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 302293, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tat_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279483, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271899, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tat_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 268365, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tat_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259710, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268686, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 268360, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tat_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283933, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tat_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 286006, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tat_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 296317, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tat_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281644, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tat_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 250343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tat_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287091, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254562, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tat_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276239, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tat_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270828, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tat_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268121, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tat_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280885, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291460, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tat_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 277021, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tat_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245071, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tat_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 273349, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tat_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264082, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279537, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tat_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 263181, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tat_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252870, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273544, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tat_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266504, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tat_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 266137, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tat_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 235336, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tat_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270349, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tat_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267439, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tat_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266707, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tat_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284944, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tat_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305827, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.70158102766797, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "xho_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250744, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "xho_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265510, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "xho_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 302481, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "xho_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270181, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "xho_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292335, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "xho_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265818, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "xho_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278277, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "xho_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297855, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "xho_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274884, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "xho_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281056, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "xho_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284693, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "xho_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296662, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "xho_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266056, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "xho_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292554, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "xho_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268531, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "xho_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261578, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "xho_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296120, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "xho_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267721, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "xho_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252696, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "xho_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287114, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "xho_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271089, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "xho_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267850, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "xho_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284985, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "xho_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 288459, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "xho_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262950, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "xho_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 292470, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "xho_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257083, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "xho_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254236, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "xho_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299874, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "xho_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270415, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "xho_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277752, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "xho_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274655, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "xho_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272174, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "xho_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300119, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "xho_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269903, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "xho_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279600, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "xho_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251200, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "xho_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268165, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "xho_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268582, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "xho_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285079, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "xho_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284885, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "xho_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265522, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "xho_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278606, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "xho_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287849, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "xho_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273569, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "xho_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278813, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "xho_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267734, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "xho_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278945, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "xho_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273683, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "xho_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269495, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "xho_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280286, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "xho_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282541, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "xho_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275969, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "xho_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273870, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "xho_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248169, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "xho_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261353, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "xho_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266571, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "xho_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298803, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "xho_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255123, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "xho_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276950, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "xho_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259766, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "xho_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271296, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "xho_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269042, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "xho_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271773, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "xho_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277305, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "xho_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273397, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "xho_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 281441, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "xho_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278095, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "xho_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285411, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "xho_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280150, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "xho_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277227, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "xho_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 288446, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "xho_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286187, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "xho_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287274, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "xho_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294300, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "xho_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268906, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "xho_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289017, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "xho_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275905, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "xho_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287694, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "xho_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276494, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "xho_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272360, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "xho_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226255, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "xho_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271310, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "xho_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279251, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "xho_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295261, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "xho_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205088, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "xho_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263780, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "xho_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286388, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "xho_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293256, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "xho_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277718, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "xho_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247104, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "xho_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 278498, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "xho_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274383, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "xho_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276051, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "xho_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269183, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "xho_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285650, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "xho_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281219, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "xho_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263540, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "xho_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303869, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "xho_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255430, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "xho_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276947, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "xho_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296761, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "xho_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196066, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "xho_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285117, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "xho_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302292, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "xho_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281744, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "xho_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263027, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "xho_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266296, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "xho_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292881, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "xho_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284904, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "xho_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283642, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "xho_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269677, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "xho_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276809, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "xho_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286630, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "xho_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267380, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "xho_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272405, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "xho_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255551, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "xho_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297808, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "xho_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261285, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "xho_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308520, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "xho_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281320, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "xho_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272566, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "xho_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278914, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "xho_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271593, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "xho_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280781, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "xho_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252686, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "xho_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266380, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "xho_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295541, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "xho_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 267479, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "xho_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 269452, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "xho_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271261, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "xho_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277694, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "xho_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284994, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "xho_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265967, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "xho_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252893, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "xho_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 278384, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "xho_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301720, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "xho_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277163, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "xho_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280789, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "xho_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266026, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "xho_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327407, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "xho_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305292, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "xho_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179142, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "xho_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265059, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "xho_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267708, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "xho_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291604, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "xho_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265228, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "xho_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268971, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "xho_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290232, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "xho_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270203, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "xho_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265232, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "xho_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182371, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "xho_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 275486, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "xho_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273161, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "xho_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285266, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "xho_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265524, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "xho_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286668, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "xho_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 281463, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "xho_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270833, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "xho_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229823, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "xho_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179818, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "xho_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267294, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "xho_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280380, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "xho_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271938, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "xho_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283053, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "xho_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282121, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "xho_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286295, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "xho_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269978, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "xho_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308134, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "xho_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285324, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "xho_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277740, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "xho_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274206, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "xho_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265551, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "xho_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274527, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "xho_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274201, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "xho_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289774, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "xho_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291847, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "xho_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302158, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "xho_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 287485, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "xho_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256184, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "xho_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292932, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "xho_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260403, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "xho_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282080, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "xho_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276669, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "xho_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273962, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "xho_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286726, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "xho_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297301, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "xho_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282862, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "xho_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250912, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "xho_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279190, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "xho_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269923, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "xho_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285378, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "xho_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269022, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "xho_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258711, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "xho_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279385, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "xho_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272345, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "xho_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271978, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "xho_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241177, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "xho_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276190, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "xho_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273280, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "xho_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272548, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "xho_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290785, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "xho_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311668, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 137.47332015810278, + "max_sentence1_length": 395, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ars_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 228049, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ars_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 242815, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ars_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 279786, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ars_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 247486, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ars_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 269640, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ars_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 243123, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ars_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 255582, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ars_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 275160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ars_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 252189, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ars_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 258361, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ars_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 261998, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ars_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 273967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ars_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 243361, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ars_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 269859, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ars_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 245836, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ars_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 238883, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ars_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 273425, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ars_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 245026, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ars_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 230001, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ars_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264419, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ars_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 248394, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ars_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 245155, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ars_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 262290, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ars_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 265764, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ars_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 240255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ars_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 269775, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ars_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 234388, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ars_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 231541, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ars_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 277179, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ars_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 247720, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ars_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 255057, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ars_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ars_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 249479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ars_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 277424, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ars_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247208, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ars_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 256905, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ars_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 228505, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ars_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 245470, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ars_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 245887, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ars_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 262384, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ars_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 262190, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ars_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 242827, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ars_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 255911, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ars_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 265154, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ars_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250874, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ars_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 256118, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ars_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 245039, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ars_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 256250, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ars_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 250988, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ars_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 246800, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ars_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 257591, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ars_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 259846, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ars_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 253274, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ars_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 251175, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ars_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 225474, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ars_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 238658, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ars_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 243876, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ars_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 276108, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ars_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 232428, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ars_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ars_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 237071, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ars_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 248601, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ars_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 246347, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ars_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 249078, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ars_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 254610, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ars_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 250702, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ars_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 258746, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ars_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 255400, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ars_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 262716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ars_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 257455, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ars_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 254532, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ars_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 265751, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ars_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 263492, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ars_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 264579, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ars_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 271605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ars_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 246211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ars_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 266322, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ars_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 253210, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ars_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 264999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ars_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 253799, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ars_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 249665, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ars_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 203560, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ars_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 248615, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ars_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 256556, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ars_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 272566, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ars_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 182393, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ars_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 241085, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ars_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 263693, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ars_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 270561, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ars_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 255023, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ars_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 224409, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ars_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 255803, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ars_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 251688, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ars_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 253356, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ars_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 246488, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ars_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 262955, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ars_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258524, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ars_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 240845, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ars_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 281174, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ars_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 232735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ars_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254252, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ars_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 274066, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ars_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 173371, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ars_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 262422, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ars_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 279597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ars_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 259049, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ars_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 240332, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ars_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 243601, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ars_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 270186, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ars_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 262209, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ars_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 260947, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ars_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 246982, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ars_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 254114, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ars_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 263935, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ars_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 244685, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ars_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249710, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ars_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 255551, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ars_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 275113, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ars_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 238590, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ars_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 285825, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ars_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 258625, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ars_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 249871, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ars_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 256219, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ars_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 248898, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ars_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 258086, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ars_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 229991, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ars_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 243685, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ars_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 272846, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ars_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 244784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ars_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 246757, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ars_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 248566, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ars_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 254999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ars_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262299, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ars_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 243272, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ars_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 230198, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ars_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 255689, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ars_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 279025, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ars_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 254468, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ars_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 258094, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ars_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 243331, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ars_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 304712, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ars_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 282597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ars_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 156447, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ars_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 242364, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ars_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 245013, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ars_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 268909, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ars_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 242533, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ars_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 246276, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ars_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 267537, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ars_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 247508, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ars_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 242537, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ars_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 159676, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ars_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 252791, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ars_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 250466, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ars_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 262571, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ars_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 242829, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ars_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 263973, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ars_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 258768, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ars_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 248138, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ars_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 207128, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ars_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 157123, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ars_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 244599, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ars_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 257685, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ars_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 249243, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ars_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 260358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ars_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 259426, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ars_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 263600, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ars_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 247283, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ars_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 285439, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ars_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 262629, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ars_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 255045, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ars_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 251511, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ars_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 242856, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ars_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251832, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ars_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 251506, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ars_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 267079, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ars_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 269152, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ars_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 279463, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ars_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 264790, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ars_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 233489, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ars_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 270237, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ars_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 237708, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ars_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 259385, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ars_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 253974, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ars_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 251267, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ars_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 264031, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ars_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 274606, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ars_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 260167, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ars_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 228217, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ars_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 256495, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ars_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 247228, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ars_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 262683, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ars_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 246327, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ars_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 236016, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ars_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 256690, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ars_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249650, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ars_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 249283, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ars_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 218482, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ars_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253495, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ars_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 250585, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ars_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 249853, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ars_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 268090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ars_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 288973, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 115.04743083003953, + "max_sentence1_length": 316, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ceb_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 270306, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ceb_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 285072, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ceb_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 322043, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ceb_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 289743, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ceb_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 311897, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ceb_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 285380, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ceb_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 297839, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ceb_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 317417, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ceb_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 294446, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ceb_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 300618, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ceb_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 304255, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ceb_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 316224, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ceb_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 285618, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ceb_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 312116, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ceb_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 288093, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ceb_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 281140, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ceb_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 315682, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ceb_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 287283, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ceb_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 272258, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ceb_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306676, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ceb_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 290651, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ceb_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 287412, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ceb_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 304547, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ceb_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 308021, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ceb_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 282512, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ceb_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 312032, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ceb_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 276645, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ceb_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 273798, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ceb_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 319436, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ceb_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 289977, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ceb_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 297314, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ceb_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294217, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ceb_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 291736, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ceb_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 319681, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ceb_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289465, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ceb_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 299162, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ceb_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 270762, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ceb_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 287727, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ceb_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 288144, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ceb_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 304641, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ceb_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 304447, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ceb_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 285084, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ceb_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 298168, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ceb_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 307411, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ceb_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293131, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ceb_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 298375, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ceb_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 287296, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ceb_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 298507, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ceb_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 293245, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ceb_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 289057, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ceb_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 299848, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ceb_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 302103, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ceb_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 295531, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ceb_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 293432, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ceb_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 267731, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ceb_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 280915, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ceb_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 286133, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ceb_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 318365, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ceb_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 274685, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ceb_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296512, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ceb_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 279328, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ceb_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 290858, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ceb_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 288604, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ceb_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 291335, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ceb_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 296867, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ceb_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 292959, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ceb_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 301003, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ceb_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 297657, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ceb_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 304973, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ceb_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 299712, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ceb_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 296789, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ceb_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 308008, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ceb_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 305749, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ceb_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 306836, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ceb_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 313862, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ceb_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 288468, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ceb_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 308579, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ceb_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 295467, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ceb_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 307256, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ceb_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 296056, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ceb_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 291922, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ceb_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 245817, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ceb_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 290872, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ceb_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 298813, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ceb_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 314823, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ceb_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 224650, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ceb_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 283342, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ceb_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 305950, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ceb_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 312818, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ceb_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 297280, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ceb_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 266666, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ceb_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 298060, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ceb_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 293945, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ceb_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 295613, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ceb_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 288745, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ceb_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 305212, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ceb_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300781, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ceb_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 283102, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ceb_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 323431, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ceb_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 274992, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ceb_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296509, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ceb_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 316323, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ceb_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 215628, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ceb_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 304679, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ceb_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 321854, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ceb_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 301306, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ceb_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 282589, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ceb_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 285858, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ceb_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 312443, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ceb_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 304466, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ceb_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 303204, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ceb_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 289239, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ceb_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 296371, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ceb_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 306192, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ceb_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 286942, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ceb_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291967, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ceb_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 297808, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ceb_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 275113, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ceb_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 280847, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ceb_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 328082, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ceb_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 300882, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ceb_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 292128, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ceb_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 298476, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ceb_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 291155, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ceb_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 300343, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ceb_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 272248, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ceb_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 285942, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ceb_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 315103, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ceb_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 287041, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ceb_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 289014, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ceb_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 290823, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ceb_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 297256, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ceb_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304556, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ceb_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 285529, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ceb_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 272455, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ceb_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 297946, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ceb_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 321282, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ceb_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 296725, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ceb_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 300351, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ceb_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 285588, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ceb_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 346969, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ceb_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 324854, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ceb_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 198704, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ceb_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 284621, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ceb_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 287270, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ceb_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 311166, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ceb_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 284790, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ceb_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 288533, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ceb_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 309794, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ceb_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 289765, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ceb_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 284794, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ceb_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 201933, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ceb_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 295048, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ceb_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 292723, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ceb_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 304828, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ceb_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 285086, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ceb_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 306230, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ceb_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 301025, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ceb_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 290395, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ceb_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 249385, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ceb_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 199380, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ceb_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 286856, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ceb_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 299942, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ceb_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 291500, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ceb_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 302615, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ceb_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 301683, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ceb_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 305857, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ceb_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 289540, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ceb_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 327696, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ceb_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 304886, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ceb_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 297302, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ceb_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 293768, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ceb_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 285113, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ceb_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294089, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ceb_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 293763, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ceb_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 309336, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ceb_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 311409, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ceb_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 321720, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ceb_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 307047, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ceb_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 275746, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ceb_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 312494, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ceb_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 279965, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ceb_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 301642, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ceb_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 296231, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ceb_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 293524, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ceb_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 306288, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ceb_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 316863, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ceb_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 302424, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ceb_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 270474, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ceb_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 298752, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ceb_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 289485, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ceb_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 304940, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ceb_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 288584, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ceb_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 278273, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ceb_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 298947, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ceb_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291907, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ceb_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 291540, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ceb_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 260739, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ceb_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295752, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ceb_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 292842, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ceb_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 292110, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ceb_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 310347, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ceb_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 331230, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 156.80335968379447, + "max_sentence1_length": 427, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "fuv_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 233783, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "fuv_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 248549, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "fuv_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 285520, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fuv_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 253220, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "fuv_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 275374, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "fuv_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 248857, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "fuv_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 261316, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fuv_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 280894, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "fuv_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 257923, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "fuv_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 264095, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fuv_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 267732, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fuv_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 279701, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "fuv_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 249095, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "fuv_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 275593, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "fuv_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 251570, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fuv_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 244617, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fuv_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 279159, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fuv_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 250760, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "fuv_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 235735, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "fuv_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270153, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fuv_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 254128, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fuv_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 250889, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fuv_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 268024, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fuv_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 271498, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fuv_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 245989, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "fuv_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 275509, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fuv_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 240122, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "fuv_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 237275, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "fuv_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 282913, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "fuv_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 253454, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fuv_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 260791, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "fuv_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257694, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fuv_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 255213, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fuv_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 283158, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "fuv_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252942, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fuv_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 262639, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "fuv_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 234239, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "fuv_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 251204, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fuv_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 251621, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "fuv_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 268118, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fuv_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 267924, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "fuv_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 248561, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fuv_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 261645, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fuv_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 270888, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fuv_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256608, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fuv_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 261852, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fuv_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 250773, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fuv_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 261984, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fuv_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 256722, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fuv_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 252534, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fuv_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 263325, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "fuv_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 265580, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fuv_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 259008, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fuv_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 256909, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fuv_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 231208, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "fuv_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 244392, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "fuv_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 249610, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fuv_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 281842, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "fuv_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 238162, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "fuv_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259989, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "fuv_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 242805, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "fuv_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 254335, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fuv_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 252081, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fuv_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 254812, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "fuv_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 260344, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fuv_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 256436, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fuv_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 264480, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "fuv_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 261134, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fuv_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 268450, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "fuv_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 263189, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fuv_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 260266, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "fuv_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 271485, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "fuv_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 269226, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fuv_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 270313, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "fuv_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 277339, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "fuv_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 251945, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fuv_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 272056, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "fuv_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 258944, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fuv_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 270733, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fuv_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 259533, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "fuv_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 255399, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fuv_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 209294, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "fuv_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 254349, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "fuv_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 262290, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fuv_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 278300, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "fuv_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 188127, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "fuv_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 246819, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "fuv_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 269427, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "fuv_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 276295, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "fuv_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 260757, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "fuv_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 230143, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "fuv_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 261537, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fuv_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 257422, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "fuv_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 259090, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fuv_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 252222, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fuv_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 268689, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fuv_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264258, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "fuv_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 246579, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fuv_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 286908, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "fuv_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 238469, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fuv_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259986, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fuv_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 279800, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "fuv_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 179105, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "fuv_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 268156, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fuv_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 285331, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fuv_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 264783, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "fuv_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 246066, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "fuv_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 249335, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "fuv_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 275920, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "fuv_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 267943, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fuv_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 266681, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "fuv_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 252716, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "fuv_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 259848, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "fuv_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 269669, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fuv_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 250419, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "fuv_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255444, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fuv_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 261285, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "fuv_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 238590, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "fuv_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 280847, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "fuv_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 291559, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "fuv_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 264359, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fuv_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 255605, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fuv_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 261953, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fuv_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 254632, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "fuv_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 263820, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "fuv_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 235725, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "fuv_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 249419, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fuv_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 278580, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "fuv_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 250518, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "fuv_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 252491, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "fuv_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 254300, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "fuv_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 260733, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fuv_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268033, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "fuv_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 249006, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "fuv_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 235932, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "fuv_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 261423, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "fuv_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 284759, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "fuv_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 260202, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fuv_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 263828, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "fuv_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 249065, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fuv_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 310446, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "fuv_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 288331, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "fuv_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 162181, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "fuv_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 248098, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "fuv_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 250747, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "fuv_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 274643, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "fuv_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 248267, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "fuv_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 252010, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "fuv_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 273271, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "fuv_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 253242, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "fuv_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 248271, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "fuv_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 165410, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "fuv_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 258525, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fuv_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 256200, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "fuv_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 268305, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "fuv_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 248563, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "fuv_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 269707, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "fuv_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 264502, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "fuv_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 253872, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "fuv_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 212862, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "fuv_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 162857, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "fuv_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 250333, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "fuv_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 263419, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fuv_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 254977, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "fuv_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 266092, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fuv_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 265160, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "fuv_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 269334, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "fuv_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 253017, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "fuv_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 291173, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "fuv_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 268363, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "fuv_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 260779, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "fuv_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 257245, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "fuv_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 248590, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "fuv_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257566, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "fuv_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 257240, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "fuv_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 272813, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fuv_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "fuv_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 285197, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "fuv_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 270524, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "fuv_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 239223, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "fuv_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 275971, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "fuv_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 243442, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "fuv_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 265119, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "fuv_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 259708, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "fuv_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 257001, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "fuv_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 269765, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "fuv_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 280340, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "fuv_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 265901, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "fuv_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 233951, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "fuv_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 262229, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "fuv_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 252962, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "fuv_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 268417, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "fuv_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 252061, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "fuv_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 241750, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "fuv_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 262424, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "fuv_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255384, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "fuv_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 255017, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "fuv_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 224216, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "fuv_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259229, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "fuv_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 256319, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fuv_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 255587, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "fuv_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 273824, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "fuv_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 294707, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 120.71343873517786, + "max_sentence1_length": 308, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kac_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 281018, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kac_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 295784, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kac_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 332755, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kac_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 300455, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kac_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 322609, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kac_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 296092, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kac_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 308551, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kac_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 328129, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kac_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 305158, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kac_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 311330, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kac_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 314967, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kac_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 326936, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kac_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 296330, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kac_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 322828, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kac_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 298805, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kac_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 291852, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kac_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 326394, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kac_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 297995, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kac_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 282970, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kac_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 317388, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kac_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 301363, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kac_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 298124, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kac_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 315259, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kac_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 318733, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kac_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 293224, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kac_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 322744, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kac_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 287357, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kac_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 284510, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kac_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 330148, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kac_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 300689, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kac_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 308026, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kac_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304929, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kac_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 302448, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kac_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 330393, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kac_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300177, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kac_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 309874, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kac_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 281474, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kac_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 298439, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kac_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 298856, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kac_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 315353, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kac_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 315159, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kac_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 295796, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kac_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 308880, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kac_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 318123, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kac_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303843, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kac_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 309087, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kac_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 298008, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kac_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 309219, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kac_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 303957, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kac_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 299769, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kac_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 310560, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kac_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 312815, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kac_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 306243, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kac_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 304144, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kac_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 278443, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kac_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 291627, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kac_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 296845, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kac_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 329077, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kac_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 285397, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kac_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 307224, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kac_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 290040, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kac_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 301570, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kac_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 299316, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kac_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 302047, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kac_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 307579, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kac_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 303671, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kac_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 311715, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kac_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 308369, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kac_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 315685, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kac_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 310424, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kac_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 307501, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kac_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 318720, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kac_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 316461, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kac_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 317548, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kac_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 324574, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kac_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 299180, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kac_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 319291, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kac_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 306179, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kac_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 317968, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kac_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 306768, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kac_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 302634, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kac_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 256529, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kac_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 301584, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kac_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 309525, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kac_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 325535, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kac_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 235362, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kac_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 294054, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kac_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 316662, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kac_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 323530, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kac_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 307992, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kac_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 277378, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kac_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 308772, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kac_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 304657, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kac_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 306325, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kac_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 299457, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kac_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 315924, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kac_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 311493, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kac_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 293814, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kac_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 334143, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kac_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 285704, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kac_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 307221, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kac_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 327035, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kac_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 226340, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kac_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 315391, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kac_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 332566, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kac_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 312018, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kac_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 293301, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kac_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 296570, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kac_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 323155, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kac_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 315178, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kac_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 313916, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kac_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 299951, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kac_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 307083, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kac_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 316904, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kac_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 297654, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kac_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302679, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kac_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 308520, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kac_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 285825, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kac_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 328082, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kac_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 291559, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kac_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 311594, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kac_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 302840, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kac_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 309188, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kac_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 301867, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kac_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 311055, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kac_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 282960, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kac_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 296654, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kac_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 325815, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kac_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 297753, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kac_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 299726, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kac_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 301535, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kac_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 307968, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kac_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 315268, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kac_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 296241, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kac_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 283167, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kac_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 308658, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kac_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 331994, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kac_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 307437, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kac_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 311063, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kac_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 296300, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kac_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 357681, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kac_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 335566, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kac_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 209416, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kac_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 295333, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kac_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 297982, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kac_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 321878, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kac_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 295502, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kac_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 299245, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kac_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 320506, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kac_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 300477, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kac_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 295506, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kac_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 212645, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kac_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 305760, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kac_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 303435, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kac_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 315540, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kac_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 295798, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kac_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 316942, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kac_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 311737, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kac_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 301107, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kac_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 260097, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kac_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 210092, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kac_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 297568, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kac_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 310654, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kac_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 302212, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kac_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 313327, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kac_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 312395, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kac_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 316569, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kac_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 300252, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kac_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 338408, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kac_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 315598, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kac_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 308014, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kac_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 304480, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kac_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 295825, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kac_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304801, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kac_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 304475, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kac_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 320048, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kac_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 322121, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kac_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 332432, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kac_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 317759, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kac_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 286458, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kac_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 323206, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kac_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 290677, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kac_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 312354, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kac_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 306943, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kac_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 304236, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kac_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 317000, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kac_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 327575, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kac_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 313136, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kac_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 281186, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kac_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 309464, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kac_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 300197, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kac_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 315652, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kac_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 299296, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kac_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 288985, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kac_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 309659, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kac_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302619, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kac_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 302252, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kac_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 271451, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kac_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306464, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kac_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 303554, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kac_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 302822, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kac_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 321059, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kac_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 341942, + "unique_pairs": 1012, + "min_sentence1_length": 52, + "average_sentence1_length": 167.38833992094862, + "max_sentence1_length": 513, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lin_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lin_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268584, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lin_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305555, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lin_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 273255, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lin_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 295409, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lin_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 268892, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lin_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 281351, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lin_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 300929, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lin_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 277958, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lin_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284130, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lin_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287767, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lin_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299736, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lin_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269130, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lin_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lin_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271605, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lin_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264652, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lin_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299194, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lin_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270795, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lin_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255770, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lin_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290188, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lin_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274163, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lin_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 270924, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lin_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 288059, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lin_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291533, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lin_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 266024, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lin_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295544, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lin_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260157, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lin_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 257310, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lin_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 302948, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lin_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 273489, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lin_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280826, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lin_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277729, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lin_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 275248, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lin_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303193, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lin_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272977, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lin_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282674, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lin_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 254274, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lin_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 271239, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lin_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271656, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lin_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288153, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lin_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 287959, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lin_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268596, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lin_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281680, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lin_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 290923, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lin_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276643, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lin_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 281887, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lin_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270808, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lin_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 282019, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lin_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276757, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lin_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272569, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lin_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 283360, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lin_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285615, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lin_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 279043, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lin_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 276944, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lin_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 251243, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lin_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 264427, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lin_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269645, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lin_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 301877, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lin_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258197, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lin_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280024, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lin_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262840, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lin_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 274370, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lin_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272116, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lin_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274847, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lin_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 280379, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lin_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 276471, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lin_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 284515, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lin_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281169, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lin_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 288485, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lin_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 283224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lin_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 280301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lin_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 291520, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lin_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 289261, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lin_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 290348, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lin_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 297374, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lin_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 271980, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lin_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 292091, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lin_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 278979, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lin_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290768, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lin_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279568, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lin_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 275434, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lin_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 229329, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lin_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 274384, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lin_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 282325, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lin_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 298335, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lin_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208162, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lin_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266854, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lin_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 289462, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lin_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 296330, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lin_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280792, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lin_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250178, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lin_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281572, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lin_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 277457, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lin_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279125, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lin_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 272257, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lin_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288724, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lin_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284293, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lin_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266614, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lin_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 306943, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lin_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 258504, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lin_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lin_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299835, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lin_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lin_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288191, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lin_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 305366, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lin_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lin_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266101, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lin_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 269370, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lin_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 295955, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lin_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 287978, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lin_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286716, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lin_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272751, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lin_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 279883, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lin_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289704, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lin_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 270454, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lin_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275479, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lin_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 281320, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lin_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258625, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lin_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 300882, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lin_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 264359, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lin_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311594, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lin_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275640, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lin_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 281988, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lin_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274667, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lin_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 283855, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lin_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255760, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lin_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 269454, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lin_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298615, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lin_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270553, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lin_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 272526, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lin_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 274335, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lin_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280768, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lin_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288068, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lin_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 269041, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lin_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 255967, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lin_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 281458, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lin_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304794, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lin_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 280237, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lin_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 283863, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lin_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 269100, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lin_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 330481, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lin_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 308366, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lin_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 182216, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lin_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268133, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lin_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270782, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lin_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294678, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lin_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 268302, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lin_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 272045, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lin_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 293306, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lin_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 273277, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lin_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 268306, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lin_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 185445, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lin_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278560, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lin_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 276235, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lin_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 288340, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lin_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268598, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lin_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289742, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lin_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 284537, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lin_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 273907, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lin_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 232897, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lin_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 182892, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lin_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 270368, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lin_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 283454, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lin_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 275012, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lin_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286127, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lin_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 285195, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lin_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 289369, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lin_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 273052, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lin_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 311208, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lin_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 288398, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lin_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280814, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lin_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 277280, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lin_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268625, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lin_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277601, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lin_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 277275, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lin_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292848, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lin_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 294921, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lin_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 305232, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lin_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290559, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lin_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 259258, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lin_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 296006, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lin_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 263477, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lin_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 285154, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lin_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279743, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lin_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 277036, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lin_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289800, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lin_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 300375, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lin_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 285936, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lin_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 253986, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lin_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 282264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lin_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 272997, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lin_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 288452, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lin_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 272096, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lin_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261785, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lin_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 282459, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lin_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275419, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lin_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 275052, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lin_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 244251, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lin_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279264, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lin_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 276354, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lin_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275622, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lin_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 293859, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lin_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314742, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 140.5108695652174, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "nno_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 245064, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "nno_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259830, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "nno_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296801, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nno_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 264501, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "nno_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286655, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "nno_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 260138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "nno_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nno_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 292175, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "nno_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 269204, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "nno_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 275376, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nno_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 279013, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nno_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "nno_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 260376, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "nno_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "nno_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262851, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nno_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255898, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nno_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 290440, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nno_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 262041, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "nno_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 247016, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "nno_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281434, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nno_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 265409, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nno_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 262170, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nno_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 279305, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nno_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282779, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nno_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257270, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "nno_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286790, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nno_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251403, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "nno_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 248556, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "nno_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 294194, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "nno_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264735, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nno_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 272072, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "nno_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268975, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nno_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 266494, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nno_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 294439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "nno_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264223, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nno_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273920, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "nno_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 245520, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "nno_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 262485, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nno_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262902, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "nno_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279399, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nno_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 279205, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "nno_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nno_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272926, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nno_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 282169, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nno_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267889, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nno_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 273133, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nno_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 262054, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nno_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273265, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nno_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 268003, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nno_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263815, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nno_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274606, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nno_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276861, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nno_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 270289, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nno_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 268190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nno_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 242489, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "nno_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255673, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "nno_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260891, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nno_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 293123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "nno_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 249443, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "nno_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271270, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "nno_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 254086, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "nno_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nno_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 263362, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nno_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 266093, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nno_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271625, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nno_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267717, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nno_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275761, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nno_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 272415, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nno_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279731, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "nno_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 274470, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nno_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 271547, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "nno_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282766, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nno_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 280507, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nno_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281594, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "nno_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288620, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "nno_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263226, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nno_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 283337, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nno_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270225, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nno_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 282014, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nno_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270814, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "nno_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266680, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nno_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 220575, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "nno_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265630, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nno_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 273571, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nno_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 289581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "nno_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 199408, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "nno_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 258100, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "nno_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280708, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "nno_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 287576, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "nno_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 272038, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "nno_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 241424, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "nno_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272818, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nno_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268703, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "nno_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 270371, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nno_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 263503, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nno_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279970, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nno_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275539, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nno_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257860, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nno_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 298189, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nno_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249750, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nno_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271267, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nno_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 291081, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "nno_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 190386, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "nno_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 279437, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nno_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296612, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nno_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 276064, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "nno_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 257347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nno_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nno_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 287201, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "nno_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279224, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nno_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277962, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nno_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263997, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "nno_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 271129, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "nno_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280950, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nno_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261700, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "nno_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266725, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nno_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 272566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "nno_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nno_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 292128, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nno_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "nno_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302840, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "nno_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275640, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nno_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273234, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nno_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265913, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nno_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 275101, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nno_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 247006, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "nno_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260700, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nno_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289861, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "nno_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261799, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "nno_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263772, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "nno_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 265581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "nno_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 272014, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nno_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279314, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "nno_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 260287, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nno_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 247213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "nno_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272704, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nno_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 296040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "nno_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 271483, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nno_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 275109, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nno_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 260346, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nno_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321727, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "nno_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299612, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nno_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 173462, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "nno_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 259379, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nno_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 262028, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nno_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285924, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "nno_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 259548, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "nno_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 263291, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nno_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 284552, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "nno_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 264523, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nno_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 259552, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nno_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176691, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "nno_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269806, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nno_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 267481, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nno_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 279586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nno_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259844, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "nno_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280988, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "nno_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275783, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "nno_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 265153, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nno_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 224143, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "nno_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 174138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "nno_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261614, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nno_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274700, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nno_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266258, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "nno_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 277373, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nno_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 276441, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "nno_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280615, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nno_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 264298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nno_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 302454, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "nno_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279644, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nno_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 272060, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "nno_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 268526, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nno_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nno_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268847, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nno_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 268521, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "nno_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 284094, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nno_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 286167, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "nno_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 296478, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nno_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "nno_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 250504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "nno_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nno_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254723, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "nno_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "nno_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270989, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nno_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268282, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nno_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 281046, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "nno_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291621, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nno_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 277182, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "nno_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245232, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "nno_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 273510, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "nno_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nno_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279698, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nno_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 263342, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "nno_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 253031, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "nno_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273705, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nno_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266665, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "nno_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 266298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "nno_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 235497, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "nno_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270510, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nno_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nno_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266868, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nno_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 285105, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "nno_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305988, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.8606719367589, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "sat_Olck-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251412, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "sat_Olck-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "sat_Olck-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303149, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sat_Olck-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "sat_Olck-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 293003, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "sat_Olck-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266486, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "sat_Olck-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278945, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sat_Olck-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298523, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "sat_Olck-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275552, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "sat_Olck-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sat_Olck-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285361, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sat_Olck-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297330, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "sat_Olck-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "sat_Olck-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "sat_Olck-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269199, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sat_Olck-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262246, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sat_Olck-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296788, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sat_Olck-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268389, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "sat_Olck-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253364, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "sat_Olck-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287782, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sat_Olck-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sat_Olck-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268518, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sat_Olck-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285653, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sat_Olck-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289127, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sat_Olck-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263618, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "sat_Olck-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293138, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sat_Olck-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257751, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "sat_Olck-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254904, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "sat_Olck-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300542, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "sat_Olck-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 271083, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sat_Olck-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278420, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "sat_Olck-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275323, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sat_Olck-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272842, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sat_Olck-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300787, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "sat_Olck-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270571, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sat_Olck-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 280268, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "sat_Olck-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251868, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "sat_Olck-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268833, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sat_Olck-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269250, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "sat_Olck-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285747, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sat_Olck-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285553, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "sat_Olck-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266190, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sat_Olck-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279274, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sat_Olck-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288517, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sat_Olck-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274237, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sat_Olck-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279481, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sat_Olck-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268402, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sat_Olck-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279613, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sat_Olck-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sat_Olck-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270163, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sat_Olck-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280954, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sat_Olck-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283209, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sat_Olck-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276637, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sat_Olck-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274538, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sat_Olck-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248837, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "sat_Olck-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 262021, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "sat_Olck-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267239, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sat_Olck-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299471, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "sat_Olck-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255791, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "sat_Olck-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277618, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "sat_Olck-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260434, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "sat_Olck-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271964, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sat_Olck-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269710, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sat_Olck-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272441, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sat_Olck-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277973, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sat_Olck-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 274065, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sat_Olck-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282109, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sat_Olck-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278763, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sat_Olck-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 286079, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "sat_Olck-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280818, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sat_Olck-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277895, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "sat_Olck-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289114, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sat_Olck-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286855, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sat_Olck-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287942, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "sat_Olck-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "sat_Olck-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269574, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sat_Olck-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289685, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sat_Olck-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276573, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sat_Olck-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288362, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sat_Olck-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277162, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "sat_Olck-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 273028, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sat_Olck-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226923, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "sat_Olck-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271978, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sat_Olck-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sat_Olck-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295929, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "sat_Olck-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205756, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "sat_Olck-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264448, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "sat_Olck-run_Latn": { + "num_samples": 1012, + "number_of_characters": 287056, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "sat_Olck-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293924, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "sat_Olck-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278386, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "sat_Olck-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247772, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "sat_Olck-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279166, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sat_Olck-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 275051, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "sat_Olck-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276719, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sat_Olck-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269851, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sat_Olck-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286318, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sat_Olck-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281887, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sat_Olck-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264208, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sat_Olck-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304537, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sat_Olck-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 256098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sat_Olck-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277615, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sat_Olck-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297429, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "sat_Olck-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196734, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "sat_Olck-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285785, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sat_Olck-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sat_Olck-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282412, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "sat_Olck-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263695, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sat_Olck-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266964, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sat_Olck-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293549, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "sat_Olck-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285572, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sat_Olck-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284310, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sat_Olck-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270345, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "sat_Olck-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277477, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "sat_Olck-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287298, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sat_Olck-san_Deva": { + "num_samples": 1012, + "number_of_characters": 268048, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "sat_Olck-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273073, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sat_Olck-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278914, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "sat_Olck-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256219, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sat_Olck-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298476, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sat_Olck-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261953, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "sat_Olck-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309188, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "sat_Olck-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281988, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sat_Olck-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273234, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sat_Olck-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272261, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sat_Olck-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281449, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sat_Olck-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253354, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "sat_Olck-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 267048, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sat_Olck-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296209, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "sat_Olck-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268147, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "sat_Olck-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "sat_Olck-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271929, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "sat_Olck-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278362, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sat_Olck-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285662, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "sat_Olck-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266635, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sat_Olck-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253561, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "sat_Olck-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 279052, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sat_Olck-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302388, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "sat_Olck-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277831, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sat_Olck-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281457, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sat_Olck-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266694, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sat_Olck-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 328075, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "sat_Olck-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sat_Olck-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179810, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "sat_Olck-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265727, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sat_Olck-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sat_Olck-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292272, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "sat_Olck-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265896, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "sat_Olck-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269639, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sat_Olck-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290900, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "sat_Olck-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270871, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sat_Olck-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265900, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sat_Olck-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 183039, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "sat_Olck-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276154, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sat_Olck-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273829, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sat_Olck-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sat_Olck-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266192, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "sat_Olck-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287336, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "sat_Olck-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282131, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "sat_Olck-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271501, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sat_Olck-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230491, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "sat_Olck-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180486, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "sat_Olck-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267962, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sat_Olck-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 281048, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sat_Olck-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272606, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "sat_Olck-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283721, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sat_Olck-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282789, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "sat_Olck-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286963, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sat_Olck-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270646, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sat_Olck-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308802, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "sat_Olck-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285992, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sat_Olck-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278408, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "sat_Olck-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274874, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sat_Olck-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266219, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sat_Olck-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275195, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sat_Olck-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274869, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "sat_Olck-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290442, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sat_Olck-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292515, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "sat_Olck-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302826, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sat_Olck-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288153, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "sat_Olck-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256852, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "sat_Olck-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293600, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sat_Olck-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 261071, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "sat_Olck-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282748, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "sat_Olck-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277337, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sat_Olck-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274630, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sat_Olck-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287394, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "sat_Olck-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297969, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sat_Olck-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283530, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "sat_Olck-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251580, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "sat_Olck-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279858, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "sat_Olck-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270591, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sat_Olck-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 286046, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sat_Olck-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269690, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "sat_Olck-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259379, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "sat_Olck-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 280053, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sat_Olck-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273013, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "sat_Olck-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272646, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "sat_Olck-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241845, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "sat_Olck-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276858, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sat_Olck-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273948, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sat_Olck-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273216, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sat_Olck-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291453, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "sat_Olck-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312336, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tel_Telu-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244091, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tel_Telu-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 258857, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tel_Telu-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 295828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tel_Telu-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263528, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tel_Telu-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 285682, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tel_Telu-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259165, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tel_Telu-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271624, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tel_Telu-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291202, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tel_Telu-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268231, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tel_Telu-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274403, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tel_Telu-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278040, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tel_Telu-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290009, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tel_Telu-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259403, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tel_Telu-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 285901, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tel_Telu-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 261878, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tel_Telu-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 254925, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tel_Telu-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289467, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tel_Telu-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261068, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tel_Telu-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246043, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tel_Telu-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280461, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tel_Telu-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264436, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tel_Telu-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261197, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tel_Telu-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278332, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tel_Telu-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281806, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tel_Telu-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256297, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tel_Telu-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tel_Telu-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tel_Telu-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247583, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tel_Telu-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293221, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tel_Telu-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263762, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tel_Telu-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271099, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tel_Telu-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268002, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tel_Telu-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265521, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tel_Telu-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293466, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tel_Telu-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263250, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tel_Telu-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 272947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tel_Telu-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244547, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tel_Telu-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261512, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tel_Telu-est_Latn": { + "num_samples": 1012, + "number_of_characters": 261929, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tel_Telu-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278426, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tel_Telu-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278232, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tel_Telu-min_Arab": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tel_Telu-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 271953, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tel_Telu-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281196, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tel_Telu-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266916, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tel_Telu-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272160, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tel_Telu-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261081, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tel_Telu-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 272292, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tel_Telu-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267030, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tel_Telu-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 262842, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tel_Telu-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273633, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tel_Telu-por_Latn": { + "num_samples": 1012, + "number_of_characters": 275888, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tel_Telu-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269316, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tel_Telu-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267217, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tel_Telu-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241516, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tel_Telu-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 254700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tel_Telu-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 259918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tel_Telu-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292150, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tel_Telu-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248470, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tel_Telu-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270297, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tel_Telu-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253113, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tel_Telu-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 264643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tel_Telu-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262389, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tel_Telu-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265120, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tel_Telu-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270652, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tel_Telu-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 266744, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tel_Telu-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274788, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tel_Telu-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271442, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tel_Telu-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 278758, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tel_Telu-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273497, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tel_Telu-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270574, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tel_Telu-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281793, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tel_Telu-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279534, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tel_Telu-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280621, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tel_Telu-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287647, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tel_Telu-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 262253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tel_Telu-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282364, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tel_Telu-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 269252, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tel_Telu-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281041, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tel_Telu-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 269841, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tel_Telu-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 265707, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tel_Telu-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219602, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tel_Telu-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 264657, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tel_Telu-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272598, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tel_Telu-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288608, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tel_Telu-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198435, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tel_Telu-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tel_Telu-run_Latn": { + "num_samples": 1012, + "number_of_characters": 279735, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tel_Telu-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286603, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tel_Telu-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271065, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tel_Telu-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240451, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tel_Telu-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 271845, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tel_Telu-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 267730, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tel_Telu-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269398, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tel_Telu-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262530, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tel_Telu-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 278997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tel_Telu-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274566, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tel_Telu-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 256887, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tel_Telu-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297216, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tel_Telu-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248777, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tel_Telu-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270294, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tel_Telu-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290108, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tel_Telu-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189413, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tel_Telu-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278464, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tel_Telu-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295639, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tel_Telu-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275091, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tel_Telu-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256374, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tel_Telu-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tel_Telu-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286228, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tel_Telu-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 278251, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tel_Telu-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 276989, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tel_Telu-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263024, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tel_Telu-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270156, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tel_Telu-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 279977, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tel_Telu-san_Deva": { + "num_samples": 1012, + "number_of_characters": 260727, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tel_Telu-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265752, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tel_Telu-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271593, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tel_Telu-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 248898, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tel_Telu-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291155, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tel_Telu-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254632, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tel_Telu-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 301867, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tel_Telu-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 274667, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tel_Telu-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 265913, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tel_Telu-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 272261, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tel_Telu-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tel_Telu-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246033, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tel_Telu-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 259727, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tel_Telu-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 288888, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tel_Telu-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 260826, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tel_Telu-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262799, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tel_Telu-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264608, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tel_Telu-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271041, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tel_Telu-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tel_Telu-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259314, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tel_Telu-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 246240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tel_Telu-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 271731, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tel_Telu-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295067, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tel_Telu-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270510, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tel_Telu-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274136, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tel_Telu-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259373, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tel_Telu-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 320754, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tel_Telu-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298639, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tel_Telu-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172489, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tel_Telu-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258406, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tel_Telu-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261055, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tel_Telu-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 284951, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tel_Telu-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258575, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tel_Telu-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262318, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tel_Telu-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283579, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tel_Telu-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263550, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tel_Telu-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258579, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tel_Telu-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 175718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tel_Telu-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 268833, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tel_Telu-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266508, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tel_Telu-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tel_Telu-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 258871, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tel_Telu-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280015, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tel_Telu-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274810, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tel_Telu-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264180, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tel_Telu-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223170, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tel_Telu-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173165, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tel_Telu-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260641, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tel_Telu-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 273727, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tel_Telu-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 265285, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tel_Telu-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276400, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tel_Telu-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275468, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tel_Telu-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279642, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tel_Telu-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263325, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tel_Telu-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301481, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tel_Telu-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 278671, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tel_Telu-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271087, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tel_Telu-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267553, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tel_Telu-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 258898, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tel_Telu-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267874, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tel_Telu-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267548, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tel_Telu-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283121, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tel_Telu-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285194, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tel_Telu-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295505, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tel_Telu-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 280832, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tel_Telu-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249531, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tel_Telu-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 286279, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tel_Telu-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 253750, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tel_Telu-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275427, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tel_Telu-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270016, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tel_Telu-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267309, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tel_Telu-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280073, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tel_Telu-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290648, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tel_Telu-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276209, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tel_Telu-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 244259, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tel_Telu-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tel_Telu-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 263270, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tel_Telu-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 278725, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tel_Telu-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262369, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tel_Telu-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252058, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tel_Telu-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 272732, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tel_Telu-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265692, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tel_Telu-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 265325, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tel_Telu-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234524, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tel_Telu-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tel_Telu-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266627, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tel_Telu-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 265895, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tel_Telu-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284132, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tel_Telu-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305015, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.899209486166, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253279, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268045, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ydd_Hebr-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305016, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 272716, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ydd_Hebr-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 294870, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 268353, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ydd_Hebr-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 280812, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ydd_Hebr-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 300390, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 277419, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 283591, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287228, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299197, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 268591, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ydd_Hebr-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295089, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ydd_Hebr-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271066, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ydd_Hebr-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264113, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ydd_Hebr-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 298655, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ydd_Hebr-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270256, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ydd_Hebr-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255231, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289649, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ydd_Hebr-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 273624, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 270385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 287520, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 290994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ydd_Hebr-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 265485, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ydd_Hebr-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295005, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 259618, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ydd_Hebr-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 256771, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 302409, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ydd_Hebr-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280287, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277190, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 274709, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ydd_Hebr-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 302654, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ydd_Hebr-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272438, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ydd_Hebr-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282135, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ydd_Hebr-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 253735, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 270700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ydd_Hebr-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271117, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 287614, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 287420, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ydd_Hebr-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268057, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ydd_Hebr-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281141, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 290384, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276104, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ydd_Hebr-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 281348, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270269, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ydd_Hebr-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 281480, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276218, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272030, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ydd_Hebr-min_Latn": { + "num_samples": 1012, + "number_of_characters": 282821, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ydd_Hebr-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285076, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ydd_Hebr-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 278504, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ydd_Hebr-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 276405, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 250704, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 263888, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269106, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 301338, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ydd_Hebr-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 257658, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279485, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ydd_Hebr-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262301, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ydd_Hebr-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 273831, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ydd_Hebr-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 271577, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ydd_Hebr-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274308, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 279840, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ydd_Hebr-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 275932, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 283976, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ydd_Hebr-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 280630, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 287946, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ydd_Hebr-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 282685, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ydd_Hebr-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 279762, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ydd_Hebr-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 290981, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ydd_Hebr-als_Latn": { + "num_samples": 1012, + "number_of_characters": 288722, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 289809, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ydd_Hebr-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 296835, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ydd_Hebr-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 271441, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 291552, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 278440, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ydd_Hebr-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279029, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ydd_Hebr-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 274895, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ydd_Hebr-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 228790, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 273845, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ydd_Hebr-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 281786, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 297796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 207623, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266315, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ydd_Hebr-run_Latn": { + "num_samples": 1012, + "number_of_characters": 288923, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ydd_Hebr-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 295791, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ydd_Hebr-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ydd_Hebr-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 249639, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281033, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ydd_Hebr-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 276918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ydd_Hebr-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 278586, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 271718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288185, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ydd_Hebr-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283754, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ydd_Hebr-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266075, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ydd_Hebr-war_Latn": { + "num_samples": 1012, + "number_of_characters": 306404, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ydd_Hebr-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 257965, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279482, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ydd_Hebr-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299296, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ydd_Hebr-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 198601, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 287652, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ydd_Hebr-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 304827, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ydd_Hebr-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284279, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ydd_Hebr-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 265562, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ydd_Hebr-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 268831, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ydd_Hebr-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 295416, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ydd_Hebr-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 287439, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ydd_Hebr-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286177, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272212, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 279344, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ydd_Hebr-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289165, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ydd_Hebr-san_Deva": { + "num_samples": 1012, + "number_of_characters": 269915, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ydd_Hebr-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274940, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ydd_Hebr-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 280781, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258086, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 300343, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ydd_Hebr-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 263820, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311055, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 283855, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ydd_Hebr-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275101, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ydd_Hebr-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 281449, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255221, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ydd_Hebr-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 268915, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ydd_Hebr-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298076, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270014, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ydd_Hebr-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 271987, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ydd_Hebr-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 273796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ydd_Hebr-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287529, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ydd_Hebr-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 268502, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ydd_Hebr-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 255428, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ydd_Hebr-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 280919, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ydd_Hebr-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304255, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ydd_Hebr-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 279698, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 283324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ydd_Hebr-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 268561, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ydd_Hebr-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 329942, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 307827, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ydd_Hebr-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 181677, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ydd_Hebr-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 267594, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270243, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ydd_Hebr-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294139, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 267763, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 271506, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ydd_Hebr-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 292767, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ydd_Hebr-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 272738, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 267767, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ydd_Hebr-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 184906, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278021, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ydd_Hebr-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 275696, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ydd_Hebr-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 287801, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268059, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289203, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ydd_Hebr-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 283998, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ydd_Hebr-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 273368, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 232358, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ydd_Hebr-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 182353, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ydd_Hebr-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 269829, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ydd_Hebr-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 282915, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ydd_Hebr-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 274473, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ydd_Hebr-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 285588, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 284656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ydd_Hebr-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 288830, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ydd_Hebr-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 272513, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 310669, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ydd_Hebr-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 287859, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280275, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ydd_Hebr-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 276741, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ydd_Hebr-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268086, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277062, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 276736, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ydd_Hebr-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292309, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ydd_Hebr-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 294382, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ydd_Hebr-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 304693, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ydd_Hebr-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290020, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ydd_Hebr-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 258719, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ydd_Hebr-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 295467, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 262938, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ydd_Hebr-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 284615, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ydd_Hebr-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279204, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ydd_Hebr-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 276497, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ydd_Hebr-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289261, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 299836, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ydd_Hebr-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 285397, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ydd_Hebr-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 253447, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ydd_Hebr-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 281725, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ydd_Hebr-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 272458, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 287913, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ydd_Hebr-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 271557, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ydd_Hebr-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261246, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 281920, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ydd_Hebr-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274880, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ydd_Hebr-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 274513, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ydd_Hebr-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 243712, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ydd_Hebr-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278725, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ydd_Hebr-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 275815, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ydd_Hebr-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275083, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ydd_Hebr-som_Latn": { + "num_samples": 1012, + "number_of_characters": 293320, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ydd_Hebr-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314203, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 139.97826086956522, + "max_sentence1_length": 385, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ary_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 225184, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ary_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 239950, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ary_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 276921, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ary_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 244621, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ary_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 266775, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ary_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 240258, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ary_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 252717, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ary_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 272295, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ary_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 249324, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ary_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 255496, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ary_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 259133, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ary_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 271102, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ary_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 240496, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ary_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 266994, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ary_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 242971, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ary_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 236018, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ary_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 270560, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ary_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 242161, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ary_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 227136, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ary_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261554, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ary_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 245529, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ary_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 242290, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ary_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 259425, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ary_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 262899, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ary_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 237390, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ary_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 266910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ary_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 231523, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ary_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 228676, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ary_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 274314, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ary_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 244855, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ary_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 252192, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ary_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249095, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ary_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 246614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ary_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 274559, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ary_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ary_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 254040, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ary_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 225640, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ary_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 242605, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ary_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 243022, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ary_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 259519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ary_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 259325, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ary_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 239962, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ary_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 253046, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ary_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 262289, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ary_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248009, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ary_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 253253, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ary_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 242174, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ary_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 253385, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ary_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 248123, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ary_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 243935, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ary_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 254726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ary_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 256981, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ary_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 250409, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ary_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 248310, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ary_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 222609, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ary_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 235793, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ary_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 241011, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ary_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 273243, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ary_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 229563, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ary_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251390, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ary_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 234206, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ary_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 245736, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ary_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 243482, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ary_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 246213, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ary_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 251745, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ary_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 247837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ary_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 255881, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ary_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 252535, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ary_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 259851, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ary_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 254590, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ary_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 251667, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ary_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 262886, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ary_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 260627, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ary_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 261714, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ary_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 268740, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ary_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 243346, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ary_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 263457, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ary_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 250345, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ary_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 262134, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ary_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 250934, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ary_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 246800, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ary_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 200695, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ary_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 245750, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ary_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 253691, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ary_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 269701, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ary_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 179528, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ary_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 238220, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ary_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 260828, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ary_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 267696, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ary_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 252158, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ary_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 221544, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ary_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 252938, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ary_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 248823, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ary_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 250491, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ary_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 243623, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ary_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 260090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ary_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255659, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ary_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 237980, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ary_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 278309, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ary_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 229870, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ary_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ary_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 271201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ary_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 170506, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ary_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 259557, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ary_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 276732, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ary_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 256184, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ary_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 237467, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ary_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 240736, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ary_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 267321, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ary_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 259344, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ary_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 258082, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ary_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 244117, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ary_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 251249, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ary_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 261070, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ary_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 241820, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ary_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246845, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ary_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 252686, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ary_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 229991, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ary_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 272248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ary_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 235725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ary_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 282960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ary_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 255760, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ary_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 247006, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ary_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 253354, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ary_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 246033, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ary_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 255221, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ary_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 240820, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ary_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 269981, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ary_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 241919, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ary_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 243892, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ary_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 245701, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ary_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 252134, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ary_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259434, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ary_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 240407, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ary_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 227333, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ary_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 252824, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ary_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 276160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ary_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 251603, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ary_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 255229, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ary_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 240466, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ary_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 301847, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ary_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 279732, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ary_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 153582, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ary_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 239499, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ary_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 242148, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ary_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 266044, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ary_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 239668, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ary_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 243411, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ary_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 264672, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ary_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 244643, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ary_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 239672, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ary_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 156811, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ary_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 249926, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ary_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 247601, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ary_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 259706, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ary_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 239964, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ary_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 261108, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ary_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 255903, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ary_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 245273, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ary_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 204263, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ary_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 154258, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ary_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 241734, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ary_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 254820, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ary_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 246378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ary_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 257493, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ary_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 256561, + "unique_pairs": 1011, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ary_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 260735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ary_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 244418, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ary_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 282574, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ary_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 259764, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ary_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 252180, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ary_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 248646, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ary_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 239991, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ary_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ary_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 248641, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ary_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 264214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ary_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 266287, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ary_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 276598, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ary_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 261925, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ary_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 230624, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ary_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 267372, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ary_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 234843, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ary_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 256520, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ary_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 251109, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ary_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 248402, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ary_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 261166, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ary_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 271741, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ary_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 257302, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ary_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 225352, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ary_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 253630, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ary_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 244363, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ary_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 259818, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ary_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 243462, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ary_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 233151, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ary_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 253825, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ary_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246785, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ary_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 246418, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ary_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 215617, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ary_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250630, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ary_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 247720, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ary_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 246988, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ary_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 265225, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ary_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 286108, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.21640316205534, + "max_sentence1_length": 315, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ces_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238878, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ces_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253644, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ces_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290615, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ces_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 258315, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ces_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 280469, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ces_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253952, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ces_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 266411, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ces_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285989, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ces_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 263018, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ces_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 269190, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ces_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 272827, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ces_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284796, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ces_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 254190, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ces_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280688, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ces_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256665, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ces_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249712, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ces_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 284254, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ces_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255855, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ces_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240830, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ces_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275248, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ces_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 259223, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ces_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255984, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ces_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 273119, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ces_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276593, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ces_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 251084, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ces_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ces_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 245217, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ces_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 242370, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ces_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 288008, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ces_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 258549, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ces_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265886, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ces_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262789, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ces_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 260308, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ces_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 288253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ces_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258037, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ces_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267734, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ces_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 239334, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ces_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 256299, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ces_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ces_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 273213, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ces_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 273019, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ces_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ces_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266740, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ces_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275983, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ces_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261703, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ces_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ces_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255868, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ces_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 267079, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ces_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 261817, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ces_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257629, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ces_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 268420, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ces_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270675, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ces_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 264103, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ces_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 262004, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ces_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 236303, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ces_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 249487, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ces_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 254705, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ces_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286937, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ces_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 243257, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ces_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265084, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ces_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247900, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ces_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 259430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ces_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 257176, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ces_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259907, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ces_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 265439, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ces_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 261531, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ces_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269575, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ces_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 266229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ces_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 273545, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ces_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 268284, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ces_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 265361, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ces_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276580, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ces_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 274321, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ces_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 275408, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ces_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 282434, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ces_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 257040, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ces_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 277151, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ces_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 264039, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ces_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 275828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ces_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264628, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ces_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 260494, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ces_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 214389, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ces_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 259444, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ces_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 267385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ces_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 283395, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ces_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 193222, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ces_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251914, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ces_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 274522, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ces_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 281390, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ces_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265852, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ces_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 235238, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ces_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266632, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ces_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 262517, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ces_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 264185, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ces_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 257317, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ces_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273784, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ces_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269353, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ces_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251674, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ces_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 292003, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ces_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243564, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ces_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265081, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ces_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284895, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ces_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 184200, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ces_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 273251, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ces_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 290426, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ces_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269878, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ces_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 251161, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ces_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 254430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ces_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 281015, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ces_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 273038, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ces_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271776, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ces_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 257811, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ces_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264943, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ces_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274764, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ces_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 255514, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ces_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260539, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ces_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 266380, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ces_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243685, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ces_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285942, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ces_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 249419, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ces_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296654, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ces_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 269454, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ces_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ces_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 267048, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ces_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259727, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ces_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268915, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ces_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 240820, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ces_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283675, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ces_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ces_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257586, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ces_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 259395, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ces_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 265828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ces_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ces_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 254101, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ces_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 241027, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ces_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 266518, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ces_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289854, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ces_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 265297, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ces_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268923, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ces_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 254160, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ces_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 315541, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ces_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 293426, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ces_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 167276, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ces_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 253193, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ces_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255842, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ces_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279738, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ces_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 253362, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ces_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 257105, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ces_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 278366, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ces_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 258337, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ces_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 253366, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ces_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 170505, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ces_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263620, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ces_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 261295, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ces_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 273400, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ces_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253658, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ces_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274802, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ces_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269597, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ces_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ces_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217957, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ces_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167952, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ces_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 255428, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ces_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 268514, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ces_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 260072, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ces_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 271187, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ces_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 270255, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ces_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 274429, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ces_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 258112, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ces_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 296268, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ces_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 273458, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ces_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265874, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ces_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 262340, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ces_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253685, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ces_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262661, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ces_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 262335, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ces_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277908, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ces_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279981, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ces_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 290292, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ces_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275619, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ces_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 244318, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ces_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 281066, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ces_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 248537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ces_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 270214, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ces_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264803, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ces_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 262096, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ces_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274860, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ces_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 285435, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ces_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270996, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ces_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 239046, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ces_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 267324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ces_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 258057, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ces_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 273512, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ces_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 257156, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ces_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246845, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ces_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 267519, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ces_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ces_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 260112, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ces_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 229311, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ces_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ces_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 261414, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ces_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260682, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ces_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278919, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ces_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299802, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 125.74802371541502, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "gaz_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 268039, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "gaz_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 282805, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "gaz_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 319776, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "gaz_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 287476, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "gaz_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 309630, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "gaz_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 283113, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "gaz_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 295572, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gaz_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 315150, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "gaz_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 292179, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "gaz_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 298351, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gaz_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 301988, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gaz_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 313957, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "gaz_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 283351, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "gaz_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 309849, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "gaz_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 285826, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gaz_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 278873, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "gaz_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 313415, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gaz_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 285016, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "gaz_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 269991, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "gaz_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304409, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gaz_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 288384, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gaz_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 285145, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gaz_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 302280, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "gaz_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 305754, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "gaz_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 280245, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "gaz_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 309765, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "gaz_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 274378, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "gaz_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 271531, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "gaz_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 317169, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "gaz_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 287710, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gaz_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 295047, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "gaz_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291950, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gaz_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 289469, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gaz_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 317414, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "gaz_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287198, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gaz_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 296895, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "gaz_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 268495, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "gaz_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 285460, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gaz_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 285877, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "gaz_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 302374, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gaz_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 302180, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "gaz_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 282817, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "gaz_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 295901, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gaz_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 305144, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gaz_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290864, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gaz_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 296108, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gaz_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 285029, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gaz_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 296240, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "gaz_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 290978, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gaz_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 286790, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gaz_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 297581, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "gaz_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 299836, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gaz_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 293264, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "gaz_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 291165, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "gaz_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 265464, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "gaz_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 278648, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "gaz_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 283866, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gaz_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 316098, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "gaz_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 272418, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "gaz_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294245, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "gaz_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 277061, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "gaz_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 288591, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "gaz_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 286337, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "gaz_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 289068, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gaz_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 294600, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gaz_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 290692, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "gaz_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 298736, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gaz_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 295390, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "gaz_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 302706, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "gaz_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 297445, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "gaz_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 294522, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "gaz_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 305741, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gaz_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 303482, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "gaz_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 304569, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "gaz_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 311595, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "gaz_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 286201, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "gaz_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 306312, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "gaz_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 293200, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gaz_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 304989, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gaz_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 293789, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "gaz_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 289655, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gaz_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 243550, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "gaz_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 288605, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "gaz_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 296546, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "gaz_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 312556, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "gaz_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 222383, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "gaz_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 281075, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "gaz_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 303683, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "gaz_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 310551, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "gaz_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 295013, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "gaz_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 264399, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "gaz_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 295793, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gaz_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 291678, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "gaz_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 293346, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gaz_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 286478, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gaz_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 302945, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gaz_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298514, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gaz_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 280835, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "gaz_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 321164, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "gaz_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 272725, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "gaz_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294242, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gaz_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 314056, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "gaz_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 213361, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "gaz_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 302412, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gaz_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 319587, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gaz_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 299039, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "gaz_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 280322, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "gaz_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 283591, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "gaz_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 310176, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "gaz_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 302199, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gaz_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 300937, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "gaz_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 286972, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "gaz_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 294104, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "gaz_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 303925, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gaz_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 284675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "gaz_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289700, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "gaz_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 295541, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "gaz_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 272846, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "gaz_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 315103, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "gaz_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 278580, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "gaz_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 325815, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "gaz_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 298615, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "gaz_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 289861, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gaz_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 296209, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "gaz_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 288888, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "gaz_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 298076, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gaz_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 269981, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "gaz_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 283675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gaz_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 284774, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "gaz_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 286747, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "gaz_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 288556, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "gaz_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 294989, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gaz_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302289, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "gaz_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 283262, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gaz_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 270188, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "gaz_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 295679, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "gaz_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 319015, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "gaz_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 294458, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gaz_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 298084, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "gaz_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 283321, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gaz_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 344702, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "gaz_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 322587, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "gaz_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 196437, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "gaz_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 282354, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "gaz_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 285003, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gaz_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 308899, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "gaz_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 282523, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "gaz_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 286266, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "gaz_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 307527, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "gaz_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 287498, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gaz_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 282527, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gaz_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 199666, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "gaz_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 292781, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gaz_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 290456, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "gaz_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 302561, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gaz_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 282819, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "gaz_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 303963, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "gaz_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 298758, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "gaz_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 288128, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gaz_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 247118, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "gaz_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 197113, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "gaz_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 284589, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "gaz_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 297675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "gaz_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 289233, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "gaz_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 300348, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gaz_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 299416, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "gaz_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 303590, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "gaz_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 287273, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "gaz_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 325429, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "gaz_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 302619, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gaz_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 295035, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "gaz_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 291501, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gaz_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 282846, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gaz_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291822, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gaz_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 291496, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "gaz_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 307069, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "gaz_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 309142, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "gaz_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 319453, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "gaz_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 304780, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "gaz_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 273479, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "gaz_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 310227, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "gaz_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 277698, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "gaz_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 299375, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "gaz_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 293964, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gaz_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 291257, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "gaz_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 304021, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "gaz_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 314596, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gaz_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 300157, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "gaz_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 268207, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "gaz_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 296485, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "gaz_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 287218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "gaz_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 302673, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "gaz_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 286317, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "gaz_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 276006, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "gaz_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 296680, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gaz_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289640, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "gaz_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 289273, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "gaz_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 258472, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "gaz_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293485, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gaz_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 290575, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "gaz_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 289843, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "gaz_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 308080, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "gaz_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 328963, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 154.56324110671937, + "max_sentence1_length": 456, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kam_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 239977, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kam_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 254743, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kam_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 291714, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kam_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259414, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kam_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281568, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kam_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 255051, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kam_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267510, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kam_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 287088, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kam_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264117, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kam_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270289, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kam_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 273926, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kam_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 285895, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kam_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255289, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kam_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kam_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 257764, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kam_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 250811, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kam_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285353, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kam_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 256954, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kam_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 241929, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kam_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276347, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kam_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260322, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kam_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 257083, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kam_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274218, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kam_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 277692, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kam_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kam_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 281703, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kam_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246316, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kam_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243469, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kam_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289107, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kam_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 259648, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kam_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 266985, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kam_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263888, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kam_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261407, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kam_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289352, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kam_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259136, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kam_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 268833, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kam_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240433, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kam_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257398, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kam_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 257815, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kam_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274312, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kam_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274118, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kam_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 254755, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kam_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 267839, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kam_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 277082, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kam_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262802, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kam_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 268046, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kam_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 256967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kam_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268178, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kam_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 262916, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kam_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 258728, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kam_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269519, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kam_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 271774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kam_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265202, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kam_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263103, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kam_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237402, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kam_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250586, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kam_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 255804, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kam_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 288036, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kam_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244356, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kam_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kam_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 248999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kam_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260529, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kam_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258275, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kam_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 261006, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kam_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266538, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kam_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 262630, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kam_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 270674, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kam_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267328, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kam_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 274644, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kam_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269383, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kam_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266460, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kam_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 277679, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kam_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275420, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kam_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276507, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kam_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283533, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kam_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258139, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kam_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278250, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kam_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265138, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kam_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 276927, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kam_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 265727, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kam_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261593, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kam_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215488, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kam_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260543, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kam_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268484, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kam_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kam_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194321, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kam_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 253013, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kam_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275621, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kam_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282489, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kam_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 266951, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kam_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236337, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kam_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 267731, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kam_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263616, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kam_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265284, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kam_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258416, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kam_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 274883, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kam_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270452, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kam_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 252773, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kam_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293102, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kam_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 244663, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kam_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266180, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kam_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 285994, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kam_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185299, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kam_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274350, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kam_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291525, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kam_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 270977, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kam_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252260, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kam_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255529, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kam_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282114, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kam_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274137, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kam_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 272875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kam_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 258910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kam_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 266042, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kam_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 275863, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kam_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 256613, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kam_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261638, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kam_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267479, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kam_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 244784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kam_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 287041, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kam_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250518, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kam_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 297753, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kam_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kam_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 261799, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kam_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268147, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kam_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 260826, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kam_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 270014, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kam_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 241919, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kam_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255613, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kam_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 284774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kam_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 258685, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kam_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kam_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 266927, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kam_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274227, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kam_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255200, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kam_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242126, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kam_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267617, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kam_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 290953, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kam_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266396, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kam_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 270022, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kam_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255259, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kam_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 316640, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kam_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294525, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kam_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168375, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kam_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254292, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kam_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 256941, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kam_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 280837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kam_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254461, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kam_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258204, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kam_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279465, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kam_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259436, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kam_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254465, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kam_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171604, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kam_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 264719, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kam_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262394, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kam_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274499, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kam_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 254757, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kam_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 275901, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kam_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 270696, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kam_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 260066, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kam_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 219056, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kam_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 169051, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kam_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 256527, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kam_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269613, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kam_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261171, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kam_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272286, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kam_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271354, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kam_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275528, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kam_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kam_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297367, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kam_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274557, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kam_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 266973, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kam_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263439, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kam_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 254784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kam_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263760, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kam_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263434, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kam_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 279007, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kam_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 281080, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kam_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291391, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kam_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 276718, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kam_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245417, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kam_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282165, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kam_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 249636, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kam_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271313, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kam_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 265902, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kam_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263195, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kam_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 275959, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kam_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286534, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kam_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 272095, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kam_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240145, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kam_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268423, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kam_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259156, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kam_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274611, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kam_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kam_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 247944, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kam_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268618, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kam_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261578, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kam_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kam_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230410, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kam_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265423, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kam_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262513, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kam_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 261781, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kam_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 280018, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kam_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 300901, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.83399209486166, + "max_sentence1_length": 398, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lit_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241950, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lit_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lit_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293687, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lit_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261387, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lit_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283541, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lit_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257024, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lit_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269483, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lit_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289061, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lit_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266090, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lit_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272262, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lit_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275899, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lit_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287868, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lit_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257262, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lit_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283760, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lit_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259737, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lit_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252784, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lit_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287326, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lit_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258927, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lit_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243902, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lit_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278320, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lit_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262295, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lit_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259056, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lit_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276191, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lit_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279665, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lit_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254156, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lit_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283676, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lit_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248289, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lit_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245442, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lit_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291080, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lit_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261621, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lit_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268958, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lit_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265861, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lit_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263380, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lit_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291325, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lit_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261109, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lit_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270806, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lit_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242406, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lit_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259371, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lit_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259788, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lit_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276285, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lit_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276091, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lit_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256728, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lit_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269812, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lit_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279055, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lit_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264775, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lit_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270019, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lit_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258940, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lit_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270151, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lit_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264889, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lit_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260701, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lit_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271492, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lit_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273747, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lit_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267175, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lit_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265076, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lit_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239375, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lit_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252559, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lit_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257777, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lit_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290009, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lit_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246329, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lit_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268156, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lit_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250972, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lit_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262502, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lit_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260248, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lit_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262979, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lit_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268511, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lit_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264603, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lit_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272647, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lit_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269301, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lit_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276617, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lit_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271356, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lit_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268433, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lit_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279652, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lit_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277393, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lit_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278480, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lit_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285506, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lit_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260112, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lit_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280223, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lit_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267111, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lit_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278900, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lit_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267700, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lit_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263566, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lit_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217461, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lit_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262516, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lit_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270457, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lit_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286467, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lit_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196294, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lit_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254986, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lit_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277594, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lit_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284462, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lit_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268924, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lit_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238310, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lit_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269704, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lit_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265589, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lit_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267257, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lit_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 260389, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lit_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276856, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lit_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272425, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lit_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254746, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lit_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295075, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lit_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246636, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lit_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268153, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lit_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287967, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lit_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187272, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lit_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276323, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lit_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293498, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lit_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lit_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254233, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lit_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257502, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lit_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284087, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lit_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276110, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lit_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274848, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lit_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260883, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lit_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268015, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lit_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277836, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lit_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258586, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lit_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263611, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lit_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269452, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lit_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246757, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lit_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289014, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lit_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252491, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lit_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299726, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lit_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272526, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lit_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263772, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lit_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270120, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lit_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262799, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lit_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271987, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lit_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243892, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lit_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257586, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lit_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286747, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lit_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258685, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lit_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262467, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lit_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268900, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lit_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276200, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lit_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257173, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lit_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244099, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lit_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269590, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lit_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292926, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lit_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268369, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lit_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271995, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lit_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257232, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lit_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318613, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lit_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296498, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lit_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170348, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lit_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256265, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lit_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258914, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lit_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282810, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lit_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256434, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lit_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260177, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lit_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281438, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lit_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261409, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lit_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256438, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lit_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173577, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lit_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266692, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lit_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264367, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lit_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276472, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lit_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256730, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lit_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277874, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lit_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272669, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lit_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262039, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lit_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221029, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lit_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171024, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lit_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258500, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lit_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271586, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lit_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263144, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lit_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274259, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lit_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273327, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lit_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277501, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lit_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261184, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lit_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299340, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lit_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276530, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lit_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268946, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lit_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265412, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lit_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256757, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lit_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265733, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lit_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265407, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lit_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280980, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lit_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283053, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lit_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293364, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lit_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278691, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lit_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247390, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lit_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284138, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lit_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251609, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lit_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273286, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lit_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267875, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lit_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265168, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lit_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277932, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lit_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288507, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lit_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274068, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lit_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242118, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lit_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270396, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lit_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261129, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lit_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276584, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lit_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260228, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lit_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249917, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lit_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270591, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lit_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263551, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lit_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263184, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lit_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232383, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lit_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267396, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lit_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264486, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lit_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263754, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lit_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281991, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lit_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302874, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 128.78359683794466, + "max_sentence1_length": 357, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "nob_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 243759, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "nob_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 258525, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "nob_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 295496, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nob_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263196, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "nob_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 285350, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "nob_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 258833, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "nob_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271292, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nob_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 290870, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "nob_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 267899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "nob_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274071, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nob_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 277708, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nob_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 289677, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "nob_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259071, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "nob_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 285569, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "nob_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 261546, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nob_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 254593, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nob_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289135, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nob_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 260736, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "nob_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 245711, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "nob_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280129, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nob_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264104, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nob_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 260865, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nob_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278000, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nob_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281474, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nob_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 255965, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "nob_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285485, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nob_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250098, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "nob_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247251, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "nob_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 292889, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "nob_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263430, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nob_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 270767, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "nob_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267670, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nob_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265189, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nob_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293134, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "nob_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262918, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nob_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 272615, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "nob_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244215, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "nob_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261180, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nob_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 261597, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "nob_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278094, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nob_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 277900, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "nob_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 258537, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nob_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 271621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nob_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 280864, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nob_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266584, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nob_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 271828, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nob_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 260749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nob_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 271960, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nob_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 266698, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nob_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 262510, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nob_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273301, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nob_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 275556, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nob_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 268984, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nob_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 266885, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nob_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241184, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "nob_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 254368, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "nob_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 259586, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nob_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 291818, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "nob_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248138, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "nob_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269965, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "nob_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 252781, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "nob_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 264311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nob_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262057, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nob_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 264788, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nob_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270320, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nob_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 266412, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nob_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274456, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nob_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271110, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nob_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 278426, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "nob_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273165, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nob_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270242, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "nob_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281461, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nob_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279202, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nob_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280289, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "nob_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287315, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "nob_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 261921, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nob_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282032, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nob_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 268920, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nob_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 280709, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nob_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 269509, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "nob_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 265375, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nob_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219270, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "nob_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 264325, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nob_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272266, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nob_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288276, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "nob_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198103, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "nob_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 256795, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "nob_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 279403, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "nob_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286271, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "nob_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 270733, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "nob_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240119, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "nob_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 271513, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nob_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 267398, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "nob_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269066, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nob_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262198, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nob_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 278665, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nob_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274234, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nob_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 256555, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nob_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 296884, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nob_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248445, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nob_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269962, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nob_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 289776, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "nob_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189081, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "nob_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278132, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nob_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nob_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 274759, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "nob_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256042, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nob_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nob_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 285896, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "nob_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 277919, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nob_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 276657, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nob_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 262692, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "nob_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 269824, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "nob_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 279645, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nob_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 260395, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "nob_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265420, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nob_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271261, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "nob_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 248566, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nob_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 290823, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nob_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254300, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "nob_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 301535, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "nob_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 274335, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nob_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 265581, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nob_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 271929, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nob_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 264608, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nob_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 273796, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nob_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 245701, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "nob_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 259395, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nob_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 288556, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "nob_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 260494, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "nob_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262467, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "nob_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 270709, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nob_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278009, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "nob_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 258982, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nob_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 245908, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "nob_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 271399, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nob_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 294735, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "nob_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270178, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nob_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 273804, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nob_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259041, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nob_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 320422, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "nob_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nob_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172157, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "nob_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258074, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nob_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 260723, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nob_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 284619, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "nob_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258243, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "nob_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 261986, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nob_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283247, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "nob_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263218, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nob_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258247, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nob_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 175386, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "nob_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 268501, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nob_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266176, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nob_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278281, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nob_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 258539, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "nob_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 279683, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "nob_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274478, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "nob_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 263848, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nob_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 222838, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "nob_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 172833, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "nob_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260309, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nob_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 273395, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nob_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 264953, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "nob_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276068, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nob_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275136, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "nob_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279310, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nob_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 262993, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nob_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301149, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "nob_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 278339, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nob_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 270755, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "nob_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267221, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nob_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 258566, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nob_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267542, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nob_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267216, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "nob_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 282789, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nob_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 284862, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "nob_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295173, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nob_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 280500, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "nob_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249199, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "nob_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 285947, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nob_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 253418, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "nob_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275095, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "nob_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 269684, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nob_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 266977, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nob_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 279741, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "nob_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290316, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nob_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 275877, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "nob_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 243927, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "nob_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272205, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "nob_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 262938, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nob_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 278393, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nob_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262037, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "nob_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 251726, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "nob_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 272400, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nob_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265360, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "nob_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 264993, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "nob_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234192, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "nob_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269205, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nob_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266295, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nob_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 265563, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nob_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 283800, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "nob_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 304683, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.57114624505928, + "max_sentence1_length": 351, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "scn_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250192, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "scn_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264958, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "scn_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301929, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "scn_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269629, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "scn_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291783, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "scn_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "scn_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277725, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "scn_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297303, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "scn_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "scn_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280504, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "scn_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284141, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "scn_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296110, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "scn_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265504, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "scn_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292002, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "scn_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267979, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "scn_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261026, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "scn_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295568, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "scn_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267169, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "scn_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252144, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "scn_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286562, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "scn_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "scn_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267298, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "scn_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284433, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "scn_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287907, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "scn_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "scn_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291918, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "scn_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256531, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "scn_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253684, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "scn_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299322, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "scn_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269863, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "scn_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277200, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "scn_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274103, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "scn_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271622, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "scn_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299567, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "scn_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269351, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "scn_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279048, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "scn_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250648, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "scn_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267613, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "scn_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268030, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "scn_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284527, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "scn_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284333, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "scn_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "scn_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278054, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "scn_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287297, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "scn_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273017, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "scn_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278261, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "scn_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "scn_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278393, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "scn_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273131, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "scn_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "scn_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279734, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "scn_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281989, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "scn_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "scn_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273318, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "scn_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247617, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "scn_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260801, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "scn_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266019, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "scn_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298251, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "scn_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254571, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "scn_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "scn_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259214, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "scn_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270744, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "scn_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268490, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "scn_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271221, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "scn_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276753, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "scn_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272845, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "scn_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280889, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "scn_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277543, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "scn_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284859, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "scn_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279598, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "scn_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 276675, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "scn_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287894, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "scn_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285635, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "scn_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286722, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "scn_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293748, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "scn_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268354, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "scn_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 288465, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "scn_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275353, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "scn_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287142, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "scn_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275942, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "scn_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271808, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "scn_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "scn_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270758, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "scn_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278699, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "scn_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294709, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "scn_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204536, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "scn_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263228, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "scn_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285836, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "scn_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292704, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "scn_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277166, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "scn_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246552, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "scn_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277946, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "scn_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273831, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "scn_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275499, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "scn_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268631, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "scn_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285098, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "scn_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280667, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "scn_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262988, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "scn_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303317, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "scn_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254878, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "scn_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276395, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "scn_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296209, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "scn_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195514, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "scn_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284565, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "scn_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301740, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "scn_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281192, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "scn_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262475, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "scn_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265744, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "scn_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292329, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "scn_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284352, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "scn_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283090, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "scn_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269125, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "scn_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276257, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "scn_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286078, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "scn_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266828, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "scn_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "scn_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277694, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "scn_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254999, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "scn_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297256, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "scn_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260733, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "scn_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307968, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "scn_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280768, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "scn_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272014, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "scn_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278362, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "scn_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271041, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "scn_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280229, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "scn_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252134, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "scn_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265828, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "scn_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294989, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "scn_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266927, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "scn_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268900, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "scn_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270709, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "scn_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284442, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "scn_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265415, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "scn_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252341, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "scn_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277832, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "scn_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301168, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "scn_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 276611, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "scn_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280237, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "scn_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265474, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "scn_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326855, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "scn_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304740, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "scn_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178590, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "scn_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264507, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "scn_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267156, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "scn_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291052, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "scn_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264676, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "scn_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268419, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "scn_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289680, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "scn_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269651, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "scn_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264680, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "scn_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181819, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "scn_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274934, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "scn_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272609, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "scn_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284714, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "scn_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264972, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "scn_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286116, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "scn_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280911, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "scn_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "scn_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229271, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "scn_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "scn_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266742, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "scn_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279828, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "scn_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271386, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "scn_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282501, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "scn_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281569, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "scn_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285743, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "scn_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269426, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "scn_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307582, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "scn_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284772, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "scn_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277188, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "scn_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273654, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "scn_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264999, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "scn_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273975, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "scn_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273649, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "scn_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289222, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "scn_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291295, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "scn_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301606, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "scn_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286933, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "scn_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255632, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "scn_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292380, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "scn_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259851, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "scn_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281528, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "scn_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276117, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "scn_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273410, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "scn_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286174, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "scn_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296749, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "scn_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282310, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "scn_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "scn_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278638, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "scn_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269371, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "scn_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284826, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "scn_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268470, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "scn_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258159, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "scn_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278833, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "scn_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271793, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "scn_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271426, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "scn_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240625, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "scn_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275638, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "scn_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272728, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "scn_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271996, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "scn_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290233, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "scn_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311116, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 136.92786561264822, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257492, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272258, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tgk_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309229, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 276929, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299083, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272566, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285025, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304603, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281632, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 287804, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291441, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303410, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 272804, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299302, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tgk_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275279, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268326, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 302868, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274469, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259444, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293862, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 277837, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274598, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 291733, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295207, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269698, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299218, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 263831, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 260984, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306622, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277163, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284500, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281403, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 278922, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 306867, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276651, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286348, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 257948, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 274913, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275330, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 291827, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291633, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272270, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285354, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294597, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280317, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285561, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274482, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285693, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280431, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276243, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287034, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289289, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282717, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280618, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 254917, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268101, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273319, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305551, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 261871, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283698, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266514, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278044, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 275790, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278521, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284053, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280145, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288189, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 284843, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292159, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 286898, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 283975, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295194, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 292935, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294022, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tgk_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301048, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275654, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 295765, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282653, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294442, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283242, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279108, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233003, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278058, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 285999, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302009, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 211836, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270528, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293136, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300004, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284466, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 253852, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285246, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281131, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 282799, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 275931, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292398, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287967, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270288, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310617, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262178, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283695, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303509, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 202814, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 291865, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309040, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288492, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 269775, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273044, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299629, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291652, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290390, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276425, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283557, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293378, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274128, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279153, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 284994, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262299, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304556, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268033, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315268, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288068, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279314, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285662, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278341, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287529, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259434, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273128, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302289, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274227, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tgk_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276200, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278009, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284442, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272715, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259641, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285132, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308468, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 283911, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287537, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 272774, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334155, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312040, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 185890, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 271807, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274456, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298352, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 271976, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275719, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 296980, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 276951, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 271980, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189119, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282234, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 279909, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292014, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272272, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293416, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288211, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277581, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236571, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186566, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274042, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287128, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278686, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tgk_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 289801, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 288869, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tgk_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293043, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276726, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 314882, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292072, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284488, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 280954, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272299, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281275, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 280949, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296522, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298595, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tgk_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 308906, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294233, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 262932, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299680, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267151, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tgk_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 288828, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283417, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280710, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293474, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304049, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289610, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257660, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 285938, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276671, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292126, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 275770, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265459, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286133, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279093, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278726, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tgk_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 247925, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282938, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280028, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279296, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297533, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tgk_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318416, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 144.1413043478261, + "max_sentence1_length": 355, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "yor_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238465, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "yor_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253231, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "yor_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290202, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "yor_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "yor_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 280056, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "yor_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253539, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "yor_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265998, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yor_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285576, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "yor_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262605, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "yor_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268777, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "yor_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 272414, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "yor_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284383, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "yor_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253777, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "yor_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280275, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "yor_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256252, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yor_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249299, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "yor_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283841, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yor_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255442, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "yor_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240417, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "yor_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274835, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "yor_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258810, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yor_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255571, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yor_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272706, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "yor_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276180, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "yor_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250671, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "yor_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280191, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "yor_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244804, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "yor_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241957, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "yor_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287595, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "yor_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 258136, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "yor_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265473, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "yor_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262376, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "yor_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259895, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "yor_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287840, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "yor_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257624, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yor_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267321, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "yor_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238921, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "yor_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255886, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "yor_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256303, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "yor_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272800, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "yor_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272606, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "yor_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253243, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "yor_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266327, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "yor_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275570, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "yor_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261290, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "yor_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266534, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "yor_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255455, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "yor_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266666, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "yor_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 261404, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "yor_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257216, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "yor_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 268007, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "yor_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270262, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "yor_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263690, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "yor_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261591, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "yor_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235890, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "yor_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 249074, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "yor_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 254292, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yor_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286524, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "yor_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242844, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "yor_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264671, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "yor_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247487, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "yor_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 259017, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "yor_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256763, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "yor_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259494, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yor_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 265026, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yor_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "yor_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269162, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "yor_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265816, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "yor_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 273132, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "yor_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267871, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "yor_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264948, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "yor_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276167, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "yor_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273908, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "yor_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274995, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "yor_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 282021, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "yor_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256627, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "yor_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276738, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "yor_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263626, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "yor_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 275415, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yor_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264215, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "yor_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 260081, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "yor_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213976, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "yor_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 259031, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "yor_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266972, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "yor_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282982, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "yor_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192809, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "yor_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251501, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "yor_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 274109, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "yor_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280977, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "yor_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265439, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "yor_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234825, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "yor_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266219, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "yor_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 262104, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "yor_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263772, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yor_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256904, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yor_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273371, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "yor_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268940, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yor_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251261, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "yor_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291590, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "yor_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243151, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "yor_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264668, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "yor_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284482, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "yor_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183787, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "yor_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272838, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "yor_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 290013, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "yor_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269465, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "yor_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250748, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "yor_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 254017, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "yor_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280602, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "yor_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272625, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yor_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271363, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "yor_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 257398, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "yor_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264530, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "yor_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274351, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "yor_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 255101, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "yor_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260126, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "yor_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265967, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "yor_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243272, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "yor_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285529, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "yor_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 249006, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "yor_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296241, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "yor_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 269041, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "yor_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260287, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "yor_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266635, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "yor_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259314, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "yor_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268502, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "yor_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 240407, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "yor_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 254101, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yor_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283262, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "yor_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255200, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "yor_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257173, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "yor_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258982, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "yor_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 265415, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "yor_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272715, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "yor_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240614, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "yor_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 266105, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "yor_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289441, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "yor_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264884, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yor_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268510, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "yor_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253747, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yor_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 315128, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "yor_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 293013, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "yor_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166863, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "yor_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252780, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "yor_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255429, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "yor_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279325, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "yor_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252949, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "yor_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256692, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "yor_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277953, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "yor_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257924, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "yor_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252953, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "yor_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 170092, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "yor_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263207, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yor_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260882, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "yor_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272987, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "yor_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253245, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "yor_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274389, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "yor_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269184, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "yor_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258554, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "yor_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217544, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "yor_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167539, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "yor_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 255015, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "yor_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 268101, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "yor_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259659, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "yor_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270774, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "yor_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269842, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "yor_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 274016, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "yor_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257699, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "yor_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295855, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "yor_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 273045, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yor_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265461, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "yor_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261927, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "yor_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253272, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "yor_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262248, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yor_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261922, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "yor_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277495, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "yor_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279568, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "yor_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289879, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "yor_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275206, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "yor_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243905, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "yor_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280653, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "yor_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 248124, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "yor_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269801, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "yor_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264390, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "yor_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261683, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "yor_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274447, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "yor_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 285022, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "yor_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270583, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "yor_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238633, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "yor_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266911, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "yor_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257644, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "yor_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 273099, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "yor_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256743, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "yor_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246432, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "yor_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 267106, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yor_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260066, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "yor_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259699, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "yor_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228898, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "yor_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263911, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "yor_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 261001, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "yor_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260269, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "yor_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278506, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "yor_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299389, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 125.3399209486166, + "max_sentence1_length": 391, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "arz_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 225391, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "arz_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 240157, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "arz_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 277128, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "arz_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 244828, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "arz_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 266982, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "arz_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 240465, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "arz_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 252924, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arz_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 272502, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "arz_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 249531, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "arz_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 255703, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arz_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 259340, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arz_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 271309, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "arz_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 240703, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "arz_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 267201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "arz_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 243178, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arz_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 236225, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "arz_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 270767, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arz_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 242368, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "arz_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 227343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "arz_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261761, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arz_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 245736, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arz_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 242497, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arz_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 259632, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "arz_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 263106, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "arz_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 237597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "arz_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 267117, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "arz_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 231730, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "arz_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 228883, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "arz_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 274521, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "arz_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 245062, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arz_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 252399, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "arz_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249302, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arz_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 246821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arz_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 274766, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "arz_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244550, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arz_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 254247, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "arz_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 225847, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "arz_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 242812, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "arz_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 243229, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "arz_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 259726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arz_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 259532, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "arz_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 240169, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "arz_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 253253, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arz_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 262496, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arz_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248216, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arz_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 253460, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arz_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 242381, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arz_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 253592, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "arz_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 248330, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arz_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 244142, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arz_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 254933, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "arz_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 257188, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arz_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 250616, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "arz_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 248517, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "arz_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 222816, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "arz_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 236000, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "arz_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 241218, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arz_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 273450, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "arz_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 229770, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "arz_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "arz_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 234413, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "arz_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 245943, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "arz_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 243689, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "arz_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 246420, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "arz_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 251952, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arz_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 248044, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "arz_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 256088, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "arz_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 252742, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "arz_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 260058, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "arz_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 254797, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "arz_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 251874, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "arz_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 263093, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "arz_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 260834, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "arz_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 261921, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "arz_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 268947, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "arz_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 243553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "arz_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 263664, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "arz_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 250552, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arz_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 262341, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arz_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 251141, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "arz_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 247007, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arz_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 200902, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "arz_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 245957, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "arz_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 253898, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "arz_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 269908, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "arz_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 179735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "arz_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 238427, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "arz_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 261035, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "arz_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 267903, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "arz_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 252365, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "arz_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 221751, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "arz_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 253145, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arz_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 249030, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "arz_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 250698, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arz_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 243830, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arz_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 260297, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arz_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "arz_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 238187, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "arz_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 278516, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "arz_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 230077, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "arz_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251594, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arz_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 271408, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "arz_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 170713, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "arz_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 259764, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arz_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 276939, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arz_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 256391, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "arz_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 237674, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "arz_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 240943, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "arz_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 267528, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "arz_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 259551, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arz_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 258289, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "arz_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 244324, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "arz_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 251456, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "arz_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 261277, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arz_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 242027, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "arz_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247052, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "arz_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 252893, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "arz_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 230198, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "arz_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 272455, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "arz_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 235932, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "arz_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 283167, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "arz_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 255967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "arz_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 247213, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arz_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 253561, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "arz_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 246240, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "arz_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 255428, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "arz_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 227333, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "arz_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 241027, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arz_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 270188, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "arz_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 242126, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "arz_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 244099, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "arz_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 245908, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "arz_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 252341, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arz_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259641, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "arz_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 240614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "arz_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 253031, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "arz_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 276367, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "arz_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 251810, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arz_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 255436, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "arz_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 240673, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arz_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 302054, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "arz_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 279939, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "arz_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 153789, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "arz_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 239706, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "arz_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 242355, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "arz_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 266251, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "arz_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 239875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "arz_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 243618, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "arz_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 264879, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "arz_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 244850, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "arz_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 239879, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "arz_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 157018, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "arz_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 250133, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arz_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 247808, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "arz_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 259913, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "arz_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 240171, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "arz_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 261315, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "arz_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 256110, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "arz_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 245480, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "arz_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 204470, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "arz_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 154465, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "arz_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 241941, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "arz_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 255027, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "arz_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 246585, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "arz_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 257700, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arz_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 256768, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "arz_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 260942, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "arz_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 244625, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "arz_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 282781, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "arz_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 259971, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "arz_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 252387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "arz_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 248853, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "arz_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 240198, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "arz_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249174, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "arz_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 248848, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "arz_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 264421, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "arz_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 266494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "arz_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 276805, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "arz_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 262132, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "arz_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 230831, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "arz_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 267579, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "arz_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 235050, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "arz_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 256727, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "arz_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 251316, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "arz_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 248609, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "arz_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 261373, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "arz_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 271948, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "arz_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 257509, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "arz_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 225559, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "arz_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 253837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "arz_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 244570, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "arz_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 260025, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "arz_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 243669, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "arz_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 233358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "arz_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 254032, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "arz_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246992, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "arz_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 246625, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "arz_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 215824, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "arz_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "arz_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 247927, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "arz_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 247195, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "arz_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 265432, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "arz_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 286315, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 112.4209486166008, + "max_sentence1_length": 322, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "cjk_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250882, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "cjk_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265648, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "cjk_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 302619, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "cjk_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 270319, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "cjk_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 292473, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "cjk_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265956, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "cjk_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 278415, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cjk_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297993, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "cjk_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275022, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "cjk_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 281194, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cjk_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284831, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cjk_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296800, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "cjk_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 266194, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "cjk_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292692, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "cjk_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268669, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cjk_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261716, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "cjk_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 296258, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cjk_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267859, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "cjk_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252834, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "cjk_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287252, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cjk_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 271227, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cjk_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267988, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cjk_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285123, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "cjk_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 288597, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "cjk_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263088, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "cjk_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 292608, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "cjk_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 257221, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "cjk_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 254374, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "cjk_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300012, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "cjk_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 270553, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cjk_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277890, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "cjk_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274793, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cjk_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 272312, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cjk_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 300257, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "cjk_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270041, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cjk_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279738, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "cjk_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 251338, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "cjk_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 268303, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "cjk_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268720, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "cjk_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 285217, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cjk_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285023, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "cjk_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265660, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "cjk_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278744, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cjk_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287987, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cjk_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273707, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cjk_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278951, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cjk_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267872, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cjk_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279083, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "cjk_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273821, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cjk_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 269633, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cjk_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 280424, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "cjk_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282679, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cjk_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276107, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "cjk_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274008, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "cjk_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 248307, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "cjk_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 261491, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "cjk_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266709, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cjk_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298941, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "cjk_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 255261, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "cjk_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277088, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "cjk_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259904, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "cjk_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 271434, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "cjk_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269180, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "cjk_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271911, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cjk_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 277443, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cjk_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 273535, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "cjk_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 281579, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cjk_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 278233, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "cjk_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 285549, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "cjk_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 280288, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "cjk_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 277365, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "cjk_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 288584, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cjk_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 286325, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "cjk_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 287412, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "cjk_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 294438, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "cjk_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269044, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "cjk_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289155, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "cjk_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276043, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cjk_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287832, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cjk_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 276632, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "cjk_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 272498, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cjk_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 226393, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "cjk_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 271448, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "cjk_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 279389, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "cjk_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 295399, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "cjk_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 205226, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "cjk_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263918, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "cjk_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 286526, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "cjk_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 293394, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "cjk_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277856, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "cjk_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 247242, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "cjk_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 278636, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cjk_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 274521, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "cjk_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276189, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cjk_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 269321, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cjk_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285788, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cjk_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281357, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cjk_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263678, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "cjk_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304007, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "cjk_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 255568, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "cjk_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277085, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cjk_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296899, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "cjk_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 196204, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "cjk_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 285255, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cjk_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 302430, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cjk_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281882, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "cjk_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263165, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "cjk_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 266434, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "cjk_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293019, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "cjk_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285042, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cjk_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283780, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "cjk_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269815, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "cjk_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276947, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "cjk_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286768, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cjk_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 267518, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "cjk_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272543, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "cjk_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 278384, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "cjk_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255689, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "cjk_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297946, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "cjk_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 261423, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "cjk_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308658, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "cjk_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 281458, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "cjk_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272704, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cjk_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279052, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "cjk_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271731, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "cjk_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280919, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cjk_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252824, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "cjk_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 266518, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cjk_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295679, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "cjk_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 267617, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "cjk_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 269590, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "cjk_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 271399, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "cjk_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277832, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cjk_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285132, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "cjk_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266105, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cjk_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253031, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "cjk_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301858, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "cjk_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 277301, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cjk_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280927, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "cjk_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266164, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cjk_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 327545, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "cjk_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 305430, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "cjk_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 179280, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "cjk_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 265197, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "cjk_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267846, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cjk_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291742, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "cjk_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 265366, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "cjk_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269109, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "cjk_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 290370, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "cjk_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 270341, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cjk_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 265370, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "cjk_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 182509, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "cjk_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 275624, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cjk_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 273299, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "cjk_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 285404, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cjk_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265662, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "cjk_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286806, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "cjk_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 281601, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "cjk_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270971, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cjk_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229961, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "cjk_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179956, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "cjk_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 267432, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "cjk_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 280518, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "cjk_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272076, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "cjk_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283191, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cjk_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 282259, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "cjk_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 286433, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "cjk_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270116, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "cjk_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 308272, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "cjk_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 285462, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cjk_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 277878, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "cjk_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 274344, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cjk_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265689, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cjk_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274665, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cjk_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 274339, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "cjk_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289912, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "cjk_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291985, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "cjk_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 302296, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "cjk_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 287623, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "cjk_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 256322, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "cjk_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293070, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "cjk_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 260541, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "cjk_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 282218, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "cjk_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276807, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cjk_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274100, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "cjk_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286864, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "cjk_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 297439, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cjk_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283000, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "cjk_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251050, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "cjk_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 279328, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "cjk_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270061, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "cjk_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 285516, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "cjk_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269160, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "cjk_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258849, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "cjk_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 279523, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cjk_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272483, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "cjk_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272116, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "cjk_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 241315, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "cjk_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276328, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cjk_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 273418, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "cjk_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272686, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "cjk_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290923, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "cjk_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311806, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 137.6096837944664, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "gla_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 274218, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "gla_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 288984, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "gla_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 325955, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "gla_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 293655, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "gla_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 315809, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "gla_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 289292, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "gla_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 301751, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gla_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 321329, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "gla_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 298358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "gla_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 304530, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gla_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 308167, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gla_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 320136, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "gla_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 289530, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "gla_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 316028, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "gla_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 292005, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gla_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 285052, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "gla_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 319594, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gla_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 291195, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "gla_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 276170, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "gla_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 310588, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gla_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 294563, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gla_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 291324, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gla_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 308459, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "gla_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 311933, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "gla_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 286424, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "gla_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 315944, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "gla_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 280557, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "gla_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 277710, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "gla_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 323348, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "gla_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 293889, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gla_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 301226, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "gla_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298129, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gla_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 295648, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gla_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 323593, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "gla_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293377, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gla_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 303074, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "gla_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 274674, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "gla_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 291639, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gla_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 292056, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "gla_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 308553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gla_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 308359, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "gla_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 288996, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "gla_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 302080, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gla_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 311323, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gla_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297043, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gla_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 302287, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gla_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 291208, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gla_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 302419, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "gla_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 297157, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gla_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 292969, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gla_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 303760, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "gla_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 306015, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gla_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 299443, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "gla_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 297344, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "gla_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 271643, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "gla_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 284827, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "gla_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 290045, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gla_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 322277, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "gla_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 278597, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "gla_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300424, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "gla_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 283240, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "gla_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 294770, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "gla_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 292516, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "gla_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 295247, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gla_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 300779, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gla_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 296871, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "gla_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 304915, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gla_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 301569, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "gla_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 308885, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "gla_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 303624, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "gla_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 300701, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "gla_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 311920, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gla_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 309661, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "gla_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 310748, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "gla_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 317774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "gla_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 292380, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "gla_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 312491, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "gla_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 299379, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gla_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 311168, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gla_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 299968, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "gla_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 295834, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gla_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 249729, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "gla_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 294784, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "gla_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 302725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "gla_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 318735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "gla_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 228562, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "gla_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 287254, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "gla_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 309862, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "gla_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 316730, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "gla_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 301192, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "gla_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 270578, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "gla_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 301972, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gla_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 297857, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "gla_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 299525, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gla_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 292657, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gla_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 309124, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gla_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304693, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gla_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 287014, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "gla_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 327343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "gla_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 278904, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "gla_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300421, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gla_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 320235, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "gla_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 219540, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "gla_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 308591, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gla_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 325766, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gla_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 305218, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "gla_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 286501, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "gla_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 289770, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "gla_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 316355, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "gla_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 308378, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gla_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 307116, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "gla_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 293151, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "gla_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 300283, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "gla_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 310104, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gla_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 290854, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "gla_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295879, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "gla_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 301720, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "gla_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 279025, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "gla_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 321282, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "gla_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 284759, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "gla_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 331994, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "gla_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 304794, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "gla_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 296040, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gla_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 302388, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "gla_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 295067, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "gla_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 304255, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gla_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 276160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "gla_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 289854, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gla_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 319015, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "gla_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 290953, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "gla_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 292926, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "gla_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 294735, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "gla_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 301168, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gla_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 308468, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "gla_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 289441, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gla_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 276367, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "gla_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 301858, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "gla_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 300637, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gla_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 304263, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "gla_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 289500, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gla_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 350881, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "gla_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 328766, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "gla_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 202616, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "gla_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 288533, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "gla_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 291182, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gla_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 315078, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "gla_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 288702, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "gla_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 292445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "gla_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 313706, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "gla_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 293677, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gla_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 288706, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gla_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 205845, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "gla_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 298960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gla_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 296635, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "gla_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 308740, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gla_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 288998, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "gla_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 310142, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "gla_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 304937, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "gla_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 294307, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gla_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 253297, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "gla_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 203292, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "gla_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 290768, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "gla_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 303854, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "gla_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 295412, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "gla_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 306527, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gla_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 305595, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "gla_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 309769, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "gla_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 293452, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "gla_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 331608, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "gla_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 308798, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gla_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 301214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "gla_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 297680, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gla_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 289025, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gla_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298001, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gla_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 297675, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "gla_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 313248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "gla_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 315321, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "gla_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 325632, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "gla_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 310959, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "gla_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 279658, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "gla_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 316406, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "gla_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 283877, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "gla_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 305554, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "gla_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 300143, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gla_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 297436, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "gla_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 310200, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "gla_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 320775, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gla_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 306336, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "gla_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 274386, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "gla_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 302664, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "gla_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 293397, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "gla_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 308852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "gla_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 292496, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "gla_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 282185, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "gla_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gla_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295819, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "gla_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 295452, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "gla_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 264651, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "gla_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299664, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gla_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 296754, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "gla_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 296022, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "gla_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 314259, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "gla_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 335142, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 160.6689723320158, + "max_sentence1_length": 478, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kan_Knda-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249661, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kan_Knda-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 264427, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kan_Knda-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301398, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kan_Knda-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269098, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kan_Knda-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291252, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kan_Knda-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264735, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kan_Knda-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277194, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kan_Knda-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296772, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kan_Knda-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273801, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kan_Knda-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279973, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kan_Knda-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283610, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kan_Knda-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kan_Knda-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264973, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kan_Knda-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 291471, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kan_Knda-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 267448, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kan_Knda-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260495, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kan_Knda-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295037, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kan_Knda-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266638, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kan_Knda-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251613, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kan_Knda-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286031, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kan_Knda-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270006, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kan_Knda-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266767, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kan_Knda-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283902, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kan_Knda-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287376, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kan_Knda-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261867, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kan_Knda-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291387, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kan_Knda-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256000, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kan_Knda-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253153, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kan_Knda-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298791, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kan_Knda-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269332, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kan_Knda-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276669, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kan_Knda-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273572, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kan_Knda-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271091, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kan_Knda-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299036, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kan_Knda-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268820, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kan_Knda-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278517, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kan_Knda-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250117, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kan_Knda-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267082, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kan_Knda-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267499, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kan_Knda-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283996, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kan_Knda-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283802, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kan_Knda-min_Arab": { + "num_samples": 1012, + "number_of_characters": 264439, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kan_Knda-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277523, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kan_Knda-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286766, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kan_Knda-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272486, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kan_Knda-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277730, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kan_Knda-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266651, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kan_Knda-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277862, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kan_Knda-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272600, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kan_Knda-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268412, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kan_Knda-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279203, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kan_Knda-por_Latn": { + "num_samples": 1012, + "number_of_characters": 281458, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kan_Knda-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kan_Knda-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272787, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kan_Knda-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247086, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kan_Knda-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260270, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kan_Knda-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 265488, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kan_Knda-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297720, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kan_Knda-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254040, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kan_Knda-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275867, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kan_Knda-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258683, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kan_Knda-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270213, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kan_Knda-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 267959, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kan_Knda-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270690, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kan_Knda-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276222, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kan_Knda-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272314, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kan_Knda-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280358, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kan_Knda-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277012, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kan_Knda-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284328, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kan_Knda-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279067, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kan_Knda-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 276144, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kan_Knda-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287363, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kan_Knda-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285104, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kan_Knda-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286191, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kan_Knda-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293217, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kan_Knda-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267823, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kan_Knda-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287934, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kan_Knda-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274822, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kan_Knda-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286611, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kan_Knda-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275411, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kan_Knda-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271277, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kan_Knda-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225172, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kan_Knda-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270227, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kan_Knda-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278168, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kan_Knda-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kan_Knda-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204005, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kan_Knda-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262697, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kan_Knda-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285305, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kan_Knda-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292173, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kan_Knda-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276635, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kan_Knda-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246021, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kan_Knda-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277415, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kan_Knda-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273300, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kan_Knda-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 274968, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kan_Knda-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268100, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kan_Knda-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284567, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kan_Knda-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280136, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kan_Knda-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 262457, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kan_Knda-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302786, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kan_Knda-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254347, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kan_Knda-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275864, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kan_Knda-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295678, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kan_Knda-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194983, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kan_Knda-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284034, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kan_Knda-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301209, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kan_Knda-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280661, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kan_Knda-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 261944, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kan_Knda-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265213, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kan_Knda-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291798, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kan_Knda-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283821, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kan_Knda-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282559, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kan_Knda-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268594, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kan_Knda-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275726, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kan_Knda-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285547, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kan_Knda-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266297, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kan_Knda-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271322, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kan_Knda-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277163, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kan_Knda-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 254468, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kan_Knda-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296725, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kan_Knda-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260202, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kan_Knda-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 307437, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kan_Knda-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280237, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kan_Knda-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 271483, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kan_Knda-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277831, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kan_Knda-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270510, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kan_Knda-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279698, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kan_Knda-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251603, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kan_Knda-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265297, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kan_Knda-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 294458, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kan_Knda-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266396, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kan_Knda-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268369, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kan_Knda-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kan_Knda-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276611, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kan_Knda-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283911, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kan_Knda-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264884, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kan_Knda-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251810, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kan_Knda-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277301, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kan_Knda-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300637, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kan_Knda-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279706, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kan_Knda-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 264943, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kan_Knda-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326324, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kan_Knda-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304209, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kan_Knda-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178059, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kan_Knda-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263976, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kan_Knda-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266625, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kan_Knda-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290521, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kan_Knda-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264145, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kan_Knda-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267888, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kan_Knda-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289149, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kan_Knda-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269120, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kan_Knda-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264149, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kan_Knda-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181288, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kan_Knda-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274403, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kan_Knda-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272078, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kan_Knda-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284183, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kan_Knda-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 264441, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kan_Knda-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285585, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kan_Knda-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280380, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kan_Knda-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269750, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kan_Knda-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228740, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kan_Knda-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178735, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kan_Knda-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266211, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kan_Knda-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279297, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kan_Knda-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270855, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kan_Knda-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 281970, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kan_Knda-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281038, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kan_Knda-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285212, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kan_Knda-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268895, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kan_Knda-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307051, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kan_Knda-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284241, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kan_Knda-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276657, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kan_Knda-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273123, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kan_Knda-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 264468, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kan_Knda-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273444, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kan_Knda-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273118, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kan_Knda-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288691, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kan_Knda-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290764, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kan_Knda-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301075, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kan_Knda-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286402, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kan_Knda-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255101, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kan_Knda-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291849, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kan_Knda-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259320, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kan_Knda-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280997, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kan_Knda-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 275586, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kan_Knda-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272879, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kan_Knda-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285643, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kan_Knda-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296218, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kan_Knda-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281779, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kan_Knda-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249829, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kan_Knda-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278107, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kan_Knda-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268840, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kan_Knda-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284295, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kan_Knda-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 267939, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kan_Knda-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257628, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kan_Knda-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278302, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kan_Knda-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271262, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kan_Knda-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270895, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kan_Knda-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240094, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kan_Knda-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275107, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kan_Knda-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272197, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kan_Knda-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 271465, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kan_Knda-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289702, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kan_Knda-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310585, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.40316205533597, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lmo_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253287, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lmo_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268053, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lmo_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305024, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lmo_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 272724, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lmo_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 294878, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lmo_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 268361, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lmo_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 280820, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lmo_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 300398, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lmo_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 277427, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lmo_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 283599, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lmo_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287236, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lmo_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299205, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lmo_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 268599, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lmo_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295097, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lmo_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271074, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lmo_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264121, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lmo_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 298663, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lmo_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270264, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lmo_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255239, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lmo_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289657, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lmo_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 273632, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lmo_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 270393, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lmo_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 287528, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lmo_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291002, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lmo_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 265493, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lmo_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295013, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lmo_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 259626, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lmo_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 256779, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lmo_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 302417, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lmo_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 272958, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lmo_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280295, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lmo_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277198, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lmo_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 274717, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lmo_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 302662, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lmo_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272446, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lmo_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282143, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lmo_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 253743, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lmo_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 270708, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lmo_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271125, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lmo_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 287622, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lmo_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 287428, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lmo_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268065, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lmo_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281149, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lmo_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 290392, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lmo_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276112, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lmo_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 281356, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lmo_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270277, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lmo_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 281488, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lmo_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276226, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lmo_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272038, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lmo_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 282829, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lmo_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285084, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lmo_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 278512, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lmo_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 276413, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lmo_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 250712, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lmo_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 263896, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lmo_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269114, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lmo_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 301346, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lmo_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 257666, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lmo_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279493, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lmo_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262309, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lmo_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 273839, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lmo_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 271585, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lmo_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274316, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lmo_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 279848, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lmo_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 275940, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lmo_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 283984, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lmo_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 280638, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lmo_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 287954, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lmo_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 282693, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lmo_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 279770, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lmo_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 290989, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lmo_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 288730, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lmo_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 289817, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lmo_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 296843, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lmo_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 271449, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lmo_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 291560, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lmo_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 278448, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lmo_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290237, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lmo_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279037, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lmo_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 274903, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lmo_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 228798, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lmo_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 273853, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lmo_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 281794, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lmo_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 297804, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lmo_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 207631, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lmo_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266323, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lmo_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 288931, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lmo_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 295799, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lmo_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280261, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lmo_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 249647, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lmo_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281041, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lmo_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 276926, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lmo_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 278594, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lmo_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 271726, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lmo_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288193, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lmo_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283762, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lmo_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266083, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lmo_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 306412, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lmo_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 257973, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lmo_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279490, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lmo_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299304, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lmo_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 198609, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lmo_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 287660, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lmo_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 304835, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lmo_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284287, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lmo_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 265570, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lmo_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 268839, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lmo_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 295424, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lmo_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 287447, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lmo_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286185, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lmo_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272220, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lmo_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 279352, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lmo_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289173, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lmo_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 269923, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lmo_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274948, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lmo_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 280789, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lmo_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258094, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lmo_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 300351, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lmo_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 263828, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lmo_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311063, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lmo_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 283863, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lmo_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275109, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lmo_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 281457, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lmo_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274136, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lmo_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 283324, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lmo_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255229, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lmo_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 268923, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lmo_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298084, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lmo_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270022, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lmo_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 271995, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lmo_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 273804, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lmo_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280237, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lmo_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287537, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lmo_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 268510, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lmo_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 255436, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lmo_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 280927, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lmo_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304263, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lmo_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 279706, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lmo_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 268569, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lmo_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 329950, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lmo_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 307835, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lmo_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 181685, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lmo_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 267602, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lmo_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270251, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lmo_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294147, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lmo_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 267771, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lmo_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 271514, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lmo_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 292775, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lmo_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 272746, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lmo_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 267775, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lmo_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 184914, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lmo_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278029, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lmo_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 275704, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lmo_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 287809, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lmo_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268067, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lmo_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289211, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lmo_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 284006, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lmo_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 273376, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lmo_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 232366, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lmo_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 182361, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lmo_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 269837, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lmo_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 282923, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lmo_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 274481, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lmo_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 285596, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lmo_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 284664, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lmo_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 288838, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lmo_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 272521, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lmo_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 310677, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lmo_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 287867, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lmo_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280283, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lmo_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 276749, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lmo_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268094, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lmo_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277070, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lmo_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 276744, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lmo_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292317, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lmo_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 294390, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lmo_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 304701, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lmo_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290028, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lmo_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 258727, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lmo_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 295475, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lmo_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 262946, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lmo_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 284623, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lmo_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279212, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lmo_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 276505, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lmo_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289269, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lmo_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 299844, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lmo_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 285405, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lmo_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 253455, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lmo_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 281733, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lmo_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 272466, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lmo_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 287921, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lmo_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 271565, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lmo_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261254, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lmo_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 281928, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lmo_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274888, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lmo_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 274521, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lmo_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 243720, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lmo_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278733, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lmo_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 275823, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lmo_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275091, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lmo_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 293328, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lmo_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314211, + "unique_pairs": 1012, + "min_sentence1_length": 45, + "average_sentence1_length": 139.98616600790513, + "max_sentence1_length": 379, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "npi_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238524, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "npi_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 253290, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "npi_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 290261, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "npi_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257961, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "npi_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 280115, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "npi_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253598, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "npi_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 266057, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "npi_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285635, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "npi_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262664, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "npi_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268836, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "npi_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 272473, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "npi_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 284442, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "npi_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253836, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "npi_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 280334, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "npi_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 256311, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "npi_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 249358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "npi_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283900, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "npi_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255501, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "npi_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240476, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "npi_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274894, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "npi_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "npi_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255630, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "npi_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272765, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "npi_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 276239, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "npi_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250730, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "npi_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 280250, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "npi_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244863, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "npi_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 242016, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "npi_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "npi_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 258195, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "npi_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265532, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "npi_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262435, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "npi_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259954, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "npi_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287899, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "npi_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257683, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "npi_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 267380, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "npi_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238980, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "npi_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255945, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "npi_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 256362, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "npi_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272859, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "npi_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272665, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "npi_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 253302, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "npi_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 266386, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "npi_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275629, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "npi_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261349, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "npi_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266593, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "npi_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255514, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "npi_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "npi_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 261463, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "npi_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 257275, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "npi_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 268066, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "npi_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 270321, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "npi_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "npi_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261650, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "npi_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235949, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "npi_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 249133, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "npi_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 254351, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "npi_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286583, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "npi_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242903, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "npi_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264730, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "npi_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247546, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "npi_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 259076, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "npi_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256822, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "npi_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "npi_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 265085, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "npi_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 261177, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "npi_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 269221, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "npi_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "npi_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 273191, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "npi_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267930, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "npi_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 265007, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "npi_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 276226, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "npi_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "npi_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 275054, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "npi_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 282080, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "npi_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256686, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "npi_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276797, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "npi_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263685, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "npi_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 275474, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "npi_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 264274, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "npi_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 260140, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "npi_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 214035, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "npi_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 259090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "npi_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 267031, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "npi_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 283041, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "npi_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192868, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "npi_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251560, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "npi_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 274168, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "npi_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 281036, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "npi_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265498, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "npi_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234884, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "npi_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 266278, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "npi_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 262163, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "npi_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263831, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "npi_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256963, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "npi_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 273430, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "npi_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "npi_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 251320, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "npi_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291649, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "npi_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 243210, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "npi_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264727, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "npi_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284541, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "npi_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183846, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "npi_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272897, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "npi_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 290072, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "npi_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269524, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "npi_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250807, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "npi_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 254076, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "npi_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280661, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "npi_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272684, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "npi_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 271422, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "npi_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 257457, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "npi_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "npi_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 274410, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "npi_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 255160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "npi_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260185, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "npi_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 266026, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "npi_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 243331, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "npi_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285588, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "npi_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 249065, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "npi_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 296300, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "npi_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 269100, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "npi_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 260346, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "npi_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266694, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "npi_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 259373, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "npi_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268561, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "npi_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 240466, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "npi_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 254160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "npi_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 283321, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "npi_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 255259, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "npi_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 257232, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "npi_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 259041, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "npi_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 265474, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "npi_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "npi_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253747, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "npi_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240673, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "npi_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 266164, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "npi_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289500, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "npi_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264943, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "npi_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268569, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "npi_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 315187, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "npi_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 293072, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "npi_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166922, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "npi_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252839, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "npi_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255488, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "npi_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 279384, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "npi_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 253008, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "npi_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256751, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "npi_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 278012, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "npi_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257983, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "npi_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 253012, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "npi_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 170151, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "npi_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 263266, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "npi_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260941, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "npi_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 273046, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "npi_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 253304, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "npi_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 274448, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "npi_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 269243, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "npi_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258613, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "npi_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217603, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "npi_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167598, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "npi_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 255074, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "npi_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 268160, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "npi_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259718, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "npi_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270833, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "npi_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269901, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "npi_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 274075, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "npi_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257758, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "npi_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295914, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "npi_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 273104, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "npi_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265520, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "npi_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261986, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "npi_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 253331, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "npi_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262307, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "npi_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261981, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "npi_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277554, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "npi_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279627, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "npi_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289938, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "npi_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 275265, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "npi_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243964, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "npi_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280712, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "npi_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 248183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "npi_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269860, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "npi_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 264449, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "npi_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261742, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "npi_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274506, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "npi_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 285081, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "npi_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270642, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "npi_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238692, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "npi_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266970, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "npi_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257703, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "npi_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 273158, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "npi_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256802, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "npi_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246491, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "npi_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 267165, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "npi_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260125, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "npi_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259758, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "npi_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228957, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "npi_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263970, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "npi_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 261060, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "npi_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 260328, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "npi_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278565, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "npi_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 299448, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "shn_Mymr-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 299905, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "shn_Mymr-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 314671, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "shn_Mymr-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 351642, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "shn_Mymr-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 319342, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "shn_Mymr-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 341496, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "shn_Mymr-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 314979, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "shn_Mymr-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 327438, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "shn_Mymr-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 347016, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "shn_Mymr-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 324045, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "shn_Mymr-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 330217, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "shn_Mymr-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 333854, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "shn_Mymr-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 345823, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "shn_Mymr-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 315217, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "shn_Mymr-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 341715, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "shn_Mymr-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 317692, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "shn_Mymr-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 310739, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "shn_Mymr-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 345281, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "shn_Mymr-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 316882, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "shn_Mymr-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 301857, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "shn_Mymr-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 336275, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "shn_Mymr-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 320250, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "shn_Mymr-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 317011, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "shn_Mymr-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 334146, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "shn_Mymr-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 337620, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "shn_Mymr-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 312111, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "shn_Mymr-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 341631, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "shn_Mymr-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 306244, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "shn_Mymr-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 303397, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "shn_Mymr-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 349035, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "shn_Mymr-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 319576, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "shn_Mymr-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 326913, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "shn_Mymr-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 323816, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "shn_Mymr-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 321335, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "shn_Mymr-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 349280, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "shn_Mymr-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 319064, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "shn_Mymr-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 328761, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "shn_Mymr-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 300361, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "shn_Mymr-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 317326, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "shn_Mymr-est_Latn": { + "num_samples": 1012, + "number_of_characters": 317743, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "shn_Mymr-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 334240, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "shn_Mymr-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 334046, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "shn_Mymr-min_Arab": { + "num_samples": 1012, + "number_of_characters": 314683, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "shn_Mymr-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 327767, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "shn_Mymr-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 337010, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "shn_Mymr-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 322730, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "shn_Mymr-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 327974, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "shn_Mymr-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 316895, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "shn_Mymr-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 328106, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "shn_Mymr-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 322844, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "shn_Mymr-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 318656, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "shn_Mymr-min_Latn": { + "num_samples": 1012, + "number_of_characters": 329447, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "shn_Mymr-por_Latn": { + "num_samples": 1012, + "number_of_characters": 331702, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "shn_Mymr-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 325130, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "shn_Mymr-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 323031, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "shn_Mymr-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 297330, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "shn_Mymr-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 310514, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "shn_Mymr-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 315732, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "shn_Mymr-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 347964, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "shn_Mymr-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 304284, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "shn_Mymr-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 326111, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "shn_Mymr-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 308927, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "shn_Mymr-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 320457, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "shn_Mymr-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 318203, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "shn_Mymr-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 320934, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "shn_Mymr-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 326466, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "shn_Mymr-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 322558, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "shn_Mymr-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 330602, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "shn_Mymr-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 327256, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "shn_Mymr-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 334572, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "shn_Mymr-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 329311, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "shn_Mymr-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 326388, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "shn_Mymr-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 337607, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "shn_Mymr-als_Latn": { + "num_samples": 1012, + "number_of_characters": 335348, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "shn_Mymr-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 336435, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "shn_Mymr-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 343461, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "shn_Mymr-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 318067, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "shn_Mymr-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 338178, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "shn_Mymr-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 325066, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "shn_Mymr-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 336855, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "shn_Mymr-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 325655, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "shn_Mymr-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 321521, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "shn_Mymr-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 275416, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "shn_Mymr-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 320471, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "shn_Mymr-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 328412, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "shn_Mymr-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 344422, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "shn_Mymr-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 254249, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "shn_Mymr-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 312941, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "shn_Mymr-run_Latn": { + "num_samples": 1012, + "number_of_characters": 335549, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "shn_Mymr-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 342417, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "shn_Mymr-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 326879, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "shn_Mymr-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 296265, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "shn_Mymr-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 327659, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "shn_Mymr-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 323544, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "shn_Mymr-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 325212, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "shn_Mymr-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 318344, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "shn_Mymr-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 334811, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "shn_Mymr-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 330380, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "shn_Mymr-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 312701, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "shn_Mymr-war_Latn": { + "num_samples": 1012, + "number_of_characters": 353030, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "shn_Mymr-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 304591, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "shn_Mymr-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 326108, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "shn_Mymr-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 345922, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "shn_Mymr-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 245227, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "shn_Mymr-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 334278, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "shn_Mymr-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 351453, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "shn_Mymr-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 330905, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "shn_Mymr-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 312188, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "shn_Mymr-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 315457, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "shn_Mymr-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 342042, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "shn_Mymr-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 334065, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "shn_Mymr-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 332803, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "shn_Mymr-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 318838, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "shn_Mymr-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 325970, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "shn_Mymr-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 335791, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "shn_Mymr-san_Deva": { + "num_samples": 1012, + "number_of_characters": 316541, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "shn_Mymr-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 321566, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "shn_Mymr-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 327407, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "shn_Mymr-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 304712, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "shn_Mymr-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 346969, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "shn_Mymr-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 310446, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "shn_Mymr-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 357681, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "shn_Mymr-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 330481, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "shn_Mymr-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 321727, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "shn_Mymr-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 328075, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "shn_Mymr-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 320754, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "shn_Mymr-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 329942, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "shn_Mymr-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 301847, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "shn_Mymr-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 315541, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "shn_Mymr-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 344702, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "shn_Mymr-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 316640, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "shn_Mymr-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 318613, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "shn_Mymr-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 320422, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "shn_Mymr-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 326855, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "shn_Mymr-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 334155, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "shn_Mymr-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 315128, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "shn_Mymr-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 302054, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "shn_Mymr-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 327545, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "shn_Mymr-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 350881, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "shn_Mymr-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 326324, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "shn_Mymr-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 329950, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "shn_Mymr-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 315187, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "shn_Mymr-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 354453, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "shn_Mymr-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 228303, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "shn_Mymr-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 314220, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "shn_Mymr-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 316869, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "shn_Mymr-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 340765, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "shn_Mymr-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 314389, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "shn_Mymr-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 318132, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "shn_Mymr-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 339393, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "shn_Mymr-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 319364, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "shn_Mymr-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 314393, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "shn_Mymr-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 231532, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "shn_Mymr-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 324647, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "shn_Mymr-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 322322, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "shn_Mymr-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 334427, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "shn_Mymr-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 314685, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "shn_Mymr-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 335829, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "shn_Mymr-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 330624, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "shn_Mymr-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 319994, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "shn_Mymr-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 278984, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "shn_Mymr-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 228979, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "shn_Mymr-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 316455, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "shn_Mymr-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 329541, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "shn_Mymr-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 321099, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "shn_Mymr-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 332214, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "shn_Mymr-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 331282, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "shn_Mymr-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 335456, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "shn_Mymr-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 319139, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "shn_Mymr-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 357295, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "shn_Mymr-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 334485, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "shn_Mymr-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 326901, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "shn_Mymr-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 323367, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "shn_Mymr-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 314712, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "shn_Mymr-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 323688, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "shn_Mymr-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 323362, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "shn_Mymr-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 338935, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "shn_Mymr-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 341008, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "shn_Mymr-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 351319, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "shn_Mymr-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 336646, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "shn_Mymr-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 305345, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "shn_Mymr-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 342093, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "shn_Mymr-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 309564, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "shn_Mymr-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 331241, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "shn_Mymr-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 325830, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "shn_Mymr-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 323123, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "shn_Mymr-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 335887, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "shn_Mymr-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 346462, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "shn_Mymr-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 332023, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "shn_Mymr-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 300073, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "shn_Mymr-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 328351, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "shn_Mymr-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 319084, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "shn_Mymr-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 334539, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "shn_Mymr-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 318183, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "shn_Mymr-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 307872, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "shn_Mymr-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 328546, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "shn_Mymr-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 321506, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "shn_Mymr-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 321139, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "shn_Mymr-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 290338, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "shn_Mymr-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 325351, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "shn_Mymr-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 322441, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "shn_Mymr-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 321709, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "shn_Mymr-som_Latn": { + "num_samples": 1012, + "number_of_characters": 339946, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "shn_Mymr-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 360829, + "unique_pairs": 1012, + "min_sentence1_length": 57, + "average_sentence1_length": 186.0513833992095, + "max_sentence1_length": 597, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tgl_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 277790, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tgl_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 292556, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tgl_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 329527, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tgl_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 297227, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tgl_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 319381, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tgl_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 292864, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tgl_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 305323, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgl_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 324901, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tgl_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 301930, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tgl_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 308102, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tgl_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 311739, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tgl_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 323708, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tgl_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 293102, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tgl_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 319600, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tgl_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 295577, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgl_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 288624, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tgl_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 323166, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgl_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 294767, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tgl_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 279742, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tgl_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 314160, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tgl_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 298135, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgl_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 294896, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgl_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 312031, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tgl_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 315505, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tgl_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 289996, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tgl_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 319516, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tgl_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 284129, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tgl_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 281282, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tgl_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 326920, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tgl_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 297461, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tgl_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 304798, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tgl_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301701, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tgl_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 299220, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tgl_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 327165, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tgl_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296949, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgl_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 306646, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tgl_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 278246, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tgl_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 295211, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tgl_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 295628, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tgl_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 312125, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tgl_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 311931, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tgl_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 292568, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tgl_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 305652, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tgl_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 314895, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tgl_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300615, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tgl_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 305859, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tgl_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 294780, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tgl_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 305991, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tgl_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 300729, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tgl_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 296541, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tgl_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 307332, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tgl_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 309587, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tgl_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 303015, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tgl_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 300916, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tgl_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 275215, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tgl_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 288399, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tgl_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 293617, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgl_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 325849, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tgl_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 282169, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tgl_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303996, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tgl_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 286812, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tgl_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 298342, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tgl_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 296088, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tgl_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 298819, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tgl_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 304351, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgl_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 300443, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tgl_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 308487, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tgl_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 305141, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tgl_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 312457, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tgl_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 307196, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tgl_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 304273, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tgl_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 315492, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tgl_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 313233, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tgl_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 314320, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tgl_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 321346, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tgl_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 295952, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tgl_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 316063, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tgl_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 302951, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tgl_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 314740, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgl_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 303540, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tgl_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 299406, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tgl_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 253301, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tgl_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 298356, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tgl_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 306297, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tgl_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 322307, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tgl_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 232134, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tgl_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 290826, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tgl_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 313434, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tgl_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 320302, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tgl_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 304764, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tgl_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 274150, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tgl_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 305544, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tgl_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 301429, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tgl_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 303097, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgl_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 296229, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgl_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 312696, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tgl_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 308265, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tgl_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 290586, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tgl_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 330915, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tgl_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 282476, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tgl_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303993, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tgl_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 323807, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tgl_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 223112, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tgl_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 312163, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tgl_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 329338, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tgl_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 308790, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tgl_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 290073, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tgl_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 293342, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tgl_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 319927, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tgl_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 311950, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgl_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 310688, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tgl_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 296723, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tgl_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 303855, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tgl_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 313676, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tgl_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 294426, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tgl_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299451, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tgl_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 305292, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tgl_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 282597, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tgl_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 324854, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tgl_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 288331, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tgl_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 335566, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tgl_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 308366, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tgl_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 299612, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tgl_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 305960, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tgl_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 298639, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tgl_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 307827, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tgl_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 279732, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tgl_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 293426, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgl_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 322587, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tgl_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 294525, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tgl_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 296498, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tgl_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 298307, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tgl_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 304740, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tgl_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 312040, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tgl_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 293013, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tgl_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 279939, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tgl_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 305430, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tgl_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 328766, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tgl_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 304209, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgl_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 307835, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tgl_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 293072, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgl_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 354453, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tgl_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 206188, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tgl_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 292105, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tgl_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 294754, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tgl_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 318650, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tgl_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 292274, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tgl_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 296017, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tgl_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 317278, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tgl_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 297249, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tgl_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 292278, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tgl_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 209417, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tgl_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 302532, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgl_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 300207, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tgl_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 312312, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tgl_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 292570, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tgl_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 313714, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tgl_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 308509, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tgl_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 297879, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tgl_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 256869, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tgl_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 206864, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tgl_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 294340, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tgl_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 307426, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tgl_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 298984, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tgl_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 310099, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tgl_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 309167, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tgl_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 313341, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tgl_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 297024, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tgl_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 335180, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tgl_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 312370, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tgl_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 304786, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tgl_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 301252, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tgl_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 292597, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tgl_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301573, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tgl_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 301247, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tgl_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 316820, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tgl_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 318893, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tgl_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 329204, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tgl_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 314531, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tgl_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 283230, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tgl_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 319978, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tgl_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 287449, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tgl_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 309126, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tgl_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 303715, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tgl_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 301008, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tgl_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 313772, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tgl_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 324347, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tgl_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 309908, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tgl_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 277958, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tgl_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 306236, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tgl_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 296969, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tgl_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 312424, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tgl_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 296068, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tgl_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 285757, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tgl_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 306431, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tgl_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299391, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tgl_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 299024, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tgl_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 268223, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tgl_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303236, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tgl_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 300326, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tgl_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 299594, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tgl_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 317831, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tgl_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 338714, + "unique_pairs": 1012, + "min_sentence1_length": 53, + "average_sentence1_length": 164.1986166007905, + "max_sentence1_length": 435, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "yue_Hant-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 151640, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "yue_Hant-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 166406, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "yue_Hant-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 203377, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "yue_Hant-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 171077, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "yue_Hant-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 193231, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "yue_Hant-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 166714, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "yue_Hant-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 179173, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yue_Hant-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 198751, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "yue_Hant-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 175780, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "yue_Hant-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 181952, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "yue_Hant-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 185589, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "yue_Hant-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 197558, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "yue_Hant-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 166952, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "yue_Hant-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 193450, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "yue_Hant-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 169427, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yue_Hant-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 162474, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "yue_Hant-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 197016, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yue_Hant-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 168617, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "yue_Hant-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 153592, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "yue_Hant-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 188010, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "yue_Hant-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 171985, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yue_Hant-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 168746, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yue_Hant-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 185881, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "yue_Hant-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 189355, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "yue_Hant-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 163846, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "yue_Hant-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 193366, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "yue_Hant-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 157979, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "yue_Hant-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 155132, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "yue_Hant-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 200770, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "yue_Hant-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 171311, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "yue_Hant-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 178648, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "yue_Hant-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 175551, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "yue_Hant-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 173070, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "yue_Hant-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 201015, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "yue_Hant-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 170799, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yue_Hant-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 180496, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "yue_Hant-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 152096, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "yue_Hant-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 169061, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "yue_Hant-est_Latn": { + "num_samples": 1012, + "number_of_characters": 169478, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "yue_Hant-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 185975, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "yue_Hant-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 185781, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "yue_Hant-min_Arab": { + "num_samples": 1012, + "number_of_characters": 166418, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "yue_Hant-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 179502, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "yue_Hant-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 188745, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "yue_Hant-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 174465, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "yue_Hant-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 179709, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "yue_Hant-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 168630, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "yue_Hant-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 179841, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "yue_Hant-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 174579, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "yue_Hant-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 170391, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "yue_Hant-min_Latn": { + "num_samples": 1012, + "number_of_characters": 181182, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "yue_Hant-por_Latn": { + "num_samples": 1012, + "number_of_characters": 183437, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "yue_Hant-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 176865, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "yue_Hant-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 174766, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "yue_Hant-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 149065, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "yue_Hant-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 162249, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "yue_Hant-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 167467, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yue_Hant-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 199699, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "yue_Hant-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 156019, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "yue_Hant-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 177846, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "yue_Hant-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 160662, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "yue_Hant-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 172192, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "yue_Hant-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 169938, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "yue_Hant-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 172669, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "yue_Hant-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 178201, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yue_Hant-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 174293, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "yue_Hant-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 182337, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "yue_Hant-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 178991, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "yue_Hant-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 186307, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "yue_Hant-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 181046, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "yue_Hant-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 178123, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "yue_Hant-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 189342, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "yue_Hant-als_Latn": { + "num_samples": 1012, + "number_of_characters": 187083, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "yue_Hant-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 188170, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "yue_Hant-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 195196, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "yue_Hant-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 169802, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "yue_Hant-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 189913, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "yue_Hant-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 176801, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "yue_Hant-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 188590, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yue_Hant-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 177390, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "yue_Hant-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 173256, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "yue_Hant-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 127151, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "yue_Hant-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 172206, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "yue_Hant-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 180147, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "yue_Hant-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 196157, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "yue_Hant-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 105984, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "yue_Hant-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 164676, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "yue_Hant-run_Latn": { + "num_samples": 1012, + "number_of_characters": 187284, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "yue_Hant-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 194152, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "yue_Hant-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 178614, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "yue_Hant-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 148000, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "yue_Hant-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 179394, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "yue_Hant-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 175279, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "yue_Hant-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 176947, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yue_Hant-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 170079, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yue_Hant-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 186546, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "yue_Hant-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 182115, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "yue_Hant-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 164436, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "yue_Hant-war_Latn": { + "num_samples": 1012, + "number_of_characters": 204765, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "yue_Hant-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 156326, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "yue_Hant-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 177843, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "yue_Hant-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 197657, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "yue_Hant-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 96962, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "yue_Hant-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 186013, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "yue_Hant-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 203188, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "yue_Hant-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 182640, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "yue_Hant-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 163923, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "yue_Hant-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 167192, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "yue_Hant-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 193777, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "yue_Hant-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 185800, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yue_Hant-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 184538, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "yue_Hant-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 170573, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "yue_Hant-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 177705, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "yue_Hant-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 187526, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "yue_Hant-san_Deva": { + "num_samples": 1012, + "number_of_characters": 168276, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "yue_Hant-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 173301, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "yue_Hant-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 179142, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "yue_Hant-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 156447, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "yue_Hant-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 198704, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "yue_Hant-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 162181, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "yue_Hant-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 209416, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "yue_Hant-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 182216, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "yue_Hant-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 173462, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "yue_Hant-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 179810, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "yue_Hant-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 172489, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "yue_Hant-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 181677, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "yue_Hant-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 153582, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "yue_Hant-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 167276, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yue_Hant-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 196437, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "yue_Hant-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 168375, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "yue_Hant-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 170348, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "yue_Hant-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 172157, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "yue_Hant-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 178590, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "yue_Hant-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 185890, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "yue_Hant-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 166863, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "yue_Hant-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 153789, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "yue_Hant-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 179280, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "yue_Hant-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 202616, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "yue_Hant-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 178059, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yue_Hant-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 181685, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "yue_Hant-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 166922, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yue_Hant-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 228303, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "yue_Hant-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 206188, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "yue_Hant-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 165955, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "yue_Hant-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 168604, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "yue_Hant-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 192500, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "yue_Hant-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 166124, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "yue_Hant-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 169867, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "yue_Hant-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 191128, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "yue_Hant-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 171099, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "yue_Hant-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 166128, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "yue_Hant-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 83267, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "yue_Hant-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 176382, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yue_Hant-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 174057, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "yue_Hant-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 186162, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "yue_Hant-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 166420, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "yue_Hant-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 187564, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "yue_Hant-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 182359, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "yue_Hant-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 171729, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "yue_Hant-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 130719, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "yue_Hant-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 80714, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "yue_Hant-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 168190, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "yue_Hant-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 181276, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "yue_Hant-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 172834, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "yue_Hant-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 183949, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "yue_Hant-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 183017, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "yue_Hant-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 187191, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "yue_Hant-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 170874, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "yue_Hant-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 209030, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "yue_Hant-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 186220, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "yue_Hant-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 178636, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "yue_Hant-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 175102, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "yue_Hant-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 166447, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "yue_Hant-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 175423, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "yue_Hant-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 175097, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "yue_Hant-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 190670, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "yue_Hant-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 192743, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "yue_Hant-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 203054, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "yue_Hant-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 188381, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "yue_Hant-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 157080, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "yue_Hant-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 193828, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "yue_Hant-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 161299, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "yue_Hant-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 182976, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "yue_Hant-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 177565, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "yue_Hant-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 174858, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "yue_Hant-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 187622, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "yue_Hant-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 198197, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "yue_Hant-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 183758, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "yue_Hant-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 151808, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "yue_Hant-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 180086, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "yue_Hant-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 170819, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "yue_Hant-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 186274, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "yue_Hant-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 169918, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "yue_Hant-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 159607, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "yue_Hant-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 180281, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "yue_Hant-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 173241, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "yue_Hant-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 172874, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "yue_Hant-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 142073, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "yue_Hant-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 177086, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "yue_Hant-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 174176, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "yue_Hant-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 173444, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "yue_Hant-som_Latn": { + "num_samples": 1012, + "number_of_characters": 191681, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "yue_Hant-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 212564, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 39.544466403162055, + "max_sentence1_length": 118, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "asm_Beng-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 237557, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "asm_Beng-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 252323, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "asm_Beng-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289294, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "asm_Beng-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 256994, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "asm_Beng-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279148, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "asm_Beng-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 252631, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "asm_Beng-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265090, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "asm_Beng-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 284668, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "asm_Beng-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 261697, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "asm_Beng-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 267869, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "asm_Beng-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271506, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "asm_Beng-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283475, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "asm_Beng-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 252869, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "asm_Beng-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279367, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "asm_Beng-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255344, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "asm_Beng-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "asm_Beng-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 282933, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "asm_Beng-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 254534, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "asm_Beng-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 239509, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "asm_Beng-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273927, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "asm_Beng-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "asm_Beng-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 254663, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "asm_Beng-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 271798, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "asm_Beng-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275272, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "asm_Beng-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 249763, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "asm_Beng-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279283, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "asm_Beng-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 243896, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "asm_Beng-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241049, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "asm_Beng-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 286687, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "asm_Beng-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257228, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "asm_Beng-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 264565, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "asm_Beng-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261468, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "asm_Beng-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 258987, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "asm_Beng-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 286932, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "asm_Beng-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256716, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "asm_Beng-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266413, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "asm_Beng-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238013, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "asm_Beng-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 254978, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "asm_Beng-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255395, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "asm_Beng-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 271892, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "asm_Beng-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 271698, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "asm_Beng-min_Arab": { + "num_samples": 1012, + "number_of_characters": 252335, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "asm_Beng-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265419, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "asm_Beng-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 274662, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "asm_Beng-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260382, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "asm_Beng-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 265626, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "asm_Beng-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 254547, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "asm_Beng-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 265758, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "asm_Beng-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260496, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "asm_Beng-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256308, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "asm_Beng-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267099, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "asm_Beng-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269354, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "asm_Beng-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 262782, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "asm_Beng-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 260683, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "asm_Beng-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 234982, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "asm_Beng-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248166, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "asm_Beng-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253384, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "asm_Beng-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 285616, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "asm_Beng-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 241936, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "asm_Beng-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263763, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "asm_Beng-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 246579, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "asm_Beng-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258109, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "asm_Beng-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 255855, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "asm_Beng-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 258586, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "asm_Beng-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264118, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "asm_Beng-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260210, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "asm_Beng-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268254, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "asm_Beng-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 264908, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "asm_Beng-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "asm_Beng-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 266963, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "asm_Beng-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264040, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "asm_Beng-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275259, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "asm_Beng-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273000, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "asm_Beng-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274087, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "asm_Beng-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281113, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "asm_Beng-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 255719, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "asm_Beng-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 275830, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "asm_Beng-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 262718, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "asm_Beng-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274507, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "asm_Beng-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "asm_Beng-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259173, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "asm_Beng-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213068, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "asm_Beng-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258123, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "asm_Beng-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266064, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "asm_Beng-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282074, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "asm_Beng-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 191901, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "asm_Beng-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 250593, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "asm_Beng-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273201, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "asm_Beng-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280069, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "asm_Beng-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 264531, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "asm_Beng-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 233917, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "asm_Beng-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265311, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "asm_Beng-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261196, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "asm_Beng-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 262864, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "asm_Beng-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 255996, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "asm_Beng-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272463, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "asm_Beng-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268032, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "asm_Beng-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250353, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "asm_Beng-war_Latn": { + "num_samples": 1012, + "number_of_characters": 290682, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "asm_Beng-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242243, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "asm_Beng-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263760, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "asm_Beng-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 283574, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "asm_Beng-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 182879, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "asm_Beng-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 271930, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "asm_Beng-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "asm_Beng-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 268557, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "asm_Beng-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 249840, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "asm_Beng-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253109, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "asm_Beng-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 279694, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "asm_Beng-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 271717, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "asm_Beng-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270455, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "asm_Beng-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256490, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "asm_Beng-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 263622, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "asm_Beng-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273443, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "asm_Beng-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254193, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "asm_Beng-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259218, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "asm_Beng-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265059, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "asm_Beng-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242364, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "asm_Beng-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 284621, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "asm_Beng-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248098, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "asm_Beng-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295333, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "asm_Beng-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268133, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "asm_Beng-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259379, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "asm_Beng-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 265727, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "asm_Beng-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258406, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "asm_Beng-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 267594, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "asm_Beng-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239499, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "asm_Beng-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253193, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "asm_Beng-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282354, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "asm_Beng-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254292, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "asm_Beng-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256265, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "asm_Beng-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258074, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "asm_Beng-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264507, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "asm_Beng-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271807, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "asm_Beng-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 252780, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "asm_Beng-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 239706, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "asm_Beng-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265197, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "asm_Beng-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 288533, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "asm_Beng-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 263976, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "asm_Beng-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 267602, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "asm_Beng-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 252839, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "asm_Beng-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314220, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "asm_Beng-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "asm_Beng-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 165955, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "asm_Beng-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 254521, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "asm_Beng-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278417, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "asm_Beng-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252041, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "asm_Beng-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 255784, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "asm_Beng-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277045, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "asm_Beng-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257016, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "asm_Beng-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252045, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "asm_Beng-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169184, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "asm_Beng-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262299, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "asm_Beng-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 259974, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "asm_Beng-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272079, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "asm_Beng-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 252337, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "asm_Beng-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273481, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "asm_Beng-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268276, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "asm_Beng-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 257646, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "asm_Beng-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 216636, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "asm_Beng-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 166631, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "asm_Beng-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254107, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "asm_Beng-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267193, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "asm_Beng-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 258751, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "asm_Beng-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 269866, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "asm_Beng-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 268934, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "asm_Beng-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273108, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "asm_Beng-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 256791, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "asm_Beng-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 294947, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "asm_Beng-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272137, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "asm_Beng-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 264553, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "asm_Beng-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261019, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "asm_Beng-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 252364, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "asm_Beng-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261340, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "asm_Beng-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261014, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "asm_Beng-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 276587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "asm_Beng-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 278660, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "asm_Beng-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 288971, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "asm_Beng-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274298, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "asm_Beng-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 242997, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "asm_Beng-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 279745, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "asm_Beng-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247216, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "asm_Beng-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 268893, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "asm_Beng-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263482, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "asm_Beng-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 260775, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "asm_Beng-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 273539, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "asm_Beng-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284114, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "asm_Beng-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 269675, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "asm_Beng-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 237725, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "asm_Beng-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266003, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "asm_Beng-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 256736, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "asm_Beng-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272191, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "asm_Beng-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 255835, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "asm_Beng-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 245524, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "asm_Beng-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266198, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "asm_Beng-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259158, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "asm_Beng-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 258791, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "asm_Beng-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 227990, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "asm_Beng-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263003, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "asm_Beng-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260093, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "asm_Beng-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259361, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "asm_Beng-som_Latn": { + "num_samples": 1012, + "number_of_characters": 277598, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "asm_Beng-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298481, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ckb_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 240206, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ckb_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 254972, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ckb_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 291943, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ckb_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259643, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ckb_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281797, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ckb_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 255280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ckb_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267739, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ckb_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 287317, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ckb_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 264346, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ckb_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270518, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ckb_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 274155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ckb_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 286124, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ckb_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255518, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ckb_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 282016, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ckb_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 257993, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ckb_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 251040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ckb_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ckb_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 257183, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ckb_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 242158, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ckb_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276576, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ckb_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260551, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ckb_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 257312, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ckb_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274447, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ckb_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 277921, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ckb_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 252412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ckb_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 281932, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ckb_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246545, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ckb_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243698, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ckb_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 289336, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ckb_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 259877, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ckb_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 267214, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ckb_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264117, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ckb_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261636, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ckb_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ckb_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259365, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ckb_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 269062, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ckb_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240662, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ckb_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257627, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ckb_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 258044, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ckb_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ckb_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 274347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ckb_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 254984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ckb_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 268068, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ckb_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 277311, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ckb_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263031, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ckb_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 268275, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ckb_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 257196, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ckb_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 268407, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ckb_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 263145, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ckb_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 258957, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ckb_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269748, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ckb_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 272003, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ckb_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265431, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ckb_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 263332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ckb_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237631, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ckb_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250815, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ckb_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 256033, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ckb_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 288265, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ckb_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244585, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ckb_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ckb_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 249228, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ckb_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260758, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ckb_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ckb_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 261235, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ckb_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ckb_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 262859, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ckb_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 270903, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ckb_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267557, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ckb_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 274873, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ckb_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269612, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ckb_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266689, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ckb_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 277908, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ckb_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275649, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ckb_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276736, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ckb_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283762, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ckb_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 258368, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ckb_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278479, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ckb_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 265367, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ckb_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 277156, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ckb_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 265956, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ckb_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ckb_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215717, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ckb_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260772, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ckb_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268713, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ckb_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284723, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ckb_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194550, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ckb_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 253242, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ckb_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275850, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ckb_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282718, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ckb_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 267180, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ckb_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ckb_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 267960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ckb_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263845, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ckb_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ckb_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258645, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ckb_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 275112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ckb_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270681, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ckb_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 253002, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ckb_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 293331, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ckb_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 244892, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ckb_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266409, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ckb_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 286223, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ckb_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185528, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ckb_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274579, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ckb_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291754, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ckb_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 271206, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ckb_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252489, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ckb_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255758, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ckb_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 282343, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ckb_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 274366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ckb_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 273104, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ckb_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 259139, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ckb_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 266271, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ckb_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 276092, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ckb_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 256842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ckb_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ckb_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267708, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ckb_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 245013, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ckb_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 287270, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ckb_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250747, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ckb_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 297982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ckb_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270782, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ckb_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 262028, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ckb_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 268376, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ckb_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 261055, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ckb_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 270243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ckb_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 242148, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ckb_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ckb_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 285003, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ckb_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 256941, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ckb_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 258914, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ckb_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260723, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ckb_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 267156, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ckb_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274456, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ckb_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255429, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ckb_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 242355, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ckb_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267846, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ckb_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 291182, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ckb_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266625, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ckb_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 270251, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ckb_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255488, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ckb_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 316869, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ckb_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294754, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ckb_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168604, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ckb_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254521, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ckb_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 281066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ckb_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254690, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ckb_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258433, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ckb_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279694, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ckb_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259665, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ckb_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254694, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ckb_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ckb_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 264948, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ckb_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262623, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ckb_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ckb_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 254986, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ckb_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 276130, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ckb_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 270925, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ckb_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 260295, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ckb_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 219285, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ckb_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 169280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ckb_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 256756, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ckb_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ckb_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 261400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ckb_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272515, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ckb_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271583, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ckb_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275757, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ckb_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259440, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ckb_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297596, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ckb_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274786, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ckb_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 267202, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ckb_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263668, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ckb_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 255013, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ckb_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263989, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ckb_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263663, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ckb_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 279236, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ckb_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 281309, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ckb_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291620, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ckb_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 276947, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ckb_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245646, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ckb_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 282394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ckb_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 249865, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ckb_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271542, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ckb_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 266131, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ckb_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263424, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ckb_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 276188, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ckb_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286763, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ckb_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 272324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ckb_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 240374, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ckb_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268652, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ckb_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 259385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ckb_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274840, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ckb_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258484, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ckb_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 248173, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ckb_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268847, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ckb_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261807, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ckb_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261440, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ckb_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230639, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ckb_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265652, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ckb_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ckb_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 262010, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ckb_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 280247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ckb_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 301130, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.06027667984189, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "gle_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 264102, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "gle_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 278868, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "gle_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 315839, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "gle_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 283539, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "gle_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 305693, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "gle_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 279176, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "gle_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 291635, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gle_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 311213, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "gle_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 288242, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "gle_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 294414, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gle_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 298051, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gle_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 310020, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "gle_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 279414, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "gle_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 305912, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "gle_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 281889, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gle_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 274936, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "gle_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 309478, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gle_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 281079, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "gle_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 266054, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "gle_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300472, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gle_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 284447, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gle_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 281208, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gle_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 298343, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "gle_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 301817, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "gle_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 276308, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "gle_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 305828, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "gle_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 270441, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "gle_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 267594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "gle_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 313232, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "gle_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 283773, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gle_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 291110, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "gle_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288013, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gle_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 285532, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gle_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 313477, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "gle_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283261, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gle_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 292958, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "gle_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 264558, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "gle_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 281523, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gle_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 281940, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "gle_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 298437, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gle_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 298243, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "gle_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 278880, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "gle_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 291964, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gle_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 301207, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gle_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286927, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gle_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 292171, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gle_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 281092, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gle_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 292303, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "gle_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 287041, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gle_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 282853, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gle_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 293644, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "gle_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 295899, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gle_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 289327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "gle_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 287228, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "gle_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 261527, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "gle_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 274711, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "gle_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 279929, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gle_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 312161, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "gle_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 268481, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "gle_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290308, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "gle_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 273124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "gle_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 284654, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "gle_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 282400, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "gle_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 285131, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "gle_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 290663, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gle_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 286755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "gle_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 294799, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "gle_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 291453, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "gle_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 298769, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "gle_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 293508, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "gle_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 290585, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "gle_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 301804, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "gle_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 299545, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "gle_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 300632, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "gle_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 307658, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "gle_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 282264, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "gle_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 302375, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "gle_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 289263, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gle_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 301052, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gle_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 289852, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "gle_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 285718, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gle_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 239613, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "gle_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 284668, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "gle_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 292609, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "gle_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 308619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "gle_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 218446, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "gle_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 277138, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "gle_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 299746, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "gle_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 306614, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "gle_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 291076, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "gle_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 260462, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "gle_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 291856, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gle_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 287741, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "gle_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 289409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gle_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 282541, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gle_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 299008, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gle_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294577, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "gle_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 276898, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "gle_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 317227, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "gle_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 268788, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "gle_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290305, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gle_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 310119, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "gle_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 209424, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "gle_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 298475, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gle_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 315650, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gle_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 295102, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "gle_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 276385, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "gle_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 279654, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "gle_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 306239, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "gle_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 298262, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gle_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 297000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "gle_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 283035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "gle_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 290167, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "gle_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 299988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gle_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 280738, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "gle_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285763, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "gle_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 291604, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "gle_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 268909, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "gle_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 311166, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "gle_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 274643, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "gle_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 321878, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "gle_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 294678, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "gle_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 285924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gle_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 292272, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "gle_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 284951, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "gle_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 294139, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "gle_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 266044, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "gle_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 279738, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gle_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 308899, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "gle_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 280837, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "gle_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 282810, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "gle_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 284619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "gle_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 291052, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gle_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298352, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "gle_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 279325, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "gle_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 266251, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "gle_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 291742, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "gle_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 315078, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "gle_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 290521, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gle_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 294147, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "gle_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 279384, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gle_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 340765, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "gle_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 318650, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "gle_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 192500, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "gle_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 278417, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "gle_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 281066, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "gle_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 278586, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "gle_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 282329, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "gle_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 303590, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "gle_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 283561, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "gle_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 278590, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gle_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 195729, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "gle_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 288844, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gle_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 286519, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "gle_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 298624, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "gle_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 278882, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "gle_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 300026, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "gle_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 294821, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "gle_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 284191, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "gle_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 243181, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "gle_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 193176, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "gle_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 280652, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "gle_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 293738, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "gle_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 285296, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "gle_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 296411, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gle_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 295479, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "gle_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 299653, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "gle_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 283336, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "gle_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 321492, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "gle_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 298682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "gle_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 291098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "gle_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 287564, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "gle_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 278909, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "gle_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "gle_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 287559, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "gle_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 303132, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "gle_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 305205, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "gle_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 315516, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "gle_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 300843, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "gle_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 269542, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "gle_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 306290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "gle_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 273761, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "gle_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 295438, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "gle_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 290027, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "gle_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 287320, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "gle_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 300084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "gle_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 310659, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "gle_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 296220, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "gle_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 264270, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "gle_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 292548, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "gle_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 283281, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "gle_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 298736, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "gle_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 282380, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "gle_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 272069, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "gle_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 292743, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "gle_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285703, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "gle_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 285336, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "gle_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 254535, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "gle_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289548, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "gle_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 286638, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "gle_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 285906, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "gle_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 304143, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "gle_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 325026, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 150.67292490118578, + "max_sentence1_length": 443, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kas_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 237726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kas_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 252492, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kas_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289463, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kas_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257163, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kas_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279317, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kas_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 252800, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kas_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265259, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 284837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kas_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 261866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kas_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268038, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kas_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271675, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kas_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283644, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kas_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253038, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kas_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279536, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kas_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255513, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248560, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kas_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283102, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 254703, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kas_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 239678, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kas_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274096, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kas_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258071, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 254832, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 271967, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kas_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275441, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kas_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 249932, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kas_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279452, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kas_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244065, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kas_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241218, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kas_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 286856, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kas_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257397, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kas_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 264734, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kas_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261637, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kas_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259156, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kas_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287101, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kas_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256885, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266582, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kas_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238182, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kas_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255147, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kas_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255564, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kas_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272061, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kas_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 271867, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kas_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 252504, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kas_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265588, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kas_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 274831, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kas_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260551, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kas_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 265795, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kas_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 254716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kas_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 265927, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kas_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260665, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kas_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256477, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kas_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267268, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kas_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269523, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kas_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 262951, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kas_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 260852, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kas_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235151, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kas_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248335, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kas_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253553, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 285785, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kas_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242105, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kas_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263932, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kas_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 246748, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kas_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258278, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kas_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256024, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kas_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 258755, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264287, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260379, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kas_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268423, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kas_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265077, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kas_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272393, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kas_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267132, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kas_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264209, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kas_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275428, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kas_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273169, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kas_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274256, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kas_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281282, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kas_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 255888, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kas_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 275999, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kas_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 262887, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kas_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274676, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263476, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kas_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259342, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kas_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213237, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kas_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258292, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kas_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266233, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kas_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282243, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kas_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192070, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kas_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 250762, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kas_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273370, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kas_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280238, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kas_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 264700, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kas_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234086, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kas_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265480, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kas_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261365, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kas_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263033, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256165, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272632, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kas_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268201, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250522, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kas_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 290851, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kas_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242412, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kas_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263929, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kas_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 283743, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kas_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183048, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kas_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272099, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kas_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289274, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kas_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 268726, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kas_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250009, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kas_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253278, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kas_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 279863, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kas_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 271886, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270624, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kas_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256659, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kas_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 263791, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kas_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273612, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kas_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254362, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kas_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259387, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kas_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265228, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kas_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242533, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kas_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 284790, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kas_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248267, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kas_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295502, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kas_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268302, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kas_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259548, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kas_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 265896, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kas_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258575, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kas_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 267763, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kas_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239668, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kas_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253362, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282523, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kas_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254461, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kas_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256434, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kas_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258243, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kas_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264676, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kas_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271976, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kas_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 252949, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kas_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 239875, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kas_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265366, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kas_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 288702, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kas_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264145, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 267771, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kas_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253008, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314389, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kas_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292274, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kas_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166124, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kas_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252041, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kas_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 254690, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kas_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278586, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kas_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 255953, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kas_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kas_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257185, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kas_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252214, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kas_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169353, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kas_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262468, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260143, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kas_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kas_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 252506, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kas_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273650, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kas_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kas_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 257815, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kas_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 216805, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kas_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 166800, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kas_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254276, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kas_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267362, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kas_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 258920, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kas_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270035, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kas_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269103, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kas_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273277, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kas_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 256960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kas_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295116, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kas_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272306, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 264722, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kas_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261188, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kas_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 252533, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kas_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261509, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261183, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kas_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 276756, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kas_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 278829, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kas_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289140, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kas_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274467, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kas_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243166, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kas_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 279914, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kas_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247385, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kas_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269062, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kas_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263651, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kas_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 260944, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kas_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 273708, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kas_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284283, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kas_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 269844, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kas_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 237894, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kas_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266172, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kas_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 256905, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kas_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272360, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kas_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256004, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kas_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 245693, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kas_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266367, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259327, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kas_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 258960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kas_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228159, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kas_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263172, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kas_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260262, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kas_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259530, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kas_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 277767, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kas_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298650, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 124.6096837944664, + "max_sentence1_length": 315, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ltg_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241469, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ltg_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256235, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ltg_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293206, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ltg_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260906, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ltg_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283060, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ltg_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256543, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ltg_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269002, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltg_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288580, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ltg_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265609, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ltg_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271781, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ltg_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275418, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ltg_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287387, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ltg_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256781, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ltg_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283279, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ltg_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259256, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltg_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252303, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ltg_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286845, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltg_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258446, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ltg_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ltg_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277839, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ltg_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261814, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltg_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258575, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltg_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275710, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ltg_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279184, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ltg_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253675, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ltg_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283195, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ltg_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247808, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ltg_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 244961, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ltg_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290599, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ltg_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261140, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ltg_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268477, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ltg_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265380, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ltg_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262899, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ltg_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290844, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ltg_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260628, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltg_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270325, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ltg_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241925, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ltg_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 258890, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ltg_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259307, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ltg_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 275804, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ltg_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275610, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ltg_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256247, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ltg_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269331, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ltg_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278574, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ltg_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264294, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ltg_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269538, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ltg_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258459, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ltg_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269670, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ltg_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264408, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ltg_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260220, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ltg_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271011, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ltg_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273266, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ltg_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266694, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ltg_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264595, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ltg_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238894, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ltg_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252078, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ltg_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257296, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltg_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289528, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ltg_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245848, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ltg_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267675, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ltg_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250491, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ltg_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ltg_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 259767, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ltg_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262498, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltg_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268030, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltg_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264122, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ltg_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272166, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ltg_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268820, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ltg_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276136, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ltg_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270875, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ltg_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 267952, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ltg_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279171, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ltg_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276912, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ltg_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 277999, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ltg_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285025, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ltg_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 259631, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ltg_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279742, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ltg_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266630, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ltg_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278419, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltg_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267219, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ltg_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263085, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ltg_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 216980, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ltg_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262035, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ltg_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 269976, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ltg_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 285986, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ltg_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195813, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ltg_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254505, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ltg_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277113, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ltg_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 283981, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ltg_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268443, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ltg_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237829, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ltg_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269223, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ltg_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265108, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ltg_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266776, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltg_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259908, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltg_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276375, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ltg_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271944, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltg_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254265, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ltg_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294594, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ltg_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246155, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ltg_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267672, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ltg_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287486, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ltg_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 186791, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ltg_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275842, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ltg_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293017, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ltg_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272469, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ltg_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253752, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ltg_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ltg_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283606, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ltg_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275629, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltg_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274367, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ltg_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260402, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ltg_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267534, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ltg_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277355, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ltg_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ltg_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263130, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ltg_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 268971, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ltg_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246276, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ltg_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288533, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ltg_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252010, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ltg_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299245, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ltg_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272045, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ltg_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263291, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ltg_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269639, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ltg_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262318, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ltg_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271506, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ltg_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243411, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ltg_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltg_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286266, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ltg_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258204, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ltg_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260177, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ltg_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 261986, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ltg_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268419, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ltg_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275719, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ltg_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256692, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ltg_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243618, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ltg_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269109, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ltg_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292445, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ltg_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267888, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltg_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271514, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ltg_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256751, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltg_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318132, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ltg_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296017, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ltg_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169867, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ltg_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255784, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ltg_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258433, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ltg_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282329, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ltg_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 255953, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ltg_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 280957, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ltg_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260928, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ltg_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 255957, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ltg_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173096, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ltg_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266211, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltg_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263886, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ltg_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 275991, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ltg_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256249, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ltg_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277393, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ltg_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272188, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ltg_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261558, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ltg_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220548, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ltg_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170543, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ltg_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258019, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ltg_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271105, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ltg_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262663, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ltg_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273778, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ltg_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272846, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ltg_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277020, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ltg_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260703, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ltg_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298859, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ltg_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276049, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltg_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268465, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ltg_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 264931, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ltg_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256276, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ltg_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265252, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltg_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264926, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ltg_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280499, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ltg_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282572, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ltg_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292883, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ltg_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278210, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ltg_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246909, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ltg_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283657, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ltg_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251128, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ltg_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 272805, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ltg_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ltg_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264687, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ltg_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277451, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ltg_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288026, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ltg_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273587, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ltg_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241637, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ltg_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269915, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ltg_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260648, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ltg_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276103, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ltg_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 259747, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ltg_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249436, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ltg_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270110, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltg_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263070, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ltg_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262703, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ltg_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231902, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ltg_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266915, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ltg_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264005, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ltg_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263273, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ltg_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281510, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ltg_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302393, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 128.3083003952569, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "nso_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 262730, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "nso_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 277496, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "nso_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 314467, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nso_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 282167, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "nso_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 304321, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "nso_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 277804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "nso_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 290263, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nso_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 309841, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "nso_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 286870, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "nso_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 293042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nso_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 296679, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nso_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 308648, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "nso_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 278042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "nso_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 304540, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "nso_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 280517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nso_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 273564, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nso_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 308106, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nso_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 279707, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "nso_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 264682, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "nso_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299100, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nso_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 283075, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nso_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 279836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nso_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 296971, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nso_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 300445, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nso_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 274936, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "nso_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 304456, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nso_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 269069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "nso_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 266222, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "nso_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 311860, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "nso_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 282401, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nso_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 289738, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "nso_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286641, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nso_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 284160, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nso_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 312105, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "nso_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281889, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nso_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 291586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "nso_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 263186, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "nso_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 280151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nso_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 280568, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "nso_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 297065, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nso_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 296871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "nso_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 277508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nso_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 290592, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nso_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 299835, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nso_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285555, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nso_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 290799, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nso_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 279720, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nso_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 290931, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nso_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 285669, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nso_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 281481, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nso_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 292272, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nso_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 294527, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nso_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 287955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nso_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 285856, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nso_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 260155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "nso_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 273339, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "nso_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 278557, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nso_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 310789, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "nso_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 267109, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "nso_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288936, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "nso_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 271752, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "nso_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 283282, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nso_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 281028, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nso_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 283759, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nso_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 289291, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nso_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 285383, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nso_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 293427, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nso_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 290081, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nso_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 297397, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "nso_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 292136, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nso_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 289213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "nso_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 300432, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nso_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 298173, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nso_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 299260, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "nso_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 306286, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "nso_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 280892, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nso_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 301003, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nso_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 287891, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nso_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 299680, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nso_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 288480, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "nso_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 284346, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nso_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 238241, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "nso_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 283296, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nso_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 291237, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nso_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 307247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "nso_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 217074, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "nso_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 275766, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "nso_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 298374, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "nso_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 305242, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "nso_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 289704, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "nso_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 259090, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "nso_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 290484, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nso_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 286369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "nso_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 288037, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nso_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 281169, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nso_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 297636, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nso_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293205, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nso_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 275526, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nso_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 315855, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nso_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 267416, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nso_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288933, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nso_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 308747, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "nso_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 208052, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "nso_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 297103, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nso_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 314278, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nso_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 293730, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "nso_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 275013, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nso_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 278282, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nso_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 304867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "nso_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 296890, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nso_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 295628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nso_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 281663, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "nso_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 288795, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "nso_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 298616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nso_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 279366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "nso_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284391, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nso_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 290232, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "nso_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 267537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nso_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 309794, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nso_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 273271, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "nso_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 320506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "nso_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 293306, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nso_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 284552, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nso_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 290900, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nso_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 283579, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nso_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 292767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nso_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 264672, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "nso_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 278366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nso_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 307527, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "nso_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 279465, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "nso_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 281438, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "nso_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 283247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "nso_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 289680, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nso_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296980, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "nso_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 277953, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nso_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 264879, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "nso_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 290370, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nso_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 313706, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "nso_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 289149, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nso_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 292775, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nso_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 278012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nso_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 339393, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "nso_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 317278, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nso_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 191128, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "nso_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 277045, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nso_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 279694, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nso_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 303590, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "nso_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 277214, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "nso_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 280957, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nso_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 282189, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nso_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 277218, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nso_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 194357, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "nso_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 287472, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nso_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 285147, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nso_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 297252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nso_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 277510, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "nso_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 298654, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "nso_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 293449, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "nso_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 282819, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nso_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 241809, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "nso_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 191804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "nso_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 279280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nso_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 292366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nso_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 283924, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "nso_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 295039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nso_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 294107, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "nso_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 298281, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nso_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 281964, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nso_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 320120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "nso_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 297310, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nso_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 289726, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "nso_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 286192, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nso_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 277537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nso_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nso_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 286187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "nso_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 301760, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nso_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 303833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "nso_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 314144, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nso_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 299471, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "nso_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 268170, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "nso_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 304918, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nso_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 272389, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "nso_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 294066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "nso_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 288655, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nso_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 285948, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nso_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 298712, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "nso_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 309287, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nso_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 294848, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "nso_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 262898, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "nso_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 291176, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "nso_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 281909, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nso_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 297364, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nso_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 281008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "nso_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 270697, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "nso_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 291371, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nso_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284331, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "nso_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 283964, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "nso_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 253163, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "nso_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288176, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nso_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 285266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nso_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 284534, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nso_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 302771, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "nso_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 323654, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 149.3171936758893, + "max_sentence1_length": 423, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "sin_Sinh-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242701, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "sin_Sinh-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 257467, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "sin_Sinh-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 294438, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sin_Sinh-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 262138, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "sin_Sinh-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 284292, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "sin_Sinh-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257775, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "sin_Sinh-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 270234, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sin_Sinh-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289812, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "sin_Sinh-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266841, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "sin_Sinh-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 273013, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sin_Sinh-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276650, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sin_Sinh-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288619, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "sin_Sinh-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 258013, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "sin_Sinh-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 284511, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "sin_Sinh-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 260488, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sin_Sinh-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253535, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sin_Sinh-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 288077, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sin_Sinh-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259678, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "sin_Sinh-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244653, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "sin_Sinh-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279071, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sin_Sinh-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 263046, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sin_Sinh-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259807, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sin_Sinh-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276942, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sin_Sinh-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 280416, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sin_Sinh-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254907, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "sin_Sinh-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 284427, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sin_Sinh-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 249040, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "sin_Sinh-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 246193, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "sin_Sinh-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291831, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "sin_Sinh-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 262372, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sin_Sinh-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269709, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "sin_Sinh-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266612, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sin_Sinh-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 264131, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sin_Sinh-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 292076, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "sin_Sinh-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261860, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sin_Sinh-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271557, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "sin_Sinh-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 243157, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "sin_Sinh-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 260122, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sin_Sinh-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260539, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "sin_Sinh-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 277036, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sin_Sinh-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276842, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "sin_Sinh-min_Arab": { + "num_samples": 1012, + "number_of_characters": 257479, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sin_Sinh-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270563, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sin_Sinh-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279806, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sin_Sinh-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265526, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sin_Sinh-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270770, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sin_Sinh-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259691, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sin_Sinh-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270902, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sin_Sinh-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265640, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sin_Sinh-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 261452, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sin_Sinh-min_Latn": { + "num_samples": 1012, + "number_of_characters": 272243, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sin_Sinh-por_Latn": { + "num_samples": 1012, + "number_of_characters": 274498, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sin_Sinh-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267926, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sin_Sinh-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265827, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sin_Sinh-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 240126, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "sin_Sinh-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253310, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "sin_Sinh-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258528, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sin_Sinh-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290760, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "sin_Sinh-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 247080, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "sin_Sinh-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268907, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "sin_Sinh-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251723, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "sin_Sinh-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 263253, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sin_Sinh-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260999, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sin_Sinh-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263730, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sin_Sinh-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 269262, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sin_Sinh-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265354, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sin_Sinh-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 273398, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sin_Sinh-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 270052, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sin_Sinh-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277368, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "sin_Sinh-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 272107, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sin_Sinh-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 269184, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "sin_Sinh-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 280403, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sin_Sinh-als_Latn": { + "num_samples": 1012, + "number_of_characters": 278144, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sin_Sinh-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 279231, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "sin_Sinh-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 286257, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "sin_Sinh-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260863, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sin_Sinh-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280974, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sin_Sinh-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267862, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sin_Sinh-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279651, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sin_Sinh-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 268451, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "sin_Sinh-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264317, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sin_Sinh-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 218212, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "sin_Sinh-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 263267, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sin_Sinh-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 271208, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sin_Sinh-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 287218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "sin_Sinh-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 197045, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "sin_Sinh-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255737, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "sin_Sinh-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278345, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "sin_Sinh-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 285213, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "sin_Sinh-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269675, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "sin_Sinh-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 239061, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "sin_Sinh-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 270455, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sin_Sinh-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266340, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "sin_Sinh-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 268008, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sin_Sinh-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 261140, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sin_Sinh-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277607, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sin_Sinh-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273176, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sin_Sinh-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 255497, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sin_Sinh-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295826, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sin_Sinh-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 247387, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sin_Sinh-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268904, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sin_Sinh-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288718, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "sin_Sinh-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 188023, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "sin_Sinh-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 277074, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sin_Sinh-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 294249, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sin_Sinh-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273701, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "sin_Sinh-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254984, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sin_Sinh-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 258253, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sin_Sinh-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284838, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "sin_Sinh-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276861, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sin_Sinh-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275599, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sin_Sinh-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 261634, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "sin_Sinh-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268766, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "sin_Sinh-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278587, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sin_Sinh-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259337, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "sin_Sinh-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264362, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sin_Sinh-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 270203, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "sin_Sinh-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 247508, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sin_Sinh-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289765, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sin_Sinh-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 253242, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "sin_Sinh-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 300477, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "sin_Sinh-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 273277, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sin_Sinh-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 264523, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sin_Sinh-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270871, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sin_Sinh-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263550, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sin_Sinh-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272738, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sin_Sinh-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244643, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "sin_Sinh-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258337, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sin_Sinh-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 287498, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "sin_Sinh-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 259436, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "sin_Sinh-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 261409, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "sin_Sinh-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 263218, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "sin_Sinh-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269651, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sin_Sinh-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276951, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "sin_Sinh-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257924, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sin_Sinh-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244850, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "sin_Sinh-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270341, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sin_Sinh-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293677, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "sin_Sinh-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 269120, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sin_Sinh-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272746, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sin_Sinh-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257983, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sin_Sinh-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319364, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "sin_Sinh-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 297249, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sin_Sinh-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 171099, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "sin_Sinh-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 257016, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sin_Sinh-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259665, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sin_Sinh-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283561, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "sin_Sinh-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 257185, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "sin_Sinh-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260928, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sin_Sinh-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 282189, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "sin_Sinh-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 257189, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sin_Sinh-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174328, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "sin_Sinh-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 267443, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sin_Sinh-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 265118, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sin_Sinh-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 277223, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sin_Sinh-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 257481, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "sin_Sinh-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278625, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "sin_Sinh-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 273420, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "sin_Sinh-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262790, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sin_Sinh-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221780, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "sin_Sinh-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171775, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "sin_Sinh-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 259251, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sin_Sinh-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272337, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sin_Sinh-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263895, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "sin_Sinh-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 275010, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sin_Sinh-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 274078, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "sin_Sinh-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 278252, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sin_Sinh-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261935, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sin_Sinh-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 300091, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "sin_Sinh-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 277281, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sin_Sinh-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269697, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "sin_Sinh-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 266163, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sin_Sinh-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 257508, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sin_Sinh-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266484, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sin_Sinh-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 266158, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "sin_Sinh-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281731, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sin_Sinh-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283804, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "sin_Sinh-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 294115, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sin_Sinh-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 279442, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "sin_Sinh-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 248141, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "sin_Sinh-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284889, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sin_Sinh-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252360, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "sin_Sinh-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 274037, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "sin_Sinh-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268626, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sin_Sinh-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265919, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sin_Sinh-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278683, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "sin_Sinh-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 289258, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sin_Sinh-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274819, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "sin_Sinh-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242869, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "sin_Sinh-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 271147, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "sin_Sinh-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261880, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sin_Sinh-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277335, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sin_Sinh-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260979, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "sin_Sinh-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250668, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "sin_Sinh-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271342, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sin_Sinh-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264302, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "sin_Sinh-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263935, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "sin_Sinh-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 233134, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "sin_Sinh-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268147, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sin_Sinh-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 265237, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sin_Sinh-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 264505, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sin_Sinh-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282742, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "sin_Sinh-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303625, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 129.52569169960475, + "max_sentence1_length": 401, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tha_Thai-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 237730, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tha_Thai-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 252496, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tha_Thai-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289467, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tha_Thai-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257167, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tha_Thai-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279321, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tha_Thai-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 252804, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tha_Thai-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265263, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tha_Thai-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 284841, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tha_Thai-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 261870, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tha_Thai-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268042, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tha_Thai-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271679, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tha_Thai-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283648, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tha_Thai-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253042, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tha_Thai-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279540, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tha_Thai-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255517, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tha_Thai-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248564, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tha_Thai-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283106, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tha_Thai-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 254707, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tha_Thai-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 239682, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tha_Thai-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274100, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tha_Thai-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258075, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tha_Thai-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 254836, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tha_Thai-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 271971, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tha_Thai-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275445, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tha_Thai-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 249936, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tha_Thai-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279456, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tha_Thai-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244069, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tha_Thai-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241222, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tha_Thai-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 286860, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tha_Thai-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257401, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tha_Thai-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 264738, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tha_Thai-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261641, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tha_Thai-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259160, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tha_Thai-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287105, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tha_Thai-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256889, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tha_Thai-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266586, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tha_Thai-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238186, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tha_Thai-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255151, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tha_Thai-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255568, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tha_Thai-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272065, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tha_Thai-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 271871, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tha_Thai-min_Arab": { + "num_samples": 1012, + "number_of_characters": 252508, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tha_Thai-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265592, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tha_Thai-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 274835, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tha_Thai-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260555, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tha_Thai-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 265799, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tha_Thai-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 254720, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tha_Thai-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 265931, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tha_Thai-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260669, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tha_Thai-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256481, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tha_Thai-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267272, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tha_Thai-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269527, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tha_Thai-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 262955, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tha_Thai-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 260856, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tha_Thai-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235155, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tha_Thai-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248339, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tha_Thai-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253557, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tha_Thai-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 285789, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tha_Thai-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242109, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tha_Thai-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263936, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tha_Thai-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 246752, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tha_Thai-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258282, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tha_Thai-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256028, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tha_Thai-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 258759, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tha_Thai-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264291, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tha_Thai-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260383, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tha_Thai-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268427, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tha_Thai-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265081, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tha_Thai-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272397, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tha_Thai-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267136, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tha_Thai-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264213, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tha_Thai-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275432, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tha_Thai-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273173, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tha_Thai-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274260, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tha_Thai-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281286, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tha_Thai-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 255892, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tha_Thai-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276003, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tha_Thai-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 262891, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tha_Thai-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274680, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tha_Thai-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263480, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tha_Thai-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259346, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tha_Thai-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213241, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tha_Thai-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258296, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tha_Thai-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266237, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tha_Thai-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282247, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tha_Thai-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192074, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tha_Thai-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 250766, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tha_Thai-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273374, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tha_Thai-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280242, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tha_Thai-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 264704, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tha_Thai-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234090, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tha_Thai-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265484, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tha_Thai-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261369, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tha_Thai-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263037, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tha_Thai-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256169, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tha_Thai-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272636, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tha_Thai-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268205, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tha_Thai-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250526, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tha_Thai-war_Latn": { + "num_samples": 1012, + "number_of_characters": 290855, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tha_Thai-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242416, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tha_Thai-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263933, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tha_Thai-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 283747, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tha_Thai-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183052, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tha_Thai-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272103, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tha_Thai-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289278, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tha_Thai-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 268730, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tha_Thai-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250013, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tha_Thai-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253282, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tha_Thai-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 279867, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tha_Thai-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 271890, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tha_Thai-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270628, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tha_Thai-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256663, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tha_Thai-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 263795, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tha_Thai-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273616, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tha_Thai-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254366, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tha_Thai-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259391, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tha_Thai-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265232, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tha_Thai-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242537, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tha_Thai-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 284794, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tha_Thai-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248271, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tha_Thai-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295506, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tha_Thai-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268306, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tha_Thai-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259552, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tha_Thai-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 265900, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tha_Thai-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258579, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tha_Thai-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 267767, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tha_Thai-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239672, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tha_Thai-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253366, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tha_Thai-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282527, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tha_Thai-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254465, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tha_Thai-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256438, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tha_Thai-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258247, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tha_Thai-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264680, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tha_Thai-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271980, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tha_Thai-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 252953, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tha_Thai-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 239879, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tha_Thai-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265370, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tha_Thai-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 288706, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tha_Thai-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264149, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tha_Thai-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 267775, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tha_Thai-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253012, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tha_Thai-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314393, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tha_Thai-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292278, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tha_Thai-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166128, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tha_Thai-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252045, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tha_Thai-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 254694, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tha_Thai-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278590, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tha_Thai-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252214, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tha_Thai-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 255957, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tha_Thai-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277218, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tha_Thai-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257189, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tha_Thai-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169357, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tha_Thai-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262472, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tha_Thai-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260147, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tha_Thai-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272252, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tha_Thai-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 252510, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tha_Thai-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273654, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tha_Thai-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268449, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tha_Thai-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 257819, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tha_Thai-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 216809, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tha_Thai-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 166804, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tha_Thai-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254280, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tha_Thai-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267366, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tha_Thai-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 258924, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tha_Thai-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270039, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tha_Thai-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269107, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tha_Thai-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273281, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tha_Thai-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 256964, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tha_Thai-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295120, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tha_Thai-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272310, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tha_Thai-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 264726, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tha_Thai-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261192, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tha_Thai-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 252537, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tha_Thai-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261513, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tha_Thai-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261187, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tha_Thai-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 276760, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tha_Thai-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 278833, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tha_Thai-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289144, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tha_Thai-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274471, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tha_Thai-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243170, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tha_Thai-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 279918, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tha_Thai-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247389, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tha_Thai-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269066, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tha_Thai-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263655, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tha_Thai-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 260948, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tha_Thai-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 273712, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tha_Thai-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284287, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tha_Thai-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 269848, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tha_Thai-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 237898, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tha_Thai-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266176, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tha_Thai-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 256909, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tha_Thai-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272364, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tha_Thai-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256008, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tha_Thai-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 245697, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tha_Thai-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266371, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tha_Thai-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259331, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tha_Thai-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 258964, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tha_Thai-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228163, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tha_Thai-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263176, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tha_Thai-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260266, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tha_Thai-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259534, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tha_Thai-som_Latn": { + "num_samples": 1012, + "number_of_characters": 277771, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tha_Thai-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298654, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 124.61363636363636, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "zho_Hans-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 154869, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "zho_Hans-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 169635, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "zho_Hans-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 206606, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zho_Hans-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 174306, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "zho_Hans-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 196460, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "zho_Hans-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 169943, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "zho_Hans-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 182402, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hans-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 201980, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "zho_Hans-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 179009, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "zho_Hans-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 185181, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zho_Hans-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 188818, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zho_Hans-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 200787, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "zho_Hans-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 170181, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "zho_Hans-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 196679, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "zho_Hans-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 172656, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hans-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 165703, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zho_Hans-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 200245, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hans-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 171846, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "zho_Hans-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 156821, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "zho_Hans-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 191239, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zho_Hans-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 175214, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hans-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 171975, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hans-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 189110, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zho_Hans-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 192584, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zho_Hans-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 167075, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "zho_Hans-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 196595, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zho_Hans-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 161208, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "zho_Hans-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 158361, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "zho_Hans-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 203999, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "zho_Hans-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 174540, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zho_Hans-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 181877, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "zho_Hans-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 178780, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zho_Hans-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 176299, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zho_Hans-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 204244, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "zho_Hans-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 174028, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hans-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 183725, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "zho_Hans-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 155325, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "zho_Hans-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 172290, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zho_Hans-est_Latn": { + "num_samples": 1012, + "number_of_characters": 172707, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "zho_Hans-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 189204, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zho_Hans-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 189010, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "zho_Hans-min_Arab": { + "num_samples": 1012, + "number_of_characters": 169647, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zho_Hans-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 182731, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zho_Hans-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 191974, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zho_Hans-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 177694, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zho_Hans-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 182938, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zho_Hans-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 171859, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zho_Hans-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 183070, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zho_Hans-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 177808, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zho_Hans-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 173620, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zho_Hans-min_Latn": { + "num_samples": 1012, + "number_of_characters": 184411, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zho_Hans-por_Latn": { + "num_samples": 1012, + "number_of_characters": 186666, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zho_Hans-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 180094, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zho_Hans-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 177995, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zho_Hans-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 152294, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "zho_Hans-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 165478, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "zho_Hans-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 170696, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hans-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 202928, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "zho_Hans-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 159248, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "zho_Hans-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 181075, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "zho_Hans-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 163891, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "zho_Hans-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 175421, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zho_Hans-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 173167, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zho_Hans-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 175898, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hans-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 181430, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hans-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 177522, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zho_Hans-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 185566, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zho_Hans-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 182220, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zho_Hans-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 189536, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "zho_Hans-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 184275, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zho_Hans-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 181352, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "zho_Hans-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 192571, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zho_Hans-als_Latn": { + "num_samples": 1012, + "number_of_characters": 190312, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zho_Hans-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 191399, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "zho_Hans-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 198425, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "zho_Hans-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 173031, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zho_Hans-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 193142, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zho_Hans-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 180030, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zho_Hans-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 191819, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hans-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 180619, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "zho_Hans-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 176485, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zho_Hans-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 130380, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "zho_Hans-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 175435, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zho_Hans-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 183376, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zho_Hans-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 199386, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "zho_Hans-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 109213, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "zho_Hans-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 167905, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "zho_Hans-run_Latn": { + "num_samples": 1012, + "number_of_characters": 190513, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "zho_Hans-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 197381, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "zho_Hans-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 181843, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "zho_Hans-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 151229, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "zho_Hans-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 182623, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zho_Hans-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 178508, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "zho_Hans-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 180176, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hans-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 173308, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hans-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 189775, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zho_Hans-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 185344, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hans-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 167665, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zho_Hans-war_Latn": { + "num_samples": 1012, + "number_of_characters": 207994, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zho_Hans-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 159555, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zho_Hans-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 181072, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zho_Hans-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 200886, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "zho_Hans-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 100191, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "zho_Hans-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 189242, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zho_Hans-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 206417, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zho_Hans-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 185869, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "zho_Hans-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 167152, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zho_Hans-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 170421, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zho_Hans-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 197006, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "zho_Hans-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 189029, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hans-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 187767, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zho_Hans-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 173802, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "zho_Hans-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 180934, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "zho_Hans-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 190755, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zho_Hans-san_Deva": { + "num_samples": 1012, + "number_of_characters": 171505, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "zho_Hans-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 176530, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zho_Hans-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 182371, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "zho_Hans-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 159676, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zho_Hans-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 201933, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zho_Hans-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 165410, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "zho_Hans-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 212645, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "zho_Hans-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 185445, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zho_Hans-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 176691, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zho_Hans-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 183039, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zho_Hans-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 175718, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zho_Hans-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 184906, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zho_Hans-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 156811, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "zho_Hans-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 170505, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hans-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 199666, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "zho_Hans-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 171604, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "zho_Hans-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 173577, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "zho_Hans-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 175386, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "zho_Hans-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 181819, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zho_Hans-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 189119, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "zho_Hans-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 170092, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zho_Hans-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 157018, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "zho_Hans-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 182509, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zho_Hans-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 205845, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "zho_Hans-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 181288, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hans-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 184914, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zho_Hans-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 170151, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hans-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 231532, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "zho_Hans-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 209417, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zho_Hans-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 83267, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "zho_Hans-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 169184, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zho_Hans-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 171833, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zho_Hans-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 195729, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "zho_Hans-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 169353, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "zho_Hans-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 173096, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zho_Hans-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 194357, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "zho_Hans-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 174328, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zho_Hans-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 169357, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zho_Hans-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 179611, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hans-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 177286, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zho_Hans-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 189391, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zho_Hans-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 169649, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "zho_Hans-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 190793, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "zho_Hans-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 185588, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "zho_Hans-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 174958, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zho_Hans-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 133948, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "zho_Hans-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 83943, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "zho_Hans-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 171419, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zho_Hans-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 184505, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zho_Hans-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 176063, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "zho_Hans-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 187178, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zho_Hans-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 186246, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "zho_Hans-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 190420, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zho_Hans-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 174103, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zho_Hans-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 212259, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "zho_Hans-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 189449, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hans-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 181865, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "zho_Hans-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 178331, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zho_Hans-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 169676, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zho_Hans-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 178652, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hans-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 178326, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "zho_Hans-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 193899, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zho_Hans-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 195972, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "zho_Hans-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 206283, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zho_Hans-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 191610, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "zho_Hans-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 160309, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "zho_Hans-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 197057, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zho_Hans-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 164528, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "zho_Hans-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 186205, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "zho_Hans-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 180794, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zho_Hans-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 178087, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zho_Hans-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 190851, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "zho_Hans-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 201426, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zho_Hans-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 186987, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "zho_Hans-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 155037, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "zho_Hans-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 183315, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "zho_Hans-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 174048, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zho_Hans-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 189503, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zho_Hans-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 173147, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "zho_Hans-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 162836, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "zho_Hans-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 183510, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hans-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 176470, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "zho_Hans-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 176103, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "zho_Hans-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 145302, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "zho_Hans-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 180315, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zho_Hans-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 177405, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zho_Hans-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 176673, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zho_Hans-som_Latn": { + "num_samples": 1012, + "number_of_characters": 194910, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "zho_Hans-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 215793, + "unique_pairs": 1012, + "min_sentence1_length": 10, + "average_sentence1_length": 42.73517786561265, + "max_sentence1_length": 130, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ast_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 247984, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ast_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 262750, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ast_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 299721, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ast_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 267421, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ast_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 289575, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ast_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 263058, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ast_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 275517, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ast_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 295095, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ast_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 272124, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ast_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 278296, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ast_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 281933, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ast_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 293902, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ast_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 263296, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ast_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 289794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ast_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 265771, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ast_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 258818, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ast_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 293360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ast_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 264961, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ast_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 249936, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ast_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284354, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ast_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 268329, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ast_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 265090, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ast_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 282225, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ast_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 285699, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ast_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 260190, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ast_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 289710, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ast_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 254323, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ast_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 251476, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ast_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 297114, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ast_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 267655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ast_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 274992, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ast_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271895, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ast_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 269414, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ast_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 297359, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ast_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267143, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ast_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 276840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ast_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 248440, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ast_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 265405, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ast_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 265822, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ast_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 282319, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ast_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 282125, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ast_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 262762, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ast_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 275846, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ast_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 285089, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ast_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270809, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ast_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 276053, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ast_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 264974, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ast_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 276185, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ast_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 270923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ast_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 266735, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ast_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 277526, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ast_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 279781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ast_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 273209, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ast_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 271110, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ast_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 245409, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ast_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 258593, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ast_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 263811, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ast_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 296043, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ast_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 252363, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ast_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274190, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ast_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 257006, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ast_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 268536, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ast_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 266282, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ast_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 269013, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ast_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 274545, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ast_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 270637, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ast_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 278681, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ast_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 275335, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ast_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 282651, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ast_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 277390, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ast_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 274467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ast_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 285686, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ast_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 283427, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ast_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 284514, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ast_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 291540, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ast_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 266146, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ast_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 286257, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ast_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 273145, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ast_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 284934, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ast_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 273734, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ast_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 269600, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ast_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 223495, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ast_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 268550, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ast_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 276491, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ast_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 292501, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ast_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 202328, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ast_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 261020, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ast_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 283628, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ast_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 290496, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ast_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 274958, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ast_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 244344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ast_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 275738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ast_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 271623, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ast_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 273291, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ast_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 266423, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ast_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 282890, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ast_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278459, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ast_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 260780, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ast_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 301109, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ast_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 252670, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ast_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274187, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ast_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 294001, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ast_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 193306, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ast_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 282357, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ast_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 299532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ast_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 278984, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ast_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 260267, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ast_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 263536, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ast_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 290121, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ast_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 282144, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ast_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 280882, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ast_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 266917, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ast_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 274049, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ast_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 283870, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ast_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 264620, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ast_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269645, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ast_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 275486, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ast_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 252791, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ast_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 295048, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ast_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 258525, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ast_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 305760, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ast_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 278560, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ast_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 269806, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ast_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 276154, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ast_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 268833, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ast_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 278021, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ast_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 249926, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ast_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 263620, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ast_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 292781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ast_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 264719, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ast_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 266692, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ast_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 268501, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ast_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 274934, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ast_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282234, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ast_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 263207, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ast_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 250133, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ast_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 275624, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ast_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 298960, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ast_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 274403, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ast_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 278029, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ast_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 263266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ast_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 324647, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ast_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 302532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ast_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 176382, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ast_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 262299, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ast_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 264948, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ast_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 288844, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ast_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 262468, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ast_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 266211, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ast_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 287472, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ast_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 267443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ast_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 262472, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ast_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 179611, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ast_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 270401, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ast_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 282506, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ast_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 262764, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ast_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 283908, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ast_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 278703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ast_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 268073, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ast_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 227063, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ast_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 177058, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ast_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 264534, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ast_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 277620, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ast_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 269178, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ast_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 280293, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ast_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 279361, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ast_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 283535, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ast_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 267218, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ast_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 305374, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ast_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 282564, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ast_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 274980, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ast_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 271446, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ast_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 262791, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ast_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271767, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ast_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 271441, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ast_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 287014, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ast_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 289087, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ast_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 299398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ast_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 284725, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ast_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 253424, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ast_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 290172, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ast_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 257643, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ast_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 279320, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ast_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 273909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ast_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 271202, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ast_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 283966, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ast_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 294541, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ast_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 280102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ast_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 248152, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ast_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 276430, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ast_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 267163, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ast_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 282618, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ast_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 266262, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ast_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 255951, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ast_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 276625, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ast_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269585, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ast_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 269218, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ast_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 238417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ast_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273430, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ast_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 270520, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ast_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 269788, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ast_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 288025, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ast_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 308908, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 134.74604743083003, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "crh_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 245659, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "crh_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 260425, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "crh_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 297396, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "crh_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265096, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "crh_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 287250, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "crh_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 260733, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "crh_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273192, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "crh_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 292770, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "crh_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 269799, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "crh_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 275971, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "crh_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 279608, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "crh_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 291577, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "crh_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 260971, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "crh_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 287469, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "crh_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 263446, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "crh_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 256493, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "crh_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291035, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "crh_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 262636, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "crh_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 247611, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "crh_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282029, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "crh_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266004, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "crh_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 262765, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "crh_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 279900, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "crh_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 283374, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "crh_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257865, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "crh_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 287385, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "crh_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251998, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "crh_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249151, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "crh_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 294789, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "crh_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 265330, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "crh_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 272667, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "crh_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269570, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "crh_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267089, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "crh_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295034, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "crh_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264818, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "crh_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 274515, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "crh_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246115, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "crh_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263080, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "crh_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 263497, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "crh_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279994, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "crh_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 279800, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "crh_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 260437, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "crh_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 273521, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "crh_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 282764, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "crh_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268484, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "crh_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 273728, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "crh_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 262649, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "crh_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273860, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "crh_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 268598, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "crh_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 264410, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "crh_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 275201, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "crh_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 277456, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "crh_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 270884, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "crh_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 268785, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "crh_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243084, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "crh_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 256268, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "crh_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 261486, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "crh_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 293718, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "crh_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250038, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "crh_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271865, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "crh_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 254681, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "crh_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 266211, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "crh_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 263957, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "crh_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 266688, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "crh_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 272220, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "crh_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 268312, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "crh_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 276356, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "crh_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273010, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "crh_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 280326, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "crh_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275065, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "crh_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272142, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "crh_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 283361, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "crh_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281102, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "crh_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282189, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "crh_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 289215, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "crh_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263821, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "crh_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 283932, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "crh_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270820, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "crh_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 282609, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "crh_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 271409, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "crh_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 267275, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "crh_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221170, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "crh_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 266225, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "crh_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274166, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "crh_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290176, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "crh_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200003, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "crh_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 258695, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "crh_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 281303, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "crh_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288171, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "crh_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 272633, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "crh_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242019, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "crh_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 273413, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "crh_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 269298, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "crh_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 270966, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "crh_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264098, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "crh_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 280565, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "crh_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276134, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "crh_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 258455, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "crh_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 298784, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "crh_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 250345, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "crh_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271862, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "crh_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 291676, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "crh_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 190981, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "crh_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280032, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "crh_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 297207, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "crh_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 276659, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "crh_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 257942, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "crh_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 261211, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "crh_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 287796, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "crh_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279819, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "crh_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 278557, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "crh_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 264592, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "crh_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 271724, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "crh_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 281545, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "crh_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 262295, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "crh_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267320, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "crh_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273161, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "crh_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 250466, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "crh_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 292723, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "crh_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 256200, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "crh_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 303435, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "crh_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 276235, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "crh_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 267481, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "crh_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273829, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "crh_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 266508, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "crh_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 275696, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "crh_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 247601, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "crh_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 261295, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "crh_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 290456, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "crh_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 262394, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "crh_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 264367, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "crh_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266176, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "crh_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 272609, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "crh_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279909, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "crh_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 260882, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "crh_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 247808, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "crh_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 273299, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "crh_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 296635, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "crh_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272078, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "crh_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 275704, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "crh_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 260941, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "crh_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 322322, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "crh_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 300207, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "crh_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174057, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "crh_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 259974, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "crh_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 262623, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "crh_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 286519, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "crh_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260143, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "crh_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 263886, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "crh_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285147, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "crh_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265118, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "crh_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260147, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "crh_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 177286, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "crh_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 270401, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "crh_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280181, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "crh_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 260439, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "crh_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 281583, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "crh_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 276378, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "crh_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 265748, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "crh_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 224738, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "crh_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 174733, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "crh_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 262209, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "crh_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 275295, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "crh_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266853, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "crh_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 277968, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "crh_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277036, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "crh_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 281210, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "crh_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 264893, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "crh_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303049, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "crh_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 280239, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "crh_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 272655, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "crh_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269121, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "crh_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 260466, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "crh_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269442, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "crh_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269116, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "crh_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 284689, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "crh_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 286762, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "crh_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297073, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "crh_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 282400, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "crh_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251099, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "crh_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287847, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "crh_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 255318, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "crh_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276995, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "crh_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 271584, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "crh_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268877, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "crh_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 281641, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "crh_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 292216, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "crh_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 277777, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "crh_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245827, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "crh_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274105, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "crh_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264838, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "crh_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 280293, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "crh_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 263937, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "crh_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 253626, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "crh_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 274300, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "crh_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267260, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "crh_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 266893, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "crh_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236092, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "crh_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271105, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "crh_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 268195, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "crh_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 267463, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "crh_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 285700, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "crh_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 306583, + "unique_pairs": 1012, + "min_sentence1_length": 27, + "average_sentence1_length": 132.4486166007905, + "max_sentence1_length": 382, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "glg_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257764, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "glg_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272530, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "glg_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309501, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "glg_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277201, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "glg_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299355, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "glg_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272838, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "glg_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285297, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "glg_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304875, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "glg_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281904, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "glg_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 288076, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "glg_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291713, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "glg_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "glg_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 273076, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "glg_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299574, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "glg_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275551, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "glg_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268598, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "glg_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 303140, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "glg_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274741, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "glg_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259716, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "glg_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294134, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "glg_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 278109, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "glg_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274870, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "glg_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 292005, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "glg_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295479, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "glg_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 269970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "glg_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299490, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "glg_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 264103, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "glg_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261256, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "glg_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306894, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "glg_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277435, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "glg_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284772, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "glg_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281675, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "glg_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279194, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "glg_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 307139, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "glg_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "glg_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286620, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "glg_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258220, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "glg_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 275185, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "glg_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275602, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "glg_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 292099, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "glg_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291905, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "glg_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "glg_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285626, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "glg_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294869, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "glg_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280589, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "glg_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285833, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "glg_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274754, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "glg_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 285965, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "glg_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "glg_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276515, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "glg_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287306, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "glg_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289561, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "glg_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 282989, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "glg_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280890, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "glg_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255189, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "glg_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268373, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "glg_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273591, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "glg_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305823, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "glg_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 262143, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "glg_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "glg_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266786, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "glg_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278316, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "glg_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 276062, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "glg_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278793, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "glg_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284325, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "glg_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "glg_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288461, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "glg_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 285115, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "glg_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292431, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "glg_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 287170, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "glg_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284247, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "glg_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295466, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "glg_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293207, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "glg_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294294, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "glg_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301320, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "glg_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275926, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "glg_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 296037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "glg_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282925, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "glg_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294714, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "glg_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283514, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "glg_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279380, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "glg_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233275, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "glg_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278330, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "glg_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286271, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "glg_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "glg_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 212108, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "glg_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "glg_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293408, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "glg_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300276, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "glg_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "glg_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 254124, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "glg_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285518, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "glg_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281403, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "glg_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 283071, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "glg_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276203, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "glg_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292670, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "glg_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288239, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "glg_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270560, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "glg_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310889, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "glg_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262450, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "glg_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283967, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "glg_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "glg_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 203086, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "glg_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 292137, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "glg_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309312, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "glg_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288764, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "glg_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 270047, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "glg_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273316, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "glg_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299901, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "glg_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291924, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "glg_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290662, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "glg_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276697, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "glg_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283829, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "glg_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293650, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "glg_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274400, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "glg_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "glg_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "glg_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262571, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "glg_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304828, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "glg_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268305, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "glg_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315540, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "glg_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288340, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "glg_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279586, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "glg_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285934, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "glg_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278613, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "glg_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287801, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "glg_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259706, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "glg_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273400, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "glg_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302561, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "glg_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274499, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "glg_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276472, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "glg_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "glg_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284714, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "glg_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292014, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "glg_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 272987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "glg_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259913, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "glg_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285404, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "glg_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308740, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "glg_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 284183, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "glg_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287809, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "glg_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 273046, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "glg_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334427, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "glg_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312312, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "glg_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 186162, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "glg_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 272079, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "glg_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274728, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "glg_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298624, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "glg_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272248, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "glg_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 275991, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "glg_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "glg_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277223, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "glg_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "glg_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189391, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "glg_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282506, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "glg_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 280181, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "glg_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272544, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "glg_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293688, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "glg_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288483, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "glg_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "glg_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "glg_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186838, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "glg_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274314, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "glg_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287400, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "glg_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 278958, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "glg_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 290073, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "glg_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 289141, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "glg_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293315, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "glg_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 276998, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "glg_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 315154, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "glg_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "glg_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284760, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "glg_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281226, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "glg_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272571, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "glg_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "glg_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281221, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "glg_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "glg_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298867, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "glg_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 309178, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "glg_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294505, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "glg_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263204, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "glg_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 299952, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "glg_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267423, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "glg_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 289100, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "glg_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283689, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "glg_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 280982, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "glg_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293746, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "glg_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304321, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "glg_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289882, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "glg_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257932, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "glg_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286210, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "glg_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 276943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "glg_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "glg_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 276042, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "glg_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265731, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "glg_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286405, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "glg_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279365, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "glg_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 278998, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "glg_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248197, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "glg_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283210, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "glg_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280300, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "glg_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279568, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "glg_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297805, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "glg_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318688, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.4100790513834, + "max_sentence1_length": 374, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kas_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238022, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kas_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 252788, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kas_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289759, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kas_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257459, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kas_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279613, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kas_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253096, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kas_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265555, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285133, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kas_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262162, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kas_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268334, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kas_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271971, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kas_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283940, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kas_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253334, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kas_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279832, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kas_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255809, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248856, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kas_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283398, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 254999, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kas_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 239974, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kas_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274392, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kas_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258367, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255128, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272263, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kas_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275737, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kas_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250228, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kas_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279748, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kas_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244361, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kas_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241514, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kas_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287152, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kas_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257693, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kas_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265030, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kas_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261933, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kas_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259452, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kas_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287397, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kas_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257181, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266878, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kas_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238478, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kas_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255443, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kas_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255860, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kas_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272357, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kas_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272163, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kas_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 252800, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kas_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265884, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kas_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275127, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kas_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260847, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kas_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266091, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kas_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255012, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kas_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266223, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kas_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260961, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kas_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256773, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kas_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267564, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kas_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269819, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kas_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263247, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kas_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261148, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kas_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235447, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kas_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248631, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kas_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253849, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286081, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kas_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242401, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kas_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264228, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kas_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247044, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kas_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kas_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256320, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kas_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259051, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kas_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264583, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260675, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kas_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268719, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kas_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265373, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kas_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272689, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kas_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267428, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kas_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264505, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kas_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275724, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kas_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273465, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kas_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274552, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kas_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281578, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kas_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256184, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kas_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276295, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kas_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263183, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kas_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274972, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263772, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kas_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259638, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kas_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213533, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kas_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258588, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kas_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266529, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kas_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282539, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kas_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192366, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kas_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251058, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kas_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273666, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kas_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280534, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kas_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 264996, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kas_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234382, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kas_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265776, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kas_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261661, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kas_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263329, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256461, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272928, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kas_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268497, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kas_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250818, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kas_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291147, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kas_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242708, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kas_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264225, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kas_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284039, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kas_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183344, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kas_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272395, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kas_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289570, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kas_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269022, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kas_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250305, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kas_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253574, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kas_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280159, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kas_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272182, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270920, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kas_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256955, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kas_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264087, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kas_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273908, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kas_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254658, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kas_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259683, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kas_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265524, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kas_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242829, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kas_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285086, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kas_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248563, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kas_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295798, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kas_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268598, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kas_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259844, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kas_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266192, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kas_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258871, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kas_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268059, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kas_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239964, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kas_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253658, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282819, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kas_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254757, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kas_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256730, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kas_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258539, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kas_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264972, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kas_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272272, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kas_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253245, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kas_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240171, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kas_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265662, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kas_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 288998, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kas_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264441, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268067, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kas_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253304, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314685, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kas_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292570, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kas_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166420, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kas_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252337, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kas_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 254986, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kas_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278882, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kas_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252506, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kas_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256249, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kas_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277510, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kas_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257481, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kas_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252510, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kas_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169649, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kas_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262764, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260439, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kas_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272544, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kas_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273946, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kas_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268741, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kas_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258111, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kas_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217101, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kas_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167096, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kas_Deva-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254572, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kas_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267658, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kas_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259216, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kas_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270331, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kas_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269399, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kas_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273573, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kas_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257256, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kas_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295412, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kas_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272602, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kas_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265018, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kas_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261484, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kas_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 252829, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kas_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261805, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kas_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261479, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kas_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277052, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kas_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279125, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kas_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289436, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kas_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274763, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kas_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243462, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kas_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280210, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kas_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247681, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kas_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269358, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kas_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263947, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kas_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261240, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kas_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274004, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kas_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284579, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kas_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270140, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kas_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238190, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kas_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266468, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kas_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257201, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kas_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272656, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kas_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256300, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kas_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 245989, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kas_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266663, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kas_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259623, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kas_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259256, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kas_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228455, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kas_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263468, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kas_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260558, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kas_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259826, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kas_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278063, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kas_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298946, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 124.90217391304348, + "max_sentence1_length": 452, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ltz_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 259166, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ltz_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 273932, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ltz_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 310903, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ltz_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 278603, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ltz_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 300757, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ltz_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 274240, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ltz_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 286699, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltz_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 306277, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ltz_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 283306, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ltz_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 289478, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ltz_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 293115, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ltz_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 305084, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ltz_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 274478, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ltz_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 300976, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ltz_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 276953, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltz_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 270000, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ltz_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304542, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltz_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 276143, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ltz_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ltz_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295536, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ltz_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279511, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltz_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 276272, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltz_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 293407, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ltz_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 296881, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ltz_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 271372, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ltz_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 300892, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ltz_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265505, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ltz_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 262658, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ltz_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 308296, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ltz_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 278837, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ltz_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 286174, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ltz_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283077, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ltz_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 280596, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ltz_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308541, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ltz_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278325, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltz_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 288022, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ltz_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 259622, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ltz_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 276587, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ltz_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 277004, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ltz_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293501, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ltz_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 293307, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ltz_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 273944, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ltz_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 287028, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ltz_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 296271, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ltz_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281991, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ltz_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 287235, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ltz_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 276156, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ltz_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 287367, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ltz_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 282105, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ltz_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 277917, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ltz_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 288708, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ltz_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 290963, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ltz_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284391, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ltz_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 282292, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ltz_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 256591, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ltz_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 269775, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ltz_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 274993, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltz_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 307225, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ltz_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263545, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ltz_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285372, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ltz_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 268188, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ltz_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 279718, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ltz_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 277464, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ltz_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 280195, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ltz_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 285727, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltz_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 281819, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ltz_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 289863, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ltz_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286517, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ltz_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 293833, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ltz_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 288572, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ltz_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 285649, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ltz_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 296868, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ltz_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 294609, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ltz_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 295696, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ltz_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 302722, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ltz_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 277328, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ltz_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 297439, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ltz_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 284327, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ltz_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 296116, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltz_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 284916, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ltz_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 280782, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ltz_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 234677, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ltz_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 279732, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ltz_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 287673, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ltz_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 303683, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ltz_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213510, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ltz_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 272202, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ltz_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 294810, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ltz_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 301678, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ltz_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 286140, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ltz_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255526, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ltz_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 286920, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ltz_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 282805, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ltz_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 284473, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltz_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 277605, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltz_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 294072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ltz_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289641, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ltz_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 271962, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ltz_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 312291, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ltz_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 263852, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ltz_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285369, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ltz_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 305183, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ltz_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204488, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ltz_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293539, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ltz_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 310714, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ltz_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 290166, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ltz_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 271449, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ltz_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 274718, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ltz_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 301303, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ltz_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 293326, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltz_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 292064, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ltz_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 278099, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ltz_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 285231, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ltz_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 295052, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ltz_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 275802, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ltz_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280827, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ltz_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 286668, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ltz_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 263973, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ltz_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 306230, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ltz_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 269707, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ltz_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 316942, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ltz_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 289742, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ltz_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 280988, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ltz_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 287336, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ltz_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 280015, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ltz_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 289203, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ltz_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 261108, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ltz_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 274802, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltz_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 303963, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ltz_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 275901, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ltz_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 277874, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ltz_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 279683, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ltz_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 286116, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ltz_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293416, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ltz_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274389, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ltz_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 261315, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ltz_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 286806, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ltz_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 310142, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ltz_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 285585, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltz_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 289211, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ltz_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 274448, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltz_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 335829, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ltz_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 313714, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ltz_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 187564, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ltz_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273481, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ltz_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 276130, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ltz_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 300026, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ltz_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 273650, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ltz_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277393, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ltz_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 298654, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ltz_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 278625, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ltz_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 273654, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ltz_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 190793, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ltz_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 283908, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltz_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 281583, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ltz_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 293688, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ltz_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 273946, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ltz_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 289885, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ltz_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 279255, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ltz_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 238245, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ltz_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 188240, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ltz_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 275716, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ltz_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 288802, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ltz_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 280360, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ltz_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 291475, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ltz_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290543, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ltz_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 294717, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ltz_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 278400, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ltz_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 316556, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ltz_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 293746, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ltz_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 286162, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ltz_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 282628, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ltz_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 273973, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ltz_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282949, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ltz_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ltz_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 298196, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ltz_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 300269, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ltz_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 310580, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ltz_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 295907, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ltz_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 264606, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ltz_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 301354, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ltz_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 268825, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ltz_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290502, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ltz_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 285091, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ltz_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282384, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ltz_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 295148, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ltz_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 305723, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ltz_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 291284, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ltz_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 259334, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ltz_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 287612, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ltz_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 278345, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ltz_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 293800, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ltz_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 277444, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ltz_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 267133, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ltz_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 287807, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ltz_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280767, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ltz_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 280400, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ltz_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 249599, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ltz_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284612, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ltz_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 281702, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ltz_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 280970, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ltz_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 299207, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ltz_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 320090, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 145.79545454545453, + "max_sentence1_length": 417, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "nus_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 253961, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "nus_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 268727, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "nus_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 305698, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nus_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 273398, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "nus_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 295552, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "nus_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 269035, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "nus_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 281494, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nus_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 301072, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "nus_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 278101, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "nus_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284273, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nus_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 287910, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nus_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 299879, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "nus_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269273, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "nus_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 295771, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "nus_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 271748, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nus_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 264795, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nus_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299337, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nus_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 270938, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "nus_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 255913, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "nus_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290331, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nus_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274306, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nus_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 271067, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nus_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 288202, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nus_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 291676, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nus_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 266167, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "nus_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 295687, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nus_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260300, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "nus_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 257453, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "nus_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 303091, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "nus_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 273632, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nus_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 280969, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "nus_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277872, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nus_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 275391, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nus_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303336, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "nus_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273120, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nus_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 282817, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "nus_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 254417, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "nus_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 271382, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nus_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 271799, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "nus_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288296, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nus_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 288102, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "nus_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 268739, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nus_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 281823, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nus_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 291066, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nus_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276786, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nus_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 282030, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nus_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 270951, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nus_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 282162, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nus_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 276900, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nus_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 272712, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nus_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 283503, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nus_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 285758, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nus_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 279186, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nus_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 277087, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nus_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 251386, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "nus_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 264570, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "nus_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 269788, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nus_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 302020, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "nus_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258340, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "nus_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280167, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "nus_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 262983, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "nus_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 274513, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nus_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272259, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nus_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 274990, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nus_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 280522, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nus_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 276614, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nus_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 284658, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nus_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281312, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nus_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 288628, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "nus_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 283367, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nus_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 280444, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "nus_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 291663, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nus_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 289404, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nus_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 290491, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "nus_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 297517, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "nus_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 272123, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nus_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 292234, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nus_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 279122, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nus_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 290911, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nus_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 279711, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "nus_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 275577, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nus_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 229472, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "nus_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 274527, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nus_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 282468, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nus_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 298478, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "nus_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208305, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "nus_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 266997, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "nus_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 289605, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "nus_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 296473, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "nus_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 280935, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "nus_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250321, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "nus_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 281715, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nus_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 277600, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "nus_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279268, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nus_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 272400, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nus_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 288867, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nus_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284436, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nus_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 266757, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nus_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 307086, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nus_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 258647, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nus_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280164, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nus_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 299978, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "nus_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199283, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "nus_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288334, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nus_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 305509, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nus_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 284961, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "nus_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266244, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nus_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 269513, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nus_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 296098, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "nus_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 288121, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nus_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 286859, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nus_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 272894, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "nus_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 280026, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "nus_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 289847, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nus_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 270597, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "nus_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275622, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nus_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 281463, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "nus_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 258768, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nus_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 301025, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nus_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 264502, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "nus_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 311737, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "nus_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 284537, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nus_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 275783, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nus_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 282131, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nus_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 274810, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nus_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 283998, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nus_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 255903, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "nus_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 269597, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nus_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 298758, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "nus_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 270696, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "nus_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 272669, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "nus_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 274478, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "nus_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 280911, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nus_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288211, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "nus_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 269184, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nus_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 256110, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "nus_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 281601, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nus_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 304937, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "nus_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 280380, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nus_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 284006, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nus_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 269243, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nus_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 330624, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "nus_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 308509, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nus_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 182359, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "nus_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268276, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nus_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 270925, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nus_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 294821, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "nus_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 268445, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "nus_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 272188, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nus_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 293449, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "nus_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 273420, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nus_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 268449, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nus_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 185588, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "nus_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 278703, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nus_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 276378, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nus_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 288483, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nus_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 268741, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "nus_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 289885, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "nus_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 274050, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nus_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 233040, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "nus_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 183035, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "nus_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 270511, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nus_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 283597, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nus_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 275155, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "nus_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286270, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nus_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 285338, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "nus_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 289512, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nus_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 273195, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nus_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 311351, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "nus_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 288541, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nus_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 280957, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "nus_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 277423, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nus_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 268768, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nus_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277744, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nus_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 277418, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "nus_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 292991, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nus_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 295064, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "nus_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 305375, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nus_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 290702, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "nus_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 259401, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "nus_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 296149, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nus_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 263620, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "nus_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 285297, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "nus_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 279886, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nus_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 277179, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nus_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 289943, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "nus_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 300518, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nus_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 286079, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "nus_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 254129, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "nus_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 282407, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "nus_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 273140, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nus_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 288595, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nus_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 272239, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "nus_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 261928, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "nus_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 282602, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nus_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275562, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "nus_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 275195, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "nus_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 244394, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "nus_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279407, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nus_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 276497, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nus_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 275765, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nus_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 294002, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "nus_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 314885, + "unique_pairs": 1012, + "min_sentence1_length": 28, + "average_sentence1_length": 140.65217391304347, + "max_sentence1_length": 490, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "slk_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 243331, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "slk_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 258097, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "slk_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 295068, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "slk_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 262768, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "slk_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 284922, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "slk_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 258405, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "slk_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 270864, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slk_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 290442, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "slk_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 267471, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "slk_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 273643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "slk_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 277280, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "slk_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 289249, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "slk_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 258643, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "slk_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 285141, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "slk_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 261118, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slk_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 254165, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "slk_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 288707, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slk_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 260308, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "slk_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 245283, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "slk_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279701, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "slk_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 263676, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slk_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 260437, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slk_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 277572, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "slk_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 281046, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "slk_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 255537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "slk_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 285057, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "slk_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 249670, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "slk_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 246823, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "slk_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 292461, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "slk_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 263002, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "slk_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 270339, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "slk_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267242, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "slk_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 264761, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "slk_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 292706, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "slk_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262490, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slk_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 272187, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "slk_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 243787, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "slk_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 260752, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "slk_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 261169, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "slk_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 277666, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "slk_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 277472, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "slk_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 258109, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "slk_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 271193, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "slk_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 280436, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "slk_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266156, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "slk_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 271400, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "slk_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 260321, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "slk_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 271532, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "slk_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 266270, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "slk_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 262082, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "slk_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 272873, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "slk_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 275128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "slk_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 268556, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "slk_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 266457, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "slk_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 240756, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "slk_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253940, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "slk_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 259158, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slk_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 291390, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "slk_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 247710, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "slk_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "slk_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 252353, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "slk_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 263883, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "slk_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 261629, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "slk_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 264360, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slk_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 269892, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slk_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265984, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "slk_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 274028, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "slk_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 270682, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "slk_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277998, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "slk_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 272737, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "slk_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 269814, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "slk_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 281033, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "slk_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 278774, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "slk_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 279861, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "slk_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 286887, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "slk_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 261493, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "slk_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 281604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "slk_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 268492, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "slk_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 280281, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slk_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 269081, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "slk_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "slk_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 218842, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "slk_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 263897, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "slk_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 271838, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "slk_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 287848, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "slk_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 197675, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "slk_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 256367, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "slk_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278975, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "slk_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 285843, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "slk_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 270305, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "slk_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 239691, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "slk_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 271085, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "slk_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266970, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "slk_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 268638, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slk_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 261770, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slk_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 278237, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "slk_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273806, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slk_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 256127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "slk_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 296456, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "slk_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 248017, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "slk_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269534, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "slk_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 289348, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "slk_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 188653, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "slk_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 277704, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "slk_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 294879, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "slk_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 274331, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "slk_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 255614, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "slk_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 258883, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "slk_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 285468, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "slk_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 277491, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slk_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 276229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "slk_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 262264, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "slk_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 269396, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "slk_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 279217, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "slk_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "slk_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264992, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "slk_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 270833, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "slk_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 248138, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "slk_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 290395, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "slk_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 253872, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "slk_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 301107, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "slk_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 273907, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "slk_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 265153, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "slk_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 271501, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "slk_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 264180, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "slk_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 273368, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "slk_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 245273, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "slk_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slk_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 288128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "slk_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 260066, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "slk_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 262039, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "slk_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 263848, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "slk_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 270281, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "slk_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277581, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "slk_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 258554, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "slk_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 245480, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "slk_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270971, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "slk_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 294307, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "slk_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 269750, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slk_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 273376, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "slk_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 258613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slk_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "slk_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 297879, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "slk_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 171729, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "slk_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 257646, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "slk_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 260295, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "slk_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 284191, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "slk_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 257815, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "slk_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 261558, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "slk_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 282819, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "slk_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 262790, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "slk_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 257819, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "slk_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174958, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "slk_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 268073, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slk_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 265748, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "slk_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 277853, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "slk_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 258111, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "slk_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 279255, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "slk_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 274050, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "slk_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 222410, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "slk_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 172405, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "slk_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 259881, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "slk_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272967, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "slk_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 264525, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "slk_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 275640, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "slk_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 274708, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "slk_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 278882, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "slk_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 262565, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "slk_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 300721, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "slk_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 277911, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slk_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 270327, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "slk_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 266793, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "slk_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 258138, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "slk_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267114, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slk_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 266788, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "slk_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 282361, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "slk_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 284434, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "slk_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 294745, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "slk_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 280072, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "slk_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 248771, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "slk_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 285519, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "slk_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252990, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "slk_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 274667, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "slk_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 269256, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "slk_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 266549, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "slk_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 279313, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "slk_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 289888, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "slk_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 275449, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "slk_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 243499, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "slk_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 271777, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "slk_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 262510, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "slk_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277965, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "slk_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 261609, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "slk_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 251298, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "slk_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271972, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slk_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264932, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "slk_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 264565, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "slk_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 233764, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "slk_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268777, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "slk_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 265867, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "slk_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 265135, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "slk_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 283372, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "slk_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 304255, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.14822134387353, + "max_sentence1_length": 370, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tir_Ethi-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 202321, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tir_Ethi-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 217087, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tir_Ethi-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 254058, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tir_Ethi-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 221758, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tir_Ethi-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 243912, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tir_Ethi-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 217395, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tir_Ethi-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 229854, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tir_Ethi-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 249432, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tir_Ethi-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 226461, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tir_Ethi-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 232633, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tir_Ethi-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 236270, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tir_Ethi-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 248239, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tir_Ethi-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 217633, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tir_Ethi-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 244131, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tir_Ethi-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 220108, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tir_Ethi-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 213155, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tir_Ethi-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 247697, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tir_Ethi-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 219298, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tir_Ethi-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 204273, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tir_Ethi-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 238691, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tir_Ethi-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 222666, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tir_Ethi-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 219427, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tir_Ethi-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 236562, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tir_Ethi-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 240036, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tir_Ethi-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 214527, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tir_Ethi-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 244047, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tir_Ethi-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 208660, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tir_Ethi-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 205813, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tir_Ethi-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 251451, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tir_Ethi-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 221992, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tir_Ethi-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 229329, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tir_Ethi-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 226232, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tir_Ethi-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 223751, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tir_Ethi-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 251696, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tir_Ethi-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 221480, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tir_Ethi-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 231177, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tir_Ethi-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 202777, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tir_Ethi-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 219742, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tir_Ethi-est_Latn": { + "num_samples": 1012, + "number_of_characters": 220159, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tir_Ethi-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 236656, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tir_Ethi-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 236462, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tir_Ethi-min_Arab": { + "num_samples": 1012, + "number_of_characters": 217099, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tir_Ethi-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 230183, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tir_Ethi-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 239426, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tir_Ethi-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 225146, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tir_Ethi-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 230390, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tir_Ethi-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 219311, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tir_Ethi-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 230522, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tir_Ethi-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 225260, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tir_Ethi-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 221072, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tir_Ethi-min_Latn": { + "num_samples": 1012, + "number_of_characters": 231863, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tir_Ethi-por_Latn": { + "num_samples": 1012, + "number_of_characters": 234118, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tir_Ethi-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 227546, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tir_Ethi-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 225447, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tir_Ethi-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 199746, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tir_Ethi-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 212930, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tir_Ethi-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 218148, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tir_Ethi-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 250380, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tir_Ethi-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 206700, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tir_Ethi-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 228527, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tir_Ethi-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 211343, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tir_Ethi-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 222873, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tir_Ethi-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 220619, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tir_Ethi-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 223350, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tir_Ethi-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 228882, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tir_Ethi-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 224974, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tir_Ethi-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 233018, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tir_Ethi-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 229672, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tir_Ethi-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 236988, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tir_Ethi-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 231727, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tir_Ethi-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 228804, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tir_Ethi-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 240023, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tir_Ethi-als_Latn": { + "num_samples": 1012, + "number_of_characters": 237764, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tir_Ethi-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 238851, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tir_Ethi-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 245877, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tir_Ethi-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 220483, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tir_Ethi-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 240594, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tir_Ethi-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 227482, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tir_Ethi-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 239271, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tir_Ethi-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 228071, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tir_Ethi-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 223937, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tir_Ethi-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 177832, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tir_Ethi-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 222887, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tir_Ethi-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 230828, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tir_Ethi-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 246838, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tir_Ethi-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 156665, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tir_Ethi-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 215357, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tir_Ethi-run_Latn": { + "num_samples": 1012, + "number_of_characters": 237965, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tir_Ethi-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 244833, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tir_Ethi-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 229295, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tir_Ethi-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 198681, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tir_Ethi-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 230075, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tir_Ethi-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 225960, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tir_Ethi-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 227628, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tir_Ethi-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 220760, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tir_Ethi-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 237227, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tir_Ethi-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 232796, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tir_Ethi-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 215117, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tir_Ethi-war_Latn": { + "num_samples": 1012, + "number_of_characters": 255446, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tir_Ethi-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 207007, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tir_Ethi-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 228524, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tir_Ethi-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 248338, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tir_Ethi-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 147643, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tir_Ethi-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 236694, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tir_Ethi-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 253869, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tir_Ethi-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 233321, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tir_Ethi-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 214604, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tir_Ethi-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 217873, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tir_Ethi-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 244458, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tir_Ethi-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 236481, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tir_Ethi-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 235219, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tir_Ethi-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 221254, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tir_Ethi-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 228386, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tir_Ethi-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 238207, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tir_Ethi-san_Deva": { + "num_samples": 1012, + "number_of_characters": 218957, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tir_Ethi-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 223982, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tir_Ethi-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 229823, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tir_Ethi-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 207128, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tir_Ethi-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 249385, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tir_Ethi-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 212862, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tir_Ethi-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 260097, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tir_Ethi-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 232897, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tir_Ethi-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 224143, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tir_Ethi-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 230491, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tir_Ethi-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 223170, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tir_Ethi-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 232358, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tir_Ethi-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 204263, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tir_Ethi-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 217957, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tir_Ethi-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 247118, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tir_Ethi-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 219056, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tir_Ethi-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 221029, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tir_Ethi-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 222838, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tir_Ethi-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 229271, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tir_Ethi-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 236571, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tir_Ethi-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 217544, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tir_Ethi-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 204470, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tir_Ethi-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 229961, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tir_Ethi-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 253297, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tir_Ethi-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 228740, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tir_Ethi-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 232366, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tir_Ethi-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 217603, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tir_Ethi-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 278984, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tir_Ethi-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 256869, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tir_Ethi-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 130719, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tir_Ethi-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 216636, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tir_Ethi-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 219285, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tir_Ethi-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 243181, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tir_Ethi-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 216805, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tir_Ethi-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 220548, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tir_Ethi-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 241809, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tir_Ethi-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 221780, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tir_Ethi-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 216809, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tir_Ethi-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 133948, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tir_Ethi-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 227063, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tir_Ethi-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 224738, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tir_Ethi-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 236843, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tir_Ethi-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 217101, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tir_Ethi-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 238245, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tir_Ethi-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 233040, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tir_Ethi-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 222410, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tir_Ethi-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 131395, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tir_Ethi-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 218871, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tir_Ethi-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 231957, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tir_Ethi-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 223515, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tir_Ethi-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 234630, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tir_Ethi-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 233698, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tir_Ethi-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 237872, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tir_Ethi-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 221555, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tir_Ethi-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 259711, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tir_Ethi-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 236901, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tir_Ethi-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 229317, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tir_Ethi-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 225783, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tir_Ethi-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 217128, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tir_Ethi-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 226104, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tir_Ethi-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 225778, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tir_Ethi-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 241351, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tir_Ethi-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 243424, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tir_Ethi-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 253735, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tir_Ethi-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 239062, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tir_Ethi-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 207761, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tir_Ethi-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 244509, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tir_Ethi-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 211980, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tir_Ethi-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 233657, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tir_Ethi-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 228246, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tir_Ethi-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 225539, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tir_Ethi-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 238303, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tir_Ethi-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 248878, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tir_Ethi-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 234439, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tir_Ethi-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 202489, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tir_Ethi-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 230767, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tir_Ethi-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 221500, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tir_Ethi-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 236955, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tir_Ethi-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 220599, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tir_Ethi-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 210288, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tir_Ethi-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 230962, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tir_Ethi-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 223922, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tir_Ethi-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 223555, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tir_Ethi-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 192754, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tir_Ethi-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 227767, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tir_Ethi-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 224857, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tir_Ethi-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 224125, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tir_Ethi-som_Latn": { + "num_samples": 1012, + "number_of_characters": 242362, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tir_Ethi-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 263245, + "unique_pairs": 1012, + "min_sentence1_length": 29, + "average_sentence1_length": 89.62450592885375, + "max_sentence1_length": 246, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "zho_Hant-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 152316, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "zho_Hant-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 167082, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "zho_Hant-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 204053, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zho_Hant-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 171753, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "zho_Hant-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 193907, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "zho_Hant-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 167390, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "zho_Hant-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 179849, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hant-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 199427, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "zho_Hant-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 176456, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "zho_Hant-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 182628, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zho_Hant-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 186265, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zho_Hant-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 198234, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "zho_Hant-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 167628, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "zho_Hant-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 194126, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "zho_Hant-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 170103, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hant-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 163150, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zho_Hant-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 197692, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hant-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 169293, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "zho_Hant-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 154268, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "zho_Hant-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 188686, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zho_Hant-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 172661, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hant-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 169422, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hant-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 186557, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zho_Hant-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 190031, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zho_Hant-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 164522, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "zho_Hant-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 194042, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zho_Hant-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 158655, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "zho_Hant-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 155808, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "zho_Hant-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 201446, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "zho_Hant-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 171987, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zho_Hant-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 179324, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "zho_Hant-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 176227, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zho_Hant-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 173746, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zho_Hant-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 201691, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "zho_Hant-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 171475, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hant-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 181172, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "zho_Hant-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 152772, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "zho_Hant-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 169737, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zho_Hant-est_Latn": { + "num_samples": 1012, + "number_of_characters": 170154, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "zho_Hant-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 186651, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zho_Hant-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 186457, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "zho_Hant-min_Arab": { + "num_samples": 1012, + "number_of_characters": 167094, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zho_Hant-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 180178, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zho_Hant-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 189421, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zho_Hant-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 175141, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zho_Hant-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 180385, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zho_Hant-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 169306, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zho_Hant-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 180517, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zho_Hant-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 175255, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zho_Hant-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 171067, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zho_Hant-min_Latn": { + "num_samples": 1012, + "number_of_characters": 181858, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zho_Hant-por_Latn": { + "num_samples": 1012, + "number_of_characters": 184113, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zho_Hant-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 177541, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zho_Hant-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 175442, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zho_Hant-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 149741, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "zho_Hant-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 162925, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "zho_Hant-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 168143, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hant-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 200375, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "zho_Hant-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 156695, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "zho_Hant-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 178522, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "zho_Hant-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 161338, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "zho_Hant-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 172868, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zho_Hant-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 170614, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zho_Hant-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 173345, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zho_Hant-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 178877, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hant-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 174969, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zho_Hant-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 183013, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zho_Hant-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 179667, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zho_Hant-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 186983, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "zho_Hant-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 181722, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zho_Hant-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 178799, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "zho_Hant-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 190018, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zho_Hant-als_Latn": { + "num_samples": 1012, + "number_of_characters": 187759, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zho_Hant-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 188846, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "zho_Hant-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 195872, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "zho_Hant-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 170478, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zho_Hant-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 190589, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zho_Hant-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 177477, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zho_Hant-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 189266, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hant-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 178066, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "zho_Hant-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 173932, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zho_Hant-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 127827, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "zho_Hant-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 172882, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zho_Hant-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 180823, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zho_Hant-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 196833, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "zho_Hant-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 106660, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "zho_Hant-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 165352, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "zho_Hant-run_Latn": { + "num_samples": 1012, + "number_of_characters": 187960, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "zho_Hant-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 194828, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "zho_Hant-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 179290, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "zho_Hant-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 148676, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "zho_Hant-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 180070, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zho_Hant-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 175955, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "zho_Hant-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 177623, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hant-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 170755, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hant-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 187222, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zho_Hant-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 182791, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zho_Hant-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 165112, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zho_Hant-war_Latn": { + "num_samples": 1012, + "number_of_characters": 205441, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zho_Hant-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 157002, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zho_Hant-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 178519, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zho_Hant-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 198333, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "zho_Hant-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 97638, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "zho_Hant-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 186689, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zho_Hant-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 203864, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zho_Hant-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 183316, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "zho_Hant-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 164599, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zho_Hant-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 167868, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zho_Hant-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 194453, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "zho_Hant-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 186476, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hant-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 185214, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zho_Hant-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 171249, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "zho_Hant-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 178381, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "zho_Hant-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 188202, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zho_Hant-san_Deva": { + "num_samples": 1012, + "number_of_characters": 168952, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "zho_Hant-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 173977, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zho_Hant-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 179818, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "zho_Hant-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 157123, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zho_Hant-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 199380, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zho_Hant-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 162857, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "zho_Hant-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 210092, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "zho_Hant-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 182892, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zho_Hant-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 174138, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zho_Hant-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 180486, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zho_Hant-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 173165, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zho_Hant-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 182353, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zho_Hant-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 154258, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "zho_Hant-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 167952, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hant-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 197113, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "zho_Hant-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 169051, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "zho_Hant-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 171024, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "zho_Hant-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 172833, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "zho_Hant-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 179266, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zho_Hant-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 186566, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "zho_Hant-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 167539, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zho_Hant-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 154465, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "zho_Hant-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 179956, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zho_Hant-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 203292, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "zho_Hant-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 178735, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hant-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 182361, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zho_Hant-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 167598, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hant-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 228979, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "zho_Hant-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 206864, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zho_Hant-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 80714, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "zho_Hant-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 166631, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zho_Hant-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 169280, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zho_Hant-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 193176, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "zho_Hant-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 166800, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "zho_Hant-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 170543, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zho_Hant-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 191804, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "zho_Hant-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 171775, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zho_Hant-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 166804, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zho_Hant-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 83943, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "zho_Hant-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 177058, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hant-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 174733, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zho_Hant-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 186838, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zho_Hant-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 167096, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "zho_Hant-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 188240, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "zho_Hant-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 183035, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "zho_Hant-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 172405, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zho_Hant-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 131395, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "zho_Hant-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 168866, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zho_Hant-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 181952, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zho_Hant-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 173510, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "zho_Hant-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 184625, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zho_Hant-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 183693, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "zho_Hant-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 187867, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zho_Hant-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 171550, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zho_Hant-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 209706, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "zho_Hant-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 186896, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zho_Hant-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 179312, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "zho_Hant-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 175778, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zho_Hant-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 167123, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zho_Hant-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 176099, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zho_Hant-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 175773, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "zho_Hant-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 191346, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zho_Hant-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 193419, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "zho_Hant-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 203730, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zho_Hant-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 189057, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "zho_Hant-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 157756, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "zho_Hant-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 194504, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zho_Hant-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 161975, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "zho_Hant-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 183652, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "zho_Hant-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 178241, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zho_Hant-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 175534, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zho_Hant-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 188298, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "zho_Hant-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 198873, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zho_Hant-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 184434, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "zho_Hant-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 152484, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "zho_Hant-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 180762, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "zho_Hant-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 171495, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zho_Hant-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 186950, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zho_Hant-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 170594, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "zho_Hant-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 160283, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "zho_Hant-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 180957, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zho_Hant-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 173917, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "zho_Hant-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 173550, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "zho_Hant-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 142749, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "zho_Hant-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 177762, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zho_Hant-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 174852, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zho_Hant-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 174120, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zho_Hant-som_Latn": { + "num_samples": 1012, + "number_of_characters": 192357, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "zho_Hant-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 213240, + "unique_pairs": 1012, + "min_sentence1_length": 12, + "average_sentence1_length": 40.212450592885375, + "max_sentence1_length": 152, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "awa_Deva-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 239792, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "awa_Deva-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 254558, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "awa_Deva-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 291529, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "awa_Deva-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 259229, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "awa_Deva-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 281383, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "awa_Deva-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 254866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "awa_Deva-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 267325, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "awa_Deva-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 286903, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "awa_Deva-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 263932, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "awa_Deva-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 270104, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "awa_Deva-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 273741, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "awa_Deva-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 285710, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "awa_Deva-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 255104, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "awa_Deva-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 281602, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "awa_Deva-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 257579, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "awa_Deva-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 250626, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "awa_Deva-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 285168, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "awa_Deva-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 256769, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "awa_Deva-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 241744, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "awa_Deva-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276162, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "awa_Deva-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 260137, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "awa_Deva-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 256898, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "awa_Deva-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 274033, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "awa_Deva-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 277507, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "awa_Deva-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 251998, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "awa_Deva-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 281518, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "awa_Deva-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 246131, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "awa_Deva-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 243284, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "awa_Deva-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 288922, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "awa_Deva-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 259463, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "awa_Deva-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 266800, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "awa_Deva-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263703, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "awa_Deva-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 261222, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "awa_Deva-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 289167, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "awa_Deva-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258951, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "awa_Deva-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 268648, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "awa_Deva-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 240248, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "awa_Deva-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 257213, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "awa_Deva-est_Latn": { + "num_samples": 1012, + "number_of_characters": 257630, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "awa_Deva-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 274127, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "awa_Deva-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 273933, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "awa_Deva-min_Arab": { + "num_samples": 1012, + "number_of_characters": 254570, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "awa_Deva-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 267654, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "awa_Deva-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 276897, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "awa_Deva-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262617, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "awa_Deva-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 267861, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "awa_Deva-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 256782, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "awa_Deva-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 267993, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "awa_Deva-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 262731, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "awa_Deva-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 258543, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "awa_Deva-min_Latn": { + "num_samples": 1012, + "number_of_characters": 269334, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "awa_Deva-por_Latn": { + "num_samples": 1012, + "number_of_characters": 271589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "awa_Deva-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 265017, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "awa_Deva-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 262918, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "awa_Deva-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 237217, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "awa_Deva-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 250401, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "awa_Deva-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 255619, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "awa_Deva-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 287851, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "awa_Deva-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 244171, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "awa_Deva-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265998, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "awa_Deva-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 248814, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "awa_Deva-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 260344, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "awa_Deva-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 258090, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "awa_Deva-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 260821, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "awa_Deva-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 266353, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "awa_Deva-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 262445, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "awa_Deva-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 270489, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "awa_Deva-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 267143, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "awa_Deva-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 274459, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "awa_Deva-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 269198, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "awa_Deva-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 266275, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "awa_Deva-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 277494, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "awa_Deva-als_Latn": { + "num_samples": 1012, + "number_of_characters": 275235, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "awa_Deva-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 276322, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "awa_Deva-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 283348, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "awa_Deva-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 257954, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "awa_Deva-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 278065, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "awa_Deva-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 264953, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "awa_Deva-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 276742, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "awa_Deva-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 265542, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "awa_Deva-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 261408, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "awa_Deva-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 215303, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "awa_Deva-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 260358, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "awa_Deva-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 268299, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "awa_Deva-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 284309, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "awa_Deva-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 194136, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "awa_Deva-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 252828, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "awa_Deva-run_Latn": { + "num_samples": 1012, + "number_of_characters": 275436, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "awa_Deva-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 282304, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "awa_Deva-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 266766, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "awa_Deva-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 236152, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "awa_Deva-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 267546, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "awa_Deva-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 263431, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "awa_Deva-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 265099, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "awa_Deva-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 258231, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "awa_Deva-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 274698, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "awa_Deva-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270267, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "awa_Deva-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 252588, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "awa_Deva-war_Latn": { + "num_samples": 1012, + "number_of_characters": 292917, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "awa_Deva-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 244478, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "awa_Deva-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265995, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "awa_Deva-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 285809, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "awa_Deva-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 185114, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "awa_Deva-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 274165, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "awa_Deva-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 291340, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "awa_Deva-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 270792, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "awa_Deva-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 252075, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "awa_Deva-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 255344, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "awa_Deva-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 281929, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "awa_Deva-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 273952, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "awa_Deva-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 272690, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "awa_Deva-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 258725, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "awa_Deva-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 265857, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "awa_Deva-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 275678, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "awa_Deva-san_Deva": { + "num_samples": 1012, + "number_of_characters": 256428, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "awa_Deva-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261453, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "awa_Deva-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 267294, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "awa_Deva-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 244599, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "awa_Deva-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 286856, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "awa_Deva-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 250333, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "awa_Deva-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 297568, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "awa_Deva-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 270368, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "awa_Deva-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 261614, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "awa_Deva-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 267962, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "awa_Deva-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 260641, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "awa_Deva-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 269829, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "awa_Deva-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 241734, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "awa_Deva-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 255428, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "awa_Deva-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 284589, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "awa_Deva-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 256527, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "awa_Deva-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 258500, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "awa_Deva-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 260309, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "awa_Deva-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 266742, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "awa_Deva-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274042, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "awa_Deva-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 255015, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "awa_Deva-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 241941, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "awa_Deva-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 267432, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "awa_Deva-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 290768, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "awa_Deva-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 266211, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "awa_Deva-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 269837, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "awa_Deva-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 255074, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "awa_Deva-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 316455, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "awa_Deva-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 294340, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "awa_Deva-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 168190, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "awa_Deva-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 254107, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "awa_Deva-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 256756, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "awa_Deva-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 280652, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "awa_Deva-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 254276, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "awa_Deva-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 258019, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "awa_Deva-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 279280, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "awa_Deva-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 259251, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "awa_Deva-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 254280, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "awa_Deva-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 171419, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "awa_Deva-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 264534, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "awa_Deva-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 262209, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "awa_Deva-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 274314, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "awa_Deva-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 254572, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "awa_Deva-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 275716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "awa_Deva-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 270511, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "awa_Deva-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 259881, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "awa_Deva-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 218871, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "awa_Deva-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 168866, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "awa_Deva-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 269428, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "awa_Deva-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 260986, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "awa_Deva-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 272101, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "awa_Deva-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 271169, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "awa_Deva-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 275343, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "awa_Deva-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 259026, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "awa_Deva-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 297182, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "awa_Deva-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 274372, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "awa_Deva-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 266788, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "awa_Deva-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 263254, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "awa_Deva-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 254599, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "awa_Deva-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263575, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "awa_Deva-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 263249, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "awa_Deva-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 278822, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "awa_Deva-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 280895, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "awa_Deva-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 291206, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "awa_Deva-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 276533, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "awa_Deva-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 245232, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "awa_Deva-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 281980, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "awa_Deva-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 249451, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "awa_Deva-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 271128, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "awa_Deva-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 265717, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "awa_Deva-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 263010, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "awa_Deva-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 275774, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "awa_Deva-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 286349, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "awa_Deva-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 271910, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "awa_Deva-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 239960, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "awa_Deva-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 268238, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "awa_Deva-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 258971, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "awa_Deva-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 274426, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "awa_Deva-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 258070, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "awa_Deva-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 247759, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "awa_Deva-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 268433, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "awa_Deva-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261393, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "awa_Deva-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 261026, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "awa_Deva-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 230225, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "awa_Deva-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265238, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "awa_Deva-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 262328, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "awa_Deva-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 261596, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "awa_Deva-som_Latn": { + "num_samples": 1012, + "number_of_characters": 279833, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "awa_Deva-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 300716, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.65118577075098, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "cym_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 252878, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "cym_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 267644, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "cym_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 304615, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "cym_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 272315, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "cym_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 294469, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "cym_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 267952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "cym_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 280411, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cym_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 299989, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "cym_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 277018, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "cym_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 283190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cym_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 286827, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cym_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 298796, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "cym_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 268190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "cym_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 294688, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "cym_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 270665, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cym_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 263712, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "cym_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 298254, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cym_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 269855, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "cym_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 254830, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "cym_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289248, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cym_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 273223, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cym_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 269984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cym_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 287119, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "cym_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 290593, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "cym_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 265084, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "cym_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 294604, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "cym_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 259217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "cym_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 256370, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "cym_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 302008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "cym_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 272549, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cym_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 279886, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "cym_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276789, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cym_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 274308, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cym_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 302253, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "cym_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272037, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cym_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 281734, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "cym_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 253334, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "cym_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 270299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "cym_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 270716, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "cym_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 287213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cym_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 287019, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "cym_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 267656, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "cym_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 280740, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cym_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 289983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cym_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275703, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cym_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 280947, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cym_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 269868, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cym_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 281079, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "cym_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 275817, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cym_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 271629, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cym_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 282420, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "cym_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 284675, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cym_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 278103, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "cym_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 276004, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "cym_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 250303, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "cym_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 263487, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "cym_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 268705, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cym_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 300937, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "cym_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 257257, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "cym_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279084, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "cym_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 261900, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "cym_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 273430, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "cym_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 271176, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "cym_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 273907, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "cym_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 279439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cym_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 275531, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "cym_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 283575, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "cym_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 280229, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "cym_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 287545, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "cym_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 282284, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "cym_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 279361, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "cym_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 290580, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "cym_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 288321, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "cym_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 289408, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "cym_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 296434, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "cym_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 271040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "cym_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 291151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "cym_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 278039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cym_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 289828, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cym_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 278628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "cym_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 274494, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cym_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 228389, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "cym_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 273444, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "cym_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 281385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "cym_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 297395, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "cym_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 207222, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "cym_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 265914, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "cym_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 288522, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "cym_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 295390, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "cym_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 279852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "cym_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 249238, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "cym_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 280632, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cym_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 276517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "cym_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 278185, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cym_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 271317, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cym_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 287784, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cym_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283353, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "cym_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 265674, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "cym_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 306003, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "cym_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 257564, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "cym_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279081, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cym_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 298895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "cym_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 198200, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "cym_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 287251, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cym_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 304426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cym_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 283878, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "cym_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 265161, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "cym_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 268430, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "cym_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 295015, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "cym_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 287038, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cym_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 285776, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "cym_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 271811, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "cym_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 278943, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "cym_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 288764, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cym_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 269514, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "cym_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274539, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "cym_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 280380, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "cym_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 257685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "cym_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 299942, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "cym_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 263419, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "cym_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 310654, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "cym_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 283454, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "cym_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 274700, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cym_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 281048, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "cym_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 273727, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "cym_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 282915, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "cym_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 254820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "cym_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 268514, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cym_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 297675, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "cym_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 269613, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "cym_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 271586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "cym_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 273395, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "cym_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 279828, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cym_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287128, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "cym_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 268101, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "cym_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 255027, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "cym_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 280518, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "cym_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 303854, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "cym_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 279297, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cym_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 282923, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "cym_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 268160, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cym_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 329541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "cym_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 307426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "cym_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 181276, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "cym_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 267193, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "cym_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 269842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "cym_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 293738, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "cym_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 267362, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "cym_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 271105, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "cym_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 292366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "cym_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 272337, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "cym_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 267366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "cym_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 184505, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "cym_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 277620, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cym_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 275295, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "cym_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 287400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "cym_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 267658, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "cym_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 288802, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "cym_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 283597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "cym_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 272967, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "cym_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 231957, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "cym_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 181952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "cym_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 269428, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "cym_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 274072, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "cym_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 285187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cym_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 284255, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "cym_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 288429, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "cym_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 272112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "cym_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 310268, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "cym_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 287458, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "cym_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 279874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "cym_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 276340, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "cym_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 267685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "cym_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "cym_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 276335, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "cym_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 291908, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "cym_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 293981, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "cym_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 304292, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "cym_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 289619, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "cym_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 258318, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "cym_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 295066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "cym_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 262537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "cym_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 284214, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "cym_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 278803, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "cym_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 276096, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "cym_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 288860, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "cym_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 299435, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "cym_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 284996, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "cym_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 253046, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "cym_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 281324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "cym_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 272057, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "cym_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 287512, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "cym_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 271156, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "cym_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 260845, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "cym_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 281519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "cym_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274479, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "cym_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 274112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "cym_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 243311, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "cym_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278324, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "cym_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 275414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "cym_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 274682, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "cym_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 292919, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "cym_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 313802, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 139.5820158102767, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "grn_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244436, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "grn_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259202, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "grn_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296173, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "grn_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263873, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "grn_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286027, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "grn_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259510, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "grn_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 271969, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "grn_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291547, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "grn_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268576, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "grn_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274748, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "grn_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278385, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "grn_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290354, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "grn_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259748, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "grn_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286246, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "grn_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262223, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "grn_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255270, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "grn_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289812, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "grn_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261413, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "grn_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246388, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "grn_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280806, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "grn_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264781, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "grn_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261542, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "grn_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278677, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "grn_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282151, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "grn_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256642, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "grn_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286162, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "grn_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250775, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "grn_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247928, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "grn_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293566, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "grn_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264107, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "grn_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271444, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "grn_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268347, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "grn_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265866, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "grn_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293811, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "grn_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263595, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "grn_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273292, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "grn_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244892, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "grn_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261857, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "grn_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262274, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "grn_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278771, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "grn_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278577, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "grn_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259214, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "grn_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272298, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "grn_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281541, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "grn_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267261, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "grn_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272505, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "grn_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261426, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "grn_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 272637, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "grn_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267375, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "grn_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263187, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "grn_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 273978, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "grn_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276233, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "grn_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269661, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "grn_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267562, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "grn_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241861, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "grn_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255045, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "grn_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260263, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "grn_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292495, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "grn_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248815, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "grn_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270642, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "grn_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253458, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "grn_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 264988, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "grn_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262734, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "grn_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265465, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "grn_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 270997, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "grn_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267089, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "grn_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275133, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "grn_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271787, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "grn_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279103, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "grn_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273842, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "grn_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270919, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "grn_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282138, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "grn_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279879, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "grn_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 280966, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "grn_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 287992, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "grn_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 262598, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "grn_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282709, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "grn_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 269597, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "grn_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281386, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "grn_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270186, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "grn_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266052, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "grn_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219947, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "grn_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265002, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "grn_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272943, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "grn_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288953, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "grn_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198780, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "grn_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257472, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "grn_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280080, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "grn_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286948, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "grn_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271410, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "grn_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240796, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "grn_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272190, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "grn_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268075, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "grn_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269743, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "grn_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262875, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "grn_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279342, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "grn_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274911, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "grn_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257232, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "grn_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297561, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "grn_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249122, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "grn_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270639, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "grn_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290453, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "grn_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189758, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "grn_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278809, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "grn_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 295984, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "grn_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275436, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "grn_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256719, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "grn_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 259988, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "grn_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286573, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "grn_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 278596, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "grn_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277334, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "grn_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263369, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "grn_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270501, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "grn_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280322, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "grn_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "grn_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266097, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "grn_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271938, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "grn_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249243, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "grn_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291500, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "grn_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 254977, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "grn_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302212, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "grn_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275012, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "grn_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266258, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "grn_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 272606, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "grn_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265285, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "grn_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274473, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "grn_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246378, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "grn_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "grn_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289233, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "grn_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261171, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "grn_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263144, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "grn_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264953, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "grn_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271386, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "grn_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278686, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "grn_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259659, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "grn_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 246585, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "grn_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272076, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "grn_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295412, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "grn_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270855, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "grn_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274481, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "grn_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259718, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "grn_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321099, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "grn_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 298984, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "grn_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172834, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "grn_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258751, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "grn_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261400, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "grn_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285296, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "grn_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258920, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "grn_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262663, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "grn_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283924, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "grn_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263895, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "grn_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258924, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "grn_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176063, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "grn_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269178, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "grn_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266853, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "grn_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278958, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "grn_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259216, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "grn_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280360, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "grn_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275155, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "grn_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264525, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "grn_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223515, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "grn_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173510, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "grn_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 260986, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "grn_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274072, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "grn_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276745, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "grn_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275813, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "grn_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 279987, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "grn_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263670, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "grn_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301826, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "grn_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279016, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "grn_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271432, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "grn_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267898, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "grn_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259243, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "grn_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268219, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "grn_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267893, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "grn_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283466, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "grn_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285539, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "grn_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295850, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "grn_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281177, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "grn_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249876, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "grn_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 286624, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "grn_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254095, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "grn_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275772, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "grn_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270361, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "grn_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267654, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "grn_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280418, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "grn_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 290993, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "grn_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276554, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "grn_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 244604, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "grn_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272882, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "grn_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 263615, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "grn_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279070, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "grn_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262714, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "grn_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252403, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "grn_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273077, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "grn_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266037, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "grn_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 265670, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "grn_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234869, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "grn_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269882, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "grn_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 266972, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "grn_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266240, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "grn_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284477, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "grn_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305360, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 131.2401185770751, + "max_sentence1_length": 360, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kat_Geor-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 255551, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kat_Geor-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 270317, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kat_Geor-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 307288, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kat_Geor-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 274988, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kat_Geor-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 297142, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kat_Geor-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 270625, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kat_Geor-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 283084, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kat_Geor-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 302662, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kat_Geor-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 279691, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kat_Geor-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 285863, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kat_Geor-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 289500, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kat_Geor-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 301469, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kat_Geor-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 270863, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kat_Geor-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 297361, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kat_Geor-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 273338, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kat_Geor-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 266385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kat_Geor-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 300927, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kat_Geor-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 272528, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kat_Geor-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 257503, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kat_Geor-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291921, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kat_Geor-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 275896, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kat_Geor-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 272657, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kat_Geor-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 289792, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kat_Geor-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 293266, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kat_Geor-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 267757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kat_Geor-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 297277, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kat_Geor-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 261890, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kat_Geor-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 259043, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kat_Geor-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 304681, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kat_Geor-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 275222, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kat_Geor-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 282559, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kat_Geor-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279462, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kat_Geor-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 276981, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kat_Geor-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 304926, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kat_Geor-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274710, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kat_Geor-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 284407, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kat_Geor-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 256007, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kat_Geor-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 272972, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kat_Geor-est_Latn": { + "num_samples": 1012, + "number_of_characters": 273389, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kat_Geor-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 289886, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kat_Geor-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 289692, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kat_Geor-min_Arab": { + "num_samples": 1012, + "number_of_characters": 270329, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kat_Geor-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 283413, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kat_Geor-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 292656, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kat_Geor-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278376, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kat_Geor-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 283620, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kat_Geor-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 272541, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kat_Geor-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 283752, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kat_Geor-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 278490, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kat_Geor-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 274302, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kat_Geor-min_Latn": { + "num_samples": 1012, + "number_of_characters": 285093, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kat_Geor-por_Latn": { + "num_samples": 1012, + "number_of_characters": 287348, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kat_Geor-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 280776, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kat_Geor-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 278677, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kat_Geor-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 252976, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kat_Geor-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 266160, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kat_Geor-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 271378, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kat_Geor-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 303610, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kat_Geor-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 259930, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kat_Geor-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kat_Geor-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 264573, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kat_Geor-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 276103, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kat_Geor-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 273849, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kat_Geor-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 276580, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kat_Geor-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 282112, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kat_Geor-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 278204, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kat_Geor-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 286248, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kat_Geor-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 282902, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kat_Geor-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 290218, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kat_Geor-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 284957, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kat_Geor-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 282034, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kat_Geor-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 293253, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kat_Geor-als_Latn": { + "num_samples": 1012, + "number_of_characters": 290994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kat_Geor-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 292081, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kat_Geor-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 299107, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kat_Geor-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 273713, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kat_Geor-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 293824, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kat_Geor-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 280712, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kat_Geor-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 292501, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kat_Geor-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 281301, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kat_Geor-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 277167, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kat_Geor-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 231062, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kat_Geor-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 276117, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kat_Geor-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 284058, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kat_Geor-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 300068, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kat_Geor-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 209895, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kat_Geor-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 268587, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kat_Geor-run_Latn": { + "num_samples": 1012, + "number_of_characters": 291195, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kat_Geor-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 298063, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kat_Geor-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 282525, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kat_Geor-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 251911, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kat_Geor-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 283305, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kat_Geor-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 279190, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kat_Geor-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 280858, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kat_Geor-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 273990, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kat_Geor-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 290457, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kat_Geor-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286026, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kat_Geor-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 268347, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kat_Geor-war_Latn": { + "num_samples": 1012, + "number_of_characters": 308676, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kat_Geor-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 260237, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kat_Geor-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281754, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kat_Geor-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 301568, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kat_Geor-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 200873, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kat_Geor-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 289924, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kat_Geor-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 307099, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kat_Geor-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 286551, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kat_Geor-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 267834, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kat_Geor-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 271103, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kat_Geor-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 297688, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kat_Geor-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 289711, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kat_Geor-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 288449, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kat_Geor-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 274484, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kat_Geor-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 281616, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kat_Geor-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 291437, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kat_Geor-san_Deva": { + "num_samples": 1012, + "number_of_characters": 272187, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kat_Geor-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277212, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kat_Geor-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 283053, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kat_Geor-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 260358, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kat_Geor-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 302615, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kat_Geor-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 266092, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kat_Geor-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 313327, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kat_Geor-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 286127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kat_Geor-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 277373, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kat_Geor-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 283721, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kat_Geor-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 276400, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kat_Geor-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 285588, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kat_Geor-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 257493, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kat_Geor-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 271187, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kat_Geor-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 300348, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kat_Geor-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 272286, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kat_Geor-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 274259, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kat_Geor-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 276068, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kat_Geor-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 282501, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kat_Geor-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289801, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kat_Geor-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 270774, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kat_Geor-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 257700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kat_Geor-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 283191, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kat_Geor-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 306527, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kat_Geor-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 281970, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kat_Geor-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 285596, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kat_Geor-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 270833, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kat_Geor-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 332214, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kat_Geor-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 310099, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kat_Geor-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 183949, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kat_Geor-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 269866, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kat_Geor-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 272515, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kat_Geor-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 296411, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kat_Geor-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 270035, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kat_Geor-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 273778, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kat_Geor-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 295039, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kat_Geor-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 275010, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kat_Geor-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 270039, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kat_Geor-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 187178, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kat_Geor-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 280293, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kat_Geor-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 277968, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kat_Geor-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 290073, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kat_Geor-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 270331, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kat_Geor-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 291475, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kat_Geor-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 286270, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kat_Geor-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 275640, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kat_Geor-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 234630, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kat_Geor-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 184625, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kat_Geor-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 272101, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kat_Geor-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 285187, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kat_Geor-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 276745, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kat_Geor-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 286928, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kat_Geor-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 291102, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kat_Geor-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 274785, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kat_Geor-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 312941, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kat_Geor-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 290131, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kat_Geor-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 282547, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kat_Geor-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 279013, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kat_Geor-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 270358, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kat_Geor-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279334, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kat_Geor-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 279008, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kat_Geor-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 294581, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kat_Geor-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 296654, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kat_Geor-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 306965, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kat_Geor-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 292292, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kat_Geor-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 260991, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kat_Geor-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 297739, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kat_Geor-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 265210, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kat_Geor-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 286887, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kat_Geor-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 281476, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kat_Geor-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 278769, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kat_Geor-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 291533, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kat_Geor-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 302108, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kat_Geor-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 287669, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kat_Geor-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 255719, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kat_Geor-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 283997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kat_Geor-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 274730, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kat_Geor-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 290185, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kat_Geor-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 273829, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kat_Geor-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 263518, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kat_Geor-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 284192, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kat_Geor-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277152, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kat_Geor-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 276785, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kat_Geor-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 245984, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kat_Geor-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kat_Geor-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 278087, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kat_Geor-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 277355, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kat_Geor-som_Latn": { + "num_samples": 1012, + "number_of_characters": 295592, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kat_Geor-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 316475, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 142.22332015810278, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lua_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 254619, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lua_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 269385, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lua_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 306356, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lua_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 274056, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lua_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 296210, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lua_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 269693, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lua_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 282152, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lua_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 301730, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lua_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 278759, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lua_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284931, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lua_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 288568, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lua_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 300537, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lua_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269931, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lua_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 296429, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lua_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 272406, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lua_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 265453, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lua_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299995, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lua_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 271596, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lua_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 256571, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lua_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290989, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lua_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274964, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lua_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 271725, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lua_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 288860, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lua_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 292334, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lua_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 266825, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lua_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 296345, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lua_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260958, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lua_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 258111, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lua_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 303749, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lua_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 274290, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lua_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 281627, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lua_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278530, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lua_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 276049, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lua_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303994, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lua_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273778, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lua_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 283475, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lua_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 255075, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lua_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 272040, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lua_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 272457, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lua_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288954, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lua_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 288760, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lua_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 269397, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lua_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 282481, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lua_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 291724, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lua_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277444, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lua_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 282688, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lua_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 271609, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lua_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 282820, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lua_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 277558, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lua_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 273370, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lua_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 284161, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lua_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 286416, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lua_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 279844, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lua_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 277745, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lua_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 252044, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lua_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 265228, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lua_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 270446, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lua_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 302678, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lua_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258998, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lua_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280825, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lua_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 263641, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lua_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 275171, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lua_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272917, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lua_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 275648, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lua_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 281180, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lua_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 277272, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lua_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 285316, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lua_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281970, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lua_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 289286, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lua_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 284025, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lua_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 281102, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lua_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 292321, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lua_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 290062, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lua_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 291149, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lua_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 298175, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lua_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 272781, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lua_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 292892, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lua_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 279780, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lua_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 291569, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lua_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 280369, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lua_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 276235, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lua_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 230130, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lua_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 275185, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lua_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 283126, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lua_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 299136, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lua_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208963, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lua_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 267655, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lua_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 290263, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lua_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 297131, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lua_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 281593, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lua_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250979, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lua_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 282373, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lua_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 278258, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lua_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279926, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lua_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 273058, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lua_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 289525, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lua_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285094, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lua_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 267415, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lua_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 307744, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lua_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 259305, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lua_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280822, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lua_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 300636, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lua_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199941, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lua_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288992, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lua_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 306167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lua_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 285619, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lua_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266902, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lua_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 270171, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lua_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 296756, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lua_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 288779, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lua_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 287517, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lua_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 273552, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lua_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 280684, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lua_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 290505, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lua_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 271255, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lua_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276280, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lua_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 282121, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lua_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 259426, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lua_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 301683, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lua_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 265160, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lua_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 312395, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lua_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 285195, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lua_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 276441, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lua_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 282789, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lua_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 275468, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lua_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 284656, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lua_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 256561, + "unique_pairs": 1011, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lua_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 270255, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lua_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 299416, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lua_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 271354, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lua_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 273327, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lua_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 275136, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lua_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 281569, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lua_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288869, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lua_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 269842, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lua_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 256768, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lua_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 282259, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lua_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 305595, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lua_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 281038, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lua_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 284664, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lua_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 269901, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lua_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 331282, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lua_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 309167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lua_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 183017, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lua_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268934, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lua_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 271583, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lua_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 295479, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lua_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 269103, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lua_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 272846, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lua_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 294107, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lua_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 274078, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lua_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 269107, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lua_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 186246, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lua_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 279361, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lua_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 277036, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lua_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 289141, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lua_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 269399, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lua_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 290543, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lua_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 285338, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lua_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 274708, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lua_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 233698, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lua_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 183693, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lua_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 271169, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lua_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 284255, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lua_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 275813, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lua_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286928, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lua_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 290170, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lua_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 273853, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lua_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 312009, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lua_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 289199, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lua_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 281615, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lua_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 278081, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lua_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 269426, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lua_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278402, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lua_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 278076, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lua_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 293649, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lua_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 295722, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lua_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 306033, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lua_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 291360, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lua_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 260059, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lua_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 296807, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lua_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 264278, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lua_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 285955, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lua_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 280544, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lua_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 277837, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lua_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 290601, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lua_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 301176, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lua_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 286737, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lua_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 254787, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lua_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 283065, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lua_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 273798, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lua_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 289253, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lua_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 272897, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lua_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 262586, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lua_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 283260, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lua_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276220, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lua_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 275853, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lua_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 245052, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lua_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280065, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lua_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 277155, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lua_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 276423, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lua_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 294660, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lua_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 315543, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 141.30237154150197, + "max_sentence1_length": 407, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "nya_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 258793, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "nya_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 273559, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "nya_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 310530, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nya_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 278230, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "nya_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 300384, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "nya_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 273867, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "nya_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 286326, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nya_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 305904, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "nya_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 282933, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "nya_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 289105, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nya_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 292742, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nya_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 304711, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "nya_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 274105, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "nya_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 300603, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "nya_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 276580, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nya_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 269627, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nya_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304169, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nya_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 275770, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "nya_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 260745, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "nya_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295163, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nya_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279138, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nya_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 275899, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nya_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 293034, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nya_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 296508, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nya_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 270999, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "nya_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 300519, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nya_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265132, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "nya_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 262285, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "nya_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 307923, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "nya_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 278464, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nya_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 285801, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "nya_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282704, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nya_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 280223, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nya_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308168, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "nya_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277952, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nya_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 287649, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "nya_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 259249, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "nya_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 276214, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nya_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 276631, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "nya_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293128, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nya_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 292934, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "nya_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 273571, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nya_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 286655, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nya_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 295898, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nya_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281618, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nya_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 286862, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nya_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 275783, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nya_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 286994, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "nya_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 281732, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nya_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 277544, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nya_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 288335, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "nya_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 290590, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nya_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284018, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nya_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 281919, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nya_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 256218, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "nya_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 269402, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "nya_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 274620, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nya_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 306852, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "nya_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263172, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "nya_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284999, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "nya_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 267815, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "nya_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 279345, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nya_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 277091, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nya_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 279822, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "nya_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 285354, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nya_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 281446, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nya_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 289490, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nya_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286144, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nya_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 293460, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "nya_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 288199, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nya_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 285276, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "nya_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 296495, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "nya_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 294236, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nya_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 295323, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "nya_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 302349, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "nya_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 276955, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nya_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 297066, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "nya_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 283954, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nya_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 295743, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nya_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 284543, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "nya_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 280409, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nya_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 234304, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "nya_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 279359, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "nya_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 287300, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nya_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 303310, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "nya_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213137, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "nya_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 271829, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "nya_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 294437, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "nya_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 301305, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "nya_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 285767, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "nya_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255153, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "nya_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 286547, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nya_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 282432, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "nya_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 284100, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nya_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 277232, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nya_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 293699, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nya_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289268, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "nya_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 271589, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nya_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 311918, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "nya_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 263479, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nya_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284996, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nya_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 304810, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "nya_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204115, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "nya_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293166, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nya_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 310341, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nya_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 289793, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "nya_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 271076, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "nya_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 274345, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "nya_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 300930, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "nya_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 292953, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nya_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 291691, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "nya_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 277726, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "nya_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 284858, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "nya_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 294679, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nya_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 275429, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "nya_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280454, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nya_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 286295, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "nya_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 263600, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "nya_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 305857, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "nya_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 269334, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "nya_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 316569, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "nya_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 289369, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nya_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 280615, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nya_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 286963, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nya_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 279642, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "nya_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 288830, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "nya_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 260735, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "nya_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 274429, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nya_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 303590, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "nya_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 275528, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "nya_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 277501, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "nya_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 279310, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "nya_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 285743, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nya_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293043, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "nya_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274016, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "nya_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 260942, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "nya_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 286433, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "nya_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 309769, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "nya_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 285212, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nya_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 288838, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "nya_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 274075, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nya_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 335456, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "nya_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 313341, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "nya_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 187191, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "nya_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273108, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "nya_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 275757, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "nya_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 299653, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "nya_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 273277, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "nya_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277020, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "nya_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 298281, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "nya_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 278252, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "nya_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 273281, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "nya_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 190420, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "nya_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 283535, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nya_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 281210, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "nya_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 293315, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "nya_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 273573, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "nya_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 294717, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "nya_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 289512, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "nya_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 278882, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "nya_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 237872, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "nya_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 187867, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "nya_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 275343, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "nya_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 288429, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nya_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 279987, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "nya_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 291102, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nya_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290170, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "nya_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 278027, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "nya_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 316183, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "nya_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 293373, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "nya_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 285789, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "nya_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 282255, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "nya_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 273600, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "nya_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282576, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "nya_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 282250, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "nya_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 297823, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nya_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 299896, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "nya_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 310207, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "nya_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 295534, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "nya_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 264233, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "nya_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 300981, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "nya_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 268452, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "nya_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290129, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "nya_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 284718, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "nya_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282011, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "nya_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 294775, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "nya_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 305350, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "nya_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 290911, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "nya_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 258961, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "nya_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 287239, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "nya_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 277972, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "nya_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 293427, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "nya_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 277071, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "nya_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 266760, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "nya_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 287434, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "nya_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280394, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "nya_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 280027, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "nya_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 249226, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "nya_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284239, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "nya_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 281329, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nya_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 280597, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "nya_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 298834, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "nya_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 319717, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 145.42687747035572, + "max_sentence1_length": 421, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "slv_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242476, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "slv_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 257242, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "slv_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 294213, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "slv_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261913, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "slv_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 284067, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "slv_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257550, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "slv_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 270009, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slv_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289587, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "slv_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266616, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "slv_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272788, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "slv_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276425, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "slv_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288394, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "slv_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257788, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "slv_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 284286, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "slv_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 260263, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slv_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253310, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "slv_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287852, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slv_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259453, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "slv_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244428, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "slv_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278846, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "slv_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262821, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slv_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259582, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slv_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276717, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "slv_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 280191, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "slv_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "slv_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 284202, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "slv_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248815, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "slv_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245968, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "slv_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291606, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "slv_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 262147, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "slv_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269484, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "slv_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266387, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "slv_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263906, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "slv_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291851, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "slv_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261635, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slv_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271332, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "slv_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242932, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "slv_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259897, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "slv_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260314, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "slv_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276811, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "slv_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276617, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "slv_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 257254, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "slv_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270338, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "slv_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279581, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "slv_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265301, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "slv_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270545, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "slv_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259466, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "slv_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270677, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "slv_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265415, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "slv_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 261227, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "slv_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 272018, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "slv_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 274273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "slv_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267701, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "slv_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265602, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "slv_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239901, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "slv_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253085, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "slv_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258303, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slv_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290535, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "slv_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246855, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "slv_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268682, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "slv_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251498, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "slv_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 263028, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "slv_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260774, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "slv_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263505, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "slv_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 269037, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slv_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265129, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "slv_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 273173, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "slv_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269827, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "slv_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277143, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "slv_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271882, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "slv_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268959, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "slv_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 280178, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "slv_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277919, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "slv_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 279006, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "slv_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 286032, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "slv_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260638, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "slv_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "slv_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267637, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "slv_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279426, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slv_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 268226, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "slv_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264092, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "slv_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217987, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "slv_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 263042, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "slv_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270983, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "slv_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286993, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "slv_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196820, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "slv_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255512, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "slv_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278120, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "slv_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "slv_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269450, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "slv_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238836, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "slv_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 270230, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "slv_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266115, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "slv_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267783, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slv_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 260915, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slv_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277382, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "slv_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272951, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "slv_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 255272, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "slv_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295601, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "slv_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 247162, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "slv_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268679, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "slv_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288493, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "slv_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187798, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "slv_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276849, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "slv_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 294024, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "slv_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273476, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "slv_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254759, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "slv_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 258028, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "slv_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284613, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "slv_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slv_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275374, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "slv_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 261409, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "slv_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268541, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "slv_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278362, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "slv_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259112, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "slv_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264137, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "slv_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269978, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "slv_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 247283, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "slv_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289540, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "slv_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 253017, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "slv_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 300252, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "slv_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 273052, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "slv_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 264298, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "slv_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270646, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "slv_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263325, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "slv_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272513, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "slv_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244418, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "slv_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258112, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slv_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 287273, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "slv_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 259211, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "slv_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 261184, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "slv_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262993, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "slv_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269426, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "slv_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276726, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "slv_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257699, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "slv_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244625, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "slv_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270116, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "slv_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293452, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "slv_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268895, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slv_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272521, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "slv_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257758, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slv_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319139, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "slv_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 297024, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "slv_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170874, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "slv_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256791, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "slv_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259440, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "slv_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283336, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "slv_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256960, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "slv_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260703, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "slv_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281964, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "slv_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261935, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "slv_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256964, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "slv_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174103, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "slv_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 267218, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slv_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264893, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "slv_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276998, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "slv_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 257256, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "slv_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278400, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "slv_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 273195, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "slv_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262565, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "slv_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221555, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "slv_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171550, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "slv_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 259026, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "slv_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272112, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "slv_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263670, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "slv_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274785, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "slv_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273853, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "slv_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 278027, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "slv_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299866, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "slv_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 277056, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "slv_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269472, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "slv_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265938, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "slv_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 257283, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "slv_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266259, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "slv_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265933, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "slv_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281506, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "slv_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283579, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "slv_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293890, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "slv_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 279217, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "slv_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247916, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "slv_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284664, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "slv_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252135, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "slv_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273812, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "slv_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268401, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "slv_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265694, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "slv_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278458, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "slv_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 289033, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "slv_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "slv_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242644, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "slv_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270922, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "slv_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 261655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "slv_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277110, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "slv_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260754, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "slv_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250443, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "slv_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271117, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "slv_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264077, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "slv_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263710, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "slv_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232909, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "slv_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267922, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "slv_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 265012, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "slv_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 264280, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "slv_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282517, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "slv_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303400, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 129.30335968379447, + "max_sentence1_length": 361, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tpi_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 280632, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tpi_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 295398, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tpi_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 332369, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tpi_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 300069, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tpi_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 322223, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tpi_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 295706, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tpi_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 308165, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tpi_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 327743, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tpi_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 304772, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tpi_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 310944, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tpi_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 314581, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tpi_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 326550, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tpi_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 295944, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tpi_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 322442, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tpi_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 298419, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tpi_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 291466, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tpi_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 326008, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tpi_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 297609, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tpi_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 282584, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tpi_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 317002, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tpi_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 300977, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tpi_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 297738, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tpi_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 314873, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tpi_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 318347, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tpi_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 292838, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tpi_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 322358, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tpi_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 286971, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tpi_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 284124, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tpi_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 329762, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tpi_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 300303, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tpi_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 307640, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tpi_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304543, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tpi_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 302062, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tpi_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 330007, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tpi_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299791, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tpi_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 309488, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tpi_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 281088, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tpi_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 298053, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tpi_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 298470, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tpi_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 314967, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tpi_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 314773, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tpi_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 295410, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tpi_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 308494, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tpi_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 317737, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tpi_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303457, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tpi_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 308701, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tpi_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 297622, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tpi_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 308833, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tpi_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 303571, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tpi_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 299383, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tpi_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 310174, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tpi_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 312429, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tpi_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 305857, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tpi_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 303758, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tpi_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 278057, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tpi_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 291241, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tpi_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 296459, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tpi_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 328691, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tpi_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 285011, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tpi_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306838, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tpi_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 289654, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tpi_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 301184, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tpi_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 298930, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tpi_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 301661, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tpi_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 307193, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tpi_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 303285, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tpi_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 311329, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tpi_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 307983, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tpi_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 315299, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tpi_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 310038, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tpi_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 307115, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tpi_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 318334, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tpi_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 316075, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tpi_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 317162, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tpi_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 324188, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tpi_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 298794, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tpi_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 318905, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tpi_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 305793, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tpi_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 317582, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tpi_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 306382, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tpi_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 302248, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tpi_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 256143, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tpi_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 301198, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tpi_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 309139, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tpi_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 325149, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tpi_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 234976, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tpi_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 293668, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tpi_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 316276, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tpi_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 323144, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tpi_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 307606, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tpi_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 276992, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tpi_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 308386, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tpi_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 304271, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tpi_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 305939, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tpi_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 299071, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tpi_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 315538, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tpi_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 311107, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tpi_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 293428, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tpi_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 333757, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tpi_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 285318, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tpi_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306835, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tpi_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 326649, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tpi_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 225954, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tpi_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 315005, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tpi_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 332180, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tpi_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 311632, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tpi_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 292915, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tpi_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 296184, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tpi_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 322769, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tpi_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 314792, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tpi_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 313530, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tpi_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 299565, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tpi_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 306697, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tpi_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 316518, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tpi_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 297268, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tpi_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302293, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tpi_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 308134, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tpi_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 285439, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tpi_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 327696, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tpi_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 291173, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tpi_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 338408, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tpi_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 311208, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tpi_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 302454, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tpi_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 308802, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tpi_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 301481, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tpi_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 310669, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tpi_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 282574, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tpi_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 296268, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tpi_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 325429, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tpi_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 297367, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tpi_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 299340, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tpi_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 301149, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tpi_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 307582, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tpi_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 314882, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tpi_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 295855, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tpi_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 282781, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tpi_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 308272, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tpi_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 331608, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tpi_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 307051, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tpi_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 310677, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tpi_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 295914, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tpi_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 357295, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tpi_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 335180, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tpi_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 209030, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tpi_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 294947, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tpi_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 297596, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tpi_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 321492, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tpi_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 295116, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tpi_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 298859, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tpi_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 320120, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tpi_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 300091, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tpi_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 295120, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tpi_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 212259, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tpi_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 305374, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tpi_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 303049, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tpi_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 315154, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tpi_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 295412, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tpi_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 316556, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tpi_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 311351, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tpi_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 300721, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tpi_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 259711, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tpi_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 209706, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tpi_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 297182, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tpi_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 310268, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tpi_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 301826, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tpi_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 312941, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tpi_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 312009, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tpi_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 316183, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tpi_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 299866, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tpi_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 315212, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tpi_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 307628, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tpi_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 304094, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tpi_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 295439, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tpi_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304415, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tpi_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 304089, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tpi_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 319662, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tpi_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 321735, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tpi_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 332046, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tpi_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 317373, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tpi_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 286072, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tpi_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 322820, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tpi_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 290291, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tpi_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 311968, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tpi_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 306557, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tpi_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 303850, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tpi_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 316614, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tpi_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 327189, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tpi_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 312750, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tpi_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 280800, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tpi_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 309078, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tpi_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 299811, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tpi_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 315266, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tpi_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 298910, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tpi_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 288599, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tpi_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 309273, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tpi_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 302233, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tpi_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 301866, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tpi_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 271065, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tpi_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306078, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tpi_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 303168, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tpi_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 302436, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tpi_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 320673, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tpi_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 341556, + "unique_pairs": 1012, + "min_sentence1_length": 49, + "average_sentence1_length": 167.00691699604744, + "max_sentence1_length": 500, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "zsm_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257822, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "zsm_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272588, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "zsm_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309559, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zsm_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277259, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "zsm_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299413, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "zsm_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272896, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "zsm_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285355, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zsm_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304933, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "zsm_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 281962, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "zsm_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 288134, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zsm_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291771, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zsm_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303740, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "zsm_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 273134, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "zsm_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299632, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "zsm_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275609, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zsm_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268656, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zsm_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 303198, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zsm_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274799, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "zsm_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259774, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "zsm_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294192, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zsm_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 278167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zsm_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274928, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zsm_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 292063, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zsm_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295537, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zsm_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 270028, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "zsm_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299548, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zsm_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 264161, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "zsm_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261314, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "zsm_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 306952, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "zsm_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277493, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zsm_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284830, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "zsm_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281733, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zsm_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279252, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zsm_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 307197, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "zsm_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276981, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zsm_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286678, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "zsm_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258278, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "zsm_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 275243, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zsm_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275660, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "zsm_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 292157, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zsm_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 291963, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "zsm_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272600, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zsm_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285684, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zsm_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294927, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zsm_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280647, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zsm_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285891, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zsm_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274812, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zsm_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 286023, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zsm_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280761, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zsm_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276573, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zsm_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287364, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zsm_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289619, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zsm_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 283047, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zsm_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 280948, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zsm_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255247, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "zsm_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268431, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "zsm_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273649, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zsm_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305881, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "zsm_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 262201, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "zsm_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284028, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "zsm_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266844, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "zsm_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278374, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zsm_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 276120, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zsm_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278851, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zsm_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284383, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zsm_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280475, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zsm_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288519, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zsm_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 285173, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zsm_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292489, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "zsm_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 287228, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zsm_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284305, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "zsm_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295524, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zsm_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293265, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zsm_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294352, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "zsm_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301378, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "zsm_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 275984, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zsm_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 296095, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zsm_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 282983, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zsm_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294772, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zsm_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283572, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "zsm_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279438, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zsm_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233333, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "zsm_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278388, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zsm_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286329, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zsm_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302339, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "zsm_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 212166, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "zsm_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270858, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "zsm_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293466, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "zsm_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300334, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "zsm_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284796, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "zsm_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 254182, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "zsm_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285576, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zsm_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281461, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "zsm_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 283129, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zsm_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276261, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zsm_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292728, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zsm_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288297, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zsm_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270618, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zsm_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 310947, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zsm_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262508, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zsm_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284025, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zsm_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303839, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "zsm_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 203144, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "zsm_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 292195, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zsm_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309370, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zsm_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288822, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "zsm_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 270105, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zsm_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273374, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zsm_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 299959, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "zsm_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 291982, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zsm_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290720, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zsm_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276755, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "zsm_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283887, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "zsm_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293708, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zsm_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274458, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "zsm_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279483, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zsm_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285324, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "zsm_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262629, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zsm_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304886, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zsm_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268363, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "zsm_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315598, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "zsm_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288398, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zsm_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279644, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zsm_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 285992, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zsm_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278671, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zsm_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287859, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zsm_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259764, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "zsm_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273458, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zsm_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302619, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "zsm_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274557, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "zsm_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276530, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "zsm_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278339, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "zsm_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284772, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zsm_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292072, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "zsm_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 273045, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zsm_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 259971, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "zsm_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285462, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zsm_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308798, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "zsm_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 284241, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zsm_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287867, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zsm_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 273104, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zsm_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334485, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "zsm_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312370, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zsm_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 186220, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "zsm_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 272137, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zsm_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274786, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zsm_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298682, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "zsm_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272306, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "zsm_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 276049, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zsm_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297310, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "zsm_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277281, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zsm_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272310, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zsm_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189449, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "zsm_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282564, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zsm_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 280239, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zsm_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292344, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zsm_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272602, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "zsm_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293746, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "zsm_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288541, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "zsm_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277911, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zsm_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236901, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "zsm_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186896, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "zsm_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274372, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zsm_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287458, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zsm_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 279016, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "zsm_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 290131, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zsm_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 289199, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "zsm_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293373, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zsm_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 277056, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zsm_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 315212, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "zsm_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284818, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "zsm_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281284, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zsm_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272629, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zsm_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281605, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zsm_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281279, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "zsm_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296852, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zsm_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298925, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "zsm_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 309236, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zsm_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294563, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "zsm_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263262, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "zsm_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 300010, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zsm_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267481, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "zsm_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 289158, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "zsm_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283747, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zsm_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 281040, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zsm_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293804, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "zsm_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304379, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zsm_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289940, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "zsm_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 257990, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "zsm_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286268, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "zsm_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 277001, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zsm_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 292456, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zsm_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 276100, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "zsm_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265789, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "zsm_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286463, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zsm_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279423, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "zsm_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 279056, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "zsm_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248255, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "zsm_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283268, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zsm_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280358, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zsm_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279626, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zsm_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297863, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "zsm_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318746, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 144.4673913043478, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ayr_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 250238, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ayr_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 265004, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ayr_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 301975, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ayr_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 269675, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ayr_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 291829, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ayr_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 265312, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ayr_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 277771, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ayr_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 297349, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ayr_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 274378, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ayr_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 280550, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ayr_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 284187, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ayr_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 296156, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ayr_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 265550, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ayr_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 292048, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ayr_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 268025, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ayr_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 261072, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ayr_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 295614, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ayr_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 267215, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ayr_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 252190, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ayr_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286608, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ayr_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 270583, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ayr_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 267344, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ayr_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 284479, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ayr_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 287953, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ayr_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 262444, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ayr_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 291964, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ayr_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 256577, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ayr_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 253730, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ayr_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 299368, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ayr_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 269909, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ayr_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 277246, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ayr_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274149, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ayr_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 271668, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ayr_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 299613, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ayr_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269397, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ayr_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 279094, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ayr_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 250694, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ayr_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 267659, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ayr_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 268076, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ayr_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 284573, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ayr_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 284379, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ayr_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 265016, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ayr_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 278100, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ayr_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 287343, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ayr_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273063, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ayr_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 278307, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ayr_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 267228, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ayr_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 278439, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ayr_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 273177, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ayr_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 268989, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ayr_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 279780, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ayr_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 282035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ayr_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 275463, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ayr_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 273364, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ayr_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 247663, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ayr_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 260847, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ayr_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 266065, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ayr_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 298297, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ayr_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 254617, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ayr_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276444, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ayr_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 259260, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ayr_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 270790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ayr_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 268536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ayr_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 271267, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ayr_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 276799, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ayr_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 272891, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ayr_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 280935, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ayr_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 277589, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ayr_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 284905, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ayr_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 279644, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ayr_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 276721, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ayr_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 287940, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ayr_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 285681, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ayr_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 286768, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ayr_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 293794, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ayr_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 268400, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ayr_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 288511, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ayr_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 275399, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ayr_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 287188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ayr_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 275988, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ayr_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 271854, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ayr_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 225749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ayr_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 270804, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ayr_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 278745, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ayr_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 294755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ayr_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 204582, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ayr_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 263274, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ayr_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 285882, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ayr_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 292750, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ayr_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 277212, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ayr_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 246598, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ayr_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 277992, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ayr_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 273877, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ayr_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 275545, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ayr_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 268677, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ayr_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 285144, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ayr_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280713, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ayr_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 263034, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ayr_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 303363, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ayr_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 254924, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ayr_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276441, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ayr_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 296255, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ayr_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 195560, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ayr_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 284611, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ayr_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 301786, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ayr_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 281238, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ayr_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 262521, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ayr_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 265790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ayr_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 292375, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ayr_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 284398, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ayr_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 283136, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ayr_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 269171, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ayr_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 276303, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ayr_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 286124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ayr_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 266874, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ayr_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271899, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ayr_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 277740, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ayr_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 255045, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ayr_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 297302, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ayr_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 260779, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ayr_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 308014, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ayr_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 280814, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ayr_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 272060, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ayr_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 278408, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ayr_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 271087, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ayr_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 280275, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ayr_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 252180, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ayr_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 265874, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ayr_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 295035, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ayr_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 266973, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ayr_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 268946, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ayr_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 270755, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ayr_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 277188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ayr_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284488, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ayr_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 265461, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ayr_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 252387, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ayr_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 277878, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ayr_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 301214, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ayr_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 276657, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ayr_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 280283, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ayr_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 265520, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ayr_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 326901, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ayr_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 304786, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ayr_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 178636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ayr_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 264553, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ayr_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 267202, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ayr_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 291098, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ayr_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 264722, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ayr_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 268465, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ayr_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 289726, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ayr_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 269697, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ayr_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 264726, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ayr_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 181865, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ayr_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 274980, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ayr_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 272655, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ayr_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 284760, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ayr_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 265018, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ayr_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 286162, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ayr_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 280957, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ayr_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 270327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ayr_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 229317, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ayr_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 179312, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ayr_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 266788, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ayr_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 279874, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ayr_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 271432, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ayr_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 282547, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ayr_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 281615, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ayr_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 285789, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ayr_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 269472, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ayr_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 307628, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ayr_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 284818, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ayr_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 273700, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ayr_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 265045, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ayr_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274021, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ayr_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 273695, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ayr_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 289268, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ayr_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 291341, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ayr_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 301652, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ayr_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 286979, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ayr_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 255678, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ayr_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 292426, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ayr_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 259897, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ayr_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 281574, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ayr_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 276163, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ayr_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 273456, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ayr_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 286220, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ayr_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 296795, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ayr_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 282356, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ayr_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 250406, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ayr_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 278684, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ayr_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 269417, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ayr_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 284872, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ayr_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 268516, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ayr_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 258205, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ayr_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 278879, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ayr_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271839, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ayr_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 271472, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ayr_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 240671, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ayr_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275684, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ayr_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 272774, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ayr_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 272042, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ayr_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 290279, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ayr_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 311162, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 136.97332015810278, + "max_sentence1_length": 510, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "dan_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246704, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "dan_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261470, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "dan_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298441, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dan_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 266141, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "dan_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288295, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "dan_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261778, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "dan_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 274237, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dan_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293815, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "dan_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270844, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "dan_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 277016, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dan_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280653, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dan_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292622, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "dan_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 262016, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "dan_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288514, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "dan_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264491, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dan_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257538, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dan_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 292080, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dan_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263681, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "dan_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248656, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "dan_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283074, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dan_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 267049, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dan_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263810, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dan_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280945, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dan_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284419, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dan_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258910, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "dan_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288430, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dan_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 253043, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "dan_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 250196, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "dan_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295834, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "dan_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266375, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dan_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273712, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "dan_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270615, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dan_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 268134, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dan_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 296079, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "dan_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265863, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dan_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275560, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "dan_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 247160, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "dan_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 264125, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dan_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264542, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "dan_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 281039, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dan_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280845, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "dan_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261482, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dan_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274566, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dan_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283809, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dan_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269529, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dan_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274773, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dan_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263694, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dan_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274905, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dan_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269643, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dan_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265455, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dan_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276246, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dan_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278501, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dan_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271929, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dan_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 269830, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dan_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 244129, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "dan_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257313, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "dan_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262531, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dan_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294763, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "dan_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 251083, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "dan_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272910, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "dan_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255726, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "dan_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267256, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dan_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 265002, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dan_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267733, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dan_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273265, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dan_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269357, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dan_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277401, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dan_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 274055, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dan_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281371, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "dan_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 276110, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dan_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 273187, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "dan_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284406, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dan_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 282147, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dan_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 283234, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "dan_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290260, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "dan_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264866, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dan_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284977, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dan_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271865, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dan_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283654, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dan_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272454, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "dan_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268320, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dan_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 222215, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "dan_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267270, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dan_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 275211, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dan_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 291221, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "dan_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 201048, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "dan_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259740, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "dan_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282348, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "dan_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 289216, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "dan_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273678, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "dan_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 243064, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "dan_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274458, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dan_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 270343, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "dan_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 272011, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dan_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 265143, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dan_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281610, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dan_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277179, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dan_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259500, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dan_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299829, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dan_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251390, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dan_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272907, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dan_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292721, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "dan_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 192026, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "dan_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 281077, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dan_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298252, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dan_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277704, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "dan_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258987, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dan_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262256, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dan_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288841, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "dan_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280864, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dan_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279602, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dan_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265637, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "dan_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272769, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "dan_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282590, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dan_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263340, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "dan_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268365, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dan_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 274206, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "dan_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251511, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dan_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293768, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dan_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257245, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "dan_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304480, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "dan_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277280, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dan_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268526, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dan_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274874, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dan_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267553, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dan_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276741, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dan_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248646, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "dan_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262340, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dan_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291501, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "dan_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263439, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "dan_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265412, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "dan_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 267221, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "dan_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273654, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dan_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280954, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "dan_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261927, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dan_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248853, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "dan_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274344, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dan_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297680, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "dan_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 273123, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dan_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276749, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dan_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261986, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dan_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323367, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "dan_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301252, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dan_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 175102, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "dan_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 261019, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dan_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263668, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dan_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287564, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "dan_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 261188, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "dan_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264931, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dan_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 286192, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "dan_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 266163, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dan_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 261192, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dan_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 178331, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "dan_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271446, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dan_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 269121, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dan_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 281226, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dan_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261484, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "dan_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282628, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "dan_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277423, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "dan_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266793, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dan_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225783, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "dan_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175778, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "dan_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263254, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dan_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276340, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dan_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267898, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "dan_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 279013, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dan_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 278081, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "dan_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282255, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dan_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265938, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dan_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 304094, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "dan_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281284, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dan_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273700, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "dan_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261511, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dan_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270487, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dan_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 270161, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "dan_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285734, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dan_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287807, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "dan_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 298118, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dan_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283445, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "dan_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 252144, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "dan_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288892, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dan_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256363, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "dan_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 278040, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "dan_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272629, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dan_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 269922, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dan_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282686, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "dan_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293261, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dan_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278822, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "dan_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246872, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "dan_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 275150, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "dan_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265883, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dan_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281338, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dan_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264982, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "dan_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254671, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "dan_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275345, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dan_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268305, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "dan_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267938, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "dan_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 237137, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "dan_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272150, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dan_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 269240, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dan_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268508, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dan_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286745, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "dan_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307628, + "unique_pairs": 1012, + "min_sentence1_length": 32, + "average_sentence1_length": 133.4812252964427, + "max_sentence1_length": 369, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "guj_Gujr-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 238049, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "guj_Gujr-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 252815, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "guj_Gujr-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 289786, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "guj_Gujr-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 257486, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "guj_Gujr-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 279640, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "guj_Gujr-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 253123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "guj_Gujr-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 265582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "guj_Gujr-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 285160, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "guj_Gujr-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 262189, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "guj_Gujr-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 268361, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "guj_Gujr-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 271998, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "guj_Gujr-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 283967, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "guj_Gujr-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 253361, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "guj_Gujr-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 279859, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "guj_Gujr-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 255836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "guj_Gujr-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 248883, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "guj_Gujr-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 283425, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "guj_Gujr-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 255026, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "guj_Gujr-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 240001, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "guj_Gujr-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274419, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "guj_Gujr-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "guj_Gujr-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 255155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "guj_Gujr-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 272290, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "guj_Gujr-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 275764, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "guj_Gujr-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 250255, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "guj_Gujr-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 279775, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "guj_Gujr-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 244388, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "guj_Gujr-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 241541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "guj_Gujr-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 287179, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "guj_Gujr-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 257720, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "guj_Gujr-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 265057, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "guj_Gujr-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "guj_Gujr-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 259479, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "guj_Gujr-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 287424, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "guj_Gujr-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257208, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "guj_Gujr-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 266905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "guj_Gujr-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 238505, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "guj_Gujr-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 255470, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "guj_Gujr-est_Latn": { + "num_samples": 1012, + "number_of_characters": 255887, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "guj_Gujr-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 272384, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "guj_Gujr-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 272190, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "guj_Gujr-min_Arab": { + "num_samples": 1012, + "number_of_characters": 252827, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "guj_Gujr-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 265911, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "guj_Gujr-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 275154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "guj_Gujr-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "guj_Gujr-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 266118, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "guj_Gujr-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 255039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "guj_Gujr-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 266250, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "guj_Gujr-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 260988, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "guj_Gujr-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 256800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "guj_Gujr-min_Latn": { + "num_samples": 1012, + "number_of_characters": 267591, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "guj_Gujr-por_Latn": { + "num_samples": 1012, + "number_of_characters": 269846, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "guj_Gujr-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 263274, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "guj_Gujr-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 261175, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "guj_Gujr-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 235474, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "guj_Gujr-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 248658, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "guj_Gujr-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 253876, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "guj_Gujr-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 286108, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "guj_Gujr-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 242428, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "guj_Gujr-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264255, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "guj_Gujr-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 247071, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "guj_Gujr-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 258601, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "guj_Gujr-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 256347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "guj_Gujr-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 259078, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "guj_Gujr-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 264610, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "guj_Gujr-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 260702, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "guj_Gujr-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 268746, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "guj_Gujr-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 265400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "guj_Gujr-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 272716, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "guj_Gujr-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 267455, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "guj_Gujr-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 264532, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "guj_Gujr-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 275751, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "guj_Gujr-als_Latn": { + "num_samples": 1012, + "number_of_characters": 273492, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "guj_Gujr-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 274579, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "guj_Gujr-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 281605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "guj_Gujr-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 256211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "guj_Gujr-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 276322, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "guj_Gujr-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 263210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "guj_Gujr-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 274999, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "guj_Gujr-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 263799, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "guj_Gujr-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 259665, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "guj_Gujr-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 213560, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "guj_Gujr-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 258615, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "guj_Gujr-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 266556, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "guj_Gujr-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 282566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "guj_Gujr-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 192393, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "guj_Gujr-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 251085, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "guj_Gujr-run_Latn": { + "num_samples": 1012, + "number_of_characters": 273693, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "guj_Gujr-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 280561, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "guj_Gujr-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 265023, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "guj_Gujr-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 234409, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "guj_Gujr-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 265803, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "guj_Gujr-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 261688, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "guj_Gujr-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 263356, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "guj_Gujr-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 256488, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "guj_Gujr-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 272955, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "guj_Gujr-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268524, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "guj_Gujr-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 250845, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "guj_Gujr-war_Latn": { + "num_samples": 1012, + "number_of_characters": 291174, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "guj_Gujr-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 242735, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "guj_Gujr-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "guj_Gujr-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 284066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "guj_Gujr-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 183371, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "guj_Gujr-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 272422, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "guj_Gujr-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 289597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "guj_Gujr-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 269049, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "guj_Gujr-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 250332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "guj_Gujr-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 253601, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "guj_Gujr-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 280186, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "guj_Gujr-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 272209, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "guj_Gujr-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 270947, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "guj_Gujr-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 256982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "guj_Gujr-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 264114, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "guj_Gujr-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 273935, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "guj_Gujr-san_Deva": { + "num_samples": 1012, + "number_of_characters": 254685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "guj_Gujr-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259710, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "guj_Gujr-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 265551, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "guj_Gujr-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 242856, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "guj_Gujr-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 285113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "guj_Gujr-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 248590, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "guj_Gujr-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 295825, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "guj_Gujr-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 268625, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "guj_Gujr-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 259871, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "guj_Gujr-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 266219, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "guj_Gujr-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 258898, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "guj_Gujr-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 268086, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "guj_Gujr-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 239991, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "guj_Gujr-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 253685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "guj_Gujr-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 282846, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "guj_Gujr-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 254784, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "guj_Gujr-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 256757, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "guj_Gujr-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 258566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "guj_Gujr-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 264999, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "guj_Gujr-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272299, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "guj_Gujr-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 253272, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "guj_Gujr-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 240198, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "guj_Gujr-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 265689, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "guj_Gujr-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 289025, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "guj_Gujr-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 264468, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "guj_Gujr-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 268094, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "guj_Gujr-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 253331, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "guj_Gujr-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 314712, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "guj_Gujr-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 292597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "guj_Gujr-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 166447, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "guj_Gujr-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 252364, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "guj_Gujr-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 255013, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "guj_Gujr-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 278909, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "guj_Gujr-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 252533, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "guj_Gujr-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 256276, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "guj_Gujr-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 277537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "guj_Gujr-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 257508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "guj_Gujr-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 252537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "guj_Gujr-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 169676, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "guj_Gujr-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 262791, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "guj_Gujr-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 260466, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "guj_Gujr-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 272571, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "guj_Gujr-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 252829, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "guj_Gujr-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 273973, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "guj_Gujr-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 268768, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "guj_Gujr-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 258138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "guj_Gujr-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 217128, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "guj_Gujr-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 167123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "guj_Gujr-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 254599, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "guj_Gujr-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 267685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "guj_Gujr-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 259243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "guj_Gujr-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 270358, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "guj_Gujr-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 269426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "guj_Gujr-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 273600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "guj_Gujr-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 257283, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "guj_Gujr-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 295439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "guj_Gujr-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 272629, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "guj_Gujr-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 265045, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "guj_Gujr-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 261511, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "guj_Gujr-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261832, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "guj_Gujr-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 261506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "guj_Gujr-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 277079, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "guj_Gujr-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 279152, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "guj_Gujr-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 289463, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "guj_Gujr-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 274790, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "guj_Gujr-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 243489, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "guj_Gujr-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 280237, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "guj_Gujr-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 247708, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "guj_Gujr-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 269385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "guj_Gujr-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 263974, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "guj_Gujr-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 261267, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "guj_Gujr-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 274031, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "guj_Gujr-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 284606, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "guj_Gujr-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 270167, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "guj_Gujr-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 238217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "guj_Gujr-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 266495, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "guj_Gujr-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 257228, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "guj_Gujr-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 272683, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "guj_Gujr-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 256327, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "guj_Gujr-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 246016, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "guj_Gujr-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 266690, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "guj_Gujr-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259650, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "guj_Gujr-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 259283, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "guj_Gujr-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 228482, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "guj_Gujr-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263495, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "guj_Gujr-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 260585, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "guj_Gujr-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 259853, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "guj_Gujr-som_Latn": { + "num_samples": 1012, + "number_of_characters": 278090, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "guj_Gujr-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 298973, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 247025, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261791, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kaz_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298762, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 266462, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 262099, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 274558, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 294136, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 271165, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 277337, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280974, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292943, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 262337, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288835, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kaz_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264812, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257859, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 292401, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 264002, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248977, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283395, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 267370, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 264131, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 281266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284740, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 259231, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288751, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 253364, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 250517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 296155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266696, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 274033, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270936, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 268455, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 296400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266184, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 247481, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 264446, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264863, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 281360, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 281166, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261803, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274887, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 284130, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269850, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 275094, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 264015, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 275226, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269964, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265776, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276567, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 272250, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 270151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 244450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257634, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 295084, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 251404, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273231, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 256047, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267577, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 265323, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 268054, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269678, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277722, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 274376, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281692, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 276431, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 273508, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284727, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 282468, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 283555, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kaz_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 265187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 285298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 272186, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283975, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272775, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268641, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 222536, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267591, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 275532, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 291542, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 201369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 260061, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282669, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 289537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273999, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 243385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274779, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 270664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 272332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 265464, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281931, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277500, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259821, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 300150, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251711, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273228, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 293042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 192347, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 281398, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298573, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 278025, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 259308, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262577, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 289162, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 281185, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279923, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265958, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 273090, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282911, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268686, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 274527, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251832, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 294089, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304801, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277601, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268847, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 275195, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 277062, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248967, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291822, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263760, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kaz_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265733, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 267542, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273975, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281275, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 262248, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 249174, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274665, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 298001, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 273444, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 277070, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 262307, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323688, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301573, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 175423, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 261340, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263989, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287885, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 261509, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 265252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 286513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 266484, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 261513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 178652, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 269442, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 281547, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282949, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277744, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 267114, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 226104, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 176099, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263575, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 268219, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kaz_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 279334, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 278402, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kaz_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282576, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 266259, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 304415, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 274021, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 270487, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261832, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 270482, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 286055, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 288128, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kaz_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 298439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283766, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 252465, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 289213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256684, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kaz_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 278361, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 270243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 283007, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 279143, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 247193, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 275471, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 266204, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281659, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 265303, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254992, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275666, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268626, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 268259, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kaz_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 237458, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272471, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 269561, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268829, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 287066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kaz_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307949, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.79841897233203, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lug_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246699, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lug_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261465, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lug_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298436, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lug_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 266136, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lug_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288290, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lug_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261773, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lug_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 274232, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lug_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293810, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lug_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270839, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lug_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 277011, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lug_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280648, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lug_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292617, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lug_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 262011, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lug_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288509, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lug_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264486, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lug_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257533, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lug_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 292075, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lug_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263676, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lug_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248651, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lug_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lug_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 267044, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lug_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lug_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280940, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lug_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lug_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lug_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288425, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lug_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 253038, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lug_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 250191, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lug_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295829, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lug_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266370, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lug_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273707, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lug_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270610, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lug_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 268129, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lug_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 296074, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lug_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265858, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lug_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275555, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lug_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 247155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lug_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 264120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lug_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264537, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lug_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 281034, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lug_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280840, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lug_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lug_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274561, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lug_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lug_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269524, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lug_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274768, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lug_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263689, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lug_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274900, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lug_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269638, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lug_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lug_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276241, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lug_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278496, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lug_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271924, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lug_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 269825, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lug_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 244124, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lug_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257308, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lug_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262526, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lug_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294758, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lug_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 251078, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lug_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lug_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255721, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lug_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267251, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lug_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264997, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lug_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lug_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273260, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lug_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269352, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lug_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277396, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lug_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 274050, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lug_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lug_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 276105, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lug_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 273182, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lug_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284401, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lug_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 282142, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lug_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 283229, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lug_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290255, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lug_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264861, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lug_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284972, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lug_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271860, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lug_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283649, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lug_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272449, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lug_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268315, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lug_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 222210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lug_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267265, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lug_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 275206, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lug_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 291216, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lug_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 201043, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lug_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259735, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lug_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282343, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lug_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 289211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lug_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273673, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lug_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 243059, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lug_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274453, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lug_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 270338, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lug_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 272006, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lug_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 265138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lug_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lug_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277174, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lug_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259495, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lug_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299824, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lug_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lug_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272902, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lug_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292716, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lug_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 192021, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lug_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 281072, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lug_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lug_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277699, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lug_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lug_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262251, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lug_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lug_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280859, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lug_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lug_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265632, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lug_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272764, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lug_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282585, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lug_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263335, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lug_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268360, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lug_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 274201, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lug_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lug_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293763, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lug_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257240, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lug_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304475, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lug_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277275, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lug_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268521, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lug_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274869, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lug_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267548, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lug_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276736, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lug_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248641, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lug_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262335, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lug_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291496, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lug_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263434, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lug_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265407, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lug_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 267216, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lug_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273649, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lug_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280949, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lug_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261922, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lug_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248848, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lug_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274339, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lug_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297675, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lug_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 273118, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lug_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276744, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lug_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261981, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lug_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323362, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lug_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lug_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 175097, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lug_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 261014, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lug_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263663, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lug_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287559, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lug_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 261183, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lug_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264926, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lug_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 286187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lug_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 266158, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lug_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 261187, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lug_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 178326, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lug_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271441, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lug_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 269116, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lug_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 281221, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lug_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261479, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lug_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lug_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277418, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lug_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266788, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lug_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225778, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lug_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175773, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lug_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263249, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lug_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276335, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lug_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267893, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lug_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 279008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lug_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 278076, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lug_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282250, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lug_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265933, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lug_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 304089, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lug_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281279, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lug_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273695, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lug_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 270161, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lug_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lug_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270482, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lug_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285729, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lug_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287802, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lug_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 298113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lug_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283440, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lug_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 252139, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lug_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288887, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lug_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256358, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lug_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 278035, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lug_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272624, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lug_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 269917, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lug_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282681, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lug_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293256, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lug_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278817, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lug_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lug_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 275145, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lug_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265878, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lug_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281333, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lug_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264977, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lug_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254666, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lug_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275340, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lug_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268300, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lug_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267933, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lug_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 237132, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lug_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272145, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lug_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 269235, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lug_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268503, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lug_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286740, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lug_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307623, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.47628458498025, + "max_sentence1_length": 396, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "oci_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 262272, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "oci_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 277038, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "oci_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 314009, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "oci_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 281709, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "oci_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 303863, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "oci_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 277346, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "oci_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 289805, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "oci_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 309383, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "oci_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 286412, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "oci_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 292584, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "oci_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 296221, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "oci_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 308190, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "oci_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 277584, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "oci_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 304082, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "oci_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 280059, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "oci_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 273106, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "oci_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 307648, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "oci_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 279249, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "oci_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 264224, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "oci_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298642, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "oci_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 282617, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "oci_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 279378, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "oci_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 296513, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "oci_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 299987, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "oci_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 274478, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "oci_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 303998, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "oci_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 268611, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "oci_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 265764, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "oci_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 311402, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "oci_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 281943, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "oci_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 289280, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "oci_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286183, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "oci_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 283702, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "oci_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 311647, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "oci_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281431, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "oci_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 291128, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "oci_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 262728, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "oci_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 279693, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "oci_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 280110, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "oci_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 296607, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "oci_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 296413, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "oci_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 277050, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "oci_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 290134, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "oci_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 299377, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "oci_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285097, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "oci_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 290341, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "oci_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 279262, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "oci_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 290473, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "oci_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 285211, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "oci_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 281023, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "oci_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 291814, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "oci_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 294069, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "oci_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 287497, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "oci_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 285398, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "oci_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 259697, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "oci_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 272881, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "oci_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 278099, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "oci_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 310331, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "oci_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 266651, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "oci_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288478, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "oci_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 271294, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "oci_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 282824, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "oci_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 280570, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "oci_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 283301, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "oci_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 288833, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "oci_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 284925, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "oci_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 292969, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "oci_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 289623, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "oci_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 296939, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "oci_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 291678, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "oci_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 288755, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "oci_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 299974, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "oci_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 297715, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "oci_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 298802, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "oci_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 305828, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "oci_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 280434, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "oci_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 300545, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "oci_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 287433, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "oci_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 299222, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "oci_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 288022, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "oci_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 283888, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "oci_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 237783, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "oci_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 282838, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "oci_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 290779, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "oci_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 306789, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "oci_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 216616, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "oci_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 275308, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "oci_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 297916, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "oci_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 304784, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "oci_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 289246, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "oci_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 258632, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "oci_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 290026, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "oci_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 285911, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "oci_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 287579, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "oci_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 280711, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "oci_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 297178, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "oci_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292747, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "oci_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 275068, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "oci_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 315397, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "oci_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 266958, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "oci_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288475, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "oci_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 308289, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "oci_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 207594, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "oci_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 296645, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "oci_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 313820, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "oci_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 293272, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "oci_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 274555, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "oci_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 277824, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "oci_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 304409, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "oci_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 296432, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "oci_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 295170, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "oci_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 281205, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "oci_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 288337, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "oci_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 298158, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "oci_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 278908, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "oci_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283933, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "oci_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 289774, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "oci_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 267079, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "oci_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 309336, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "oci_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 272813, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "oci_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 320048, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "oci_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 292848, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "oci_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 284094, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "oci_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 290442, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "oci_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 283121, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "oci_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 292309, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "oci_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 264214, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "oci_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 277908, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "oci_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 307069, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "oci_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 279007, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "oci_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 280980, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "oci_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 282789, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "oci_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 289222, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "oci_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296522, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "oci_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 277495, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "oci_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 264421, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "oci_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 289912, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "oci_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 313248, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "oci_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 288691, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "oci_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 292317, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "oci_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 277554, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "oci_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 338935, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "oci_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 316820, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "oci_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 190670, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "oci_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 276587, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "oci_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 279236, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "oci_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 303132, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "oci_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 276756, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "oci_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 280499, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "oci_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 301760, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "oci_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 281731, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "oci_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 276760, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "oci_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 193899, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "oci_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 287014, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "oci_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 284689, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "oci_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 296794, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "oci_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 277052, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "oci_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 298196, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "oci_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 292991, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "oci_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 282361, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "oci_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 241351, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "oci_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 191346, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "oci_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 278822, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "oci_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 291908, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "oci_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 283466, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "oci_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 294581, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "oci_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 293649, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "oci_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 297823, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "oci_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 281506, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "oci_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 319662, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "oci_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 296852, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "oci_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 289268, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "oci_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 285734, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "oci_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 277079, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "oci_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286055, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "oci_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 285729, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "oci_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 303375, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "oci_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 313686, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "oci_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 299013, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "oci_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 267712, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "oci_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 304460, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "oci_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 271931, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "oci_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 293608, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "oci_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 288197, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "oci_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 285490, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "oci_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 298254, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "oci_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 308829, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "oci_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 294390, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "oci_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 262440, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "oci_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 290718, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "oci_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 281451, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "oci_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 296906, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "oci_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 280550, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "oci_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 270239, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "oci_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 290913, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "oci_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283873, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "oci_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 283506, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "oci_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 252705, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "oci_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287718, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "oci_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 284808, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "oci_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 284076, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "oci_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 302313, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "oci_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 323196, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 148.86462450592884, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "smo_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 264345, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "smo_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 279111, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "smo_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 316082, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "smo_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 283782, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "smo_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 305936, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "smo_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 279419, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "smo_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 291878, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "smo_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 311456, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "smo_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 288485, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "smo_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 294657, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "smo_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 298294, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "smo_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 310263, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "smo_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 279657, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "smo_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 306155, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "smo_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 282132, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "smo_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 275179, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "smo_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 309721, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "smo_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 281322, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "smo_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 266297, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "smo_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300715, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "smo_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 284690, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "smo_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 281451, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "smo_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 298586, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "smo_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 302060, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "smo_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 276551, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "smo_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 306071, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "smo_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 270684, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "smo_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 267837, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "smo_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 313475, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "smo_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 284016, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "smo_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 291353, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "smo_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288256, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "smo_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 285775, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "smo_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 313720, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "smo_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283504, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "smo_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 293201, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "smo_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 264801, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "smo_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 281766, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "smo_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 282183, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "smo_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 298680, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "smo_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 298486, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "smo_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 279123, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "smo_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 292207, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "smo_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 301450, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "smo_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287170, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "smo_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 292414, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "smo_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 281335, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "smo_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 292546, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "smo_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 287284, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "smo_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 283096, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "smo_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 293887, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "smo_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 296142, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "smo_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 289570, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "smo_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 287471, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "smo_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 261770, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "smo_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 274954, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "smo_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 280172, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "smo_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 312404, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "smo_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 268724, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "smo_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290551, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "smo_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 273367, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "smo_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 284897, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "smo_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 282643, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "smo_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 285374, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "smo_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 290906, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "smo_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 286998, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "smo_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 295042, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "smo_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 291696, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "smo_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 299012, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "smo_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 293751, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "smo_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 290828, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "smo_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 302047, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "smo_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 299788, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "smo_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 300875, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "smo_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 307901, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "smo_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 282507, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "smo_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 302618, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "smo_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 289506, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "smo_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 301295, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "smo_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 290095, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "smo_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 285961, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "smo_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 239856, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "smo_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 284911, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "smo_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 292852, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "smo_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 308862, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "smo_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 218689, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "smo_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 277381, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "smo_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 299989, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "smo_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 306857, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "smo_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 291319, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "smo_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 260705, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "smo_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 292099, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "smo_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 287984, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "smo_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 289652, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "smo_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 282784, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "smo_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 299251, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "smo_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294820, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "smo_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 277141, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "smo_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 317470, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "smo_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 269031, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "smo_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290548, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "smo_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 310362, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "smo_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 209667, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "smo_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 298718, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "smo_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 315893, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "smo_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 295345, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "smo_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 276628, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "smo_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 279897, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "smo_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 306482, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "smo_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 298505, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "smo_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 297243, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "smo_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 283278, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "smo_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 290410, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "smo_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 300231, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "smo_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 280981, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "smo_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286006, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "smo_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 291847, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "smo_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 269152, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "smo_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 311409, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "smo_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 274886, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "smo_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 322121, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "smo_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 294921, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "smo_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 286167, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "smo_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 292515, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "smo_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 285194, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "smo_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 294382, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "smo_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 266287, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "smo_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 279981, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "smo_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 309142, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "smo_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 281080, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "smo_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 283053, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "smo_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 284862, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "smo_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 291295, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "smo_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298595, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "smo_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 279568, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "smo_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 266494, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "smo_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 291985, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "smo_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 315321, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "smo_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 290764, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "smo_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 294390, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "smo_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 279627, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "smo_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 341008, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "smo_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 318893, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "smo_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 192743, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "smo_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 278660, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "smo_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 281309, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "smo_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 305205, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "smo_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 278829, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "smo_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 282572, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "smo_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 303833, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "smo_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 283804, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "smo_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 278833, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "smo_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 195972, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "smo_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 289087, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "smo_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 286762, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "smo_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 298867, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "smo_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 279125, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "smo_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 300269, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "smo_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 295064, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "smo_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 284434, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "smo_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 243424, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "smo_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 193419, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "smo_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 280895, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "smo_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 293981, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "smo_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 285539, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "smo_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 296654, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "smo_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 295722, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "smo_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 299896, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "smo_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 283579, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "smo_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 321735, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "smo_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 298925, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "smo_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 291341, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "smo_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 287807, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "smo_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 279152, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "smo_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288128, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "smo_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 287802, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "smo_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 303375, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "smo_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 315759, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "smo_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 301086, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "smo_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 269785, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "smo_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 306533, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "smo_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 274004, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "smo_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 295681, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "smo_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 290270, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "smo_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 287563, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "smo_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 300327, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "smo_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 310902, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "smo_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 296463, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "smo_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 264513, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "smo_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 292791, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "smo_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 283524, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "smo_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 298979, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "smo_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "smo_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 272312, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "smo_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 292986, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "smo_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285946, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "smo_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 285579, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "smo_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 254778, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "smo_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289791, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "smo_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 286881, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "smo_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 286149, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "smo_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 304386, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "smo_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 325269, + "unique_pairs": 1012, + "min_sentence1_length": 51, + "average_sentence1_length": 150.91304347826087, + "max_sentence1_length": 412, + "unique_sentence1": 1011, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tsn_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 274656, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tsn_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 289422, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tsn_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 326393, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tsn_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 294093, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tsn_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 316247, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tsn_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 289730, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tsn_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 302189, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tsn_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 321767, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tsn_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 298796, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tsn_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 304968, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tsn_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 308605, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tsn_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 320574, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tsn_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 289968, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tsn_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 316466, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tsn_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 292443, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tsn_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 285490, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tsn_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 320032, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tsn_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 291633, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tsn_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 276608, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tsn_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 311026, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tsn_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 295001, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tsn_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 291762, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tsn_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 308897, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tsn_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 312371, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tsn_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 286862, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tsn_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 316382, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tsn_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 280995, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tsn_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 278148, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tsn_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 323786, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tsn_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 294327, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tsn_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 301664, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tsn_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298567, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tsn_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 296086, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tsn_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 324031, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tsn_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293815, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tsn_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 303512, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tsn_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 275112, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tsn_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 292077, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tsn_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 292494, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tsn_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 308991, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tsn_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 308797, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tsn_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 289434, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tsn_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 302518, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tsn_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 311761, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tsn_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297481, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tsn_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 302725, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tsn_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 291646, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tsn_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 302857, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tsn_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 297595, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tsn_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 293407, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tsn_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 304198, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tsn_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 306453, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tsn_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 299881, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tsn_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 297782, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tsn_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 272081, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tsn_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 285265, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tsn_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 290483, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tsn_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 322715, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tsn_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 279035, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tsn_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300862, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tsn_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 283678, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tsn_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 295208, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tsn_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 292954, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tsn_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 295685, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tsn_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 301217, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tsn_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 297309, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tsn_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 305353, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tsn_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 302007, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tsn_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 309323, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tsn_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 304062, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tsn_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 301139, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tsn_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 312358, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tsn_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 310099, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tsn_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 311186, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tsn_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 318212, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tsn_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 292818, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tsn_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 312929, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tsn_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 299817, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tsn_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 311606, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tsn_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 300406, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tsn_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 296272, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tsn_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 250167, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tsn_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 295222, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tsn_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 303163, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tsn_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 319173, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tsn_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 229000, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tsn_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 287692, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tsn_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 310300, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tsn_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 317168, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tsn_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 301630, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tsn_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 271016, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tsn_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 302410, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tsn_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 298295, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tsn_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 299963, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tsn_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 293095, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tsn_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 309562, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tsn_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305131, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tsn_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 287452, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tsn_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 327781, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tsn_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 279342, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tsn_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300859, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tsn_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 320673, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tsn_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 219978, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tsn_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 309029, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tsn_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 326204, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tsn_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 305656, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tsn_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 286939, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tsn_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 290208, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tsn_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 316793, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tsn_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 308816, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tsn_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 307554, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tsn_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 293589, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tsn_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 300721, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tsn_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 310542, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tsn_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 291292, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tsn_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296317, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tsn_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 302158, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tsn_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 279463, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tsn_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 321720, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tsn_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 285197, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tsn_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 332432, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tsn_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 305232, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tsn_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 296478, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tsn_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 302826, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tsn_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 295505, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tsn_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 304693, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tsn_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 276598, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tsn_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 290292, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tsn_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 319453, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tsn_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 291391, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tsn_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 293364, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tsn_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 295173, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tsn_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 301606, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tsn_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 308906, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tsn_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 289879, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tsn_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 276805, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tsn_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 302296, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tsn_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 325632, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tsn_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 301075, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tsn_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 304701, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tsn_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 289938, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tsn_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 351319, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tsn_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 329204, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tsn_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 203054, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tsn_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 288971, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tsn_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 291620, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tsn_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 315516, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tsn_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 289140, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tsn_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 292883, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tsn_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 314144, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tsn_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 294115, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tsn_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 289144, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tsn_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 206283, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tsn_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 299398, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tsn_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 297073, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tsn_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 309178, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tsn_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 289436, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tsn_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 310580, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tsn_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 305375, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tsn_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 294745, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tsn_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 253735, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tsn_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 203730, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tsn_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 291206, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tsn_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 304292, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tsn_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 295850, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tsn_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 306965, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tsn_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 306033, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tsn_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 310207, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tsn_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 293890, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tsn_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 332046, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tsn_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 309236, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tsn_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 301652, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tsn_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 298118, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tsn_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 289463, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tsn_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 298439, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tsn_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 298113, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tsn_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 313686, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tsn_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 315759, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tsn_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 311397, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tsn_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 280096, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tsn_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 316844, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tsn_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 284315, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tsn_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 305992, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tsn_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 300581, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tsn_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 297874, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tsn_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 310638, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tsn_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 321213, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tsn_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 306774, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tsn_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 274824, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tsn_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 303102, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tsn_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 293835, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tsn_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 309290, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tsn_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 292934, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tsn_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tsn_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 303297, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tsn_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296257, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tsn_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 295890, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tsn_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 265089, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tsn_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300102, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tsn_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 297192, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tsn_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 296460, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tsn_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 314697, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tsn_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 335580, + "unique_pairs": 1012, + "min_sentence1_length": 46, + "average_sentence1_length": 161.10177865612647, + "max_sentence1_length": 440, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "zul_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 259983, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "zul_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 274749, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "zul_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 311720, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zul_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 279420, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "zul_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 301574, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "zul_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 275057, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "zul_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 287516, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zul_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 307094, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "zul_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 284123, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "zul_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 290295, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zul_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 293932, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zul_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 305901, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "zul_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 275295, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "zul_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 301793, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "zul_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 277770, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zul_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 270817, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zul_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 305359, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zul_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 276960, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "zul_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 261935, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "zul_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296353, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zul_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 280328, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zul_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 277089, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zul_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 294224, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zul_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 297698, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zul_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 272189, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "zul_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 301709, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zul_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 266322, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "zul_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 263475, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "zul_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 309113, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "zul_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 279654, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zul_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 286991, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "zul_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283894, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zul_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 281413, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zul_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 309358, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "zul_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279142, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zul_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 288839, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "zul_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 260439, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "zul_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 277404, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zul_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 277821, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "zul_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 294318, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zul_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 294124, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "zul_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 274761, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zul_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 287845, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zul_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 297088, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zul_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282808, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zul_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 288052, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zul_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 276973, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zul_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 288184, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zul_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 282922, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zul_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 278734, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zul_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 289525, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "zul_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 291780, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zul_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 285208, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zul_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 283109, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zul_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 257408, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "zul_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 270592, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "zul_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 275810, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zul_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 308042, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "zul_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 264362, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "zul_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286189, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "zul_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 269005, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "zul_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 280535, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zul_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 278281, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zul_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 281012, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "zul_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 286544, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zul_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 282636, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zul_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 290680, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "zul_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 287334, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zul_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 294650, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "zul_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 289389, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zul_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 286466, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "zul_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 297685, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "zul_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 295426, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zul_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 296513, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "zul_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 303539, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "zul_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 278145, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zul_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 298256, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "zul_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 285144, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zul_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 296933, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zul_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 285733, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "zul_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 281599, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zul_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 235494, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "zul_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 280549, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "zul_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 288490, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zul_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 304500, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "zul_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 214327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "zul_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 273019, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "zul_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 295627, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "zul_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 302495, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "zul_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 286957, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "zul_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 256343, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "zul_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 287737, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zul_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 283622, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "zul_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 285290, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zul_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 278422, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zul_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 294889, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zul_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290458, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "zul_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 272779, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zul_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 313108, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "zul_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 264669, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zul_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286186, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zul_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 306000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "zul_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 205305, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "zul_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 294356, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zul_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 311531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zul_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 290983, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "zul_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 272266, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "zul_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 275535, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "zul_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 302120, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "zul_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 294143, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zul_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 292881, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "zul_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 278916, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "zul_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 286048, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "zul_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 295869, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zul_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 276619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "zul_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281644, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zul_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 287485, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "zul_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 264790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "zul_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 307047, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "zul_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 270524, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "zul_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 317759, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "zul_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 290559, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zul_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 281805, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zul_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 288153, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zul_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 280832, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "zul_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 290020, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "zul_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 261925, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "zul_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 275619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zul_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 304780, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "zul_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 276718, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "zul_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 278691, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "zul_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 280500, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "zul_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 286933, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zul_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294233, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "zul_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 275206, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "zul_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 262132, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "zul_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 287623, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "zul_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 310959, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "zul_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 286402, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zul_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 290028, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "zul_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 275265, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zul_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 336646, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "zul_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 314531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "zul_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 188381, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "zul_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 274298, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "zul_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 276947, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "zul_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 300843, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "zul_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 274467, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "zul_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 278210, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "zul_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 299471, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "zul_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 279442, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "zul_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 274471, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "zul_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 191610, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "zul_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 284725, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zul_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 282400, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "zul_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 294505, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "zul_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 274763, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "zul_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 295907, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "zul_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 290702, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "zul_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 280072, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "zul_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 239062, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "zul_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 189057, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "zul_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 276533, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "zul_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 289619, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zul_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 281177, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "zul_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 292292, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zul_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 291360, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "zul_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 295534, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "zul_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 279217, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "zul_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 317373, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "zul_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 294563, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "zul_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 286979, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "zul_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 283445, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "zul_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 274790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "zul_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283766, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "zul_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 283440, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "zul_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 299013, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zul_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 301086, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "zul_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 311397, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "zul_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 265423, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "zul_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 302171, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "zul_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 269642, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "zul_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 291319, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "zul_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 285908, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "zul_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 283201, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "zul_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 295965, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "zul_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 306540, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "zul_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 292101, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "zul_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 260151, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "zul_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 288429, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "zul_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 279162, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "zul_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 294617, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "zul_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 278261, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "zul_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 267950, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "zul_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 288624, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "zul_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281584, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "zul_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 281217, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "zul_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 250416, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "zul_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285429, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "zul_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 282519, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zul_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "zul_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 300024, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "zul_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 320907, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 146.60276679841897, + "max_sentence1_length": 425, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "azb_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 228682, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "azb_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 243448, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "azb_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 280419, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "azb_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 248119, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "azb_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 270273, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "azb_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 243756, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "azb_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 256215, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azb_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 275793, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "azb_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 252822, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "azb_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 258994, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "azb_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 262631, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "azb_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 274600, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "azb_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 243994, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "azb_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 270492, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "azb_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 246469, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azb_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 239516, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "azb_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 274058, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azb_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 245659, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "azb_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 230634, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "azb_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265052, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "azb_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 249027, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azb_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 245788, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azb_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 262923, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "azb_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 266397, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "azb_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 240888, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "azb_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 270408, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "azb_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 235021, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "azb_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 232174, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "azb_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 277812, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "azb_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 248353, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "azb_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 255690, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "azb_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252593, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "azb_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 250112, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "azb_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 278057, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "azb_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247841, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azb_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 257538, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "azb_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 229138, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "azb_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 246103, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "azb_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 246520, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "azb_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 263017, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "azb_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 262823, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "azb_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 243460, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "azb_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 256544, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "azb_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 265787, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "azb_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 251507, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "azb_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 256751, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "azb_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 245672, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "azb_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 256883, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "azb_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 251621, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "azb_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 247433, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "azb_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 258224, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "azb_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 260479, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "azb_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 253907, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "azb_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 251808, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "azb_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 226107, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "azb_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 239291, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "azb_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 244509, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azb_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 276741, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "azb_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 233061, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "azb_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254888, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "azb_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 237704, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "azb_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 249234, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "azb_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 246980, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "azb_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 249711, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azb_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 255243, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azb_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 251335, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "azb_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 259379, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "azb_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 256033, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "azb_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 263349, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "azb_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 258088, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "azb_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 255165, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "azb_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 266384, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "azb_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 264125, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "azb_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 265212, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "azb_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 272238, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "azb_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 246844, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "azb_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 266955, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "azb_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 253843, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "azb_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 265632, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azb_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 254432, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "azb_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 250298, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "azb_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 204193, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "azb_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 249248, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "azb_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 257189, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "azb_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 273199, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "azb_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 183026, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "azb_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 241718, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "azb_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 264326, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "azb_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 271194, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "azb_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 255656, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "azb_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 225042, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "azb_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 256436, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "azb_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 252321, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "azb_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 253989, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azb_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 247121, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azb_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 263588, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "azb_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259157, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azb_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 241478, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "azb_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 281807, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "azb_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 233368, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "azb_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254885, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "azb_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 274699, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "azb_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 174004, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "azb_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 263055, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "azb_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 280230, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "azb_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 259682, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "azb_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 240965, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "azb_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 244234, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "azb_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 270819, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "azb_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 262842, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azb_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 261580, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "azb_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 247615, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "azb_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 254747, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "azb_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 264568, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "azb_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 245318, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "azb_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250343, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "azb_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 256184, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "azb_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 233489, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "azb_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 275746, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "azb_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 239223, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "azb_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 286458, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "azb_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 259258, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "azb_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 250504, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "azb_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 256852, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "azb_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 249531, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "azb_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 258719, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "azb_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 230624, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "azb_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 244318, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azb_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 273479, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "azb_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 245417, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "azb_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 247390, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "azb_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 249199, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "azb_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 255632, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "azb_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 262932, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "azb_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 243905, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "azb_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 230831, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "azb_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 256322, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "azb_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 279658, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "azb_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 255101, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azb_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 258727, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "azb_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 243964, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azb_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 305345, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "azb_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 283230, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "azb_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 157080, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "azb_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 242997, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "azb_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 245646, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "azb_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 269542, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "azb_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 243166, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "azb_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 246909, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "azb_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 268170, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "azb_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 248141, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "azb_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 243170, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "azb_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 160309, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "azb_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 253424, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azb_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 251099, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "azb_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 263204, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "azb_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 243462, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "azb_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 264606, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "azb_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 259401, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "azb_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 248771, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "azb_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 207761, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "azb_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 157756, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "azb_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 245232, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "azb_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 258318, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "azb_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 249876, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "azb_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 260991, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "azb_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 260059, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "azb_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 264233, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "azb_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 247916, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "azb_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 286072, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "azb_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 263262, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azb_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 255678, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "azb_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 252144, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "azb_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 243489, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "azb_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252465, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azb_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 252139, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "azb_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 267712, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "azb_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 269785, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "azb_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 280096, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "azb_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 265423, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "azb_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 270870, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "azb_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 238341, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "azb_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 260018, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "azb_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 254607, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "azb_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 251900, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "azb_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 264664, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "azb_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 275239, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "azb_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 260800, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "azb_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 228850, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "azb_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 257128, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "azb_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 247861, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "azb_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 263316, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "azb_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 246960, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "azb_Arab-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 236649, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "azb_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 257323, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azb_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250283, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "azb_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 249916, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "azb_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 219115, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "azb_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254128, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "azb_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 251218, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "azb_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 250486, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "azb_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 268723, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "azb_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 289606, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 115.67292490118577, + "max_sentence1_length": 327, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "deu_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 265430, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "deu_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 280196, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "deu_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 317167, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "deu_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 284867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "deu_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 307021, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "deu_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 280504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "deu_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 292963, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "deu_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 312541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "deu_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 289570, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "deu_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 295742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "deu_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 299379, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "deu_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 311348, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "deu_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 280742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "deu_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 307240, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "deu_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 283217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "deu_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 276264, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "deu_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 310806, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "deu_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 282407, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "deu_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 267382, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "deu_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 301800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "deu_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 285775, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "deu_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 282536, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "deu_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 299671, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "deu_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 303145, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "deu_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 277636, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "deu_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 307156, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "deu_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 271769, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "deu_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 268922, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "deu_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 314560, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "deu_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 285101, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "deu_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 292438, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "deu_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289341, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "deu_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 286860, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "deu_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 314805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "deu_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284589, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "deu_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 294286, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "deu_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 265886, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "deu_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 282851, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "deu_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 283268, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "deu_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 299765, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "deu_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 299571, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "deu_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 280208, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "deu_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 293292, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "deu_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 302535, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "deu_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288255, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "deu_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 293499, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "deu_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 282420, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "deu_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 293631, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "deu_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 288369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "deu_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 284181, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "deu_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 294972, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "deu_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 297227, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "deu_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 290655, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "deu_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 288556, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "deu_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 262855, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "deu_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 276039, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "deu_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 281257, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "deu_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 313489, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "deu_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 269809, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "deu_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291636, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "deu_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 274452, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "deu_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 285982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "deu_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 283728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "deu_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 286459, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "deu_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 291991, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "deu_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 288083, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "deu_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 296127, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "deu_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 292781, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "deu_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 300097, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "deu_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 294836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "deu_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 291913, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "deu_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 303132, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "deu_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 300873, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "deu_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 301960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "deu_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 308986, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "deu_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 283592, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "deu_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 303703, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "deu_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 290591, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "deu_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 302380, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "deu_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 291180, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "deu_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 287046, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "deu_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 240941, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "deu_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 285996, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "deu_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 293937, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "deu_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 309947, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "deu_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 219774, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "deu_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 278466, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "deu_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 301074, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "deu_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 307942, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "deu_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 292404, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "deu_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 261790, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "deu_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 293184, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "deu_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 289069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "deu_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 290737, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "deu_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 283869, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "deu_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 300336, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "deu_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295905, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "deu_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 278226, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "deu_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 318555, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "deu_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 270116, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "deu_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291633, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "deu_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 311447, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "deu_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 210752, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "deu_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 299803, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "deu_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 316978, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "deu_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 296430, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "deu_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 277713, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "deu_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 280982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "deu_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 307567, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "deu_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 299590, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "deu_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 298328, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "deu_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 284363, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "deu_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 291495, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "deu_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 301316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "deu_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 282066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "deu_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287091, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "deu_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 292932, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "deu_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 270237, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "deu_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 312494, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "deu_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 275971, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "deu_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 323206, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "deu_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 296006, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "deu_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 287252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "deu_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 293600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "deu_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 286279, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "deu_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 295467, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "deu_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 267372, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "deu_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 281066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "deu_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 310227, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "deu_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 282165, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "deu_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 284138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "deu_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 285947, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "deu_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 292380, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "deu_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299680, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "deu_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 280653, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "deu_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 267579, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "deu_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 293070, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "deu_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 316406, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "deu_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 291849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "deu_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 295475, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "deu_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 280712, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "deu_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 342093, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "deu_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 319978, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "deu_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 193828, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "deu_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 279745, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "deu_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 282394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "deu_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 306290, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "deu_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 279914, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "deu_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 283657, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "deu_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 304918, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "deu_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 284889, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "deu_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 279918, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "deu_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 197057, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "deu_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 290172, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "deu_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 287847, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "deu_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 299952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "deu_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 280210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "deu_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 301354, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "deu_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 296149, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "deu_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 285519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "deu_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 244509, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "deu_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 194504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "deu_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 281980, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "deu_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 295066, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "deu_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 286624, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "deu_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 297739, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "deu_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 296807, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "deu_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 300981, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "deu_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 284664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "deu_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 322820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "deu_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 300010, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "deu_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 292426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "deu_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 288892, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "deu_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 280237, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "deu_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "deu_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 288887, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "deu_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 304460, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "deu_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 306533, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "deu_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 316844, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "deu_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 302171, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "deu_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 270870, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "deu_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 275089, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "deu_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 296766, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "deu_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 291355, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "deu_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 288648, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "deu_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 301412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "deu_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 311987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "deu_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 297548, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "deu_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 265598, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "deu_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 293876, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "deu_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 284609, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "deu_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 300064, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "deu_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 283708, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "deu_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 273397, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "deu_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 294071, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "deu_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287031, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "deu_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 286664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "deu_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 255863, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "deu_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290876, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "deu_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 287966, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "deu_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 287234, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "deu_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 305471, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "deu_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 326354, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 151.98517786561266, + "max_sentence1_length": 408, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hat_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 232901, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hat_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 247667, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hat_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 284638, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hat_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 252338, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "hat_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 274492, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hat_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 247975, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hat_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 260434, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hat_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 280012, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hat_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 257041, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hat_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 263213, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hat_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 266850, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hat_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 278819, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hat_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 248213, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "hat_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 274711, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hat_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 250688, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hat_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 243735, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hat_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 278277, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hat_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 249878, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hat_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 234853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hat_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269271, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hat_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 253246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hat_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 250007, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hat_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 267142, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hat_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 270616, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hat_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 245107, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hat_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 274627, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hat_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 239240, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hat_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 236393, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hat_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 282031, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hat_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 252572, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hat_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 259909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "hat_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256812, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hat_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 254331, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hat_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 282276, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hat_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252060, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hat_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 261757, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hat_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 233357, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hat_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 250322, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hat_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 250739, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hat_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 267236, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hat_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 267042, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hat_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 247679, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hat_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 260763, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hat_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 270006, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hat_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255726, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hat_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 260970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hat_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 249891, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hat_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 261102, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hat_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 255840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hat_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 251652, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hat_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 262443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hat_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 264698, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hat_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 258126, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hat_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 256027, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hat_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 230326, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hat_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 243510, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hat_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 248728, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hat_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 280960, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hat_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 237280, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hat_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259107, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hat_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 241923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hat_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 253453, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hat_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 251199, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hat_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 253930, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hat_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 259462, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hat_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 255554, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hat_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 263598, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hat_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 260252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hat_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 267568, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hat_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 262307, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hat_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 259384, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hat_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 270603, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hat_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 268344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hat_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 269431, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hat_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 276457, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hat_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 251063, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hat_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 271174, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hat_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 258062, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hat_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 269851, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hat_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 258651, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hat_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 254517, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hat_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 208412, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hat_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 253467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hat_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 261408, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hat_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 277418, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hat_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 187245, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hat_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 245937, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hat_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 268545, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hat_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 275413, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hat_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 259875, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hat_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 229261, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hat_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 260655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hat_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 256540, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hat_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 258208, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hat_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 251340, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hat_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 267807, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hat_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263376, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hat_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 245697, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hat_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 286026, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hat_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 237587, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hat_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259104, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hat_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 278918, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hat_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 178223, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hat_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 267274, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hat_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 284449, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hat_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 263901, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hat_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 245184, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hat_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 248453, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hat_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 275038, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hat_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 267061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hat_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 265799, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hat_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 251834, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hat_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 258966, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hat_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 268787, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hat_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 249537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hat_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254562, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hat_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 260403, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hat_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 237708, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hat_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 279965, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hat_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 243442, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hat_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 290677, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hat_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 263477, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hat_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 254723, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hat_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 261071, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hat_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 253750, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hat_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 262938, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hat_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 234843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hat_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 248537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hat_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 277698, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hat_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 249636, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hat_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 251609, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hat_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 253418, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hat_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 259851, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hat_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267151, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hat_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 248124, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hat_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 235050, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hat_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 260541, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hat_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 283877, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hat_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 259320, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hat_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 262946, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hat_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 248183, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hat_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 309564, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hat_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 287449, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hat_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 161299, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hat_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 247216, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hat_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 249865, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hat_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 273761, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hat_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 247385, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hat_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 251128, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hat_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 272389, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hat_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 252360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hat_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 247389, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hat_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 164528, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hat_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 257643, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hat_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 255318, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hat_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 267423, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hat_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 247681, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hat_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 268825, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hat_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 263620, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hat_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 252990, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hat_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 211980, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hat_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 161975, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hat_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 249451, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hat_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 262537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hat_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 254095, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hat_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 265210, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hat_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 264278, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hat_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 268452, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hat_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 252135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hat_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 290291, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hat_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 267481, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hat_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 259897, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hat_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 256363, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hat_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 247708, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hat_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256684, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hat_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 256358, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hat_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 271931, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hat_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 274004, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hat_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 284315, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hat_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 269642, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hat_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 238341, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hat_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 275089, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hat_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 264237, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hat_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 258826, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hat_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 256119, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hat_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 268883, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hat_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 279458, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hat_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 265019, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hat_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 233069, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hat_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 261347, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "hat_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 252080, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hat_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 267535, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hat_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 251179, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hat_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 240868, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hat_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 261542, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hat_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254502, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hat_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 254135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hat_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 223334, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hat_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 258347, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hat_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 255437, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hat_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 254705, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hat_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 272942, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hat_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 293825, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 119.84189723320158, + "max_sentence1_length": 333, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kbp_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 254578, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kbp_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 269344, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kbp_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 306315, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kbp_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 274015, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kbp_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 296169, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kbp_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 269652, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kbp_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 282111, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kbp_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 301689, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kbp_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 278718, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kbp_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 284890, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kbp_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 288527, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kbp_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 300496, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kbp_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 269890, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kbp_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 296388, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kbp_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 272365, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kbp_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 265412, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kbp_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 299954, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kbp_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 271555, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kbp_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 256530, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kbp_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 290948, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kbp_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 274923, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kbp_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 271684, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kbp_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 288819, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kbp_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 292293, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kbp_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 266784, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kbp_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 296304, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kbp_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 260917, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kbp_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 258070, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kbp_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 303708, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kbp_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 274249, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kbp_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 281586, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kbp_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278489, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kbp_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 276008, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kbp_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 303953, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kbp_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273737, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kbp_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 283434, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kbp_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 255034, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kbp_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 271999, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kbp_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 272416, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kbp_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 288913, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kbp_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 288719, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kbp_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 269356, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kbp_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 282440, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kbp_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 291683, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kbp_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277403, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kbp_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 282647, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kbp_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 271568, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kbp_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 282779, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kbp_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 277517, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kbp_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 273329, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kbp_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 284120, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kbp_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 286375, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kbp_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 279803, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kbp_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 277704, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kbp_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 252003, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kbp_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 265187, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kbp_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 270405, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kbp_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 302637, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kbp_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 258957, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kbp_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280784, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kbp_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 263600, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kbp_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 275130, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kbp_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 272876, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kbp_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 275607, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kbp_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 281139, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kbp_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 277231, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kbp_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 285275, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kbp_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 281929, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kbp_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 289245, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kbp_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 283984, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kbp_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 281061, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kbp_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 292280, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kbp_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 290021, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kbp_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 291108, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kbp_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 298134, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kbp_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 272740, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kbp_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 292851, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kbp_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 279739, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kbp_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 291528, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kbp_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 280328, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kbp_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 276194, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kbp_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 230089, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kbp_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 275144, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kbp_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 283085, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kbp_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 299095, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kbp_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 208922, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kbp_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 267614, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kbp_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 290222, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kbp_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 297090, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kbp_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 281552, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kbp_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 250938, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kbp_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 282332, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kbp_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 278217, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kbp_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 279885, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kbp_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 273017, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kbp_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 289484, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kbp_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285053, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kbp_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 267374, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kbp_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 307703, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kbp_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 259264, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kbp_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280781, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kbp_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 300595, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kbp_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 199900, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kbp_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 288951, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kbp_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 306126, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kbp_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 285578, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kbp_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 266861, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kbp_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 270130, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kbp_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 296715, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kbp_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 288738, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kbp_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 287476, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kbp_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 273511, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kbp_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 280643, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kbp_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 290464, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kbp_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 271214, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kbp_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276239, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kbp_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 282080, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kbp_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 259385, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kbp_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 301642, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kbp_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 265119, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kbp_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 312354, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kbp_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 285154, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kbp_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 276400, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kbp_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 282748, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kbp_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 275427, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kbp_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 284615, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kbp_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 256520, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kbp_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 270214, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kbp_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 299375, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kbp_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 271313, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kbp_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 273286, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kbp_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 275095, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kbp_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 281528, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kbp_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288828, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kbp_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 269801, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kbp_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 256727, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kbp_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 282218, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kbp_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 305554, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kbp_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 280997, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kbp_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 284623, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kbp_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 269860, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kbp_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 331241, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kbp_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 309126, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kbp_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 182976, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kbp_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 268893, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kbp_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 271542, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kbp_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 295438, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kbp_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 269062, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kbp_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 272805, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kbp_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 294066, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kbp_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 274037, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kbp_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 269066, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kbp_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 186205, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kbp_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 279320, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kbp_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 276995, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kbp_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 289100, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kbp_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 269358, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kbp_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 290502, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kbp_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 285297, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kbp_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 274667, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kbp_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 233657, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kbp_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 183652, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kbp_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 271128, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kbp_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 284214, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kbp_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 275772, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kbp_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 286887, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kbp_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 285955, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kbp_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 290129, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kbp_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 273812, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kbp_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 311968, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kbp_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 289158, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kbp_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 281574, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kbp_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 278040, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kbp_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 269385, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kbp_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278361, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kbp_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 278035, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kbp_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 293608, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kbp_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 295681, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kbp_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 305992, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kbp_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 291319, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kbp_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 260018, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kbp_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 296766, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kbp_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 264237, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kbp_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 280503, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kbp_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 277796, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kbp_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 290560, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kbp_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 301135, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kbp_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 286696, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kbp_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 254746, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kbp_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 283024, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kbp_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 273757, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kbp_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 289212, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kbp_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 272856, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kbp_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 262545, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kbp_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 283219, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kbp_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276179, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kbp_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 275812, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kbp_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 245011, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kbp_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280024, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kbp_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 277114, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kbp_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 276382, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kbp_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 294619, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kbp_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 315502, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 141.26185770750988, + "max_sentence1_length": 377, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "luo_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 249167, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "luo_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 263933, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "luo_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 300904, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "luo_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 268604, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "luo_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 290758, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "luo_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 264241, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "luo_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276700, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "luo_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 296278, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "luo_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 273307, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "luo_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "luo_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 283116, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "luo_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 295085, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "luo_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264479, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "luo_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 290977, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "luo_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 266954, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "luo_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 260001, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "luo_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 294543, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "luo_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 266144, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "luo_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 251119, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "luo_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "luo_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 269512, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "luo_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 266273, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "luo_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 283408, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "luo_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 286882, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "luo_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 261373, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "luo_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 290893, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "luo_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 255506, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "luo_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252659, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "luo_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 298297, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "luo_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268838, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "luo_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 276175, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "luo_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273078, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "luo_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 270597, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "luo_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 298542, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "luo_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268326, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "luo_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 278023, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "luo_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249623, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "luo_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 266588, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "luo_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 267005, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "luo_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283502, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "luo_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 283308, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "luo_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 263945, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "luo_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 277029, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "luo_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 286272, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "luo_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271992, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "luo_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 277236, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "luo_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 266157, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "luo_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 277368, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "luo_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 272106, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "luo_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 267918, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "luo_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278709, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "luo_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 280964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "luo_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 274392, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "luo_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 272293, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "luo_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 246592, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "luo_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259776, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "luo_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 264994, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "luo_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 297226, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "luo_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 253546, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "luo_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275373, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "luo_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 258189, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "luo_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269719, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "luo_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 267465, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "luo_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 270196, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "luo_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 275728, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "luo_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271820, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "luo_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 279864, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "luo_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 276518, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "luo_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283834, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "luo_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 278573, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "luo_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275650, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "luo_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 286869, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "luo_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 284610, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "luo_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285697, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "luo_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 292723, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "luo_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 267329, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "luo_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 287440, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "luo_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 274328, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "luo_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 286117, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "luo_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 274917, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "luo_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270783, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "luo_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224678, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "luo_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 269733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "luo_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277674, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "luo_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293684, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "luo_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 203511, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "luo_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 262203, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "luo_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284811, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "luo_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291679, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "luo_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 276141, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "luo_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 245527, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "luo_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 276921, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "luo_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272806, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "luo_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 274474, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "luo_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 267606, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "luo_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 284073, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "luo_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279642, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "luo_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 261963, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "luo_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 302292, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "luo_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253853, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "luo_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275370, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "luo_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 295184, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "luo_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194489, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "luo_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 283540, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "luo_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300715, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "luo_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 280167, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "luo_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 261450, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "luo_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264719, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "luo_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 291304, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "luo_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 283327, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "luo_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 282065, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "luo_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 268100, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "luo_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 275232, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "luo_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 285053, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "luo_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265803, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "luo_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "luo_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276669, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "luo_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 253974, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "luo_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 296231, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "luo_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259708, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "luo_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 306943, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "luo_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 279743, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "luo_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 270989, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "luo_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 277337, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "luo_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 270016, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "luo_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 279204, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "luo_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 251109, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "luo_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264803, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "luo_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 293964, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "luo_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 265902, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "luo_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 267875, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "luo_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269684, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "luo_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 276117, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "luo_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283417, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "luo_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 264390, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "luo_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 251316, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "luo_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276807, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "luo_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 300143, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "luo_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 275586, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "luo_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 279212, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "luo_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 264449, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "luo_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325830, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "luo_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303715, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "luo_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 177565, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "luo_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263482, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "luo_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 266131, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "luo_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 290027, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "luo_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263651, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "luo_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 267394, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "luo_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288655, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "luo_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268626, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "luo_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263655, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "luo_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "luo_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 273909, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "luo_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 271584, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "luo_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283689, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "luo_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 263947, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "luo_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 285091, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "luo_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 279886, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "luo_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 269256, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "luo_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 228246, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "luo_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 178241, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "luo_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265717, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "luo_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278803, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "luo_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 270361, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "luo_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 281476, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "luo_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 280544, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "luo_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "luo_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 268401, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "luo_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 306557, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "luo_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 283747, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "luo_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 276163, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "luo_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272629, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "luo_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 263974, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "luo_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "luo_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272624, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "luo_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 288197, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "luo_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 290270, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "luo_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 300581, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "luo_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 285908, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "luo_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 254607, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "luo_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 291355, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "luo_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258826, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "luo_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280503, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "luo_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 272385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "luo_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 285149, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "luo_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 295724, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "luo_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 281285, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "luo_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 249335, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "luo_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 277613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "luo_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 268346, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "luo_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283801, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "luo_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 267445, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "luo_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 257134, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "luo_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277808, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "luo_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270768, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "luo_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 270401, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "luo_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 239600, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "luo_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "luo_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271703, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "luo_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 270971, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "luo_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 289208, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "luo_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 310091, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 135.91501976284584, + "max_sentence1_length": 392, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "ory_Orya-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 246460, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "ory_Orya-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 261226, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "ory_Orya-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 298197, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ory_Orya-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265897, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "ory_Orya-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 288051, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "ory_Orya-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 261534, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "ory_Orya-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273993, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ory_Orya-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 293571, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "ory_Orya-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 270600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "ory_Orya-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 276772, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ory_Orya-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 280409, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ory_Orya-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 292378, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "ory_Orya-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 261772, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "ory_Orya-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 288270, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "ory_Orya-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 264247, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ory_Orya-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 257294, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ory_Orya-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ory_Orya-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 263437, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "ory_Orya-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 248412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "ory_Orya-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282830, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ory_Orya-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ory_Orya-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 263566, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ory_Orya-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280701, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ory_Orya-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 284175, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ory_Orya-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 258666, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "ory_Orya-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 288186, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ory_Orya-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 252799, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "ory_Orya-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249952, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "ory_Orya-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 295590, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "ory_Orya-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 266131, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ory_Orya-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 273468, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "ory_Orya-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270371, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ory_Orya-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267890, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ory_Orya-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295835, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "ory_Orya-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265619, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ory_Orya-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 275316, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "ory_Orya-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246916, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "ory_Orya-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ory_Orya-est_Latn": { + "num_samples": 1012, + "number_of_characters": 264298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "ory_Orya-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 280795, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ory_Orya-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 280601, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "ory_Orya-min_Arab": { + "num_samples": 1012, + "number_of_characters": 261238, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ory_Orya-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 274322, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ory_Orya-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 283565, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ory_Orya-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269285, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ory_Orya-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 274529, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ory_Orya-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 263450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ory_Orya-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 274661, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ory_Orya-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 269399, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ory_Orya-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 265211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ory_Orya-min_Latn": { + "num_samples": 1012, + "number_of_characters": 276002, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "ory_Orya-por_Latn": { + "num_samples": 1012, + "number_of_characters": 278257, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ory_Orya-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271685, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "ory_Orya-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 269586, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ory_Orya-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243885, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "ory_Orya-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 257069, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "ory_Orya-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 262287, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ory_Orya-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 294519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "ory_Orya-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250839, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "ory_Orya-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272666, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "ory_Orya-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 255482, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "ory_Orya-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 267012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ory_Orya-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264758, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ory_Orya-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 267489, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "ory_Orya-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 273021, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ory_Orya-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 269113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ory_Orya-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 277157, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "ory_Orya-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273811, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ory_Orya-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 281127, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "ory_Orya-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275866, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ory_Orya-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272943, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "ory_Orya-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 284162, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "ory_Orya-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281903, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ory_Orya-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282990, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "ory_Orya-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 290016, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "ory_Orya-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 264622, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ory_Orya-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284733, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "ory_Orya-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 271621, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ory_Orya-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 283410, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ory_Orya-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 272210, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "ory_Orya-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 268076, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ory_Orya-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221971, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "ory_Orya-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 267026, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "ory_Orya-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274967, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ory_Orya-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290977, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "ory_Orya-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200804, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "ory_Orya-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 259496, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "ory_Orya-run_Latn": { + "num_samples": 1012, + "number_of_characters": 282104, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "ory_Orya-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288972, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "ory_Orya-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 273434, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "ory_Orya-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "ory_Orya-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 274214, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ory_Orya-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 270099, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "ory_Orya-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 271767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ory_Orya-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264899, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ory_Orya-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 281366, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ory_Orya-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276935, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "ory_Orya-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 259256, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ory_Orya-war_Latn": { + "num_samples": 1012, + "number_of_characters": 299585, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "ory_Orya-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 251146, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ory_Orya-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272663, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ory_Orya-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 292477, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "ory_Orya-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 191782, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "ory_Orya-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280833, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ory_Orya-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 298008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ory_Orya-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 277460, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "ory_Orya-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258743, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "ory_Orya-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 262012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "ory_Orya-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 288597, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "ory_Orya-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 280620, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ory_Orya-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 279358, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "ory_Orya-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 265393, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "ory_Orya-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 272525, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "ory_Orya-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 282346, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ory_Orya-san_Deva": { + "num_samples": 1012, + "number_of_characters": 263096, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "ory_Orya-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268121, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ory_Orya-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273962, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "ory_Orya-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 251267, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "ory_Orya-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 293524, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "ory_Orya-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 257001, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "ory_Orya-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 304236, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "ory_Orya-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 277036, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ory_Orya-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 268282, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ory_Orya-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 274630, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ory_Orya-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 267309, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "ory_Orya-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 276497, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "ory_Orya-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 248402, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "ory_Orya-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 262096, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ory_Orya-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 291257, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "ory_Orya-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 263195, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "ory_Orya-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 265168, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "ory_Orya-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266977, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "ory_Orya-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 273410, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ory_Orya-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280710, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "ory_Orya-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261683, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "ory_Orya-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 248609, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "ory_Orya-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 274100, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "ory_Orya-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 297436, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "ory_Orya-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272879, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ory_Orya-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 276505, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "ory_Orya-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ory_Orya-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 323123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "ory_Orya-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 301008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "ory_Orya-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174858, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "ory_Orya-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 260775, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "ory_Orya-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 263424, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "ory_Orya-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 287320, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "ory_Orya-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260944, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "ory_Orya-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264687, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "ory_Orya-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285948, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "ory_Orya-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265919, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "ory_Orya-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260948, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "ory_Orya-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 178087, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "ory_Orya-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 271202, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ory_Orya-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 268877, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "ory_Orya-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280982, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "ory_Orya-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 261240, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "ory_Orya-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 282384, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "ory_Orya-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 277179, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "ory_Orya-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 266549, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "ory_Orya-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 225539, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "ory_Orya-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 175534, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "ory_Orya-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 263010, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "ory_Orya-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 276096, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ory_Orya-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 267654, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "ory_Orya-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 278769, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ory_Orya-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277837, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "ory_Orya-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 282011, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "ory_Orya-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265694, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "ory_Orya-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303850, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "ory_Orya-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 281040, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ory_Orya-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 273456, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "ory_Orya-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269922, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "ory_Orya-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 261267, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "ory_Orya-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270243, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "ory_Orya-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269917, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "ory_Orya-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 285490, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ory_Orya-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 287563, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "ory_Orya-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "ory_Orya-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 283201, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "ory_Orya-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251900, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "ory_Orya-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 288648, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "ory_Orya-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 256119, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "ory_Orya-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 277796, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "ory_Orya-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 272385, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "ory_Orya-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 282442, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "ory_Orya-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 293017, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "ory_Orya-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 278578, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "ory_Orya-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 246628, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "ory_Orya-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274906, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "ory_Orya-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 265639, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "ory_Orya-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 281094, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "ory_Orya-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264738, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "ory_Orya-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 254427, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "ory_Orya-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 275101, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "ory_Orya-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268061, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "ory_Orya-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267694, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "ory_Orya-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236893, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "ory_Orya-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271906, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "ory_Orya-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 268996, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ory_Orya-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 268264, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "ory_Orya-som_Latn": { + "num_samples": 1012, + "number_of_characters": 286501, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "ory_Orya-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 307384, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "sna_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 259224, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "sna_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 273990, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "sna_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 310961, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sna_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 278661, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "sna_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 300815, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "sna_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 274298, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "sna_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 286757, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sna_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 306335, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "sna_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 283364, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "sna_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 289536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sna_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 293173, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sna_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 305142, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "sna_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 274536, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "sna_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 301034, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "sna_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 277011, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sna_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 270058, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sna_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 304600, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sna_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 276201, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "sna_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 261176, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "sna_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295594, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sna_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 279569, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sna_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 276330, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sna_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 293465, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sna_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 296939, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sna_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 271430, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "sna_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 300950, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sna_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 265563, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "sna_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 262716, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "sna_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 308354, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "sna_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 278895, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sna_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 286232, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "sna_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283135, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sna_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 280654, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sna_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 308599, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "sna_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278383, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sna_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 288080, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "sna_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 259680, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "sna_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 276645, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sna_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 277062, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "sna_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 293559, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sna_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 293365, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "sna_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 274002, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sna_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 287086, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sna_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 296329, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sna_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282049, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sna_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 287293, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sna_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 276214, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sna_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 287425, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sna_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 282163, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sna_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 277975, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sna_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 288766, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "sna_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 291021, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sna_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 284449, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sna_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 282350, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sna_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 256649, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "sna_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 269833, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "sna_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 275051, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sna_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 307283, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "sna_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 263603, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "sna_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285430, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "sna_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 268246, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "sna_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 279776, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sna_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 277522, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sna_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 280253, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "sna_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 285785, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sna_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 281877, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sna_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 289921, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "sna_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 286575, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sna_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 293891, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "sna_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 288630, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sna_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 285707, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "sna_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 296926, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "sna_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 294667, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sna_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 295754, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "sna_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 302780, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "sna_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 277386, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sna_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 297497, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "sna_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 284385, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sna_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 296174, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sna_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 284974, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "sna_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 280840, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sna_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 234735, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "sna_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 279790, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "sna_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 287731, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sna_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 303741, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "sna_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 213568, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "sna_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 272260, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "sna_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 294868, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "sna_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 301736, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "sna_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 286198, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "sna_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 255584, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "sna_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 286978, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sna_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 282863, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "sna_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 284531, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sna_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 277663, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sna_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 294130, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sna_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289699, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "sna_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 272020, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sna_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 312349, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "sna_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 263910, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sna_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285427, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sna_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 305241, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "sna_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 204546, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "sna_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 293597, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sna_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 310772, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sna_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 290224, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "sna_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 271507, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "sna_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 274776, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "sna_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 301361, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "sna_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 293384, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sna_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 292122, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "sna_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 278157, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "sna_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 285289, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "sna_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 295110, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sna_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 275860, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "sna_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280885, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sna_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 286726, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "sna_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 264031, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "sna_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 306288, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "sna_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 269765, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "sna_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 317000, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "sna_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 289800, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sna_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 281046, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sna_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 287394, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sna_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 280073, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "sna_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 289261, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "sna_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 261166, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "sna_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 274860, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sna_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 304021, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "sna_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 275959, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "sna_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 277932, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "sna_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 279741, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "sna_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 286174, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sna_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293474, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "sna_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 274447, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "sna_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 261373, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "sna_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 286864, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "sna_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 310200, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "sna_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 285643, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sna_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 289269, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "sna_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 274506, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sna_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 335887, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "sna_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 313772, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "sna_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 187622, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "sna_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 273539, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "sna_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 276188, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "sna_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 300084, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "sna_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 273708, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "sna_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 277451, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "sna_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 298712, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "sna_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 278683, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "sna_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 273712, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "sna_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 190851, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "sna_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 283966, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sna_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 281641, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "sna_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 293746, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "sna_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 274004, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "sna_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 295148, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "sna_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 289943, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "sna_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 279313, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "sna_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 238303, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "sna_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 188298, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "sna_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 275774, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "sna_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 288860, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sna_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 280418, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "sna_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 291533, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sna_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 290601, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "sna_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 294775, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "sna_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 278458, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "sna_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 316614, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "sna_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 293804, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "sna_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 286220, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "sna_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 282686, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "sna_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 274031, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "sna_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283007, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "sna_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 282681, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "sna_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 298254, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sna_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 300327, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "sna_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 310638, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "sna_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 295965, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "sna_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 264664, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "sna_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 301412, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "sna_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 268883, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "sna_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 290560, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "sna_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 285149, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "sna_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 282442, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "sna_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 305781, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "sna_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 291342, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "sna_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 259392, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "sna_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 287670, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "sna_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 278403, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "sna_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 293858, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "sna_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 277502, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "sna_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 267191, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "sna_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 287865, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "sna_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280825, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "sna_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 280458, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "sna_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 249657, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "sna_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284670, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "sna_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 281760, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sna_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 281028, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "sna_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 299265, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "sna_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 320148, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 145.85276679841897, + "max_sentence1_length": 424, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tso_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 269799, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tso_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 284565, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tso_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 321536, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tso_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 289236, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tso_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 311390, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tso_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 284873, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tso_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 297332, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tso_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 316910, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tso_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 293939, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tso_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 300111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tso_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 303748, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tso_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 315717, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tso_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 285111, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tso_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 311609, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tso_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 287586, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tso_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 280633, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tso_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 315175, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tso_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 286776, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tso_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 271751, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tso_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306169, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tso_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 290144, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tso_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 286905, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tso_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 304040, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tso_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 307514, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tso_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 282005, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tso_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 311525, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tso_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 276138, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tso_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 273291, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tso_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 318929, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tso_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 289470, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tso_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 296807, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tso_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293710, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tso_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 291229, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tso_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 319174, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tso_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288958, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tso_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 298655, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tso_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 270255, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tso_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 287220, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tso_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 287637, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tso_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 304134, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tso_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 303940, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tso_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 284577, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tso_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 297661, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tso_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 306904, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tso_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292624, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tso_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 297868, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tso_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 286789, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tso_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 298000, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tso_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 292738, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tso_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 288550, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tso_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 299341, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tso_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 301596, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tso_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 295024, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tso_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 292925, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tso_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 267224, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tso_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 280408, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tso_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 285626, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tso_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 317858, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tso_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 274178, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tso_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296005, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tso_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 278821, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tso_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 290351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tso_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 288097, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tso_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 290828, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tso_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 296360, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tso_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 292452, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tso_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 300496, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tso_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 297150, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tso_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 304466, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tso_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 299205, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tso_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 296282, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tso_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 307501, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tso_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 305242, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tso_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 306329, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tso_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 313355, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tso_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 287961, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tso_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 308072, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tso_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 294960, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tso_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 306749, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tso_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 295549, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tso_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 291415, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tso_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 245310, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tso_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 290365, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tso_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 298306, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tso_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 314316, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tso_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 224143, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tso_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 282835, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tso_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 305443, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tso_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 312311, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tso_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 296773, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tso_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 266159, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tso_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 297553, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tso_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 293438, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tso_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 295106, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tso_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 288238, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tso_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 304705, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tso_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 300274, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tso_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 282595, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tso_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 322924, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tso_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 274485, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tso_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 296002, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tso_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 315816, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tso_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 215121, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tso_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 304172, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tso_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 321347, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tso_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 300799, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tso_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 282082, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tso_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 285351, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tso_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 311936, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tso_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 303959, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tso_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 302697, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tso_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 288732, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tso_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 295864, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tso_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 305685, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tso_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 286435, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tso_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291460, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tso_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 297301, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tso_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 274606, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tso_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 316863, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tso_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 280340, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tso_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 327575, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tso_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 300375, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tso_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 291621, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tso_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 297969, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tso_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 290648, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tso_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 299836, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tso_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 271741, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tso_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 285435, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tso_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 314596, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tso_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 286534, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tso_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 288507, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tso_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 290316, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tso_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 296749, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tso_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 304049, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tso_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 285022, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tso_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 271948, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tso_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 297439, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tso_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 320775, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tso_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 296218, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tso_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 299844, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tso_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 285081, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tso_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 346462, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tso_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 324347, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tso_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 198197, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tso_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 284114, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tso_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 286763, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tso_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 310659, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tso_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 284283, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tso_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 288026, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tso_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 309287, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tso_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 289258, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tso_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 284287, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tso_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 201426, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tso_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 294541, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tso_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 292216, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tso_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 304321, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tso_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 284579, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tso_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 305723, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tso_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 300518, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tso_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 289888, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tso_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 248878, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tso_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 198873, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tso_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 286349, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tso_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 299435, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tso_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 290993, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tso_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 302108, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tso_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 301176, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tso_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 305350, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tso_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 289033, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tso_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 327189, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tso_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 304379, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tso_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 296795, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tso_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 293261, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tso_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 284606, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tso_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293582, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tso_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 293256, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tso_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 308829, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tso_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 310902, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tso_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 321213, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tso_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 306540, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tso_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 275239, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tso_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 311987, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tso_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 279458, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tso_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 301135, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tso_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 295724, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tso_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 293017, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tso_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 305781, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tso_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 301917, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tso_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 269967, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tso_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 298245, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tso_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 288978, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tso_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 304433, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tso_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 288077, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tso_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 277766, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tso_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 298440, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tso_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291400, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tso_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 291033, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tso_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 260232, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tso_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 295245, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tso_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 292335, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tso_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 291603, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tso_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 309840, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tso_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 330723, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 156.30237154150197, + "max_sentence1_length": 429, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "azj_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 255360, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "azj_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 270126, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "azj_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 307097, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "azj_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 274797, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "azj_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 296951, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "azj_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 270434, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "azj_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 282893, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azj_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 302471, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "azj_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 279500, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "azj_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 285672, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "azj_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 289309, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "azj_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 301278, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "azj_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 270672, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "azj_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 297170, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "azj_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 273147, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azj_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 266194, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "azj_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 300736, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azj_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 272337, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "azj_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 257312, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "azj_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 291730, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "azj_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 275705, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azj_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 272466, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azj_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 289601, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "azj_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 293075, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "azj_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 267566, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "azj_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 297086, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "azj_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 261699, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "azj_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 258852, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "azj_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 304490, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "azj_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 275031, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "azj_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 282368, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "azj_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279271, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "azj_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 276790, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "azj_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 304735, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "azj_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274519, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azj_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 284216, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "azj_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 255816, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "azj_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 272781, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "azj_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 273198, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "azj_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 289695, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "azj_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 289501, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "azj_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 270138, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "azj_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 283222, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "azj_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 292465, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "azj_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278185, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "azj_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 283429, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "azj_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 272350, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "azj_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 283561, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "azj_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 278299, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "azj_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 274111, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "azj_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 284902, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "azj_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 287157, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "azj_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 280585, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "azj_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 278486, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "azj_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 252785, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "azj_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 265969, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "azj_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 271187, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azj_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 303419, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "azj_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 259739, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "azj_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281566, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "azj_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 264382, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "azj_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 275912, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "azj_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 273658, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "azj_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 276389, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "azj_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 281921, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azj_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 278013, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "azj_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 286057, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "azj_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 282711, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "azj_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 290027, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "azj_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 284766, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "azj_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 281843, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "azj_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 293062, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "azj_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 290803, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "azj_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 291890, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "azj_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 298916, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "azj_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 273522, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "azj_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 293633, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "azj_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 280521, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "azj_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 292310, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azj_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 281110, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "azj_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 276976, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "azj_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 230871, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "azj_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 275926, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "azj_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 283867, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "azj_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 299877, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "azj_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 209704, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "azj_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 268396, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "azj_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 291004, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "azj_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 297872, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "azj_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 282334, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "azj_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 251720, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "azj_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 283114, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "azj_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 278999, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "azj_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 280667, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azj_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 273799, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azj_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 290266, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "azj_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285835, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "azj_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 268156, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "azj_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 308485, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "azj_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 260046, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "azj_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281563, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "azj_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 301377, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "azj_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 200682, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "azj_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 289733, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "azj_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 306908, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "azj_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 286360, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "azj_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 267643, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "azj_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 270912, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "azj_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 297497, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "azj_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 289520, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azj_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 288258, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "azj_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 274293, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "azj_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 281425, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "azj_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 291246, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "azj_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 271996, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "azj_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277021, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "azj_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 282862, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "azj_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 260167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "azj_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 302424, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "azj_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 265901, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "azj_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 313136, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "azj_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 285936, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "azj_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 277182, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "azj_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 283530, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "azj_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 276209, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "azj_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 285397, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "azj_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 257302, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "azj_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 270996, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azj_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 300157, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "azj_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 272095, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "azj_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 274068, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "azj_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 275877, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "azj_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 282310, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "azj_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289610, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "azj_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 270583, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "azj_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 257509, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "azj_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 283000, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "azj_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 306336, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "azj_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 281779, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azj_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 285405, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "azj_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 270642, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azj_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 332023, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "azj_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 309908, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "azj_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 183758, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "azj_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 269675, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "azj_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 272324, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "azj_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 296220, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "azj_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 269844, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "azj_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 273587, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "azj_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 294848, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "azj_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 274819, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "azj_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 269848, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "azj_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 186987, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "azj_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 280102, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azj_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 277777, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "azj_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 289882, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "azj_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 270140, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "azj_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 291284, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "azj_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 286079, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "azj_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 275449, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "azj_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 234439, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "azj_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 184434, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "azj_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 271910, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "azj_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 284996, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "azj_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 276554, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "azj_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 287669, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "azj_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 286737, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "azj_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 290911, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "azj_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 274594, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "azj_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 312750, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "azj_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 289940, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "azj_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 282356, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "azj_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 278822, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "azj_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 270167, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "azj_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279143, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "azj_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 278817, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "azj_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 294390, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "azj_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 296463, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "azj_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 306774, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "azj_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 292101, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "azj_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 260800, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "azj_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 297548, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "azj_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 265019, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "azj_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 286696, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "azj_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 281285, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "azj_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 278578, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "azj_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 291342, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "azj_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 301917, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "azj_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 255528, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "azj_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 283806, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "azj_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 274539, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "azj_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 289994, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "azj_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 273638, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "azj_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 263327, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "azj_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 284001, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "azj_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276961, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "azj_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 276594, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "azj_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 245793, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "azj_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280806, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "azj_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 277896, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "azj_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 277164, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "azj_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 295401, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "azj_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 316284, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 142.03458498023716, + "max_sentence1_length": 383, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "dik_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 223410, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "dik_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 238176, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "dik_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 275147, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dik_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 242847, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "dik_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 265001, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "dik_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 238484, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "dik_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 250943, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dik_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 270521, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "dik_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 247550, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "dik_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 253722, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dik_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 257359, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dik_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 269328, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "dik_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 238722, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "dik_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 265220, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "dik_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 241197, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dik_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 234244, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dik_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 268786, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dik_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 240387, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "dik_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 225362, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "dik_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 259780, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dik_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 243755, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dik_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 240516, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dik_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 257651, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dik_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 261125, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dik_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 235616, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "dik_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 265136, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dik_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 229749, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "dik_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 226902, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "dik_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 272540, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "dik_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 243081, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dik_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 250418, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "dik_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247321, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dik_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 244840, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dik_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 272785, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "dik_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 242569, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dik_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 252266, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "dik_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 223866, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "dik_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 240831, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dik_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 241248, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "dik_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 257745, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dik_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 257551, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "dik_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 238188, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dik_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 251272, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dik_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 260515, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dik_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 246235, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dik_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 251479, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dik_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 240400, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dik_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 251611, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dik_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 246349, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dik_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 242161, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dik_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 252952, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dik_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 255207, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dik_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 248635, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dik_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 246536, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dik_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 220835, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "dik_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 234019, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "dik_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 239237, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dik_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 271469, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "dik_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 227789, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "dik_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249616, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "dik_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 232432, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "dik_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 243962, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dik_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 241708, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dik_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 244439, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dik_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 249971, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dik_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 246063, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dik_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 254107, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dik_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 250761, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dik_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 258077, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "dik_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 252816, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dik_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 249893, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "dik_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 261112, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dik_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 258853, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dik_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 259940, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "dik_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 266966, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "dik_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 241572, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dik_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 261683, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dik_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 248571, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dik_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 260360, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dik_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 249160, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "dik_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 245026, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dik_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 198921, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "dik_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 243976, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dik_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 251917, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dik_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 267927, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "dik_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 177754, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "dik_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 236446, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "dik_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 259054, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "dik_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 265922, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "dik_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 250384, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "dik_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 219770, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "dik_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 251164, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dik_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 247049, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "dik_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 248717, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dik_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 241849, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dik_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 258316, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dik_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 253885, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dik_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 236206, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dik_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 276535, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dik_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 228096, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dik_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 249613, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dik_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 269427, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "dik_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 168732, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "dik_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 257783, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dik_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 274958, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dik_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 254410, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "dik_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 235693, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dik_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 238962, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dik_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 265547, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "dik_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 257570, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dik_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 256308, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dik_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 242343, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "dik_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 249475, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "dik_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 259296, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dik_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 240046, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "dik_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245071, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dik_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 250912, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "dik_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 228217, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dik_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 270474, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dik_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 233951, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "dik_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 281186, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "dik_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 253986, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dik_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 245232, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dik_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 251580, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dik_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 244259, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dik_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 253447, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dik_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 225352, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "dik_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 239046, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dik_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 268207, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "dik_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 240145, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "dik_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 242118, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "dik_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 243927, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "dik_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 250360, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dik_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257660, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "dik_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 238633, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dik_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 225559, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "dik_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 251050, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dik_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 274386, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "dik_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 249829, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dik_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 253455, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dik_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 238692, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dik_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 300073, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "dik_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 277958, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dik_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 151808, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "dik_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 237725, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dik_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 240374, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dik_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 264270, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "dik_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 237894, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "dik_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 241637, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dik_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 262898, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "dik_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 242869, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dik_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 237898, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dik_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 155037, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "dik_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 248152, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dik_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 245827, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dik_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 257932, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dik_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 238190, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "dik_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 259334, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "dik_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 254129, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "dik_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 243499, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dik_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 202489, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "dik_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 152484, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "dik_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 239960, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dik_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 253046, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dik_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 244604, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "dik_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 255719, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dik_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 254787, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "dik_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 258961, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dik_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 242644, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dik_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 280800, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "dik_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 257990, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dik_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 250406, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "dik_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 246872, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dik_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 238217, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dik_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247193, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dik_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 246867, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "dik_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 262440, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dik_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 264513, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "dik_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 274824, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dik_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 260151, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "dik_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 228850, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "dik_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 265598, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dik_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 233069, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "dik_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 254746, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "dik_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 249335, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dik_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 246628, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dik_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 259392, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "dik_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 269967, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dik_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 255528, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "dik_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 251856, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "dik_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 242589, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dik_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 258044, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dik_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 241688, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "dik_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 231377, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "dik_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 252051, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dik_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 245011, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "dik_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 244644, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "dik_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 213843, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "dik_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 248856, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dik_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 245946, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dik_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 245214, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dik_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 263451, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "dik_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 284334, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 110.46343873517786, + "max_sentence1_length": 585, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "hau_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251688, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "hau_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266454, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "hau_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303425, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hau_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 271125, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "hau_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 293279, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "hau_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266762, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "hau_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 279221, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hau_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298799, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "hau_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 275828, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "hau_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 282000, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hau_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285637, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hau_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297606, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "hau_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 267000, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "hau_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293498, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "hau_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269475, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hau_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262522, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hau_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 297064, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hau_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268665, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hau_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253640, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "hau_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288058, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hau_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 272033, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hau_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268794, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hau_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 285929, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hau_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289403, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hau_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 263894, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "hau_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293414, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hau_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 258027, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "hau_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 255180, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "hau_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 300818, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "hau_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 271359, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hau_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278696, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "hau_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275599, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hau_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 273118, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hau_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 301063, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "hau_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270847, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hau_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 280544, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "hau_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 252144, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "hau_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 269109, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hau_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269526, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "hau_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 286023, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hau_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 285829, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "hau_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266466, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hau_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279550, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hau_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288793, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hau_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274513, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hau_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279757, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hau_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268678, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hau_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 279889, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hau_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274627, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hau_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270439, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hau_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 281230, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "hau_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283485, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hau_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 276913, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hau_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 274814, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hau_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 249113, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "hau_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 262297, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "hau_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267515, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hau_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299747, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "hau_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 256067, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "hau_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277894, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "hau_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260710, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "hau_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 272240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hau_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 269986, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hau_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272717, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "hau_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 278249, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hau_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 274341, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hau_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282385, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "hau_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 279039, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hau_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 286355, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "hau_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 281094, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hau_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 278171, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "hau_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289390, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "hau_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 287131, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hau_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 288218, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "hau_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 295244, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "hau_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 269850, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hau_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 289961, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "hau_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 276849, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hau_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288638, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hau_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277438, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "hau_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 273304, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hau_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 227199, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "hau_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 272254, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "hau_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 280195, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hau_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 296205, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "hau_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 206032, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "hau_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264724, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "hau_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 287332, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "hau_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 294200, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "hau_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278662, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "hau_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 248048, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "hau_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279442, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hau_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 275327, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "hau_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 276995, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hau_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 270127, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hau_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286594, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hau_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282163, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "hau_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264484, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hau_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 304813, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "hau_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 256374, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hau_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277891, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hau_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297705, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "hau_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 197010, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "hau_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 286061, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hau_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 303236, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hau_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282688, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "hau_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 263971, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "hau_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 267240, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "hau_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 293825, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "hau_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 285848, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hau_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284586, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "hau_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270621, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "hau_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277753, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "hau_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287574, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hau_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 268324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "hau_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273349, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hau_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 279190, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "hau_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256495, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "hau_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298752, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "hau_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 262229, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "hau_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309464, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "hau_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 282264, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hau_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273510, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hau_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 279858, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hau_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272537, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "hau_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281725, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "hau_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253630, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "hau_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 267324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hau_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296485, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "hau_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268423, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "hau_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270396, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "hau_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 272205, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "hau_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278638, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hau_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285938, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "hau_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 266911, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "hau_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 253837, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "hau_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 279328, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "hau_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302664, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "hau_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 278107, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hau_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281733, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "hau_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 266970, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hau_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 328351, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "hau_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 306236, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "hau_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 180086, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "hau_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 266003, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "hau_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268652, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "hau_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292548, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "hau_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 266172, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "hau_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 269915, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "hau_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 291176, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "hau_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 271147, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "hau_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 266176, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "hau_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 183315, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "hau_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276430, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hau_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 274105, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "hau_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 286210, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "hau_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266468, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "hau_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287612, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "hau_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282407, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "hau_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271777, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "hau_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230767, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "hau_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180762, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "hau_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 268238, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "hau_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 281324, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hau_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 272882, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "hau_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 283997, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hau_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 283065, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "hau_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 287239, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "hau_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 270922, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "hau_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 309078, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "hau_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 286268, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "hau_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278684, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "hau_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 275150, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "hau_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266495, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hau_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275471, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "hau_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 275145, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "hau_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290718, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hau_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292791, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "hau_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 303102, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "hau_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288429, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "hau_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 257128, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "hau_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 293876, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "hau_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 261347, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "hau_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 283024, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "hau_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277613, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "hau_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 274906, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "hau_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287670, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "hau_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 298245, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "hau_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 283806, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "hau_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 251856, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "hau_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 270867, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "hau_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 286322, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "hau_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 269966, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "hau_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259655, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "hau_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 280329, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "hau_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273289, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "hau_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 272922, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "hau_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 242121, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "hau_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277134, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "hau_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 274224, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hau_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273492, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "hau_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291729, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "hau_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312612, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 138.40612648221344, + "max_sentence1_length": 372, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "kea_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 242421, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "kea_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 257187, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "kea_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 294158, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kea_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 261858, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kea_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 284012, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "kea_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 257495, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "kea_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269954, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kea_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 289532, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "kea_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 266561, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "kea_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 272733, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kea_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 276370, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kea_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 288339, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "kea_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 257733, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "kea_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 284231, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "kea_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 260208, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kea_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 253255, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kea_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 287797, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kea_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 259398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "kea_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 244373, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "kea_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278791, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kea_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 262766, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kea_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 259527, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kea_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 276662, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kea_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 280136, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kea_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 254627, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "kea_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 284147, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kea_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 248760, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "kea_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245913, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "kea_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 291551, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "kea_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 262092, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kea_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 269429, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "kea_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kea_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 263851, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kea_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 291796, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "kea_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261580, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kea_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 271277, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "kea_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 242877, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "kea_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 259842, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kea_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 260259, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "kea_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 276756, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kea_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 276562, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "kea_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 257199, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kea_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 270283, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kea_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 279526, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kea_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kea_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 270490, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kea_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 259411, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kea_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 270622, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kea_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 265360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kea_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 261172, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kea_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271963, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "kea_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 274218, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kea_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 267646, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kea_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 265547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kea_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 239846, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "kea_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 253030, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "kea_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 258248, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kea_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 290480, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "kea_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 246800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "kea_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268627, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "kea_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 251443, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "kea_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262973, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kea_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 260719, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kea_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 263450, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "kea_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268982, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kea_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 265074, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kea_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 273118, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "kea_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 269772, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kea_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 277088, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "kea_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 271827, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kea_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268904, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "kea_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 280123, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "kea_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 277864, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kea_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278951, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "kea_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285977, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "kea_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 260583, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kea_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 280694, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "kea_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 267582, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kea_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 279371, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kea_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 268171, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "kea_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 264037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kea_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217932, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "kea_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "kea_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270928, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kea_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286938, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "kea_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 196765, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "kea_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 255457, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "kea_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 278065, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "kea_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284933, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "kea_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 269395, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "kea_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 238781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "kea_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 270175, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kea_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 266060, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "kea_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 267728, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kea_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 260860, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kea_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 277327, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kea_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272896, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "kea_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 255217, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kea_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 295546, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "kea_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 247107, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kea_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268624, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kea_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 288438, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "kea_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 187743, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "kea_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 276794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kea_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293969, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kea_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 273421, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "kea_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 254704, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "kea_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257973, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "kea_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 284558, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "kea_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 276581, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kea_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 275319, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "kea_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 261354, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "kea_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 268486, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "kea_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 278307, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kea_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 259057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "kea_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264082, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kea_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269923, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "kea_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 247228, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "kea_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 289485, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "kea_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252962, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "kea_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 300197, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "kea_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272997, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kea_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 264243, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kea_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 270591, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kea_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 263270, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "kea_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 272458, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "kea_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 244363, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "kea_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 258057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kea_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 287218, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "kea_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 259156, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "kea_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 261129, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "kea_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262938, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "kea_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 269371, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kea_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276671, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "kea_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 257644, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "kea_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 244570, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "kea_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 270061, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "kea_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 293397, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "kea_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 268840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kea_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 272466, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "kea_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 257703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kea_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 319084, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "kea_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296969, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "kea_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 170819, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "kea_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 256736, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "kea_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 259385, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "kea_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 283281, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "kea_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256905, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "kea_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 260648, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "kea_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "kea_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 261880, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "kea_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256909, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "kea_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 174048, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "kea_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 267163, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kea_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 264838, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "kea_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276943, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "kea_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 257201, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "kea_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 278345, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "kea_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 273140, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "kea_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 262510, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "kea_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 221500, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "kea_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 171495, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "kea_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258971, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "kea_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 272057, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "kea_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 263615, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "kea_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 274730, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kea_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 273798, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "kea_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277972, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "kea_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 261655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "kea_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 299811, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "kea_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 277001, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "kea_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 269417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "kea_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 265883, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "kea_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 257228, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "kea_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266204, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "kea_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 265878, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "kea_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 281451, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kea_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 283524, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "kea_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 293835, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "kea_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 279162, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "kea_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 247861, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "kea_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 284609, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "kea_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 252080, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "kea_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 273757, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "kea_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 268346, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "kea_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 265639, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "kea_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 278403, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "kea_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288978, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "kea_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 274539, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "kea_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 242589, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "kea_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 270867, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "kea_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 277055, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "kea_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 260699, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "kea_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 250388, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "kea_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 271062, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "kea_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264022, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "kea_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 263655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "kea_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 232854, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "kea_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267867, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "kea_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264957, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kea_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 264225, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "kea_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 282462, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "kea_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 303345, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 129.2490118577075, + "max_sentence1_length": 360, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lus_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 257876, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lus_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 272642, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lus_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 309613, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lus_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 277313, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lus_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 299467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lus_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 272950, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lus_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 285409, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lus_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 304987, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lus_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 282016, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lus_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 288188, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lus_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 291825, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lus_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 303794, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lus_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 273188, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lus_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 299686, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lus_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 275663, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lus_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 268710, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lus_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 303252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lus_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 274853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lus_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 259828, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lus_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 294246, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lus_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 278221, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lus_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 274982, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lus_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 292117, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lus_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 295591, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lus_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 270082, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lus_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 299602, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lus_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 264215, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lus_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 261368, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lus_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 307006, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lus_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 277547, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lus_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 284884, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lus_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lus_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 279306, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lus_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 307251, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lus_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277035, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lus_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 286732, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lus_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 258332, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lus_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 275297, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lus_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 275714, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lus_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 292211, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lus_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 292017, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lus_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 272654, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lus_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 285738, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lus_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 294981, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lus_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280701, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lus_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 285945, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lus_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 274866, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lus_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 286077, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lus_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 280815, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lus_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 276627, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lus_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 287418, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lus_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 289673, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lus_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 283101, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lus_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 281002, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lus_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 255301, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lus_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 268485, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lus_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 273703, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lus_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 305935, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lus_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 262255, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lus_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284082, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lus_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 266898, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lus_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 278428, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lus_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 276174, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lus_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 278905, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lus_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 284437, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lus_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 280529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lus_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 288573, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lus_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 285227, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lus_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 292543, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lus_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 287282, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lus_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 284359, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lus_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 295578, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lus_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 293319, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lus_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 294406, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lus_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 301432, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lus_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 276038, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lus_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 296149, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lus_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 283037, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lus_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 294826, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lus_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 283626, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lus_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 279492, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lus_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 233387, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lus_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 278442, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lus_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 286383, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lus_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 302393, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lus_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 212220, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lus_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 270912, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lus_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 293520, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lus_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 300388, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lus_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 284850, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lus_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 254236, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lus_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 285630, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lus_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 281515, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lus_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 283183, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lus_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 276315, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lus_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 292782, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lus_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288351, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lus_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 270672, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lus_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 311001, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lus_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 262562, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lus_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284079, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lus_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 303893, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lus_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 203198, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lus_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 292249, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lus_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 309424, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lus_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 288876, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lus_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 270159, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lus_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 273428, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lus_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 300013, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lus_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 292036, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lus_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 290774, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lus_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 276809, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lus_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 283941, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lus_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 293762, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lus_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 274512, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lus_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279537, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lus_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 285378, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lus_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 262683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lus_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 304940, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lus_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 268417, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lus_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 315652, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lus_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 288452, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lus_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 279698, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lus_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 286046, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lus_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 278725, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lus_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 287913, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lus_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 259818, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lus_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 273512, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lus_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 302673, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lus_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 274611, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lus_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 276584, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lus_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 278393, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lus_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 284826, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lus_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 292126, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lus_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 273099, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lus_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 260025, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lus_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 285516, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lus_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 308852, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lus_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 284295, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lus_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 287921, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lus_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 273158, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lus_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 334539, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lus_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 312424, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lus_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 186274, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lus_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 272191, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lus_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 274840, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lus_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 298736, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lus_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 272360, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lus_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 276103, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lus_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 297364, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lus_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 277335, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lus_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 272364, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lus_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 189503, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lus_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 282618, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lus_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 280293, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lus_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 292398, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lus_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 272656, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lus_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 293800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lus_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 288595, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lus_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 277965, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lus_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 236955, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lus_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 186950, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lus_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 274426, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lus_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 287512, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lus_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 279070, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lus_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 290185, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lus_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 289253, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lus_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 293427, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lus_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 277110, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lus_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 315266, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lus_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 292456, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lus_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 284872, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lus_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 281338, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lus_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 272683, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lus_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281659, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lus_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 281333, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lus_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 296906, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lus_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 298979, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lus_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 309290, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lus_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 294617, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lus_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 263316, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lus_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 300064, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lus_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 267535, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lus_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 289212, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lus_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 283801, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lus_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 281094, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lus_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 293858, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lus_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 304433, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lus_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 289994, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lus_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 258044, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lus_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 286322, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lus_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 277055, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lus_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 276154, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lus_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 265843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lus_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 286517, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lus_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279477, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lus_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 279110, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lus_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 248309, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lus_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 283322, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lus_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 280412, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lus_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 279680, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lus_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 297917, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lus_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 318800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 144.52075098814228, + "max_sentence1_length": 418, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "pag_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 241520, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "pag_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 256286, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "pag_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 293257, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pag_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 260957, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "pag_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 283111, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "pag_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 256594, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "pag_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 269053, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pag_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 288631, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "pag_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 265660, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "pag_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 271832, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pag_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 275469, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pag_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 287438, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "pag_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 256832, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "pag_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 283330, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "pag_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 259307, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pag_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 252354, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pag_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 286896, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pag_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 258497, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "pag_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 243472, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "pag_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277890, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pag_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 261865, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pag_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 258626, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pag_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 275761, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pag_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 279235, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pag_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 253726, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "pag_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 283246, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pag_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 247859, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "pag_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 245012, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "pag_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 290650, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "pag_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 261191, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pag_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 268528, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "pag_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265431, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pag_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 262950, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pag_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 290895, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "pag_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 260679, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pag_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 270376, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "pag_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 241976, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "pag_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 258941, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pag_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 259358, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "pag_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 275855, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pag_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 275661, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "pag_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 256298, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pag_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 269382, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pag_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 278625, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pag_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264345, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pag_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 269589, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pag_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 258510, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pag_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 269721, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pag_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 264459, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pag_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 260271, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pag_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 271062, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pag_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 273317, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pag_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 266745, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pag_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 264646, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pag_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 238945, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "pag_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 252129, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "pag_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 257347, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pag_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 289579, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "pag_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 245899, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "pag_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267726, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "pag_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 250542, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "pag_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 262072, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pag_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 259818, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pag_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 262549, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pag_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 268081, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pag_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 264173, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pag_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 272217, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pag_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 268871, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pag_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 276187, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "pag_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 270926, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pag_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 268003, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "pag_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 279222, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pag_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 276963, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pag_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 278050, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "pag_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 285076, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "pag_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 259682, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pag_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 279793, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pag_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 266681, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pag_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 278470, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pag_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 267270, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "pag_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 263136, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pag_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 217031, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "pag_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 262086, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pag_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 270027, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pag_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 286037, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "pag_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 195864, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "pag_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 254556, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "pag_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 277164, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "pag_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 284032, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "pag_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 268494, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "pag_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 237880, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "pag_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 269274, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pag_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 265159, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "pag_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 266827, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pag_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 259959, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pag_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 276426, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pag_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271995, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pag_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 254316, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pag_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 294645, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pag_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 246206, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pag_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267723, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pag_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 287537, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "pag_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 186842, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "pag_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 275893, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pag_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 293068, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pag_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 272520, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "pag_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 253803, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pag_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 257072, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pag_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 283657, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "pag_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 275680, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pag_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 274418, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pag_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 260453, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "pag_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 267585, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "pag_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 277406, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pag_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 258156, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "pag_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263181, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pag_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 269022, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "pag_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 246327, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pag_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 288584, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pag_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 252061, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "pag_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 299296, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "pag_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 272096, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pag_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 263342, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pag_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 269690, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pag_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 262369, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pag_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 271557, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pag_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 243462, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "pag_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 257156, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pag_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 286317, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "pag_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 258255, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "pag_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 260228, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "pag_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 262037, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "pag_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 268470, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pag_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275770, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "pag_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 256743, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pag_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 243669, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "pag_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 269160, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pag_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 292496, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "pag_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 267939, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pag_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 271565, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pag_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 256802, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pag_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 318183, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "pag_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 296068, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pag_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 169918, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "pag_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 255835, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pag_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 258484, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pag_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 282380, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "pag_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 256004, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "pag_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 259747, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pag_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 281008, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "pag_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 260979, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pag_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 256008, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pag_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 173147, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "pag_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 266262, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pag_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 263937, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pag_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 276042, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pag_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 256300, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "pag_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 277444, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "pag_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 272239, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "pag_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 261609, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pag_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 220599, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "pag_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 170594, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "pag_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 258070, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pag_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 271156, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pag_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 262714, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "pag_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 273829, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pag_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 272897, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "pag_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 277071, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pag_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 260754, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pag_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 298910, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "pag_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 276100, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pag_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 268516, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "pag_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 264982, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pag_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 256327, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pag_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265303, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pag_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 264977, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "pag_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 280550, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pag_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "pag_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 292934, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pag_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 278261, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "pag_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 246960, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "pag_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 283708, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pag_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 251179, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "pag_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 272856, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "pag_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 267445, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pag_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 264738, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pag_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 277502, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "pag_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 288077, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pag_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 273638, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "pag_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 241688, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "pag_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 269966, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "pag_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 260699, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pag_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 276154, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pag_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 249487, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "pag_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 270161, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pag_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263121, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "pag_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 262754, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "pag_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 231953, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "pag_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266966, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pag_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 264056, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pag_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 263324, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pag_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 281561, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "pag_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 302444, + "unique_pairs": 1012, + "min_sentence1_length": 31, + "average_sentence1_length": 128.3586956521739, + "max_sentence1_length": 339, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "snd_Arab-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 231209, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "snd_Arab-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 245975, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "snd_Arab-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 282946, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "snd_Arab-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 250646, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "snd_Arab-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 272800, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "snd_Arab-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 246283, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "snd_Arab-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 258742, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "snd_Arab-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 278320, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "snd_Arab-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 255349, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "snd_Arab-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 261521, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "snd_Arab-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 265158, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "snd_Arab-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 277127, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "snd_Arab-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 246521, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "snd_Arab-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 273019, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "snd_Arab-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 248996, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "snd_Arab-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 242043, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "snd_Arab-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 276585, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "snd_Arab-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 248186, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "snd_Arab-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 233161, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "snd_Arab-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267579, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "snd_Arab-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 251554, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "snd_Arab-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 248315, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "snd_Arab-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 265450, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "snd_Arab-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 268924, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "snd_Arab-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 243415, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "snd_Arab-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 272935, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "snd_Arab-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 237548, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "snd_Arab-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 234701, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "snd_Arab-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 280339, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "snd_Arab-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 250880, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "snd_Arab-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 258217, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "snd_Arab-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 255120, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "snd_Arab-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 252639, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "snd_Arab-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 280584, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "snd_Arab-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250368, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "snd_Arab-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 260065, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "snd_Arab-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 231665, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "snd_Arab-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 248630, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "snd_Arab-est_Latn": { + "num_samples": 1012, + "number_of_characters": 249047, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "snd_Arab-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 265544, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "snd_Arab-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 265350, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "snd_Arab-min_Arab": { + "num_samples": 1012, + "number_of_characters": 245987, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "snd_Arab-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 259071, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "snd_Arab-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 268314, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "snd_Arab-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254034, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "snd_Arab-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 259278, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "snd_Arab-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 248199, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "snd_Arab-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 259410, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "snd_Arab-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 254148, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "snd_Arab-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 249960, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "snd_Arab-min_Latn": { + "num_samples": 1012, + "number_of_characters": 260751, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "snd_Arab-por_Latn": { + "num_samples": 1012, + "number_of_characters": 263006, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "snd_Arab-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 256434, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "snd_Arab-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 254335, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "snd_Arab-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 228634, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "snd_Arab-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 241818, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "snd_Arab-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 247036, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "snd_Arab-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 279268, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "snd_Arab-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 235588, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "snd_Arab-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257415, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "snd_Arab-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 240231, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "snd_Arab-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 251761, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "snd_Arab-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 249507, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "snd_Arab-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 252238, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "snd_Arab-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 257770, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "snd_Arab-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 253862, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "snd_Arab-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 261906, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "snd_Arab-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 258560, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "snd_Arab-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 265876, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "snd_Arab-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 260615, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "snd_Arab-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 257692, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "snd_Arab-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 268911, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "snd_Arab-als_Latn": { + "num_samples": 1012, + "number_of_characters": 266652, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "snd_Arab-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 267739, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "snd_Arab-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 274765, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "snd_Arab-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 249371, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "snd_Arab-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 269482, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "snd_Arab-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 256370, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "snd_Arab-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 268159, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "snd_Arab-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 256959, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "snd_Arab-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 252825, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "snd_Arab-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 206720, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "snd_Arab-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 251775, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "snd_Arab-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 259716, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "snd_Arab-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 275726, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "snd_Arab-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 185553, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "snd_Arab-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 244245, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "snd_Arab-run_Latn": { + "num_samples": 1012, + "number_of_characters": 266853, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "snd_Arab-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 273721, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "snd_Arab-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 258183, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "snd_Arab-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 227569, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "snd_Arab-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 258963, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "snd_Arab-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 254848, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "snd_Arab-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 256516, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "snd_Arab-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 249648, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "snd_Arab-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 266115, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "snd_Arab-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 261684, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "snd_Arab-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 244005, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "snd_Arab-war_Latn": { + "num_samples": 1012, + "number_of_characters": 284334, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "snd_Arab-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 235895, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "snd_Arab-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 257412, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "snd_Arab-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 277226, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "snd_Arab-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 176531, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "snd_Arab-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 265582, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "snd_Arab-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 282757, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "snd_Arab-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 262209, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "snd_Arab-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 243492, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "snd_Arab-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 246761, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "snd_Arab-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 273346, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "snd_Arab-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 265369, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "snd_Arab-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 264107, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "snd_Arab-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 250142, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "snd_Arab-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 257274, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "snd_Arab-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 267095, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "snd_Arab-san_Deva": { + "num_samples": 1012, + "number_of_characters": 247845, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "snd_Arab-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252870, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "snd_Arab-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 258711, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "snd_Arab-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 236016, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "snd_Arab-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 278273, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "snd_Arab-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 241750, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "snd_Arab-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 288985, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "snd_Arab-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 261785, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "snd_Arab-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 253031, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "snd_Arab-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 259379, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "snd_Arab-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 252058, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "snd_Arab-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 261246, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "snd_Arab-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 233151, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "snd_Arab-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 246845, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "snd_Arab-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 276006, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "snd_Arab-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 247944, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "snd_Arab-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 249917, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "snd_Arab-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 251726, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "snd_Arab-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 258159, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "snd_Arab-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 265459, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "snd_Arab-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 246432, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "snd_Arab-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 233358, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "snd_Arab-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 258849, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "snd_Arab-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 282185, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "snd_Arab-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 257628, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "snd_Arab-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 261254, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "snd_Arab-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 246491, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "snd_Arab-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 307872, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "snd_Arab-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 285757, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "snd_Arab-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 159607, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "snd_Arab-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 245524, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "snd_Arab-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 248173, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "snd_Arab-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 272069, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "snd_Arab-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 245693, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "snd_Arab-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 249436, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "snd_Arab-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 270697, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "snd_Arab-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 250668, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "snd_Arab-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 245697, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "snd_Arab-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 162836, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "snd_Arab-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 255951, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "snd_Arab-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 253626, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "snd_Arab-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 265731, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "snd_Arab-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 245989, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "snd_Arab-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 267133, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "snd_Arab-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 261928, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "snd_Arab-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 251298, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "snd_Arab-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 210288, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "snd_Arab-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 160283, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "snd_Arab-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 247759, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "snd_Arab-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 260845, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "snd_Arab-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 252403, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "snd_Arab-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 263518, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "snd_Arab-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 262586, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "snd_Arab-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 266760, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "snd_Arab-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 250443, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "snd_Arab-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 288599, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "snd_Arab-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 265789, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "snd_Arab-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 258205, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "snd_Arab-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 254671, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "snd_Arab-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 246016, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "snd_Arab-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 254992, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "snd_Arab-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 254666, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "snd_Arab-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 270239, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "snd_Arab-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 272312, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "snd_Arab-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 282623, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "snd_Arab-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 267950, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "snd_Arab-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 236649, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "snd_Arab-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 273397, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "snd_Arab-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 240868, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "snd_Arab-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 262545, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "snd_Arab-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 257134, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "snd_Arab-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 254427, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "snd_Arab-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 267191, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "snd_Arab-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 277766, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "snd_Arab-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 263327, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "snd_Arab-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 231377, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "snd_Arab-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 259655, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "snd_Arab-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 250388, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "snd_Arab-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 265843, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "snd_Arab-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 249487, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "snd_Arab-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 259850, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "snd_Arab-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 252810, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "snd_Arab-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 252443, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "snd_Arab-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 221642, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "snd_Arab-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 256655, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "snd_Arab-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 253745, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "snd_Arab-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 253013, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "snd_Arab-som_Latn": { + "num_samples": 1012, + "number_of_characters": 271250, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "snd_Arab-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 292133, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 118.1699604743083, + "max_sentence1_length": 307, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tuk_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 251883, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tuk_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 266649, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tuk_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 303620, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tuk_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 271320, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tuk_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 293474, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tuk_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 266957, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tuk_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 279416, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tuk_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 298994, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tuk_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 276023, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tuk_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 282195, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tuk_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 285832, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tuk_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 297801, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tuk_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 267195, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tuk_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 293693, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tuk_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 269670, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tuk_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 262717, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tuk_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 297259, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tuk_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 268860, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tuk_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 253835, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tuk_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288253, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tuk_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 272228, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tuk_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 268989, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tuk_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 286124, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tuk_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 289598, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tuk_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 264089, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tuk_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 293609, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tuk_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 258222, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tuk_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 255375, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tuk_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 301013, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tuk_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 271554, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tuk_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 278891, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tuk_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275794, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tuk_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 273313, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tuk_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 301258, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tuk_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271042, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tuk_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 280739, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tuk_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 252339, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tuk_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 269304, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tuk_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 269721, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tuk_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 286218, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tuk_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 286024, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tuk_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 266661, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tuk_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 279745, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tuk_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 288988, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tuk_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274708, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tuk_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 279952, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tuk_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 268873, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tuk_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 280084, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tuk_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 274822, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tuk_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 270634, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tuk_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 281425, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tuk_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 283680, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tuk_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 277108, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tuk_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 275009, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tuk_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 249308, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tuk_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 262492, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tuk_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 267710, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tuk_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 299942, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tuk_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 256262, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tuk_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278089, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tuk_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 260905, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tuk_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 272435, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tuk_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 270181, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tuk_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 272912, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tuk_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 278444, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tuk_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 274536, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tuk_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 282580, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tuk_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 279234, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tuk_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 286550, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tuk_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 281289, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tuk_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 278366, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tuk_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 289585, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tuk_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 287326, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tuk_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 288413, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tuk_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 295439, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tuk_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 270045, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tuk_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 290156, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tuk_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 277044, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tuk_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 288833, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tuk_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 277633, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tuk_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 273499, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tuk_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 227394, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tuk_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 272449, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tuk_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 280390, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tuk_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 296400, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tuk_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 206227, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tuk_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 264919, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tuk_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 287527, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tuk_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 294395, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tuk_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 278857, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tuk_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 248243, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tuk_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 279637, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tuk_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 275522, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tuk_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 277190, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tuk_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 270322, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tuk_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 286789, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tuk_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282358, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tuk_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 264679, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tuk_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 305008, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tuk_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 256569, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tuk_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278086, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tuk_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 297900, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tuk_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 197205, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tuk_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 286256, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tuk_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 303431, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tuk_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 282883, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tuk_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 264166, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tuk_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 267435, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tuk_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 294020, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tuk_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 286043, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tuk_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 284781, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tuk_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 270816, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tuk_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 277948, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tuk_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 287769, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tuk_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 268519, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tuk_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273544, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tuk_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 279385, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tuk_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 256690, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tuk_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 298947, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tuk_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 262424, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tuk_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 309659, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tuk_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 282459, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tuk_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 273705, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tuk_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 280053, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tuk_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 272732, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tuk_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 281920, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tuk_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 253825, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tuk_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 267519, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tuk_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 296680, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tuk_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 268618, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tuk_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 270591, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tuk_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 272400, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tuk_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 278833, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tuk_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286133, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tuk_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 267106, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tuk_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 254032, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tuk_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 279523, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tuk_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 302859, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tuk_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 278302, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tuk_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 281928, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tuk_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 267165, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tuk_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 328546, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tuk_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 306431, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tuk_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 180281, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tuk_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 266198, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tuk_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 268847, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tuk_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 292743, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tuk_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 266367, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tuk_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 270110, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tuk_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 291371, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tuk_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 271342, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tuk_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 266371, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tuk_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 183510, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tuk_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 276625, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tuk_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 274300, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tuk_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 286405, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tuk_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 266663, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tuk_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 287807, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tuk_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 282602, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tuk_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 271972, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tuk_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 230962, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tuk_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 180957, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tuk_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 268433, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tuk_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 281519, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tuk_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 273077, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tuk_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 284192, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tuk_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 283260, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tuk_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 287434, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tuk_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 271117, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tuk_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 309273, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tuk_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 286463, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tuk_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 278879, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tuk_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 275345, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tuk_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 266690, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tuk_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275666, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tuk_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 275340, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tuk_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 290913, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tuk_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 292986, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tuk_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 303297, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tuk_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 288624, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tuk_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 257323, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tuk_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 294071, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tuk_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 261542, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tuk_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 283219, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tuk_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 277808, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tuk_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 275101, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tuk_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 287865, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tuk_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 298440, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tuk_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 284001, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tuk_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 252051, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tuk_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 280329, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tuk_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 271062, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tuk_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 286517, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tuk_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 270161, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tuk_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 259850, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tuk_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 273484, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tuk_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 273117, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tuk_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 242316, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tuk_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 277329, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tuk_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 274419, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tuk_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 273687, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tuk_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 291924, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "tuk_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 312807, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 138.598814229249, + "max_sentence1_length": 397, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244843, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259609, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "bak_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296580, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 264280, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "bak_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286434, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259917, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "bak_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272376, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bak_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291954, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268983, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 275155, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278792, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290761, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 260155, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "bak_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286653, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "bak_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262630, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bak_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255677, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bak_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 290219, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bak_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261820, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "bak_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246795, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281213, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bak_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 265188, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261949, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 279084, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282558, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bak_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257049, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "bak_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286569, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251182, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "bak_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 248335, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293973, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "bak_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264514, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271851, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268754, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 266273, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bak_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 294218, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "bak_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264002, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bak_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273699, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "bak_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 245299, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 262264, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bak_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262681, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279178, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278984, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "bak_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259621, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bak_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272705, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281948, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267668, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bak_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272912, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261833, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bak_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273044, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267782, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263594, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bak_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274385, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "bak_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276640, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bak_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 270068, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bak_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267969, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 242268, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255452, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260670, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292902, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "bak_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 249222, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271049, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "bak_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253865, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "bak_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265395, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bak_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 263141, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bak_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265872, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271404, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bak_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267496, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275540, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "bak_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 272194, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279510, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "bak_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 274249, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bak_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 271326, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "bak_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282545, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "bak_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 280286, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281373, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "bak_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288399, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "bak_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263005, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 283116, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270004, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281793, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bak_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270593, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "bak_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266459, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bak_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 220354, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265409, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "bak_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 273350, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 289360, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 199187, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257879, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "bak_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280487, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "bak_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 287355, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "bak_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271817, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bak_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 241203, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272597, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bak_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268482, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "bak_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 270150, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 263282, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279749, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bak_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275318, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "bak_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257639, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bak_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297968, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "bak_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249529, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bak_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271046, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bak_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290860, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "bak_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 190165, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 279216, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bak_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296391, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bak_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275843, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "bak_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 257126, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "bak_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260395, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "bak_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286980, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "bak_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279003, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bak_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277741, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263776, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270908, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "bak_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280729, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bak_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261479, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "bak_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266504, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bak_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 272345, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249650, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291907, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "bak_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255384, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302619, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275419, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bak_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266665, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bak_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273013, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265692, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274880, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246785, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "bak_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260479, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bak_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289640, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261578, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "bak_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263551, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "bak_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 265360, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "bak_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271793, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279093, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "bak_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 260066, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "bak_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 246992, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "bak_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272483, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "bak_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295819, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "bak_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 271262, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274888, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "bak_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 260125, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bak_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321506, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299391, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "bak_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 173241, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "bak_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 259158, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261807, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "bak_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285703, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 259327, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 263070, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "bak_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 284331, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "bak_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 264302, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 259331, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "bak_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176470, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269585, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bak_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 267260, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "bak_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 279365, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259623, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280767, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "bak_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275562, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "bak_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264932, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223922, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "bak_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173917, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "bak_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261393, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bak_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274479, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bak_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266037, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "bak_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 277152, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 276220, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "bak_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280394, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "bak_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 264077, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 302233, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "bak_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279423, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271839, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "bak_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 268305, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "bak_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259650, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268626, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 268300, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "bak_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283873, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bak_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285946, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "bak_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 296257, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "bak_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281584, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "bak_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 250283, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "bak_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287031, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254502, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "bak_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276179, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "bak_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270768, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "bak_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268061, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "bak_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280825, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291400, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "bak_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276961, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "bak_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245011, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "bak_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 273289, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "bak_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264022, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279477, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "bak_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 263121, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "bak_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252810, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273484, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "bak_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 266077, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "bak_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 235276, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "bak_Cyrl-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270289, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "bak_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267379, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bak_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266647, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "bak_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284884, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "bak_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305767, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 131.64229249011856, + "max_sentence1_length": 389, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "dyu_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 244476, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "dyu_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259242, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "dyu_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296213, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dyu_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 263913, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "dyu_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286067, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "dyu_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 259550, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "dyu_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272009, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dyu_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 291587, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "dyu_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 268616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "dyu_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 274788, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dyu_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278425, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dyu_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "dyu_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 259788, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "dyu_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286286, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "dyu_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262263, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dyu_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255310, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dyu_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 289852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dyu_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 261453, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "dyu_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246428, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "dyu_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280846, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dyu_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 264821, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dyu_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 261582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dyu_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 278717, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dyu_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282191, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dyu_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 256682, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "dyu_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286202, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dyu_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 250815, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "dyu_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 247968, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "dyu_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 293606, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "dyu_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264147, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dyu_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 271484, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "dyu_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268387, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dyu_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 265906, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dyu_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 293851, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "dyu_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 263635, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dyu_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273332, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "dyu_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 244932, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "dyu_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 261897, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dyu_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262314, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "dyu_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 278811, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dyu_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 278617, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "dyu_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259254, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dyu_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272338, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dyu_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 281581, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dyu_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267301, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dyu_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 272545, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dyu_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 261466, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dyu_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 272677, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dyu_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267415, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dyu_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263227, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dyu_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274018, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "dyu_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276273, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dyu_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 269701, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dyu_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 267602, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dyu_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 241901, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "dyu_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255085, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "dyu_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260303, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dyu_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 292535, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "dyu_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 248855, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "dyu_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270682, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "dyu_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 253498, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "dyu_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265028, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dyu_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 262774, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dyu_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 265505, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "dyu_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271037, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dyu_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267129, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dyu_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275173, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "dyu_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 271827, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dyu_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279143, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "dyu_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 273882, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dyu_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 270959, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "dyu_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282178, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "dyu_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dyu_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281006, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "dyu_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288032, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "dyu_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 262638, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dyu_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 282749, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "dyu_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 269637, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dyu_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dyu_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270226, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "dyu_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266092, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dyu_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 219987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "dyu_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265042, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "dyu_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 272983, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dyu_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 288993, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "dyu_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 198820, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "dyu_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 257512, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "dyu_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280120, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "dyu_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 286988, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "dyu_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 271450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "dyu_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 240836, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "dyu_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272230, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dyu_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268115, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "dyu_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 269783, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dyu_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 262915, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dyu_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279382, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dyu_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274951, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "dyu_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257272, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dyu_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 297601, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "dyu_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249162, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dyu_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270679, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dyu_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 290493, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "dyu_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 189798, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "dyu_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 278849, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dyu_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296024, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dyu_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 275476, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "dyu_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 256759, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "dyu_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260028, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "dyu_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 286613, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "dyu_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 278636, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dyu_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277374, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "dyu_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263409, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "dyu_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 270541, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "dyu_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280362, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dyu_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "dyu_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266137, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dyu_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 271978, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "dyu_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249283, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "dyu_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 291540, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "dyu_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255017, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "dyu_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302252, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "dyu_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275052, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dyu_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266298, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dyu_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 272646, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dyu_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265325, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "dyu_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 274513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "dyu_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246418, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "dyu_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dyu_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289273, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "dyu_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "dyu_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263184, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "dyu_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 264993, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "dyu_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271426, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dyu_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 278726, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "dyu_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 259699, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "dyu_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 246625, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "dyu_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272116, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "dyu_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 295452, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "dyu_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 270895, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dyu_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 274521, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "dyu_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 259758, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dyu_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321139, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "dyu_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299024, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "dyu_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 172874, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "dyu_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 258791, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "dyu_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 261440, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "dyu_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285336, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "dyu_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 258960, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "dyu_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 262703, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "dyu_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 283964, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "dyu_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 263935, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "dyu_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 258964, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "dyu_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176103, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "dyu_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269218, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dyu_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 266893, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "dyu_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 278998, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "dyu_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259256, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "dyu_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "dyu_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275195, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "dyu_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 264565, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "dyu_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 223555, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "dyu_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 173550, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "dyu_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261026, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "dyu_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274112, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dyu_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 265670, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "dyu_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 276785, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dyu_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 275853, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "dyu_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280027, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "dyu_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 263710, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "dyu_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 301866, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "dyu_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279056, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "dyu_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 271472, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "dyu_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 267938, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "dyu_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259283, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "dyu_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268259, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "dyu_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 267933, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "dyu_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 283506, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dyu_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 285579, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "dyu_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 295890, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "dyu_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "dyu_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 249916, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "dyu_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 286664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "dyu_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254135, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "dyu_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 275812, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "dyu_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270401, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "dyu_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 267694, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "dyu_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 280458, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "dyu_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291033, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "dyu_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 276594, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "dyu_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 244644, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "dyu_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 272922, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "dyu_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 263655, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "dyu_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279110, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "dyu_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 262754, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "dyu_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 252443, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "dyu_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273117, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "dyu_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266077, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "dyu_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 234909, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "dyu_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269922, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "dyu_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dyu_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 266280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "dyu_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 284517, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "dyu_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305400, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 131.27964426877472, + "max_sentence1_length": 342, + "unique_sentence1": 1010, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "heb_Hebr-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 213675, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "heb_Hebr-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 228441, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "heb_Hebr-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 265412, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "heb_Hebr-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 233112, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "heb_Hebr-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 255266, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "heb_Hebr-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 228749, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "heb_Hebr-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 241208, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "heb_Hebr-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 260786, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "heb_Hebr-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 237815, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "heb_Hebr-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 243987, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "heb_Hebr-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 247624, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "heb_Hebr-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 259593, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "heb_Hebr-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 228987, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "heb_Hebr-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 255485, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "heb_Hebr-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 231462, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "heb_Hebr-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 224509, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "heb_Hebr-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 259051, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "heb_Hebr-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 230652, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "heb_Hebr-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 215627, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "heb_Hebr-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 250045, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "heb_Hebr-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 234020, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "heb_Hebr-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 230781, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "heb_Hebr-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 247916, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "heb_Hebr-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 251390, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "heb_Hebr-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 225881, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "heb_Hebr-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 255401, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "heb_Hebr-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 220014, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "heb_Hebr-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 217167, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "heb_Hebr-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 262805, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "heb_Hebr-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 233346, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "heb_Hebr-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 240683, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "heb_Hebr-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 237586, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "heb_Hebr-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 235105, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "heb_Hebr-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 263050, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "heb_Hebr-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 232834, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "heb_Hebr-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 242531, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "heb_Hebr-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 214131, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "heb_Hebr-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 231096, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "heb_Hebr-est_Latn": { + "num_samples": 1012, + "number_of_characters": 231513, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "heb_Hebr-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 248010, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "heb_Hebr-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 247816, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "heb_Hebr-min_Arab": { + "num_samples": 1012, + "number_of_characters": 228453, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "heb_Hebr-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 241537, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "heb_Hebr-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 250780, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "heb_Hebr-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 236500, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "heb_Hebr-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 241744, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "heb_Hebr-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 230665, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "heb_Hebr-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 241876, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "heb_Hebr-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 236614, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "heb_Hebr-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 232426, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "heb_Hebr-min_Latn": { + "num_samples": 1012, + "number_of_characters": 243217, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "heb_Hebr-por_Latn": { + "num_samples": 1012, + "number_of_characters": 245472, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "heb_Hebr-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 238900, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "heb_Hebr-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 236801, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "heb_Hebr-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 211100, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "heb_Hebr-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 224284, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "heb_Hebr-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 229502, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "heb_Hebr-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 261734, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "heb_Hebr-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 218054, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "heb_Hebr-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 239881, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "heb_Hebr-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 222697, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "heb_Hebr-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 234227, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "heb_Hebr-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 231973, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "heb_Hebr-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 234704, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "heb_Hebr-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 240236, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "heb_Hebr-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 236328, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "heb_Hebr-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 244372, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "heb_Hebr-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 241026, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "heb_Hebr-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 248342, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "heb_Hebr-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 243081, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "heb_Hebr-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 240158, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "heb_Hebr-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 251377, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "heb_Hebr-als_Latn": { + "num_samples": 1012, + "number_of_characters": 249118, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "heb_Hebr-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 250205, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "heb_Hebr-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 257231, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "heb_Hebr-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 231837, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "heb_Hebr-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 251948, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "heb_Hebr-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 238836, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "heb_Hebr-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 250625, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "heb_Hebr-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 239425, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "heb_Hebr-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 235291, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "heb_Hebr-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 189186, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "heb_Hebr-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 234241, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "heb_Hebr-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 242182, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "heb_Hebr-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 258192, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "heb_Hebr-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 168019, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "heb_Hebr-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 226711, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "heb_Hebr-run_Latn": { + "num_samples": 1012, + "number_of_characters": 249319, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "heb_Hebr-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 256187, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "heb_Hebr-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 240649, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "heb_Hebr-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 210035, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "heb_Hebr-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 241429, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "heb_Hebr-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 237314, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "heb_Hebr-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 238982, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "heb_Hebr-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 232114, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "heb_Hebr-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 248581, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "heb_Hebr-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 244150, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "heb_Hebr-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 226471, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "heb_Hebr-war_Latn": { + "num_samples": 1012, + "number_of_characters": 266800, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "heb_Hebr-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 218361, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "heb_Hebr-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 239878, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "heb_Hebr-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 259692, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "heb_Hebr-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 158997, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "heb_Hebr-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 248048, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "heb_Hebr-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 265223, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "heb_Hebr-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 244675, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "heb_Hebr-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 225958, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "heb_Hebr-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 229227, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "heb_Hebr-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 255812, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "heb_Hebr-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 247835, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "heb_Hebr-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 246573, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "heb_Hebr-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 232608, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "heb_Hebr-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 239740, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "heb_Hebr-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 249561, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "heb_Hebr-san_Deva": { + "num_samples": 1012, + "number_of_characters": 230311, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "heb_Hebr-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 235336, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "heb_Hebr-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 241177, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "heb_Hebr-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 218482, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "heb_Hebr-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 260739, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "heb_Hebr-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 224216, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "heb_Hebr-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 271451, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "heb_Hebr-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 244251, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "heb_Hebr-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 235497, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "heb_Hebr-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 241845, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "heb_Hebr-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 234524, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "heb_Hebr-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 243712, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "heb_Hebr-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 215617, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "heb_Hebr-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 229311, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "heb_Hebr-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 258472, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "heb_Hebr-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 230410, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "heb_Hebr-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 232383, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "heb_Hebr-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 234192, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "heb_Hebr-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 240625, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "heb_Hebr-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 247925, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "heb_Hebr-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 228898, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "heb_Hebr-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 215824, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "heb_Hebr-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 241315, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "heb_Hebr-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 264651, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "heb_Hebr-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 240094, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "heb_Hebr-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 243720, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "heb_Hebr-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 228957, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "heb_Hebr-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 290338, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "heb_Hebr-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 268223, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "heb_Hebr-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 142073, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "heb_Hebr-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 227990, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "heb_Hebr-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 230639, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "heb_Hebr-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 254535, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "heb_Hebr-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 228159, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "heb_Hebr-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 231902, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "heb_Hebr-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 253163, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "heb_Hebr-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 233134, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "heb_Hebr-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 228163, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "heb_Hebr-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 145302, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "heb_Hebr-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 238417, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "heb_Hebr-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 236092, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "heb_Hebr-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 248197, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "heb_Hebr-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 228455, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "heb_Hebr-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 249599, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "heb_Hebr-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 244394, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "heb_Hebr-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 233764, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "heb_Hebr-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 192754, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "heb_Hebr-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 142749, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "heb_Hebr-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 230225, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "heb_Hebr-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 243311, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "heb_Hebr-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 234869, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "heb_Hebr-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 245984, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "heb_Hebr-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 245052, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "heb_Hebr-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 249226, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "heb_Hebr-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 232909, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "heb_Hebr-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 271065, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "heb_Hebr-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 248255, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "heb_Hebr-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 240671, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "heb_Hebr-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 237137, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "heb_Hebr-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 228482, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "heb_Hebr-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 237458, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "heb_Hebr-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 237132, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "heb_Hebr-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 252705, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "heb_Hebr-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 254778, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "heb_Hebr-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 265089, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "heb_Hebr-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 250416, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "heb_Hebr-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 219115, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "heb_Hebr-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 255863, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "heb_Hebr-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 223334, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "heb_Hebr-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 245011, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "heb_Hebr-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 239600, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "heb_Hebr-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 236893, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "heb_Hebr-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 249657, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "heb_Hebr-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 260232, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "heb_Hebr-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 245793, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "heb_Hebr-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 213843, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "heb_Hebr-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 242121, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "heb_Hebr-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 232854, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "heb_Hebr-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 248309, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "heb_Hebr-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 231953, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "heb_Hebr-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 221642, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "heb_Hebr-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 242316, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "heb_Hebr-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 235276, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "heb_Hebr-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 234909, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "heb_Hebr-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 239121, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "heb_Hebr-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 236211, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "heb_Hebr-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 235479, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "heb_Hebr-som_Latn": { + "num_samples": 1012, + "number_of_characters": 253716, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "heb_Hebr-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 274599, + "unique_pairs": 1012, + "min_sentence1_length": 30, + "average_sentence1_length": 100.84387351778656, + "max_sentence1_length": 281, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 248688, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 263454, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "khk_Cyrl-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 300425, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 268125, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "khk_Cyrl-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 290279, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 263762, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "khk_Cyrl-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 276221, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khk_Cyrl-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 295799, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 272828, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 279000, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 282637, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 294606, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 264000, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "khk_Cyrl-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 290498, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "khk_Cyrl-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 266475, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khk_Cyrl-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 259522, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "khk_Cyrl-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 294064, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khk_Cyrl-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 265665, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "khk_Cyrl-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 250640, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 285058, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "khk_Cyrl-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 269033, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 265794, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 282929, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 286403, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "khk_Cyrl-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 260894, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "khk_Cyrl-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 290414, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 255027, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "khk_Cyrl-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 252180, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 297818, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "khk_Cyrl-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 268359, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 275696, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272599, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 270118, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "khk_Cyrl-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 298063, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "khk_Cyrl-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267847, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khk_Cyrl-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 277544, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "khk_Cyrl-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 249144, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 266109, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "khk_Cyrl-est_Latn": { + "num_samples": 1012, + "number_of_characters": 266526, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 283023, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 282829, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "khk_Cyrl-min_Arab": { + "num_samples": 1012, + "number_of_characters": 263466, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "khk_Cyrl-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 276550, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 285793, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271513, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "khk_Cyrl-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 276757, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 265678, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "khk_Cyrl-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 276889, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 271627, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 267439, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "khk_Cyrl-min_Latn": { + "num_samples": 1012, + "number_of_characters": 278230, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "khk_Cyrl-por_Latn": { + "num_samples": 1012, + "number_of_characters": 280485, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "khk_Cyrl-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 273913, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "khk_Cyrl-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 271814, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 246113, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 259297, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 264515, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 296747, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "khk_Cyrl-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 253067, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274894, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "khk_Cyrl-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 257710, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "khk_Cyrl-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 269240, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "khk_Cyrl-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 266986, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "khk_Cyrl-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 269717, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 275249, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khk_Cyrl-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 271341, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 279385, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "khk_Cyrl-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 276039, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 283355, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "khk_Cyrl-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 278094, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "khk_Cyrl-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 275171, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "khk_Cyrl-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 286390, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "khk_Cyrl-als_Latn": { + "num_samples": 1012, + "number_of_characters": 284131, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 285218, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "khk_Cyrl-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 292244, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "khk_Cyrl-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 266850, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 286961, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 273849, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 285638, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khk_Cyrl-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 274438, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "khk_Cyrl-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 270304, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "khk_Cyrl-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 224199, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 269254, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "khk_Cyrl-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 277195, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 293205, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 203032, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 261724, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "khk_Cyrl-run_Latn": { + "num_samples": 1012, + "number_of_characters": 284332, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "khk_Cyrl-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 291200, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "khk_Cyrl-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 275662, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "khk_Cyrl-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 245048, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 276442, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "khk_Cyrl-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 272327, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "khk_Cyrl-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 273995, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 267127, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 283594, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "khk_Cyrl-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279163, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "khk_Cyrl-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 261484, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "khk_Cyrl-war_Latn": { + "num_samples": 1012, + "number_of_characters": 301813, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "khk_Cyrl-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 253374, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 274891, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "khk_Cyrl-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 294705, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "khk_Cyrl-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 194010, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 283061, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "khk_Cyrl-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 300236, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "khk_Cyrl-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 279688, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "khk_Cyrl-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 260971, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "khk_Cyrl-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 264240, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "khk_Cyrl-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 290825, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "khk_Cyrl-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 282848, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khk_Cyrl-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 281586, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 267621, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 274753, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "khk_Cyrl-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 284574, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "khk_Cyrl-san_Deva": { + "num_samples": 1012, + "number_of_characters": 265324, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "khk_Cyrl-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270349, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "khk_Cyrl-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 276190, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 253495, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 295752, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "khk_Cyrl-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 259229, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 306464, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 279264, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "khk_Cyrl-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 270510, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "khk_Cyrl-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 276858, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 269537, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 278725, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 250630, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "khk_Cyrl-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 264324, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khk_Cyrl-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 293485, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 265423, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "khk_Cyrl-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 267396, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "khk_Cyrl-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 269205, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "khk_Cyrl-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 275638, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282938, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "khk_Cyrl-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 263911, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "khk_Cyrl-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 250837, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "khk_Cyrl-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 276328, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "khk_Cyrl-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 299664, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "khk_Cyrl-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 275107, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 278733, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "khk_Cyrl-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 263970, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khk_Cyrl-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 325351, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 303236, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "khk_Cyrl-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 177086, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "khk_Cyrl-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 263003, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 265652, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "khk_Cyrl-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 289548, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 263172, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 266915, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "khk_Cyrl-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 288176, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "khk_Cyrl-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 268147, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 263176, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "khk_Cyrl-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 180315, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 273430, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khk_Cyrl-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 271105, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "khk_Cyrl-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 283210, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 263468, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 284612, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "khk_Cyrl-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 279407, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "khk_Cyrl-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 268777, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 227767, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "khk_Cyrl-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 177762, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "khk_Cyrl-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 265238, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "khk_Cyrl-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 278324, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "khk_Cyrl-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 269882, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "khk_Cyrl-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 280997, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 280065, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "khk_Cyrl-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 284239, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "khk_Cyrl-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 267922, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 306078, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "khk_Cyrl-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 283268, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 275684, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "khk_Cyrl-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 272150, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "khk_Cyrl-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 263495, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 272471, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 272145, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "khk_Cyrl-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 287718, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "khk_Cyrl-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 289791, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "khk_Cyrl-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 300102, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "khk_Cyrl-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 285429, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "khk_Cyrl-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 254128, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "khk_Cyrl-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 290876, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 258347, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "khk_Cyrl-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 280024, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "khk_Cyrl-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 274613, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "khk_Cyrl-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 271906, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "khk_Cyrl-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 284670, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 295245, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "khk_Cyrl-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 280806, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "khk_Cyrl-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 248856, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "khk_Cyrl-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 277134, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "khk_Cyrl-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 267867, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 283322, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "khk_Cyrl-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 266966, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "khk_Cyrl-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 256655, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 277329, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "khk_Cyrl-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270289, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "khk_Cyrl-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 269922, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "khk_Cyrl-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 239121, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "khk_Cyrl-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 271224, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "khk_Cyrl-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 270492, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "khk_Cyrl-som_Latn": { + "num_samples": 1012, + "number_of_characters": 288729, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "khk_Cyrl-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 309612, + "unique_pairs": 1012, + "min_sentence1_length": 44, + "average_sentence1_length": 135.4416996047431, + "max_sentence1_length": 355, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "lvs_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 245778, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "lvs_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 260544, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "lvs_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 297515, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lvs_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 265215, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "lvs_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 287369, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "lvs_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 260852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "lvs_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 273311, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lvs_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 292889, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "lvs_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 269918, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "lvs_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 276090, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lvs_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 279727, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lvs_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 291696, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "lvs_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 261090, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "lvs_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 287588, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "lvs_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 263565, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lvs_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 256612, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lvs_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 291154, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lvs_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 262755, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "lvs_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 247730, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "lvs_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282148, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lvs_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 266123, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lvs_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 262884, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lvs_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 280019, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lvs_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 283493, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lvs_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "lvs_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 287504, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lvs_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 252117, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "lvs_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 249270, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "lvs_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 294908, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "lvs_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 265449, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lvs_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 272786, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "lvs_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269689, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lvs_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 267208, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lvs_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 295153, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "lvs_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264937, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lvs_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 274634, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "lvs_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 246234, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "lvs_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 263199, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lvs_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 263616, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "lvs_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 280113, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lvs_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "lvs_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 260556, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lvs_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 273640, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lvs_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 282883, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lvs_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268603, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lvs_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 273847, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lvs_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 262768, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lvs_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273979, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lvs_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 268717, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lvs_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 264529, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lvs_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 275320, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "lvs_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 277575, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lvs_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 271003, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lvs_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 268904, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lvs_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 243203, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "lvs_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 256387, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "lvs_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 261605, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lvs_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 293837, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "lvs_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 250157, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "lvs_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271984, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "lvs_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 254800, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "lvs_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 266330, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lvs_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 264076, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lvs_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 266807, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "lvs_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 272339, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lvs_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 268431, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lvs_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 276475, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "lvs_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 273129, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lvs_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 280445, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "lvs_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 275184, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lvs_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 272261, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "lvs_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 283480, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "lvs_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 281221, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lvs_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 282308, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "lvs_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 289334, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "lvs_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263940, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lvs_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 284051, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "lvs_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270939, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lvs_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 282728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lvs_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 271528, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "lvs_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 267394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lvs_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 221289, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "lvs_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 266344, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "lvs_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 274285, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lvs_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 290295, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "lvs_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 200122, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "lvs_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 258814, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "lvs_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 281422, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "lvs_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 288290, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "lvs_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 272752, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "lvs_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 242138, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "lvs_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 273532, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lvs_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 269417, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "lvs_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 271085, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lvs_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 264217, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lvs_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 280684, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lvs_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 276253, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "lvs_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 258574, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lvs_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 298903, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "lvs_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 250464, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lvs_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271981, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lvs_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 291795, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "lvs_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 191100, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "lvs_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 280151, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lvs_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 297326, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lvs_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 276778, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "lvs_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 258061, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "lvs_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 261330, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "lvs_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 287915, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "lvs_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279938, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lvs_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 278676, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "lvs_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 264711, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "lvs_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 271843, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "lvs_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 281664, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lvs_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 262414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "lvs_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267439, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lvs_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 273280, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "lvs_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 250585, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "lvs_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 292842, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "lvs_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 256319, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "lvs_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 303554, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "lvs_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 276354, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lvs_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 267600, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lvs_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273948, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lvs_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 266627, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "lvs_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 275815, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "lvs_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 247720, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "lvs_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 261414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lvs_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 290575, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "lvs_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 262513, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "lvs_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 264486, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "lvs_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 266295, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "lvs_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 272728, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lvs_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 280028, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "lvs_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 261001, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "lvs_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 247927, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "lvs_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 273418, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "lvs_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 296754, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "lvs_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 272197, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lvs_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 275823, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "lvs_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 261060, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lvs_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 322441, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "lvs_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 300326, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "lvs_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 174176, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "lvs_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 260093, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "lvs_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 262742, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "lvs_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 286638, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "lvs_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 260262, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "lvs_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 264005, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "lvs_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 285266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "lvs_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 265237, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "lvs_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 260266, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "lvs_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 177405, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "lvs_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 270520, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lvs_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 268195, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "lvs_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 280300, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "lvs_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 260558, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "lvs_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 281702, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "lvs_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 276497, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "lvs_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 265867, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "lvs_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 224857, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "lvs_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 174852, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "lvs_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 262328, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "lvs_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 275414, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lvs_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266972, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "lvs_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 278087, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lvs_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 277155, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "lvs_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 281329, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "lvs_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 265012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "lvs_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 303168, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "lvs_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 280358, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "lvs_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 272774, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "lvs_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 269240, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "lvs_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 260585, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "lvs_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 269561, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "lvs_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 269235, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "lvs_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 284808, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lvs_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 286881, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "lvs_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 297192, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "lvs_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 282519, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "lvs_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 251218, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "lvs_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287966, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "lvs_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 255437, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "lvs_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 277114, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "lvs_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 271703, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "lvs_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268996, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "lvs_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 281760, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "lvs_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 292335, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "lvs_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 277896, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "lvs_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245946, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "lvs_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 274224, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "lvs_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264957, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "lvs_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 280412, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "lvs_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 264056, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "lvs_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 253745, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "lvs_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 274419, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "lvs_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267379, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "lvs_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 267012, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "lvs_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 236211, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "lvs_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271224, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "lvs_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 267582, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "lvs_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 285819, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "lvs_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 306702, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 132.56620553359684, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "pan_Guru-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 245046, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "pan_Guru-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 259812, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "pan_Guru-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 296783, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pan_Guru-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 264483, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "pan_Guru-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 286637, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "pan_Guru-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 260120, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "pan_Guru-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 272579, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pan_Guru-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 292157, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "pan_Guru-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 269186, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "pan_Guru-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 275358, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pan_Guru-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 278995, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pan_Guru-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 290964, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "pan_Guru-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 260358, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "pan_Guru-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 286856, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "pan_Guru-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 262833, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pan_Guru-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 255880, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pan_Guru-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 290422, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pan_Guru-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 262023, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "pan_Guru-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 246998, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "pan_Guru-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 281416, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pan_Guru-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pan_Guru-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 262152, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pan_Guru-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 279287, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pan_Guru-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 282761, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pan_Guru-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 257252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "pan_Guru-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 286772, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pan_Guru-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 251385, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "pan_Guru-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 248538, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "pan_Guru-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 294176, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "pan_Guru-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 264717, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pan_Guru-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 272054, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "pan_Guru-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268957, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pan_Guru-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 266476, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pan_Guru-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 294421, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "pan_Guru-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 264205, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pan_Guru-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 273902, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "pan_Guru-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 245502, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "pan_Guru-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 262467, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pan_Guru-est_Latn": { + "num_samples": 1012, + "number_of_characters": 262884, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "pan_Guru-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 279381, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pan_Guru-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 279187, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "pan_Guru-min_Arab": { + "num_samples": 1012, + "number_of_characters": 259824, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pan_Guru-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 272908, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pan_Guru-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 282151, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pan_Guru-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 267871, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pan_Guru-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 273115, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pan_Guru-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 262036, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pan_Guru-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 273247, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pan_Guru-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 267985, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pan_Guru-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 263797, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pan_Guru-min_Latn": { + "num_samples": 1012, + "number_of_characters": 274588, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "pan_Guru-por_Latn": { + "num_samples": 1012, + "number_of_characters": 276843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pan_Guru-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 270271, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pan_Guru-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 268172, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pan_Guru-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 242471, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "pan_Guru-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 255655, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "pan_Guru-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 260873, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pan_Guru-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 293105, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "pan_Guru-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 249425, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "pan_Guru-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271252, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "pan_Guru-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 254068, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "pan_Guru-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 265598, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pan_Guru-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 263344, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pan_Guru-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 266075, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "pan_Guru-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 271607, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pan_Guru-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 267699, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pan_Guru-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 275743, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "pan_Guru-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 272397, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pan_Guru-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 279713, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "pan_Guru-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 274452, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pan_Guru-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 271529, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "pan_Guru-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 282748, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "pan_Guru-als_Latn": { + "num_samples": 1012, + "number_of_characters": 280489, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pan_Guru-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 281576, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "pan_Guru-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 288602, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "pan_Guru-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 263208, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pan_Guru-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 283319, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "pan_Guru-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 270207, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pan_Guru-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 281996, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pan_Guru-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 270796, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "pan_Guru-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 266662, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pan_Guru-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 220557, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "pan_Guru-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 265612, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "pan_Guru-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 273553, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pan_Guru-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 289563, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "pan_Guru-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 199390, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "pan_Guru-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 258082, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "pan_Guru-run_Latn": { + "num_samples": 1012, + "number_of_characters": 280690, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "pan_Guru-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 287558, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "pan_Guru-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 272020, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "pan_Guru-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 241406, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "pan_Guru-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 272800, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pan_Guru-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 268685, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "pan_Guru-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 270353, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pan_Guru-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 263485, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pan_Guru-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 279952, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pan_Guru-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 275521, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "pan_Guru-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 257842, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pan_Guru-war_Latn": { + "num_samples": 1012, + "number_of_characters": 298171, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "pan_Guru-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 249732, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pan_Guru-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 271249, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pan_Guru-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 291063, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "pan_Guru-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 190368, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "pan_Guru-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 279419, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pan_Guru-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 296594, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pan_Guru-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 276046, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "pan_Guru-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 257329, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "pan_Guru-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 260598, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "pan_Guru-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 287183, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "pan_Guru-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 279206, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pan_Guru-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 277944, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "pan_Guru-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 263979, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "pan_Guru-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 271111, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "pan_Guru-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 280932, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pan_Guru-san_Deva": { + "num_samples": 1012, + "number_of_characters": 261682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "pan_Guru-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266707, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pan_Guru-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 272548, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "pan_Guru-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 249853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "pan_Guru-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 292110, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "pan_Guru-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 255587, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "pan_Guru-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 302822, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "pan_Guru-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 275622, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pan_Guru-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 266868, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pan_Guru-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 273216, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pan_Guru-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 265895, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "pan_Guru-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 275083, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "pan_Guru-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 246988, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "pan_Guru-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 260682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pan_Guru-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 289843, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "pan_Guru-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 261781, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "pan_Guru-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 263754, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "pan_Guru-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 265563, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "pan_Guru-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 271996, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pan_Guru-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 279296, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "pan_Guru-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 260269, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "pan_Guru-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 247195, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "pan_Guru-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 272686, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "pan_Guru-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 296022, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "pan_Guru-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 271465, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pan_Guru-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 275091, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "pan_Guru-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 260328, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pan_Guru-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 321709, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "pan_Guru-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 299594, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "pan_Guru-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 173444, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "pan_Guru-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 259361, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "pan_Guru-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 262010, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "pan_Guru-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 285906, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "pan_Guru-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 259530, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "pan_Guru-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 263273, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "pan_Guru-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 284534, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "pan_Guru-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 264505, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "pan_Guru-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 259534, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "pan_Guru-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 176673, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "pan_Guru-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 269788, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pan_Guru-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 267463, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "pan_Guru-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 279568, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "pan_Guru-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 259826, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "pan_Guru-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 280970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "pan_Guru-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 275765, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "pan_Guru-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 265135, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "pan_Guru-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 224125, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "pan_Guru-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 174120, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "pan_Guru-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 261596, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "pan_Guru-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 274682, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pan_Guru-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 266240, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "pan_Guru-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 277355, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pan_Guru-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 276423, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "pan_Guru-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 280597, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "pan_Guru-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 264280, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "pan_Guru-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 302436, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "pan_Guru-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 279626, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "pan_Guru-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 272042, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "pan_Guru-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 268508, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "pan_Guru-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 259853, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "pan_Guru-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 268829, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "pan_Guru-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 268503, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "pan_Guru-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 284076, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pan_Guru-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 286149, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "pan_Guru-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 296460, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "pan_Guru-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 281787, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "pan_Guru-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 250486, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "pan_Guru-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 287234, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "pan_Guru-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 254705, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "pan_Guru-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 276382, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "pan_Guru-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 270971, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "pan_Guru-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 268264, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pan_Guru-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 281028, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "pan_Guru-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 291603, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "pan_Guru-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 277164, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "pan_Guru-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 245214, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "pan_Guru-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 273492, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "pan_Guru-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 264225, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "pan_Guru-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 279680, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "pan_Guru-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 263324, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "pan_Guru-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 253013, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "pan_Guru-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 273687, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "pan_Guru-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 266647, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "pan_Guru-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 266280, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "pan_Guru-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 235479, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "pan_Guru-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 270492, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "pan_Guru-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 267582, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pan_Guru-som_Latn": { + "num_samples": 1012, + "number_of_characters": 285087, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + }, + "pan_Guru-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 305970, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "som_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 263283, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "som_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 278049, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "som_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 315020, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "som_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 282720, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "som_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 304874, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "som_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 278357, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "som_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 290816, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "som_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 310394, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "som_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 287423, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "som_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 293595, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "som_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 297232, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "som_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 309201, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "som_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 278595, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "som_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 305093, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "som_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 281070, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "som_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 274117, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "som_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 308659, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "som_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 280260, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "som_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 265235, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "som_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 299653, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "som_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 283628, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "som_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 280389, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "som_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 297524, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "som_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 300998, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "som_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 275489, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "som_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 305009, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "som_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 269622, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "som_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 266775, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "som_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 312413, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "som_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 282954, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "som_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 290291, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "som_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287194, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "som_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 284713, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "som_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 312658, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "som_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 282442, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "som_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 292139, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "som_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 263739, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "som_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 280704, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "som_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 281121, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "som_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 297618, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "som_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 297424, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "som_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 278061, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "som_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 291145, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "som_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 300388, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "som_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 286108, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "som_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 291352, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "som_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 280273, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "som_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 291484, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "som_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 286222, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "som_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 282034, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "som_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 292825, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "som_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 295080, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "som_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 288508, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "som_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 286409, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "som_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 260708, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "som_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 273892, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "som_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 279110, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "som_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 311342, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "som_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 267662, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "som_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289489, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "som_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 272305, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "som_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 283835, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "som_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 281581, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "som_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 284312, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "som_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 289844, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "som_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 285936, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "som_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 293980, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "som_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 290634, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "som_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 297950, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "som_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 292689, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "som_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 289766, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "som_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 300985, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "som_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 298726, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "som_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 299813, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "som_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 306839, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "som_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 281445, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "som_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 301556, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "som_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 288444, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "som_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 300233, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "som_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 289033, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "som_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 284899, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "som_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 238794, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "som_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 283849, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "som_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 291790, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "som_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 307800, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "som_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 217627, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "som_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 276319, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "som_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 298927, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "som_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 305795, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "som_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 290257, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "som_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 259643, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "som_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 291037, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "som_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 286922, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "som_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 288590, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "som_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 281722, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "som_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 298189, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "som_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 293758, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "som_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 276079, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "som_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 316408, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "som_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 267969, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "som_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 289486, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "som_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 309300, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "som_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 208605, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "som_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 297656, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "som_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 314831, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "som_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 294283, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "som_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 275566, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "som_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 278835, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "som_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 305420, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "som_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 297443, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "som_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 296181, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "som_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 282216, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "som_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 289348, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "som_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 299169, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "som_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 279919, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "som_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284944, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "som_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 290785, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "som_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 268090, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "som_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 310347, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "som_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 273824, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "som_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 321059, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "som_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 293859, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "som_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 285105, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "som_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 291453, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "som_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 284132, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "som_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 293320, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "som_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 265225, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "som_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 278919, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "som_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 308080, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "som_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 280018, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "som_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 281991, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "som_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 283800, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "som_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 290233, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "som_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 297533, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "som_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 278506, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "som_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 265432, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "som_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 290923, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "som_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 314259, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "som_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 289702, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "som_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 293328, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "som_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 278565, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "som_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 339946, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "som_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 317831, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "som_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 191681, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "som_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 277598, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "som_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 280247, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "som_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 304143, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "som_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 277767, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "som_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 281510, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "som_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 302771, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "som_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 282742, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "som_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 277771, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "som_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 194910, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "som_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 288025, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "som_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 285700, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "som_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 297805, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "som_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 278063, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "som_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 299207, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "som_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 294002, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "som_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 283372, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "som_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 242362, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "som_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 192357, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "som_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 279833, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "som_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 292919, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "som_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 284477, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "som_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 295592, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "som_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 294660, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "som_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 298834, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "som_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 282517, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "som_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 320673, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "som_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 297863, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "som_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 290279, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "som_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 286745, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "som_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 278090, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "som_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 287066, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "som_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 286740, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "som_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 302313, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "som_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 304386, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "som_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 314697, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "som_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 300024, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "som_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 268723, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "som_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 305471, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "som_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 272942, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "som_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 294619, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "som_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 289208, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "som_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 286501, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "som_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 299265, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "som_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 309840, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "som_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 295401, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "som_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 263451, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "som_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 291729, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "som_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 282462, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "som_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 297917, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "som_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 281561, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "som_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 271250, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "som_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 291924, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "som_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 284884, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "som_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 284517, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "som_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 253716, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "som_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 288729, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "som_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 285819, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "som_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 285087, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "som_Latn-tum_Latn": { + "num_samples": 1012, + "number_of_characters": 324207, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 149.86363636363637, + "max_sentence1_length": 414, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 170.4990118577075, + "max_sentence2_length": 542, + "unique_sentence2": 1012 + }, + "tum_Latn-ace_Arab": { + "num_samples": 1012, + "number_of_characters": 284166, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 110.29743083003953, + "max_sentence2_length": 295, + "unique_sentence2": 1012 + }, + "tum_Latn-bam_Latn": { + "num_samples": 1012, + "number_of_characters": 298932, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 124.88833992094861, + "max_sentence2_length": 372, + "unique_sentence2": 1009 + }, + "tum_Latn-dzo_Tibt": { + "num_samples": 1012, + "number_of_characters": 335903, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 161.42094861660078, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tum_Latn-hin_Deva": { + "num_samples": 1012, + "number_of_characters": 303603, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.50395256916997, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "tum_Latn-khm_Khmr": { + "num_samples": 1012, + "number_of_characters": 325757, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 151.39525691699606, + "max_sentence2_length": 507, + "unique_sentence2": 1012 + }, + "tum_Latn-mag_Deva": { + "num_samples": 1012, + "number_of_characters": 299240, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 125.19268774703558, + "max_sentence2_length": 343, + "unique_sentence2": 1012 + }, + "tum_Latn-pap_Latn": { + "num_samples": 1012, + "number_of_characters": 311699, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 137.50395256916997, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tum_Latn-sot_Latn": { + "num_samples": 1012, + "number_of_characters": 331277, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 156.8498023715415, + "max_sentence2_length": 461, + "unique_sentence2": 1012 + }, + "tum_Latn-tur_Latn": { + "num_samples": 1012, + "number_of_characters": 308306, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 134.151185770751, + "max_sentence2_length": 386, + "unique_sentence2": 1012 + }, + "tum_Latn-ace_Latn": { + "num_samples": 1012, + "number_of_characters": 314478, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.25, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tum_Latn-ban_Latn": { + "num_samples": 1012, + "number_of_characters": 318115, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 143.84387351778656, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tum_Latn-ell_Grek": { + "num_samples": 1012, + "number_of_characters": 330084, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 155.67094861660078, + "max_sentence2_length": 464, + "unique_sentence2": 1012 + }, + "tum_Latn-hne_Deva": { + "num_samples": 1012, + "number_of_characters": 299478, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.42786561264822, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "tum_Latn-kik_Latn": { + "num_samples": 1012, + "number_of_characters": 325976, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 151.61166007905138, + "max_sentence2_length": 515, + "unique_sentence2": 1009 + }, + "tum_Latn-mai_Deva": { + "num_samples": 1012, + "number_of_characters": 301953, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87351778656127, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tum_Latn-pbt_Arab": { + "num_samples": 1012, + "number_of_characters": 295000, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tum_Latn-spa_Latn": { + "num_samples": 1012, + "number_of_characters": 329542, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 155.13537549407116, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tum_Latn-twi_Latn": { + "num_samples": 1012, + "number_of_characters": 301143, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 127.07312252964427, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "tum_Latn-acm_Arab": { + "num_samples": 1012, + "number_of_characters": 286118, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 112.22628458498023, + "max_sentence2_length": 303, + "unique_sentence2": 1012 + }, + "tum_Latn-bel_Cyrl": { + "num_samples": 1012, + "number_of_characters": 320536, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.23616600790513, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tum_Latn-eng_Latn": { + "num_samples": 1012, + "number_of_characters": 304511, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tum_Latn-hrv_Latn": { + "num_samples": 1012, + "number_of_characters": 301272, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 127.2005928853755, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tum_Latn-kin_Latn": { + "num_samples": 1012, + "number_of_characters": 318407, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.1324110671937, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tum_Latn-mal_Mlym": { + "num_samples": 1012, + "number_of_characters": 321881, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tum_Latn-pes_Arab": { + "num_samples": 1012, + "number_of_characters": 296372, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 122.3586956521739, + "max_sentence2_length": 324, + "unique_sentence2": 1012 + }, + "tum_Latn-srd_Latn": { + "num_samples": 1012, + "number_of_characters": 325892, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 151.52865612648222, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tum_Latn-tzm_Tfng": { + "num_samples": 1012, + "number_of_characters": 290505, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 116.56126482213439, + "max_sentence2_length": 330, + "unique_sentence2": 1012 + }, + "tum_Latn-acq_Arab": { + "num_samples": 1012, + "number_of_characters": 287658, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 113.74802371541502, + "max_sentence2_length": 318, + "unique_sentence2": 1012 + }, + "tum_Latn-bem_Latn": { + "num_samples": 1012, + "number_of_characters": 333296, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 158.84486166007906, + "max_sentence2_length": 422, + "unique_sentence2": 1012 + }, + "tum_Latn-epo_Latn": { + "num_samples": 1012, + "number_of_characters": 303837, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.73517786561266, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tum_Latn-hun_Latn": { + "num_samples": 1012, + "number_of_characters": 311174, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 136.98517786561266, + "max_sentence2_length": 393, + "unique_sentence2": 1012 + }, + "tum_Latn-kir_Cyrl": { + "num_samples": 1012, + "number_of_characters": 308077, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.92490118577075, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tum_Latn-mar_Deva": { + "num_samples": 1012, + "number_of_characters": 305596, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47332015810278, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tum_Latn-plt_Latn": { + "num_samples": 1012, + "number_of_characters": 333541, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 159.08695652173913, + "max_sentence2_length": 479, + "unique_sentence2": 1012 + }, + "tum_Latn-srp_Cyrl": { + "num_samples": 1012, + "number_of_characters": 303325, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 129.22924901185772, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tum_Latn-uig_Arab": { + "num_samples": 1012, + "number_of_characters": 313022, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 138.81126482213438, + "max_sentence2_length": 354, + "unique_sentence2": 1011 + }, + "tum_Latn-aeb_Arab": { + "num_samples": 1012, + "number_of_characters": 284622, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 110.74802371541502, + "max_sentence2_length": 305, + "unique_sentence2": 1012 + }, + "tum_Latn-ben_Beng": { + "num_samples": 1012, + "number_of_characters": 301587, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tum_Latn-est_Latn": { + "num_samples": 1012, + "number_of_characters": 302004, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.92391304347827, + "max_sentence2_length": 356, + "unique_sentence2": 1012 + }, + "tum_Latn-hye_Armn": { + "num_samples": 1012, + "number_of_characters": 318501, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 144.22529644268775, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tum_Latn-kmb_Latn": { + "num_samples": 1012, + "number_of_characters": 318307, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 144.03359683794466, + "max_sentence2_length": 426, + "unique_sentence2": 1012 + }, + "tum_Latn-min_Arab": { + "num_samples": 1012, + "number_of_characters": 298944, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 124.9001976284585, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tum_Latn-pol_Latn": { + "num_samples": 1012, + "number_of_characters": 312028, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 137.82905138339922, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tum_Latn-ssw_Latn": { + "num_samples": 1012, + "number_of_characters": 321271, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 146.96245059288538, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tum_Latn-ukr_Cyrl": { + "num_samples": 1012, + "number_of_characters": 306991, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 132.85177865612647, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tum_Latn-afr_Latn": { + "num_samples": 1012, + "number_of_characters": 312235, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.03359683794466, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tum_Latn-bho_Deva": { + "num_samples": 1012, + "number_of_characters": 301156, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.08596837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tum_Latn-eus_Latn": { + "num_samples": 1012, + "number_of_characters": 312367, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.16403162055337, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tum_Latn-ibo_Latn": { + "num_samples": 1012, + "number_of_characters": 307105, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 132.96442687747034, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tum_Latn-kmr_Latn": { + "num_samples": 1012, + "number_of_characters": 302917, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 128.82608695652175, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tum_Latn-min_Latn": { + "num_samples": 1012, + "number_of_characters": 313708, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 139.4891304347826, + "max_sentence2_length": 363, + "unique_sentence2": 1011 + }, + "tum_Latn-por_Latn": { + "num_samples": 1012, + "number_of_characters": 315963, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 141.7173913043478, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tum_Latn-sun_Latn": { + "num_samples": 1012, + "number_of_characters": 309391, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 135.22332015810278, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tum_Latn-umb_Latn": { + "num_samples": 1012, + "number_of_characters": 307292, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.149209486166, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tum_Latn-ajp_Arab": { + "num_samples": 1012, + "number_of_characters": 281591, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 107.75296442687747, + "max_sentence2_length": 310, + "unique_sentence2": 1012 + }, + "tum_Latn-bjn_Arab": { + "num_samples": 1012, + "number_of_characters": 294775, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 120.78063241106719, + "max_sentence2_length": 319, + "unique_sentence2": 1012 + }, + "tum_Latn-ewe_Latn": { + "num_samples": 1012, + "number_of_characters": 299993, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 125.93675889328063, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tum_Latn-ilo_Latn": { + "num_samples": 1012, + "number_of_characters": 332225, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 157.78656126482213, + "max_sentence2_length": 432, + "unique_sentence2": 1012 + }, + "tum_Latn-knc_Arab": { + "num_samples": 1012, + "number_of_characters": 288545, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 25, + "average_sentence2_length": 114.62450592885375, + "max_sentence2_length": 405, + "unique_sentence2": 1012 + }, + "tum_Latn-mkd_Cyrl": { + "num_samples": 1012, + "number_of_characters": 310372, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.19268774703556, + "max_sentence2_length": 364, + "unique_sentence2": 1012 + }, + "tum_Latn-prs_Arab": { + "num_samples": 1012, + "number_of_characters": 293188, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.21245059288538, + "max_sentence2_length": 320, + "unique_sentence2": 1012 + }, + "tum_Latn-swe_Latn": { + "num_samples": 1012, + "number_of_characters": 304718, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.60573122529644, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tum_Latn-urd_Arab": { + "num_samples": 1012, + "number_of_characters": 302464, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tum_Latn-aka_Latn": { + "num_samples": 1012, + "number_of_characters": 305195, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 131.07707509881422, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "tum_Latn-bjn_Latn": { + "num_samples": 1012, + "number_of_characters": 310727, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 136.54347826086956, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tum_Latn-fao_Latn": { + "num_samples": 1012, + "number_of_characters": 306819, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 132.6818181818182, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tum_Latn-ind_Latn": { + "num_samples": 1012, + "number_of_characters": 314863, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 140.6304347826087, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "tum_Latn-knc_Latn": { + "num_samples": 1012, + "number_of_characters": 311517, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.32411067193675, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tum_Latn-mlt_Latn": { + "num_samples": 1012, + "number_of_characters": 318833, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.55335968379447, + "max_sentence2_length": 400, + "unique_sentence2": 1012 + }, + "tum_Latn-quy_Latn": { + "num_samples": 1012, + "number_of_characters": 313572, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 139.35474308300394, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tum_Latn-swh_Latn": { + "num_samples": 1012, + "number_of_characters": 310649, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.46640316205534, + "max_sentence2_length": 384, + "unique_sentence2": 1012 + }, + "tum_Latn-uzn_Latn": { + "num_samples": 1012, + "number_of_characters": 321868, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 147.55237154150197, + "max_sentence2_length": 394, + "unique_sentence2": 1012 + }, + "tum_Latn-als_Latn": { + "num_samples": 1012, + "number_of_characters": 319609, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 145.3201581027668, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tum_Latn-bod_Tibt": { + "num_samples": 1012, + "number_of_characters": 320696, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.39426877470356, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "tum_Latn-fij_Latn": { + "num_samples": 1012, + "number_of_characters": 327722, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 153.33695652173913, + "max_sentence2_length": 543, + "unique_sentence2": 1012 + }, + "tum_Latn-isl_Latn": { + "num_samples": 1012, + "number_of_characters": 302328, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.24407114624506, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tum_Latn-kon_Latn": { + "num_samples": 1012, + "number_of_characters": 322439, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 148.11660079051384, + "max_sentence2_length": 410, + "unique_sentence2": 1012 + }, + "tum_Latn-mni_Beng": { + "num_samples": 1012, + "number_of_characters": 309327, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1600790513834, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tum_Latn-ron_Latn": { + "num_samples": 1012, + "number_of_characters": 321116, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 146.8092885375494, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tum_Latn-szl_Latn": { + "num_samples": 1012, + "number_of_characters": 309916, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.7420948616601, + "max_sentence2_length": 372, + "unique_sentence2": 1011 + }, + "tum_Latn-vec_Latn": { + "num_samples": 1012, + "number_of_characters": 305782, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 131.65711462450594, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tum_Latn-amh_Ethi": { + "num_samples": 1012, + "number_of_characters": 259677, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 26, + "average_sentence2_length": 86.09881422924902, + "max_sentence2_length": 219, + "unique_sentence2": 1012 + }, + "tum_Latn-bos_Latn": { + "num_samples": 1012, + "number_of_characters": 304732, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 130.6195652173913, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "tum_Latn-fin_Latn": { + "num_samples": 1012, + "number_of_characters": 312673, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 138.46640316205534, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tum_Latn-ita_Latn": { + "num_samples": 1012, + "number_of_characters": 328683, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 154.28656126482213, + "max_sentence2_length": 455, + "unique_sentence2": 1012 + }, + "tum_Latn-kor_Hang": { + "num_samples": 1012, + "number_of_characters": 238510, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 20, + "average_sentence2_length": 65.18280632411067, + "max_sentence2_length": 177, + "unique_sentence2": 1012 + }, + "tum_Latn-mos_Latn": { + "num_samples": 1012, + "number_of_characters": 297202, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 123.17885375494072, + "max_sentence2_length": 342, + "unique_sentence2": 1012 + }, + "tum_Latn-run_Latn": { + "num_samples": 1012, + "number_of_characters": 319810, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 145.5187747035573, + "max_sentence2_length": 411, + "unique_sentence2": 1011 + }, + "tum_Latn-tam_Taml": { + "num_samples": 1012, + "number_of_characters": 326678, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tum_Latn-vie_Latn": { + "num_samples": 1012, + "number_of_characters": 311140, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 136.95158102766797, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "tum_Latn-apc_Arab": { + "num_samples": 1012, + "number_of_characters": 280526, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 106.7005928853755, + "max_sentence2_length": 306, + "unique_sentence2": 1012 + }, + "tum_Latn-bug_Latn": { + "num_samples": 1012, + "number_of_characters": 311920, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 137.72233201581028, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tum_Latn-fon_Latn": { + "num_samples": 1012, + "number_of_characters": 307805, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.65612648221344, + "max_sentence2_length": 481, + "unique_sentence2": 1012 + }, + "tum_Latn-jav_Latn": { + "num_samples": 1012, + "number_of_characters": 309473, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 135.30434782608697, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tum_Latn-lao_Laoo": { + "num_samples": 1012, + "number_of_characters": 302605, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 128.5177865612648, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tum_Latn-mri_Latn": { + "num_samples": 1012, + "number_of_characters": 319072, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 144.7895256916996, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tum_Latn-rus_Cyrl": { + "num_samples": 1012, + "number_of_characters": 314641, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 140.41106719367588, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "tum_Latn-taq_Latn": { + "num_samples": 1012, + "number_of_characters": 296962, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.94169960474308, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tum_Latn-war_Latn": { + "num_samples": 1012, + "number_of_characters": 337291, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 162.79249011857706, + "max_sentence2_length": 434, + "unique_sentence2": 1012 + }, + "tum_Latn-arb_Arab": { + "num_samples": 1012, + "number_of_characters": 288852, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 114.92786561264822, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tum_Latn-bul_Cyrl": { + "num_samples": 1012, + "number_of_characters": 310369, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 136.1897233201581, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tum_Latn-fra_Latn": { + "num_samples": 1012, + "number_of_characters": 330183, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 47, + "average_sentence2_length": 155.7687747035573, + "max_sentence2_length": 415, + "unique_sentence2": 1012 + }, + "tum_Latn-jpn_Jpan": { + "num_samples": 1012, + "number_of_characters": 229488, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 17, + "average_sentence2_length": 56.26778656126482, + "max_sentence2_length": 139, + "unique_sentence2": 1012 + }, + "tum_Latn-lij_Latn": { + "num_samples": 1012, + "number_of_characters": 318539, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 144.26284584980237, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tum_Latn-mya_Mymr": { + "num_samples": 1012, + "number_of_characters": 335714, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 54, + "average_sentence2_length": 161.23418972332016, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tum_Latn-sag_Latn": { + "num_samples": 1012, + "number_of_characters": 315166, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 140.9298418972332, + "max_sentence2_length": 406, + "unique_sentence2": 1012 + }, + "tum_Latn-taq_Tfng": { + "num_samples": 1012, + "number_of_characters": 296449, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 122.43478260869566, + "max_sentence2_length": 312, + "unique_sentence2": 1012 + }, + "tum_Latn-wol_Latn": { + "num_samples": 1012, + "number_of_characters": 299718, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.66501976284584, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "tum_Latn-arb_Latn": { + "num_samples": 1012, + "number_of_characters": 326303, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 151.93478260869566, + "max_sentence2_length": 420, + "unique_sentence2": 1012 + }, + "tum_Latn-cat_Latn": { + "num_samples": 1012, + "number_of_characters": 318326, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 144.05237154150197, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tum_Latn-fur_Latn": { + "num_samples": 1012, + "number_of_characters": 317064, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 142.80533596837944, + "max_sentence2_length": 399, + "unique_sentence2": 1012 + }, + "tum_Latn-kab_Latn": { + "num_samples": 1012, + "number_of_characters": 303099, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 129.00592885375494, + "max_sentence2_length": 358, + "unique_sentence2": 1012 + }, + "tum_Latn-lim_Latn": { + "num_samples": 1012, + "number_of_characters": 310231, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.05335968379447, + "max_sentence2_length": 416, + "unique_sentence2": 1012 + }, + "tum_Latn-nld_Latn": { + "num_samples": 1012, + "number_of_characters": 320052, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 145.7579051383399, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tum_Latn-san_Deva": { + "num_samples": 1012, + "number_of_characters": 300802, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73616600790514, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "tum_Latn-tat_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305827, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.70158102766797, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tum_Latn-xho_Latn": { + "num_samples": 1012, + "number_of_characters": 311668, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 137.47332015810278, + "max_sentence2_length": 395, + "unique_sentence2": 1012 + }, + "tum_Latn-ars_Arab": { + "num_samples": 1012, + "number_of_characters": 288973, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 115.04743083003953, + "max_sentence2_length": 316, + "unique_sentence2": 1012 + }, + "tum_Latn-ceb_Latn": { + "num_samples": 1012, + "number_of_characters": 331230, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 156.80335968379447, + "max_sentence2_length": 427, + "unique_sentence2": 1012 + }, + "tum_Latn-fuv_Latn": { + "num_samples": 1012, + "number_of_characters": 294707, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 120.71343873517786, + "max_sentence2_length": 308, + "unique_sentence2": 1012 + }, + "tum_Latn-kac_Latn": { + "num_samples": 1012, + "number_of_characters": 341942, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 52, + "average_sentence2_length": 167.38833992094862, + "max_sentence2_length": 513, + "unique_sentence2": 1012 + }, + "tum_Latn-lin_Latn": { + "num_samples": 1012, + "number_of_characters": 314742, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 140.5108695652174, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tum_Latn-nno_Latn": { + "num_samples": 1012, + "number_of_characters": 305988, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.8606719367589, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tum_Latn-sat_Olck": { + "num_samples": 1012, + "number_of_characters": 312336, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tum_Latn-tel_Telu": { + "num_samples": 1012, + "number_of_characters": 305015, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.899209486166, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "tum_Latn-ydd_Hebr": { + "num_samples": 1012, + "number_of_characters": 314203, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 139.97826086956522, + "max_sentence2_length": 385, + "unique_sentence2": 1012 + }, + "tum_Latn-ary_Arab": { + "num_samples": 1012, + "number_of_characters": 286108, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.21640316205534, + "max_sentence2_length": 315, + "unique_sentence2": 1011 + }, + "tum_Latn-ces_Latn": { + "num_samples": 1012, + "number_of_characters": 299802, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 125.74802371541502, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tum_Latn-gaz_Latn": { + "num_samples": 1012, + "number_of_characters": 328963, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 154.56324110671937, + "max_sentence2_length": 456, + "unique_sentence2": 1012 + }, + "tum_Latn-kam_Latn": { + "num_samples": 1012, + "number_of_characters": 300901, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.83399209486166, + "max_sentence2_length": 398, + "unique_sentence2": 1010 + }, + "tum_Latn-lit_Latn": { + "num_samples": 1012, + "number_of_characters": 302874, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 128.78359683794466, + "max_sentence2_length": 357, + "unique_sentence2": 1012 + }, + "tum_Latn-nob_Latn": { + "num_samples": 1012, + "number_of_characters": 304683, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.57114624505928, + "max_sentence2_length": 351, + "unique_sentence2": 1012 + }, + "tum_Latn-scn_Latn": { + "num_samples": 1012, + "number_of_characters": 311116, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 136.92786561264822, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tum_Latn-tgk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 318416, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 144.1413043478261, + "max_sentence2_length": 355, + "unique_sentence2": 1011 + }, + "tum_Latn-yor_Latn": { + "num_samples": 1012, + "number_of_characters": 299389, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 125.3399209486166, + "max_sentence2_length": 391, + "unique_sentence2": 1012 + }, + "tum_Latn-arz_Arab": { + "num_samples": 1012, + "number_of_characters": 286315, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 112.4209486166008, + "max_sentence2_length": 322, + "unique_sentence2": 1012 + }, + "tum_Latn-cjk_Latn": { + "num_samples": 1012, + "number_of_characters": 311806, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 137.6096837944664, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + }, + "tum_Latn-gla_Latn": { + "num_samples": 1012, + "number_of_characters": 335142, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 160.6689723320158, + "max_sentence2_length": 478, + "unique_sentence2": 1011 + }, + "tum_Latn-kan_Knda": { + "num_samples": 1012, + "number_of_characters": 310585, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.40316205533597, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tum_Latn-lmo_Latn": { + "num_samples": 1012, + "number_of_characters": 314211, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 45, + "average_sentence2_length": 139.98616600790513, + "max_sentence2_length": 379, + "unique_sentence2": 1012 + }, + "tum_Latn-npi_Deva": { + "num_samples": 1012, + "number_of_characters": 299448, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tum_Latn-shn_Mymr": { + "num_samples": 1012, + "number_of_characters": 360829, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 57, + "average_sentence2_length": 186.0513833992095, + "max_sentence2_length": 597, + "unique_sentence2": 1012 + }, + "tum_Latn-tgl_Latn": { + "num_samples": 1012, + "number_of_characters": 338714, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 53, + "average_sentence2_length": 164.1986166007905, + "max_sentence2_length": 435, + "unique_sentence2": 1012 + }, + "tum_Latn-yue_Hant": { + "num_samples": 1012, + "number_of_characters": 212564, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 39.544466403162055, + "max_sentence2_length": 118, + "unique_sentence2": 1012 + }, + "tum_Latn-asm_Beng": { + "num_samples": 1012, + "number_of_characters": 298481, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.44268774703558, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "tum_Latn-ckb_Arab": { + "num_samples": 1012, + "number_of_characters": 301130, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.06027667984189, + "max_sentence2_length": 353, + "unique_sentence2": 1012 + }, + "tum_Latn-gle_Latn": { + "num_samples": 1012, + "number_of_characters": 325026, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 150.67292490118578, + "max_sentence2_length": 443, + "unique_sentence2": 1012 + }, + "tum_Latn-kas_Arab": { + "num_samples": 1012, + "number_of_characters": 298650, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 124.6096837944664, + "max_sentence2_length": 315, + "unique_sentence2": 1012 + }, + "tum_Latn-ltg_Latn": { + "num_samples": 1012, + "number_of_characters": 302393, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 128.3083003952569, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "tum_Latn-nso_Latn": { + "num_samples": 1012, + "number_of_characters": 323654, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 149.3171936758893, + "max_sentence2_length": 423, + "unique_sentence2": 1012 + }, + "tum_Latn-sin_Sinh": { + "num_samples": 1012, + "number_of_characters": 303625, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 129.52569169960475, + "max_sentence2_length": 401, + "unique_sentence2": 1012 + }, + "tum_Latn-tha_Thai": { + "num_samples": 1012, + "number_of_characters": 298654, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 124.61363636363636, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "tum_Latn-zho_Hans": { + "num_samples": 1012, + "number_of_characters": 215793, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 10, + "average_sentence2_length": 42.73517786561265, + "max_sentence2_length": 130, + "unique_sentence2": 1012 + }, + "tum_Latn-ast_Latn": { + "num_samples": 1012, + "number_of_characters": 308908, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 134.74604743083003, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tum_Latn-crh_Latn": { + "num_samples": 1012, + "number_of_characters": 306583, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 27, + "average_sentence2_length": 132.4486166007905, + "max_sentence2_length": 382, + "unique_sentence2": 1012 + }, + "tum_Latn-glg_Latn": { + "num_samples": 1012, + "number_of_characters": 318688, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.4100790513834, + "max_sentence2_length": 374, + "unique_sentence2": 1012 + }, + "tum_Latn-kas_Deva": { + "num_samples": 1012, + "number_of_characters": 298946, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 124.90217391304348, + "max_sentence2_length": 452, + "unique_sentence2": 1012 + }, + "tum_Latn-ltz_Latn": { + "num_samples": 1012, + "number_of_characters": 320090, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 145.79545454545453, + "max_sentence2_length": 417, + "unique_sentence2": 1012 + }, + "tum_Latn-nus_Latn": { + "num_samples": 1012, + "number_of_characters": 314885, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 28, + "average_sentence2_length": 140.65217391304347, + "max_sentence2_length": 490, + "unique_sentence2": 1012 + }, + "tum_Latn-slk_Latn": { + "num_samples": 1012, + "number_of_characters": 304255, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.14822134387353, + "max_sentence2_length": 370, + "unique_sentence2": 1012 + }, + "tum_Latn-tir_Ethi": { + "num_samples": 1012, + "number_of_characters": 263245, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 29, + "average_sentence2_length": 89.62450592885375, + "max_sentence2_length": 246, + "unique_sentence2": 1012 + }, + "tum_Latn-zho_Hant": { + "num_samples": 1012, + "number_of_characters": 213240, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 12, + "average_sentence2_length": 40.212450592885375, + "max_sentence2_length": 152, + "unique_sentence2": 1012 + }, + "tum_Latn-awa_Deva": { + "num_samples": 1012, + "number_of_characters": 300716, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "tum_Latn-cym_Latn": { + "num_samples": 1012, + "number_of_characters": 313802, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 139.5820158102767, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tum_Latn-grn_Latn": { + "num_samples": 1012, + "number_of_characters": 305360, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 131.2401185770751, + "max_sentence2_length": 360, + "unique_sentence2": 1010 + }, + "tum_Latn-kat_Geor": { + "num_samples": 1012, + "number_of_characters": 316475, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 142.22332015810278, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tum_Latn-lua_Latn": { + "num_samples": 1012, + "number_of_characters": 315543, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 141.30237154150197, + "max_sentence2_length": 407, + "unique_sentence2": 1010 + }, + "tum_Latn-nya_Latn": { + "num_samples": 1012, + "number_of_characters": 319717, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 145.42687747035572, + "max_sentence2_length": 421, + "unique_sentence2": 1012 + }, + "tum_Latn-slv_Latn": { + "num_samples": 1012, + "number_of_characters": 303400, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 129.30335968379447, + "max_sentence2_length": 361, + "unique_sentence2": 1012 + }, + "tum_Latn-tpi_Latn": { + "num_samples": 1012, + "number_of_characters": 341556, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 49, + "average_sentence2_length": 167.00691699604744, + "max_sentence2_length": 500, + "unique_sentence2": 1012 + }, + "tum_Latn-zsm_Latn": { + "num_samples": 1012, + "number_of_characters": 318746, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 144.4673913043478, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "tum_Latn-ayr_Latn": { + "num_samples": 1012, + "number_of_characters": 311162, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 136.97332015810278, + "max_sentence2_length": 510, + "unique_sentence2": 1012 + }, + "tum_Latn-dan_Latn": { + "num_samples": 1012, + "number_of_characters": 307628, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 32, + "average_sentence2_length": 133.4812252964427, + "max_sentence2_length": 369, + "unique_sentence2": 1012 + }, + "tum_Latn-guj_Gujr": { + "num_samples": 1012, + "number_of_characters": 298973, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "tum_Latn-kaz_Cyrl": { + "num_samples": 1012, + "number_of_characters": 307949, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.79841897233203, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "tum_Latn-lug_Latn": { + "num_samples": 1012, + "number_of_characters": 307623, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.47628458498025, + "max_sentence2_length": 396, + "unique_sentence2": 1012 + }, + "tum_Latn-oci_Latn": { + "num_samples": 1012, + "number_of_characters": 323196, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 148.86462450592884, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tum_Latn-smo_Latn": { + "num_samples": 1012, + "number_of_characters": 325269, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 51, + "average_sentence2_length": 150.91304347826087, + "max_sentence2_length": 412, + "unique_sentence2": 1011 + }, + "tum_Latn-tsn_Latn": { + "num_samples": 1012, + "number_of_characters": 335580, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 46, + "average_sentence2_length": 161.10177865612647, + "max_sentence2_length": 440, + "unique_sentence2": 1012 + }, + "tum_Latn-zul_Latn": { + "num_samples": 1012, + "number_of_characters": 320907, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 146.60276679841897, + "max_sentence2_length": 425, + "unique_sentence2": 1012 + }, + "tum_Latn-azb_Arab": { + "num_samples": 1012, + "number_of_characters": 289606, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 115.67292490118577, + "max_sentence2_length": 327, + "unique_sentence2": 1012 + }, + "tum_Latn-deu_Latn": { + "num_samples": 1012, + "number_of_characters": 326354, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 151.98517786561266, + "max_sentence2_length": 408, + "unique_sentence2": 1012 + }, + "tum_Latn-hat_Latn": { + "num_samples": 1012, + "number_of_characters": 293825, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 119.84189723320158, + "max_sentence2_length": 333, + "unique_sentence2": 1010 + }, + "tum_Latn-kbp_Latn": { + "num_samples": 1012, + "number_of_characters": 315502, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 141.26185770750988, + "max_sentence2_length": 377, + "unique_sentence2": 1012 + }, + "tum_Latn-luo_Latn": { + "num_samples": 1012, + "number_of_characters": 310091, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 135.91501976284584, + "max_sentence2_length": 392, + "unique_sentence2": 1012 + }, + "tum_Latn-ory_Orya": { + "num_samples": 1012, + "number_of_characters": 307384, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "tum_Latn-sna_Latn": { + "num_samples": 1012, + "number_of_characters": 320148, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 145.85276679841897, + "max_sentence2_length": 424, + "unique_sentence2": 1012 + }, + "tum_Latn-tso_Latn": { + "num_samples": 1012, + "number_of_characters": 330723, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 156.30237154150197, + "max_sentence2_length": 429, + "unique_sentence2": 1012 + }, + "tum_Latn-azj_Latn": { + "num_samples": 1012, + "number_of_characters": 316284, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 142.03458498023716, + "max_sentence2_length": 383, + "unique_sentence2": 1012 + }, + "tum_Latn-dik_Latn": { + "num_samples": 1012, + "number_of_characters": 284334, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 110.46343873517786, + "max_sentence2_length": 585, + "unique_sentence2": 1012 + }, + "tum_Latn-hau_Latn": { + "num_samples": 1012, + "number_of_characters": 312612, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 138.40612648221344, + "max_sentence2_length": 372, + "unique_sentence2": 1012 + }, + "tum_Latn-kea_Latn": { + "num_samples": 1012, + "number_of_characters": 303345, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 129.2490118577075, + "max_sentence2_length": 360, + "unique_sentence2": 1012 + }, + "tum_Latn-lus_Latn": { + "num_samples": 1012, + "number_of_characters": 318800, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 144.52075098814228, + "max_sentence2_length": 418, + "unique_sentence2": 1012 + }, + "tum_Latn-pag_Latn": { + "num_samples": 1012, + "number_of_characters": 302444, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 31, + "average_sentence2_length": 128.3586956521739, + "max_sentence2_length": 339, + "unique_sentence2": 1012 + }, + "tum_Latn-snd_Arab": { + "num_samples": 1012, + "number_of_characters": 292133, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 118.1699604743083, + "max_sentence2_length": 307, + "unique_sentence2": 1012 + }, + "tum_Latn-tuk_Latn": { + "num_samples": 1012, + "number_of_characters": 312807, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 138.598814229249, + "max_sentence2_length": 397, + "unique_sentence2": 1012 + }, + "tum_Latn-bak_Cyrl": { + "num_samples": 1012, + "number_of_characters": 305767, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 131.64229249011856, + "max_sentence2_length": 389, + "unique_sentence2": 1012 + }, + "tum_Latn-dyu_Latn": { + "num_samples": 1012, + "number_of_characters": 305400, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 131.27964426877472, + "max_sentence2_length": 342, + "unique_sentence2": 1010 + }, + "tum_Latn-heb_Hebr": { + "num_samples": 1012, + "number_of_characters": 274599, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 30, + "average_sentence2_length": 100.84387351778656, + "max_sentence2_length": 281, + "unique_sentence2": 1012 + }, + "tum_Latn-khk_Cyrl": { + "num_samples": 1012, + "number_of_characters": 309612, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 44, + "average_sentence2_length": 135.4416996047431, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tum_Latn-lvs_Latn": { + "num_samples": 1012, + "number_of_characters": 306702, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 132.56620553359684, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tum_Latn-pan_Guru": { + "num_samples": 1012, + "number_of_characters": 305970, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "tum_Latn-som_Latn": { + "num_samples": 1012, + "number_of_characters": 324207, + "unique_pairs": 1012, + "min_sentence1_length": 47, + "average_sentence1_length": 170.4990118577075, + "max_sentence1_length": 542, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 149.86363636363637, + "max_sentence2_length": 414, + "unique_sentence2": 1012 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json index 6f2a7d170e..14f312bbe9 100644 --- a/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionReranking/Core17InstructionRetrieval.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 44459412, "num_samples": 19939, - "num_queries": 40, + "number_of_characters": 44459412, "num_documents": 19899, - "min_document_length": 55, - "average_document_length": 0.22061410121111613, - "max_document_length": 278, + "min_document_length": 8, + "average_document_length": 2234.0329664807277, + "max_document_length": 2960, "unique_documents": 19899, - "min_query_length": 8, - "average_query_length": 1111375.55, - "max_query_length": 2960, + "num_queries": 40, + "min_query_length": 55, + "average_query_length": 109.75, + "max_query_length": 278, "unique_queries": 40, "none_queries": 0, + "num_relevant_docs": 9480, "min_relevant_docs_per_query": 135, "average_relevant_docs_per_query": 43.6, "max_relevant_docs_per_query": 379, @@ -22,8 +23,9 @@ "average_instruction_length": 13015, "max_instruction_length": 837, "unique_instructions": 40, - "min_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_top_ranked_per_query": 1000 + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json b/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json index 5c4f7a6ba6..c6106c7e4a 100644 --- a/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionReranking/News21InstructionRetrieval.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 72717436, "num_samples": 30985, - "num_queries": 64, + "number_of_characters": 72717436, "num_documents": 30921, - "min_document_length": 35, - "average_document_length": 0.15413473044209436, - "max_document_length": 159, + "min_document_length": 3, + "average_document_length": 2351.5626920216037, + "max_document_length": 5056, "unique_documents": 30921, - "min_query_length": 3, - "average_query_length": 1136135.46875, - "max_query_length": 5056, + "num_queries": 64, + "min_query_length": 35, + "average_query_length": 74.46875, + "max_query_length": 159, "unique_queries": 64, "none_queries": 0, + "num_relevant_docs": 8554, "min_relevant_docs_per_query": 83, "average_relevant_docs_per_query": 26.796875, "max_relevant_docs_per_query": 217, @@ -22,8 +23,9 @@ "average_instruction_length": 29015, "max_instruction_length": 879, "unique_instructions": 64, - "min_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_top_ranked_per_query": 1000 + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json b/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json index b765c4dc0d..4838d5e0ba 100644 --- a/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionReranking/Robust04InstructionRetrieval.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 109145153, "num_samples": 47596, - "num_queries": 104, + "number_of_characters": 109145153, "num_documents": 47492, - "min_document_length": 43, - "average_document_length": 0.21224627305651478, - "max_document_length": 425, + "min_document_length": 48, + "average_document_length": 2297.9675103175273, + "max_document_length": 6597, "unique_documents": 47492, - "min_query_length": 48, - "average_query_length": 1049375.701923077, - "max_query_length": 6597, + "num_queries": 104, + "min_query_length": 43, + "average_query_length": 96.92307692307692, + "max_query_length": 425, "unique_queries": 104, "none_queries": 0, + "num_relevant_docs": 36930, "min_relevant_docs_per_query": 195, "average_relevant_docs_per_query": 26.75, "max_relevant_docs_per_query": 570, @@ -22,8 +23,9 @@ "average_instruction_length": 29957, "max_instruction_length": 923, "unique_instructions": 104, - "min_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_top_ranked_per_query": 1000 + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json deleted file mode 100644 index 897b23d7c7..0000000000 --- a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "test": { - "num_samples": 19919, - "num_docs": 19899, - "num_queries": 20, - "number_of_characters": 44450333, - "min_document_length": 7, - "average_document_length": 2233.0329664807277, - "max_document_length": 2959, - "unique_docs": 19143, - "min_query_length": 55, - "average_query_length": 109.75, - "max_query_length": 278, - "unique_queries": 20, - "min_instruction_length": 102, - "average_instruction_length": 295.55, - "max_instruction_length": 811, - "unique_instructions": 20, - "min_changed_instruction_length": 151, - "average_changed_instruction_length": 355.2, - "max_changed_instruction_length": 837, - "unique_changed_instructions": 20, - "min_average_relevant_docs_per_query": 4, - "average_relevant_docs_per_query": 32.7, - "max_average_relevant_docs_per_query": 55, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/AlloprofReranking.json b/mteb/descriptive_stats/Reranking/AlloprofReranking.json index 8e39a283ce..9ad03e8379 100644 --- a/mteb/descriptive_stats/Reranking/AlloprofReranking.json +++ b/mteb/descriptive_stats/Reranking/AlloprofReranking.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 102329333, "num_samples": 27355, - "num_queries": 2316, + "number_of_characters": 102329333, "num_documents": 25039, - "min_document_length": 8, - "average_document_length": 15.79020727664843, - "max_document_length": 2863, + "min_document_length": 42, + "average_document_length": 4071.0077079755583, + "max_document_length": 47972, "unique_documents": 25039, - "min_query_length": 42, - "average_query_length": 44012.93696027634, - "max_query_length": 47972, + "num_queries": 2316, + "min_query_length": 8, + "average_query_length": 170.71286701208982, + "max_query_length": 2863, "unique_queries": 2316, "none_queries": 0, + "num_relevant_docs": 25039, "min_relevant_docs_per_query": 10, "average_relevant_docs_per_query": 1.2845423143350605, "max_relevant_docs_per_query": 37, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 2316, "min_top_ranked_per_query": 10, "average_top_ranked_per_query": 10.811312607944732, "max_top_ranked_per_query": 37 diff --git a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json index 27103ee3cc..b4a5705502 100644 --- a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 397127, "num_samples": 7581, - "num_queries": 361, + "number_of_characters": 397127, "num_documents": 7220, - "min_document_length": 17, - "average_document_length": 2.5065096952908585, + "min_document_length": 15, + "average_document_length": 52.49722991689751, "max_document_length": 148, "unique_documents": 7220, - "min_query_length": 15, - "average_query_length": 1049.94459833795, + "num_queries": 361, + "min_query_length": 17, + "average_query_length": 50.13019390581717, "max_query_length": 148, "unique_queries": 361, "none_queries": 0, + "num_relevant_docs": 7220, "min_relevant_docs_per_query": 20, "average_relevant_docs_per_query": 5.470914127423823, "max_relevant_docs_per_query": 20, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 361, "min_top_ranked_per_query": 20, "average_top_ranked_per_query": 20.0, "max_top_ranked_per_query": 20 diff --git a/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json b/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json index 5dee6893ad..97538feb92 100644 --- a/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json +++ b/mteb/descriptive_stats/Reranking/CMedQAv1-reranking.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 10719709, "num_samples": 101000, - "num_queries": 1000, + "number_of_characters": 10719709, "num_documents": 100000, - "min_document_length": 3, - "average_document_length": 0.55717, - "max_document_length": 505, + "min_document_length": 5, + "average_document_length": 106.63992, + "max_document_length": 265, "unique_documents": 100000, - "min_query_length": 5, - "average_query_length": 10663.992, - "max_query_length": 265, + "num_queries": 1000, + "min_query_length": 3, + "average_query_length": 55.717, + "max_query_length": 505, "unique_queries": 1000, "none_queries": 0, + "num_relevant_docs": 100000, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.931, "max_relevant_docs_per_query": 100, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1000, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 diff --git a/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json b/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json index 58a7b0efcd..438bab1126 100644 --- a/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json +++ b/mteb/descriptive_stats/Reranking/CMedQAv2-reranking.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 10110234, "num_samples": 101000, - "num_queries": 1000, + "number_of_characters": 10110234, "num_documents": 100000, "min_document_length": 11, - "average_document_length": 0.48848, - "max_document_length": 153, + "average_document_length": 100.61386, + "max_document_length": 264, "unique_documents": 100000, + "num_queries": 1000, "min_query_length": 11, - "average_query_length": 10061.386, - "max_query_length": 264, + "average_query_length": 48.848, + "max_query_length": 153, "unique_queries": 1000, "none_queries": 0, + "num_relevant_docs": 100000, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.91, "max_relevant_docs_per_query": 100, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1000, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 diff --git a/mteb/descriptive_stats/Reranking/ESCIReranking.json b/mteb/descriptive_stats/Reranking/ESCIReranking.json index 3d052c309e..376a3ef48b 100644 --- a/mteb/descriptive_stats/Reranking/ESCIReranking.json +++ b/mteb/descriptive_stats/Reranking/ESCIReranking.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 117894609, "num_samples": 158627, - "num_queries": 10395, + "number_of_characters": 117894609, "num_documents": 148232, "min_document_length": 1, - "average_document_length": 1.4161921852231636, - "max_document_length": 143, + "average_document_length": 793.9222570025365, + "max_document_length": 8640, "unique_documents": 148232, + "num_queries": 10395, "min_query_length": 1, - "average_query_length": 11321.277922077921, - "max_query_length": 8640, + "average_query_length": 20.194805194805195, + "max_query_length": 143, "unique_queries": 10395, "none_queries": 0, + "num_relevant_docs": 148232, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 10.277825877825878, "max_relevant_docs_per_query": 74, @@ -22,24 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 10395, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 14.25993265993266, "max_top_ranked_per_query": 74, "hf_subset_descriptive_stats": { "us": { - "number_of_characters": 74993786, "num_samples": 93896, - "num_queries": 6694, + "number_of_characters": 74993786, "num_documents": 87202, "min_document_length": 1, - "average_document_length": 1.7313823077452353, - "max_document_length": 143, + "average_document_length": 858.2693745556295, + "max_document_length": 4608, "unique_documents": 87202, + "num_queries": 6694, "min_query_length": 1, - "average_query_length": 11180.58051986854, - "max_query_length": 4608, + "average_query_length": 22.554526441589484, + "max_query_length": 143, "unique_queries": 6694, "none_queries": 0, + "num_relevant_docs": 87202, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 9.446519270988945, "max_relevant_docs_per_query": 74, @@ -49,24 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 6694, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 13.026889752016732, "max_top_ranked_per_query": 74 }, "es": { - "number_of_characters": 32170187, "num_samples": 33785, - "num_queries": 1851, + "number_of_characters": 32170187, "num_documents": 31934, - "min_document_length": 3, - "average_document_length": 1.2324481743596167, - "max_document_length": 59, + "min_document_length": 1, + "average_document_length": 1006.1636500281832, + "max_document_length": 8640, "unique_documents": 31934, - "min_query_length": 1, - "average_query_length": 17358.633171258778, - "max_query_length": 8640, + "num_queries": 1851, + "min_query_length": 3, + "average_query_length": 21.262560777957862, + "max_query_length": 59, "unique_queries": 1851, "none_queries": 0, + "num_relevant_docs": 31934, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 12.038357644516477, "max_relevant_docs_per_query": 41, @@ -76,24 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1851, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 17.252296056185845, "max_top_ranked_per_query": 41 }, "jp": { - "number_of_characters": 10730636, "num_samples": 30946, - "num_queries": 1850, + "number_of_characters": 10730636, "num_documents": 29096, "min_document_length": 1, - "average_document_length": 0.6732196865548529, - "max_document_length": 60, + "average_document_length": 368.12785262579047, + "max_document_length": 3940, "unique_documents": 29096, + "num_queries": 1850, "min_query_length": 1, - "average_query_length": 5789.755675675676, - "max_query_length": 3940, + "average_query_length": 10.588108108108107, + "max_query_length": 60, "unique_queries": 1850, "none_queries": 0, + "num_relevant_docs": 29096, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 11.524324324324324, "max_relevant_docs_per_query": 50, @@ -103,6 +110,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1850, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 15.727567567567567, "max_top_ranked_per_query": 50 diff --git a/mteb/descriptive_stats/Reranking/InstructIR.json b/mteb/descriptive_stats/Reranking/InstructIR.json index a2a5bf6dce..f52e771372 100644 --- a/mteb/descriptive_stats/Reranking/InstructIR.json +++ b/mteb/descriptive_stats/Reranking/InstructIR.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 10114013, "num_samples": 25978, - "num_queries": 9906, + "number_of_characters": 10114013, "num_documents": 16072, - "min_document_length": 24, - "average_document_length": 19.262008461921354, - "max_document_length": 40, + "min_document_length": 36, + "average_document_length": 610.031981085117, + "max_document_length": 1700, "unique_documents": 16072, - "min_query_length": 36, - "average_query_length": 989.7470220068645, - "max_query_length": 1700, + "num_queries": 9906, + "min_query_length": 24, + "average_query_length": 31.25166565717747, + "max_query_length": 40, "unique_queries": 9906, + "none_queries": 0, + "num_relevant_docs": 9906, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": 3063330, "max_instruction_length": 616, "unique_instructions": 9906, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Reranking/MIRACLReranking.json b/mteb/descriptive_stats/Reranking/MIRACLReranking.json index 8f37b97947..b718bf8fed 100644 --- a/mteb/descriptive_stats/Reranking/MIRACLReranking.json +++ b/mteb/descriptive_stats/Reranking/MIRACLReranking.json @@ -1,19 +1,19 @@ { "dev": { - "number_of_characters": 584993395, "num_samples": 1260008, - "num_queries": 12524, + "number_of_characters": 584993395, "num_documents": 1247484, - "num_relevant_docs": 1247483, - "min_document_length": 5, - "average_document_length": 0.3661874621237627, - "max_document_length": 176, + "min_document_length": 7, + "average_document_length": 468.57240814311047, + "max_document_length": 48058, "unique_documents": 1247484, - "min_query_length": 7, - "average_query_length": 46673.31379750878, - "max_query_length": 48058, + "num_queries": 12524, + "min_query_length": 5, + "average_query_length": 36.47500798466943, + "max_query_length": 176, "unique_queries": 12524, "none_queries": 0, + "num_relevant_docs": 1247483, "min_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 1.8850207601405302, "max_relevant_docs_per_query": 100, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 12524, "min_top_ranked_per_query": 1, "average_top_ranked_per_query": 99.60747365059086, "max_top_ranked_per_query": 100, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 139203930, "num_samples": 290077, - "num_queries": 2896, + "number_of_characters": 139203930, "num_documents": 287181, - "num_relevant_docs": 287181, - "min_document_length": 12, - "average_document_length": 0.29728986249090295, - "max_document_length": 101, + "min_document_length": 9, + "average_document_length": 484.42812720897274, + "max_document_length": 48058, "unique_documents": 287181, - "min_query_length": 9, - "average_query_length": 48038.17472375691, - "max_query_length": 48058, + "num_queries": 2896, + "min_query_length": 12, + "average_query_length": 29.480662983425415, + "max_query_length": 101, "unique_queries": 2896, "none_queries": 0, + "num_relevant_docs": 287181, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.7178867403314917, "max_relevant_docs_per_query": 100, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 2896, "min_top_ranked_per_query": 1, "average_top_ranked_per_query": 99.16470994475138, "max_top_ranked_per_query": 100 }, "bn": { - "number_of_characters": 22936306, "num_samples": 41466, - "num_queries": 411, + "number_of_characters": 22936306, "num_documents": 41055, - "num_relevant_docs": 41055, - "min_document_length": 16, - "average_document_length": 0.470320302033857, - "max_document_length": 112, + "min_document_length": 12, + "average_document_length": 558.202338326635, + "max_document_length": 16749, "unique_documents": 41055, - "min_query_length": 12, - "average_query_length": 55759.11678832117, - "max_query_length": 16749, + "num_queries": 411, + "min_query_length": 16, + "average_query_length": 46.98053527980535, + "max_query_length": 112, "unique_queries": 411, "none_queries": 0, + "num_relevant_docs": 41055, "min_relevant_docs_per_query": 55, "average_relevant_docs_per_query": 1.9172749391727495, "max_relevant_docs_per_query": 100, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 411, "min_top_ranked_per_query": 55, "average_top_ranked_per_query": 99.8905109489051, "max_top_ranked_per_query": 100 }, "de": { - "number_of_characters": 16502961, "num_samples": 30704, - "num_queries": 304, + "number_of_characters": 16502961, "num_documents": 30400, - "num_relevant_docs": 30400, - "min_document_length": 15, - "average_document_length": 0.4606578947368421, - "max_document_length": 87, + "min_document_length": 13, + "average_document_length": 542.3999013157895, + "max_document_length": 5224, "unique_documents": 30400, - "min_query_length": 13, - "average_query_length": 54239.99013157895, - "max_query_length": 5224, + "num_queries": 304, + "min_query_length": 15, + "average_query_length": 46.06578947368421, + "max_query_length": 87, "unique_queries": 304, "none_queries": 0, + "num_relevant_docs": 30400, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.542763157894737, "max_relevant_docs_per_query": 100, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 304, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "en": { - "number_of_characters": 51198664, "num_samples": 79487, - "num_queries": 787, + "number_of_characters": 51198664, "num_documents": 78700, - "num_relevant_docs": 78700, - "min_document_length": 16, - "average_document_length": 0.40310038119440916, - "max_document_length": 122, + "min_document_length": 19, + "average_document_length": 650.1517153748412, + "max_document_length": 8110, "unique_documents": 78700, - "min_query_length": 19, - "average_query_length": 65015.171537484115, - "max_query_length": 8110, + "num_queries": 787, + "min_query_length": 16, + "average_query_length": 40.31003811944092, + "max_query_length": 122, "unique_queries": 787, "none_queries": 0, + "num_relevant_docs": 78700, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 2.3824650571791612, "max_relevant_docs_per_query": 100, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 787, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "es": { - "number_of_characters": 34643777, "num_samples": 62317, - "num_queries": 617, + "number_of_characters": 34643777, "num_documents": 61700, - "num_relevant_docs": 61700, - "min_document_length": 19, - "average_document_length": 0.47573743922204215, - "max_document_length": 88, + "min_document_length": 21, + "average_document_length": 561.0117341977309, + "max_document_length": 21550, "unique_documents": 61700, - "min_query_length": 21, - "average_query_length": 56101.1734197731, - "max_query_length": 21550, + "num_queries": 617, + "min_query_length": 19, + "average_query_length": 47.573743922204216, + "max_query_length": 88, "unique_queries": 617, "none_queries": 0, + "num_relevant_docs": 61700, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 3.053484602917342, "max_relevant_docs_per_query": 100, @@ -163,25 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 617, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "fa": { - "number_of_characters": 27767876, "num_samples": 63832, - "num_queries": 632, + "number_of_characters": 27767876, "num_documents": 63200, - "num_relevant_docs": 63200, - "min_document_length": 18, - "average_document_length": 0.411503164556962, - "max_document_length": 82, + "min_document_length": 14, + "average_document_length": 438.95362341772153, + "max_document_length": 8151, "unique_documents": 63200, - "min_query_length": 14, - "average_query_length": 43895.362341772154, - "max_query_length": 8151, + "num_queries": 632, + "min_query_length": 18, + "average_query_length": 41.1503164556962, + "max_query_length": 82, "unique_queries": 632, "none_queries": 0, + "num_relevant_docs": 63200, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.4667721518987342, "max_relevant_docs_per_query": 100, @@ -191,25 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 632, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "fi": { - "number_of_characters": 52848523, "num_samples": 117879, - "num_queries": 1183, + "number_of_characters": 52848523, "num_documents": 116696, - "num_relevant_docs": 116696, - "min_document_length": 14, - "average_document_length": 0.3929526290532666, - "max_document_length": 130, + "min_document_length": 13, + "average_document_length": 452.48052204017273, + "max_document_length": 6755, "unique_documents": 116696, - "min_query_length": 13, - "average_query_length": 44634.54522400676, - "max_query_length": 6755, + "num_queries": 1183, + "min_query_length": 14, + "average_query_length": 38.76246830092984, + "max_query_length": 130, "unique_queries": 1183, "none_queries": 0, + "num_relevant_docs": 116696, "min_relevant_docs_per_query": 3, "average_relevant_docs_per_query": 1.7557058326289094, "max_relevant_docs_per_query": 100, @@ -219,25 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1183, "min_top_ranked_per_query": 3, "average_top_ranked_per_query": 98.64412510566356, "max_top_ranked_per_query": 100 }, "fr": { - "number_of_characters": 17084953, "num_samples": 34643, - "num_queries": 343, + "number_of_characters": 17084953, "num_documents": 34300, - "num_relevant_docs": 34300, - "min_document_length": 16, - "average_document_length": 0.4388338192419825, - "max_document_length": 83, + "min_document_length": 25, + "average_document_length": 497.66475218658894, + "max_document_length": 4404, "unique_documents": 34300, - "min_query_length": 25, - "average_query_length": 49766.475218658896, - "max_query_length": 4404, + "num_queries": 343, + "min_query_length": 16, + "average_query_length": 43.883381924198254, + "max_query_length": 83, "unique_queries": 343, "none_queries": 0, + "num_relevant_docs": 34300, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.3877551020408163, "max_relevant_docs_per_query": 100, @@ -247,25 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 343, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "hi": { - "number_of_characters": 21162593, "num_samples": 35350, - "num_queries": 350, + "number_of_characters": 21162593, "num_documents": 35000, - "num_relevant_docs": 35000, - "min_document_length": 24, - "average_document_length": 0.5334, - "max_document_length": 120, + "min_document_length": 13, + "average_document_length": 604.1121142857143, + "max_document_length": 29681, "unique_documents": 35000, - "min_query_length": 13, - "average_query_length": 60411.21142857143, - "max_query_length": 29681, + "num_queries": 350, + "min_query_length": 24, + "average_query_length": 53.34, + "max_query_length": 120, "unique_queries": 350, "none_queries": 0, + "num_relevant_docs": 35000, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.9142857142857144, "max_relevant_docs_per_query": 100, @@ -275,25 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 350, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "id": { - "number_of_characters": 51428701, "num_samples": 94149, - "num_queries": 939, + "number_of_characters": 51428701, "num_documents": 93210, - "num_relevant_docs": 93210, - "min_document_length": 13, - "average_document_length": 0.3831563137002468, - "max_document_length": 93, + "min_document_length": 9, + "average_document_length": 551.3677395129278, + "max_document_length": 13961, "unique_documents": 93210, - "min_query_length": 9, - "average_query_length": 54731.615548455804, - "max_query_length": 13961, + "num_queries": 939, + "min_query_length": 13, + "average_query_length": 38.03407880724175, + "max_query_length": 93, "unique_queries": 939, "none_queries": 0, + "num_relevant_docs": 93210, "min_relevant_docs_per_query": 3, "average_relevant_docs_per_query": 2.774227902023429, "max_relevant_docs_per_query": 100, @@ -303,25 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 939, "min_top_ranked_per_query": 3, "average_top_ranked_per_query": 99.26517571884985, "max_top_ranked_per_query": 100 }, "ja": { - "number_of_characters": 17053080, "num_samples": 80497, - "num_queries": 797, + "number_of_characters": 17053080, "num_documents": 79700, - "num_relevant_docs": 79700, "min_document_length": 7, - "average_document_length": 0.177465495608532, - "max_document_length": 48, + "average_document_length": 213.78840652446675, + "max_document_length": 6592, "unique_documents": 79700, + "num_queries": 797, "min_query_length": 7, - "average_query_length": 21378.840652446674, - "max_query_length": 6592, + "average_query_length": 17.7465495608532, + "max_query_length": 48, "unique_queries": 797, "none_queries": 0, + "num_relevant_docs": 79700, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.7465495608531996, "max_relevant_docs_per_query": 100, @@ -331,25 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 797, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "ko": { - "number_of_characters": 5439444, "num_samples": 21414, - "num_queries": 213, + "number_of_characters": 5439444, "num_documents": 21201, - "num_relevant_docs": 21200, - "min_document_length": 5, - "average_document_length": 0.21725390311777745, - "max_document_length": 92, + "min_document_length": 11, + "average_document_length": 256.34819112306025, + "max_document_length": 4838, "unique_documents": 21201, - "min_query_length": 11, - "average_query_length": 25515.671361502347, - "max_query_length": 4838, + "num_queries": 213, + "min_query_length": 5, + "average_query_length": 21.624413145539908, + "max_query_length": 92, "unique_queries": 213, "none_queries": 0, + "num_relevant_docs": 21200, "min_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 1.9812206572769953, "max_relevant_docs_per_query": 100, @@ -359,25 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 213, "min_top_ranked_per_query": 1, "average_top_ranked_per_query": 99.53521126760563, "max_top_ranked_per_query": 100 }, "ru": { - "number_of_characters": 59556512, "num_samples": 125947, - "num_queries": 1247, + "number_of_characters": 59556512, "num_documents": 124700, - "num_relevant_docs": 124700, - "min_document_length": 15, - "average_document_length": 0.4415878107457899, - "max_document_length": 108, + "min_document_length": 8, + "average_document_length": 477.1567441860465, + "max_document_length": 12427, "unique_documents": 124700, - "min_query_length": 8, - "average_query_length": 47715.67441860465, - "max_query_length": 12427, + "num_queries": 1247, + "min_query_length": 15, + "average_query_length": 44.15878107457899, + "max_query_length": 108, "unique_queries": 1247, "none_queries": 0, + "num_relevant_docs": 124700, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.9534883720930232, "max_relevant_docs_per_query": 100, @@ -387,25 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1247, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "sw": { - "number_of_characters": 14840684, "num_samples": 48581, - "num_queries": 481, + "number_of_characters": 14840684, "num_documents": 48100, - "num_relevant_docs": 48100, - "min_document_length": 13, - "average_document_length": 0.38885654885654886, - "max_document_length": 75, + "min_document_length": 10, + "average_document_length": 308.14927234927234, + "max_document_length": 6048, "unique_documents": 48100, - "min_query_length": 10, - "average_query_length": 30814.927234927236, - "max_query_length": 6048, + "num_queries": 481, + "min_query_length": 13, + "average_query_length": 38.88565488565489, + "max_query_length": 75, "unique_queries": 481, "none_queries": 0, + "num_relevant_docs": 48100, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.3846153846153846, "max_relevant_docs_per_query": 100, @@ -415,25 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 481, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "te": { - "number_of_characters": 3910478, "num_samples": 8484, - "num_queries": 84, + "number_of_characters": 3910478, "num_documents": 8400, - "num_relevant_docs": 8400, - "min_document_length": 24, - "average_document_length": 0.3846428571428571, - "max_document_length": 64, + "min_document_length": 19, + "average_document_length": 465.1484523809524, + "max_document_length": 8736, "unique_documents": 8400, - "min_query_length": 19, - "average_query_length": 46514.84523809524, - "max_query_length": 8736, + "num_queries": 84, + "min_query_length": 24, + "average_query_length": 38.464285714285715, + "max_query_length": 64, "unique_queries": 84, "none_queries": 0, + "num_relevant_docs": 8400, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.119047619047619, "max_relevant_docs_per_query": 100, @@ -443,25 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 84, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "th": { - "number_of_characters": 38321622, "num_samples": 73671, - "num_queries": 730, + "number_of_characters": 38321622, "num_documents": 72941, - "num_relevant_docs": 72941, - "min_document_length": 14, - "average_document_length": 0.42866152095529264, - "max_document_length": 176, + "min_document_length": 15, + "average_document_length": 524.949685362142, + "max_document_length": 12078, "unique_documents": 72941, - "min_query_length": 15, - "average_query_length": 52452.54109589041, - "max_query_length": 12078, + "num_queries": 730, + "min_query_length": 14, + "average_query_length": 42.83150684931507, + "max_query_length": 176, "unique_queries": 730, "none_queries": 0, + "num_relevant_docs": 72941, "min_relevant_docs_per_query": 41, "average_relevant_docs_per_query": 1.632876712328767, "max_relevant_docs_per_query": 100, @@ -471,25 +487,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 730, "min_top_ranked_per_query": 41, "average_top_ranked_per_query": 99.91917808219178, "max_top_ranked_per_query": 100 }, "yo": { - "number_of_characters": 4939804, "num_samples": 12019, - "num_queries": 119, + "number_of_characters": 4939804, "num_documents": 11900, - "num_relevant_docs": 11900, - "min_document_length": 25, - "average_document_length": 0.376890756302521, - "max_document_length": 56, + "min_document_length": 7, + "average_document_length": 414.7326890756303, + "max_document_length": 5793, "unique_documents": 11900, - "min_query_length": 7, - "average_query_length": 41473.268907563026, - "max_query_length": 5793, + "num_queries": 119, + "min_query_length": 25, + "average_query_length": 37.6890756302521, + "max_query_length": 56, "unique_queries": 119, "none_queries": 0, + "num_relevant_docs": 11900, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 0.8823529411764706, "max_relevant_docs_per_query": 100, @@ -499,25 +516,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 119, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 }, "zh": { - "number_of_characters": 6153487, "num_samples": 39491, - "num_queries": 391, + "number_of_characters": 6153487, "num_documents": 39100, - "num_relevant_docs": 39100, "min_document_length": 7, - "average_document_length": 0.10859335038363171, - "max_document_length": 22, + "average_document_length": 157.26959079283887, + "max_document_length": 2629, "unique_documents": 39100, + "num_queries": 391, "min_query_length": 7, - "average_query_length": 15726.959079283888, - "max_query_length": 2629, + "average_query_length": 10.859335038363172, + "max_query_length": 22, "unique_queries": 391, "none_queries": 0, + "num_relevant_docs": 39100, "min_relevant_docs_per_query": 100, "average_relevant_docs_per_query": 1.4194373401534526, "max_relevant_docs_per_query": 100, @@ -527,6 +545,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 391, "min_top_ranked_per_query": 100, "average_top_ranked_per_query": 100.0, "max_top_ranked_per_query": 100 diff --git a/mteb/descriptive_stats/Reranking/MMarcoReranking.json b/mteb/descriptive_stats/Reranking/MMarcoReranking.json index 7658ad1fea..0ef42510c9 100644 --- a/mteb/descriptive_stats/Reranking/MMarcoReranking.json +++ b/mteb/descriptive_stats/Reranking/MMarcoReranking.json @@ -1,18 +1,19 @@ { "dev": { - "number_of_characters": 12381331, "num_samples": 100126, - "num_queries": 100, + "number_of_characters": 12381331, "num_documents": 100026, - "min_document_length": 4, - "average_document_length": 0.011437026373142983, - "max_document_length": 61, + "min_document_length": 13, + "average_document_length": 123.76968988063103, + "max_document_length": 803, "unique_documents": 100026, - "min_query_length": 13, - "average_query_length": 123801.87, - "max_query_length": 803, + "num_queries": 100, + "min_query_length": 4, + "average_query_length": 11.44, + "max_query_length": 61, "unique_queries": 100, "none_queries": 0, + "num_relevant_docs": 100026, "min_relevant_docs_per_query": 1000, "average_relevant_docs_per_query": 1.07, "max_relevant_docs_per_query": 1002, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 100, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.26, "max_top_ranked_per_query": 1002 diff --git a/mteb/descriptive_stats/Reranking/MindSmallReranking.json b/mteb/descriptive_stats/Reranking/MindSmallReranking.json index a71e415fa6..3b4f6734e8 100644 --- a/mteb/descriptive_stats/Reranking/MindSmallReranking.json +++ b/mteb/descriptive_stats/Reranking/MindSmallReranking.json @@ -1,19 +1,19 @@ { "test": { - "number_of_characters": 162620316, "num_samples": 2367791, - "num_queries": 2362514, + "number_of_characters": 162620316, "num_documents": 5277, - "num_relevant_docs": 97006943, "min_document_length": 11, - "average_document_length": 30751.748341860904, - "max_document_length": 251, + "average_document_length": 65.06348303960584, + "max_document_length": 176, "unique_documents": 5277, + "num_queries": 2362514, "min_query_length": 11, - "average_query_length": 0.14532823932471933, - "max_query_length": 176, + "average_query_length": 68.68826004840606, + "max_query_length": 251, "unique_queries": 2362514, "none_queries": 0, + "num_relevant_docs": 97006943, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.8289660928993436, "max_relevant_docs_per_query": 295, @@ -23,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 2362514, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 41.06168556038187, "max_top_ranked_per_query": 295 diff --git a/mteb/descriptive_stats/Reranking/NevIR.json b/mteb/descriptive_stats/Reranking/NevIR.json index 4a5eddde01..6e6531dbe6 100644 --- a/mteb/descriptive_stats/Reranking/NevIR.json +++ b/mteb/descriptive_stats/Reranking/NevIR.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3829988, "num_samples": 7878, - "num_queries": 2766, + "number_of_characters": 3829988, "num_documents": 5112, - "min_document_length": 19, - "average_document_length": 36.754890453834115, - "max_document_length": 168, + "min_document_length": 95, + "average_document_length": 712.460289514867, + "max_document_length": 1317, "unique_documents": 5112, - "min_query_length": 95, - "average_query_length": 1316.737888647867, - "max_query_length": 1317, + "num_queries": 2766, + "min_query_length": 19, + "average_query_length": 67.9287780187997, + "max_query_length": 168, "unique_queries": 2766, + "none_queries": 0, + "num_relevant_docs": 2766, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,8 +23,9 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, - "min_top_ranked_per_query": 2, - "average_top_ranked_per_query": 2.0, - "max_top_ranked_per_query": 2 + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/RuBQReranking.json b/mteb/descriptive_stats/Reranking/RuBQReranking.json index d4f56b1f17..b2f29a6500 100644 --- a/mteb/descriptive_stats/Reranking/RuBQReranking.json +++ b/mteb/descriptive_stats/Reranking/RuBQReranking.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 17186357, "num_samples": 38998, - "num_queries": 1551, + "number_of_characters": 17186357, "num_documents": 37447, - "min_document_length": 13, - "average_document_length": 1.7734932037279354, - "max_document_length": 142, + "min_document_length": 1, + "average_document_length": 457.17801158971344, + "max_document_length": 11010, "unique_documents": 37447, - "min_query_length": 1, - "average_query_length": 11038.00451321728, - "max_query_length": 11010, + "num_queries": 1551, + "min_query_length": 13, + "average_query_length": 42.818826563507415, + "max_query_length": 142, "unique_queries": 1551, "none_queries": 0, + "num_relevant_docs": 37447, "min_relevant_docs_per_query": 10, "average_relevant_docs_per_query": 1.6776273372018053, "max_relevant_docs_per_query": 29, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1551, "min_top_ranked_per_query": 10, "average_top_ranked_per_query": 24.143778207607994, "max_top_ranked_per_query": 29 diff --git a/mteb/descriptive_stats/Reranking/SciDocsRR.json b/mteb/descriptive_stats/Reranking/SciDocsRR.json index f706e7e175..d8deb62db0 100644 --- a/mteb/descriptive_stats/Reranking/SciDocsRR.json +++ b/mteb/descriptive_stats/Reranking/SciDocsRR.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 8761260, "num_samples": 122578, - "num_queries": 3978, + "number_of_characters": 8761260, "num_documents": 118600, - "min_document_length": 13, - "average_document_length": 2.343684654300169, + "min_document_length": 8, + "average_document_length": 71.52865935919056, "max_document_length": 300, "unique_documents": 118600, - "min_query_length": 8, - "average_query_length": 2132.5537958773252, + "num_queries": 3978, + "min_query_length": 13, + "average_query_length": 69.87456008044244, "max_query_length": 300, "unique_queries": 3978, "none_queries": 0, + "num_relevant_docs": 118600, "min_relevant_docs_per_query": 26, "average_relevant_docs_per_query": 4.92684766214178, "max_relevant_docs_per_query": 60, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 3978, "min_top_ranked_per_query": 26, "average_top_ranked_per_query": 29.813976872800403, "max_top_ranked_per_query": 60 diff --git a/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json b/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json index 15d02e7ae8..b1143d8c67 100644 --- a/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/StackOverflowDupQuestions.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 4138870, "num_samples": 92462, - "num_queries": 2992, + "number_of_characters": 4138870, "num_documents": 89470, - "min_document_length": 13, - "average_document_length": 1.7777690846093663, - "max_document_length": 149, + "min_document_length": 10, + "average_document_length": 44.482094556834696, + "max_document_length": 150, "unique_documents": 89470, - "min_query_length": 10, - "average_query_length": 1330.1514037433155, - "max_query_length": 150, + "num_queries": 2992, + "min_query_length": 13, + "average_query_length": 53.160762032085564, + "max_query_length": 149, "unique_queries": 2992, "none_queries": 0, + "num_relevant_docs": 89470, "min_relevant_docs_per_query": 20, "average_relevant_docs_per_query": 1.1587566844919786, "max_relevant_docs_per_query": 30, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 2992, "min_top_ranked_per_query": 20, "average_top_ranked_per_query": 29.90307486631016, "max_top_ranked_per_query": 30 diff --git a/mteb/descriptive_stats/Reranking/SyntecReranking.json b/mteb/descriptive_stats/Reranking/SyntecReranking.json index 6ce98070b7..563a05eec8 100644 --- a/mteb/descriptive_stats/Reranking/SyntecReranking.json +++ b/mteb/descriptive_stats/Reranking/SyntecReranking.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 1753367, "num_samples": 1117, - "num_queries": 100, + "number_of_characters": 1753367, "num_documents": 1017, - "min_document_length": 18, - "average_document_length": 7.160275319567355, - "max_document_length": 175, + "min_document_length": 100, + "average_document_length": 1716.897738446411, + "max_document_length": 6947, "unique_documents": 1017, - "min_query_length": 100, - "average_query_length": 17460.85, - "max_query_length": 6947, + "num_queries": 100, + "min_query_length": 18, + "average_query_length": 72.82, + "max_query_length": 175, "unique_queries": 100, "none_queries": 0, + "num_relevant_docs": 1017, "min_relevant_docs_per_query": 10, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 11, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 100, "min_top_ranked_per_query": 10, "average_top_ranked_per_query": 10.17, "max_top_ranked_per_query": 11 diff --git a/mteb/descriptive_stats/Reranking/T2Reranking.json b/mteb/descriptive_stats/Reranking/T2Reranking.json index f880ef6f79..50844d1284 100644 --- a/mteb/descriptive_stats/Reranking/T2Reranking.json +++ b/mteb/descriptive_stats/Reranking/T2Reranking.json @@ -1,18 +1,19 @@ { "dev": { - "number_of_characters": 81980036, "num_samples": 103330, - "num_queries": 5908, + "number_of_characters": 81980036, "num_documents": 97422, - "min_document_length": 4, - "average_document_length": 0.6639465418488637, - "max_document_length": 29, + "min_document_length": 1, + "average_document_length": 840.8301307712837, + "max_document_length": 120026, "unique_documents": 97422, - "min_query_length": 1, - "average_query_length": 13865.157921462423, - "max_query_length": 120026, + "num_queries": 5908, + "min_query_length": 4, + "average_query_length": 10.948375084631008, + "max_query_length": 29, "unique_queries": 5908, "none_queries": 0, + "num_relevant_docs": 97422, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 7.522681110358835, "max_relevant_docs_per_query": 335, @@ -22,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 5908, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 16.489844278943806, "max_top_ranked_per_query": 335 diff --git a/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json b/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json index 51b405b439..da4009b072 100644 --- a/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json +++ b/mteb/descriptive_stats/Reranking/VoyageMMarcoReranking.json @@ -1,19 +1,19 @@ { "test": { - "number_of_characters": 8824820, "num_samples": 55423, - "num_queries": 2048, + "number_of_characters": 8824820, "num_documents": 53375, - "num_relevant_docs": 53375, - "min_document_length": 3, - "average_document_length": 0.6108852459016394, - "max_document_length": 73, + "min_document_length": 19, + "average_document_length": 164.72532084309134, + "max_document_length": 1192, "unique_documents": 53375, - "min_query_length": 19, - "average_query_length": 4293.0732421875, - "max_query_length": 1192, + "num_queries": 2048, + "min_query_length": 3, + "average_query_length": 15.9208984375, + "max_query_length": 73, "unique_queries": 2048, "none_queries": 0, + "num_relevant_docs": 53375, "min_relevant_docs_per_query": 26, "average_relevant_docs_per_query": 1.06201171875, "max_relevant_docs_per_query": 29, @@ -23,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 2048, "min_top_ranked_per_query": 26, "average_top_ranked_per_query": 26.06201171875, "max_top_ranked_per_query": 29 diff --git a/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json b/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json index e31afea818..d7d53ee599 100644 --- a/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json +++ b/mteb/descriptive_stats/Reranking/WebLINXCandidatesReranking.json @@ -1,19 +1,19 @@ { "validation": { - "number_of_characters": 102848781, "num_samples": 317809, - "num_queries": 1301, + "number_of_characters": 102848781, "num_documents": 316508, - "num_relevant_docs": 316508, - "min_document_length": 142, - "average_document_length": 6.772091068788151, - "max_document_length": 9356, + "min_document_length": 152, + "average_document_length": 318.17634941296905, + "max_document_length": 1605, "unique_documents": 316508, - "min_query_length": 152, - "average_query_length": 77406.11837048424, - "max_query_length": 1605, + "num_queries": 1301, + "min_query_length": 142, + "average_query_length": 1647.5180630284397, + "max_query_length": 9356, "unique_queries": 1301, "none_queries": 0, + "num_relevant_docs": 316508, "min_relevant_docs_per_query": 21, "average_relevant_docs_per_query": 1.01076095311299, "max_relevant_docs_per_query": 945, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1301, "min_top_ranked_per_query": 21, "average_top_ranked_per_query": 243.2805534204458, "max_top_ranked_per_query": 945 }, "test_iid": { - "number_of_characters": 131631330, "num_samples": 407410, - "num_queries": 1438, + "number_of_characters": 131631330, "num_documents": 405972, - "num_relevant_docs": 405972, - "min_document_length": 173, - "average_document_length": 6.101763175785522, - "max_document_length": 10467, + "min_document_length": 153, + "average_document_length": 318.135696550501, + "max_document_length": 1471, "unique_documents": 405972, - "min_query_length": 153, - "average_query_length": 89815.14951321279, - "max_query_length": 1471, + "num_queries": 1438, + "min_query_length": 173, + "average_query_length": 1722.6321279554938, + "max_query_length": 10467, "unique_queries": 1438, "none_queries": 0, + "num_relevant_docs": 405972, "min_relevant_docs_per_query": 15, "average_relevant_docs_per_query": 1.0528511821974966, "max_relevant_docs_per_query": 1149, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1438, "min_top_ranked_per_query": 15, "average_top_ranked_per_query": 282.317107093185, "max_top_ranked_per_query": 1149 }, "test_cat": { - "number_of_characters": 402615943, "num_samples": 1261751, - "num_queries": 3560, + "number_of_characters": 402615943, "num_documents": 1258191, - "num_relevant_docs": 1258191, - "min_document_length": 161, - "average_document_length": 6.082371436451222, - "max_document_length": 8502, + "min_document_length": 156, + "average_document_length": 313.91351392594606, + "max_document_length": 1590, "unique_documents": 1258191, - "min_query_length": 156, - "average_query_length": 110944.70730337079, - "max_query_length": 1590, + "num_queries": 3560, + "min_query_length": 161, + "average_query_length": 2149.6587078651687, + "max_query_length": 8502, "unique_queries": 3560, "none_queries": 0, + "num_relevant_docs": 1258191, "min_relevant_docs_per_query": 14, "average_relevant_docs_per_query": 1.0016853932584269, "max_relevant_docs_per_query": 1245, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 3560, "min_top_ranked_per_query": 14, "average_top_ranked_per_query": 353.4244382022472, "max_top_ranked_per_query": 1245 }, "test_geo": { - "number_of_characters": 371063547, "num_samples": 1155697, - "num_queries": 4916, + "number_of_characters": 371063547, "num_documents": 1150781, - "num_relevant_docs": 1150781, - "min_document_length": 146, - "average_document_length": 7.444432085687894, - "max_document_length": 19082, + "min_document_length": 154, + "average_document_length": 315.00053963351843, + "max_document_length": 1289, "unique_documents": 1150781, - "min_query_length": 154, - "average_query_length": 73738.12774613507, - "max_query_length": 1289, + "num_queries": 4916, + "min_query_length": 146, + "average_query_length": 1742.6588689991863, + "max_query_length": 19082, "unique_queries": 4916, "none_queries": 0, + "num_relevant_docs": 1150781, "min_relevant_docs_per_query": 3, "average_relevant_docs_per_query": 1.0024410089503661, "max_relevant_docs_per_query": 1274, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 4916, "min_top_ranked_per_query": 3, "average_top_ranked_per_query": 234.08889340927584, "max_top_ranked_per_query": 1274 }, "test_vis": { - "number_of_characters": 534911902, "num_samples": 1612156, - "num_queries": 5298, + "number_of_characters": 534911902, "num_documents": 1606858, - "num_relevant_docs": 1606858, - "min_document_length": 176, - "average_document_length": 5.7279492027298, - "max_document_length": 28468, + "min_document_length": 154, + "average_document_length": 327.165126601106, + "max_document_length": 1796, "unique_documents": 1606858, - "min_query_length": 154, - "average_query_length": 99227.61438278596, - "max_query_length": 1796, + "num_queries": 5298, + "min_query_length": 176, + "average_query_length": 1737.2595318988297, + "max_query_length": 28468, "unique_queries": 5298, "none_queries": 0, + "num_relevant_docs": 1606858, "min_relevant_docs_per_query": 11, "average_relevant_docs_per_query": 1.0152887882219706, "max_relevant_docs_per_query": 1819, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 5298, "min_top_ranked_per_query": 11, "average_top_ranked_per_query": 303.2952057380143, "max_top_ranked_per_query": 1819 }, "test_web": { - "number_of_characters": 277932894, "num_samples": 837319, - "num_queries": 3144, + "number_of_characters": 277932894, "num_documents": 834175, - "num_relevant_docs": 834175, - "min_document_length": 146, - "average_document_length": 6.902769802499476, - "max_document_length": 15329, + "min_document_length": 157, + "average_document_length": 326.280188209908, + "max_document_length": 1542, "unique_documents": 834175, - "min_query_length": 157, - "average_query_length": 86569.58524173028, - "max_query_length": 1542, + "num_queries": 3144, + "min_query_length": 146, + "average_query_length": 1831.4624681933842, + "max_query_length": 15329, "unique_queries": 3144, "none_queries": 0, + "num_relevant_docs": 834175, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0588422391857506, "max_relevant_docs_per_query": 1064, @@ -163,6 +168,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 3144, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 265.3228371501272, "max_top_ranked_per_query": 1064 diff --git a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json index 18eed2cfca..9fc0d00802 100644 --- a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json +++ b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 83866932, "num_samples": 240000, - "num_queries": 24000, + "number_of_characters": 83866932, "num_documents": 216000, - "min_document_length": 7, - "average_document_length": 6.565689814814815, - "max_document_length": 180, + "min_document_length": 100, + "average_document_length": 381.70714351851854, + "max_document_length": 9461, "unique_documents": 216000, - "min_query_length": 100, - "average_query_length": 3435.3642916666668, - "max_query_length": 9461, + "num_queries": 24000, + "min_query_length": 7, + "average_query_length": 59.091208333333334, + "max_query_length": 180, "unique_queries": 24000, "none_queries": 0, + "num_relevant_docs": 216000, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -22,24 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 24000, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9, "hf_subset_descriptive_stats": { "bg": { - "number_of_characters": 5145316, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5145316, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 6.758518518518518, - "max_document_length": 166, + "min_document_length": 100, + "average_document_length": 374.376, + "max_document_length": 4869, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3369.384, - "max_query_length": 4869, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 60.82666666666667, + "max_query_length": 166, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -49,24 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "bn": { - "number_of_characters": 5390581, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5390581, "num_documents": 13500, - "min_document_length": 7, - "average_document_length": 5.2518518518518515, - "max_document_length": 123, + "min_document_length": 100, + "average_document_length": 394.05044444444445, + "max_document_length": 5104, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3546.454, - "max_query_length": 5104, + "num_queries": 1500, + "min_query_length": 7, + "average_query_length": 47.266666666666666, + "max_query_length": 123, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -76,24 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "cs": { - "number_of_characters": 5079180, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5079180, "num_documents": 13500, - "min_document_length": 17, - "average_document_length": 6.2524444444444445, - "max_document_length": 137, + "min_document_length": 100, + "average_document_length": 369.9831111111111, + "max_document_length": 3487, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3329.848, - "max_query_length": 3487, + "num_queries": 1500, + "min_query_length": 17, + "average_query_length": 56.272, + "max_query_length": 137, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -103,24 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "da": { - "number_of_characters": 4746132, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 4746132, "num_documents": 13500, - "min_document_length": 17, - "average_document_length": 6.30562962962963, - "max_document_length": 137, + "min_document_length": 100, + "average_document_length": 345.2597037037037, + "max_document_length": 2563, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3107.3373333333334, - "max_query_length": 2563, + "num_queries": 1500, + "min_query_length": 17, + "average_query_length": 56.75066666666667, + "max_query_length": 137, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -130,24 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "de": { - "number_of_characters": 5483592, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5483592, "num_documents": 13500, - "min_document_length": 20, - "average_document_length": 7.778222222222222, - "max_document_length": 180, + "min_document_length": 100, + "average_document_length": 398.4137777777778, + "max_document_length": 3083, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3585.724, - "max_query_length": 3083, + "num_queries": 1500, + "min_query_length": 20, + "average_query_length": 70.004, + "max_query_length": 180, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -157,24 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "en": { - "number_of_characters": 6217884, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 6217884, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 7.596888888888889, - "max_document_length": 162, + "min_document_length": 100, + "average_document_length": 452.9871111111111, + "max_document_length": 3662, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 4076.884, - "max_query_length": 3662, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 68.372, + "max_query_length": 162, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -184,24 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "fa": { - "number_of_characters": 4732619, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 4732619, "num_documents": 13500, - "min_document_length": 12, - "average_document_length": 5.407481481481481, - "max_document_length": 119, + "min_document_length": 100, + "average_document_length": 345.1568888888889, + "max_document_length": 4707, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3106.412, - "max_query_length": 4707, + "num_queries": 1500, + "min_query_length": 12, + "average_query_length": 48.66733333333333, + "max_query_length": 119, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -211,24 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "fi": { - "number_of_characters": 5209132, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5209132, "num_documents": 13500, - "min_document_length": 14, - "average_document_length": 6.149259259259259, - "max_document_length": 132, + "min_document_length": 100, + "average_document_length": 379.71237037037037, + "max_document_length": 2574, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3417.4113333333335, - "max_query_length": 2574, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 55.343333333333334, + "max_query_length": 132, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -238,24 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "hi": { - "number_of_characters": 5620959, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5620959, "num_documents": 13500, - "min_document_length": 13, - "average_document_length": 5.641925925925926, - "max_document_length": 125, + "min_document_length": 100, + "average_document_length": 410.72540740740743, + "max_document_length": 5912, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3696.5286666666666, - "max_query_length": 5912, + "num_queries": 1500, + "min_query_length": 13, + "average_query_length": 50.77733333333333, + "max_query_length": 125, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -265,24 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "it": { - "number_of_characters": 5420496, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5420496, "num_documents": 13500, - "min_document_length": 23, - "average_document_length": 7.783851851851852, - "max_document_length": 156, + "min_document_length": 100, + "average_document_length": 393.73437037037036, + "max_document_length": 9461, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3543.6093333333333, - "max_query_length": 9461, + "num_queries": 1500, + "min_query_length": 23, + "average_query_length": 70.05466666666666, + "max_query_length": 156, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -292,24 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "nl": { - "number_of_characters": 5169556, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5169556, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 7.260518518518518, - "max_document_length": 136, + "min_document_length": 100, + "average_document_length": 375.6695555555556, + "max_document_length": 3641, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3381.026, - "max_query_length": 3641, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 65.34466666666667, + "max_query_length": 136, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -319,24 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "pt": { - "number_of_characters": 5474356, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5474356, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 7.235481481481481, - "max_document_length": 176, + "min_document_length": 100, + "average_document_length": 398.27237037037037, + "max_document_length": 3057, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3584.4513333333334, - "max_query_length": 3057, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 65.11933333333333, + "max_query_length": 176, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -346,24 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "ro": { - "number_of_characters": 4796113, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 4796113, "num_documents": 13500, - "min_document_length": 14, - "average_document_length": 6.885925925925926, - "max_document_length": 169, + "min_document_length": 100, + "average_document_length": 348.3817037037037, + "max_document_length": 4213, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3135.4353333333333, - "max_query_length": 4213, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 61.973333333333336, + "max_query_length": 169, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -373,24 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "sr": { - "number_of_characters": 5271732, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5271732, "num_documents": 13500, - "min_document_length": 15, - "average_document_length": 6.185481481481482, - "max_document_length": 146, + "min_document_length": 100, + "average_document_length": 384.3131851851852, + "max_document_length": 3668, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3458.8186666666666, - "max_query_length": 3668, + "num_queries": 1500, + "min_query_length": 15, + "average_query_length": 55.669333333333334, + "max_query_length": 146, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -400,24 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "no": { - "number_of_characters": 5036586, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5036586, "num_documents": 13500, - "min_document_length": 14, - "average_document_length": 6.143111111111111, - "max_document_length": 129, + "min_document_length": 100, + "average_document_length": 366.93733333333336, + "max_document_length": 2841, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3302.436, - "max_query_length": 2841, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 55.288, + "max_query_length": 129, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -427,24 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 }, "sv": { - "number_of_characters": 5072698, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5072698, "num_documents": 13500, - "min_document_length": 17, - "average_document_length": 6.414444444444444, - "max_document_length": 133, + "min_document_length": 100, + "average_document_length": 369.340962962963, + "max_document_length": 3680, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3324.0686666666666, - "max_query_length": 3680, + "num_queries": 1500, + "min_query_length": 17, + "average_query_length": 57.73, + "max_query_length": 133, "unique_queries": 1500, "none_queries": 0, + "num_relevant_docs": 13500, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 9, @@ -454,6 +487,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": 1500, "min_top_ranked_per_query": 9, "average_top_ranked_per_query": 9.0, "max_top_ranked_per_query": 9 diff --git a/mteb/descriptive_stats/Retrieval/AILACasedocs.json b/mteb/descriptive_stats/Retrieval/AILACasedocs.json index dec3b677d4..820bd013f9 100644 --- a/mteb/descriptive_stats/Retrieval/AILACasedocs.json +++ b/mteb/descriptive_stats/Retrieval/AILACasedocs.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 5164499, "num_samples": 236, - "num_queries": 50, + "number_of_characters": 5164499, "num_documents": 186, - "min_document_length": 1174, - "average_document_length": 816.7795698924731, - "max_document_length": 5936, + "min_document_length": 1014, + "average_document_length": 26949.344086021505, + "max_document_length": 222891, "unique_documents": 186, - "min_query_length": 1014, - "average_query_length": 100251.56, - "max_query_length": 222891, + "num_queries": 50, + "min_query_length": 1174, + "average_query_length": 3038.42, + "max_query_length": 5936, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 195, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 3.9, "max_relevant_docs_per_query": 22, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/AILAStatutes.json b/mteb/descriptive_stats/Retrieval/AILAStatutes.json index 9ced1b695f..06ebab8858 100644 --- a/mteb/descriptive_stats/Retrieval/AILAStatutes.json +++ b/mteb/descriptive_stats/Retrieval/AILAStatutes.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 313841, "num_samples": 132, - "num_queries": 50, + "number_of_characters": 313841, "num_documents": 82, - "min_document_length": 1174, - "average_document_length": 1852.6951219512196, - "max_document_length": 5936, + "min_document_length": 164, + "average_document_length": 1974.6341463414635, + "max_document_length": 26039, "unique_documents": 82, - "min_query_length": 164, - "average_query_length": 3238.4, - "max_query_length": 26039, + "num_queries": 50, + "min_query_length": 1174, + "average_query_length": 3038.42, + "max_query_length": 5936, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 217, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 4.34, "max_relevant_docs_per_query": 5, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/ARCChallenge.json b/mteb/descriptive_stats/Retrieval/ARCChallenge.json index 1634911bd4..6329f648f0 100644 --- a/mteb/descriptive_stats/Retrieval/ARCChallenge.json +++ b/mteb/descriptive_stats/Retrieval/ARCChallenge.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 452856, "num_samples": 10522, - "num_queries": 1172, + "number_of_characters": 452856, "num_documents": 9350, - "min_document_length": 13, - "average_document_length": 16.49144385026738, - "max_document_length": 831, + "min_document_length": 2, + "average_document_length": 31.94235294117647, + "max_document_length": 193, "unique_documents": 9350, - "min_query_length": 2, - "average_query_length": 254.830204778157, - "max_query_length": 193, + "num_queries": 1172, + "min_query_length": 13, + "average_query_length": 131.56569965870307, + "max_query_length": 831, "unique_queries": 1172, + "none_queries": 0, + "num_relevant_docs": 1172, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json b/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json index 6ddd5ce0b8..4f6659026f 100644 --- a/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json @@ -1,19 +1,19 @@ { "test": { - "number_of_characters": 9355954, "num_samples": 4872, - "num_queries": 2316, + "number_of_characters": 9355954, "num_documents": 2556, - "num_relevant_docs": 2316, - "min_document_length": 8, - "average_document_length": 154.68348982785602, - "max_document_length": 2863, + "min_document_length": 9, + "average_document_length": 3505.705399061033, + "max_document_length": 47930, "unique_documents": 2556, - "min_query_length": 9, - "average_query_length": 3868.990932642487, - "max_query_length": 47930, + "num_queries": 2316, + "min_query_length": 8, + "average_query_length": 170.71286701208982, + "max_query_length": 2863, "unique_queries": 2316, "none_queries": 0, + "num_relevant_docs": 2316, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -23,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/AlphaNLI.json b/mteb/descriptive_stats/Retrieval/AlphaNLI.json index 3c3751f9c7..4a4055a790 100644 --- a/mteb/descriptive_stats/Retrieval/AlphaNLI.json +++ b/mteb/descriptive_stats/Retrieval/AlphaNLI.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 10880076, "num_samples": 242879, - "num_queries": 1532, + "number_of_characters": 10880076, "num_documents": 241347, - "min_document_length": 51, - "average_document_length": 0.6541618499504862, - "max_document_length": 153, + "min_document_length": 2, + "average_document_length": 44.42647308646886, + "max_document_length": 185, "unique_documents": 241347, - "min_query_length": 2, - "average_query_length": 6998.822454308094, - "max_query_length": 185, + "num_queries": 1532, + "min_query_length": 51, + "average_query_length": 103.05483028720627, + "max_query_length": 153, "unique_queries": 1532, + "none_queries": 0, + "num_relevant_docs": 1532, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json index caaab2453b..902275f328 100644 --- a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 11335620, "num_samples": 12530, - "num_queries": 3765, + "number_of_characters": 11335620, "num_documents": 8765, - "min_document_length": 152, - "average_document_length": 717.2737022247576, - "max_document_length": 5742, + "min_document_length": 6, + "average_document_length": 576.0086708499715, + "max_document_length": 289049, "unique_documents": 8765, - "min_query_length": 6, - "average_query_length": 1340.9604249667996, - "max_query_length": 289049, + "num_queries": 3765, + "min_query_length": 152, + "average_query_length": 1669.8284196547145, + "max_query_length": 5742, "unique_queries": 3765, + "none_queries": 0, + "num_relevant_docs": 3765, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 3765 + "unique_relevant_docs": 3765, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/ArguAna-PL.json b/mteb/descriptive_stats/Retrieval/ArguAna-PL.json index c72e0a2944..fac5caa5c6 100644 --- a/mteb/descriptive_stats/Retrieval/ArguAna-PL.json +++ b/mteb/descriptive_stats/Retrieval/ArguAna-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 10931281, "num_samples": 10080, - "num_queries": 1406, + "number_of_characters": 10931281, "num_documents": 8674, - "min_document_length": 229, - "average_document_length": 198.53262623933594, - "max_document_length": 5541, + "min_document_length": 3, + "average_document_length": 1061.702674659903, + "max_document_length": 6506, "unique_documents": 8674, - "min_query_length": 3, - "average_query_length": 6549.935277382646, - "max_query_length": 6506, + "num_queries": 1406, + "min_query_length": 229, + "average_query_length": 1224.8022759601706, + "max_query_length": 5541, "unique_queries": 1406, + "none_queries": 0, + "num_relevant_docs": 1406, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/ArguAna.json b/mteb/descriptive_stats/Retrieval/ArguAna.json index 5eb59cda5e..0e101d956b 100644 --- a/mteb/descriptive_stats/Retrieval/ArguAna.json +++ b/mteb/descriptive_stats/Retrieval/ArguAna.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 10613204, "num_samples": 10080, - "num_queries": 1406, + "number_of_characters": 10613204, "num_documents": 8674, - "min_document_length": 251, - "average_document_length": 193.33237260779342, - "max_document_length": 5500, + "min_document_length": 3, + "average_document_length": 1030.2327645838136, + "max_document_length": 6674, "unique_documents": 8674, - "min_query_length": 3, - "average_query_length": 6355.7887624466575, - "max_query_length": 6674, + "num_queries": 1406, + "min_query_length": 251, + "average_query_length": 1192.7204836415362, + "max_query_length": 5500, "unique_queries": 1406, + "none_queries": 0, + "num_relevant_docs": 1406, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json b/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json index 9685cec463..5ce9a04b27 100644 --- a/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 601650, "num_samples": 834, - "num_queries": 114, + "number_of_characters": 601650, "num_documents": 720, - "min_document_length": 34, - "average_document_length": 11.022222222222222, - "max_document_length": 157, + "min_document_length": 8, + "average_document_length": 824.6027777777778, + "max_document_length": 2485, "unique_documents": 720, - "min_query_length": 8, - "average_query_length": 5208.017543859649, - "max_query_length": 2485, + "num_queries": 114, + "min_query_length": 34, + "average_query_length": 69.6140350877193, + "max_query_length": 157, "unique_queries": 114, + "none_queries": 0, + "num_relevant_docs": 114, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json b/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json index 155754d26c..d345954c0c 100644 --- a/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BSARDRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 19955744, "num_samples": 22855, - "num_queries": 222, + "number_of_characters": 19955744, "num_documents": 22633, - "min_document_length": 36, - "average_document_length": 1.4200061856581099, - "max_document_length": 293, + "min_document_length": 1, + "average_document_length": 880.2900631820793, + "max_document_length": 39566, "unique_documents": 22633, - "min_query_length": 1, - "average_query_length": 89745.96846846846, - "max_query_length": 39566, + "num_queries": 222, + "min_query_length": 36, + "average_query_length": 144.77027027027026, + "max_query_length": 293, "unique_queries": 222, + "none_queries": 0, + "num_relevant_docs": 222, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json index 78c8a7e121..d67f579e79 100644 --- a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json @@ -1,6789 +1,10936 @@ { "test": { - "number_of_characters": 25574620, "num_samples": 521866, - "num_queries": 338378, + "number_of_characters": 114639237, "num_documents": 183488, - "min_document_length": 4, - "average_document_length": 137.38034094872688, - "max_document_length": 237, + "min_document_length": 45, + "average_document_length": 487.3975028339728, + "max_document_length": 1926, "unique_documents": 183488, - "min_query_length": 2, - "average_query_length": 1.0845149507355678, - "max_query_length": 2, + "num_queries": 338378, + "min_query_length": 4, + "average_query_length": 74.49551684802204, + "max_query_length": 237, "unique_queries": 338378, + "none_queries": 0, + "num_relevant_docs": 338392, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0000413738481817, "max_relevant_docs_per_query": 2, "unique_relevant_docs": 183488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "acm_Arab-acm_Arab": { - "number_of_characters": 51232, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 253495, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 102.98360655737704, - "max_document_length": 129, + "min_document_length": 135, + "average_document_length": 416.4733606557377, + "max_document_length": 1289, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 55.84, + "max_query_length": 129, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "acm_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 272852, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 135, + "average_document_length": 416.4733606557377, + "max_document_length": 1289, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-acm_Arab": { - "number_of_characters": 51232, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 282305, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 102.98360655737704, - "max_document_length": 129, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 55.84, + "max_query_length": 129, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "afr_Latn-afr_Latn": { - "number_of_characters": 71217, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 316030, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 143.93647540983608, - "max_document_length": 159, + "min_document_length": 172, + "average_document_length": 503.6659836065574, + "max_document_length": 1421, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 78.04555555555555, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "afr_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 315402, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 172, + "average_document_length": 503.6659836065574, + "max_document_length": 1421, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-afr_Latn": { - "number_of_characters": 71217, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302290, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 143.93647540983608, - "max_document_length": 159, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 78.04555555555555, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "als_Latn-als_Latn": { - "number_of_characters": 69498, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329122, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 140.4139344262295, - "max_document_length": 175, + "min_document_length": 177, + "average_document_length": 534.016393442623, + "max_document_length": 1547, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 76.13555555555556, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "als_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 330213, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 177, + "average_document_length": 534.016393442623, + "max_document_length": 1547, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-als_Latn": { - "number_of_characters": 69498, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300571, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 140.4139344262295, - "max_document_length": 175, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 76.13555555555556, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "amh_Ethi-amh_Ethi": { - "number_of_characters": 45221, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 200341, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 90.66598360655738, - "max_document_length": 100, + "min_document_length": 92, + "average_document_length": 319.8688524590164, + "max_document_length": 915, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 49.16111111111111, + "max_query_length": 100, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "amh_Ethi-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 225709, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 92, + "average_document_length": 319.8688524590164, + "max_document_length": 915, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-amh_Ethi": { - "number_of_characters": 45221, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 276294, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 90.66598360655738, - "max_document_length": 100, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 49.16111111111111, + "max_query_length": 100, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "apc_Arab-apc_Arab": { - "number_of_characters": 51248, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 242083, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 103.01639344262296, - "max_document_length": 134, + "min_document_length": 119, + "average_document_length": 393.0553278688525, + "max_document_length": 1214, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 55.85777777777778, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "apc_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 261424, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 119, + "average_document_length": 393.0553278688525, + "max_document_length": 1214, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-apc_Arab": { - "number_of_characters": 51248, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 282321, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 103.01639344262296, - "max_document_length": 134, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 55.85777777777778, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arb_Arab-arb_Arab": { - "number_of_characters": 53671, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 258613, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 107.98155737704919, - "max_document_length": 134, + "min_document_length": 142, + "average_document_length": 421.96311475409834, + "max_document_length": 1275, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 58.55, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arb_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 275531, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 142, + "average_document_length": 421.96311475409834, + "max_document_length": 1275, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-arb_Arab": { - "number_of_characters": 53671, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 284744, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 107.98155737704919, - "max_document_length": 134, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 58.55, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arb_Latn-arb_Latn": { - "number_of_characters": 61298, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 331464, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 123.61065573770492, - "max_document_length": 160, + "min_document_length": 142, + "average_document_length": 555.6188524590164, + "max_document_length": 1708, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 67.02444444444444, + "max_query_length": 160, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arb_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 340755, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 142, + "average_document_length": 555.6188524590164, + "max_document_length": 1708, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-arb_Latn": { - "number_of_characters": 61298, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292371, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 123.61065573770492, - "max_document_length": 160, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 67.02444444444444, + "max_query_length": 160, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ars_Arab-ars_Arab": { - "number_of_characters": 51765, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 256996, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 104.07581967213115, - "max_document_length": 119, + "min_document_length": 144, + "average_document_length": 422.5553278688525, + "max_document_length": 1284, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 56.43222222222222, + "max_query_length": 119, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ars_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 275820, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 144, + "average_document_length": 422.5553278688525, + "max_document_length": 1284, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ars_Arab": { - "number_of_characters": 51765, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 282838, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 104.07581967213115, - "max_document_length": 119, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 56.43222222222222, + "max_query_length": 119, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ary_Arab-ary_Arab": { - "number_of_characters": 60261, "num_samples": 1386, - "num_queries": 898, + "number_of_characters": 259925, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 121.48565573770492, - "max_document_length": 138, + "min_document_length": 141, + "average_document_length": 411.1475409836066, + "max_document_length": 1189, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.086859688195991, - "max_query_length": 2, + "num_queries": 898, + "min_query_length": 13, + "average_query_length": 66.01893095768374, + "max_query_length": 138, "unique_queries": 898, + "none_queries": 0, + "num_relevant_docs": 898, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ary_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 270253, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 141, + "average_document_length": 411.1475409836066, + "max_document_length": 1189, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ary_Arab": { - "number_of_characters": 60261, "num_samples": 1386, - "num_queries": 898, + "number_of_characters": 291334, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 121.48565573770492, - "max_document_length": 138, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.086859688195991, - "max_query_length": 2, + "num_queries": 898, + "min_query_length": 13, + "average_query_length": 66.01893095768374, + "max_query_length": 138, "unique_queries": 898, + "none_queries": 0, + "num_relevant_docs": 898, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arz_Arab-arz_Arab": { - "number_of_characters": 52403, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 252508, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 105.38319672131148, - "max_document_length": 115, + "min_document_length": 127, + "average_document_length": 412.05122950819674, + "max_document_length": 1197, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 57.14111111111111, + "max_query_length": 115, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arz_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 270694, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 127, + "average_document_length": 412.05122950819674, + "max_document_length": 1197, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-arz_Arab": { - "number_of_characters": 52403, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 283476, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 105.38319672131148, - "max_document_length": 115, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 57.14111111111111, + "max_query_length": 115, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "asm_Beng-asm_Beng": { - "number_of_characters": 62410, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 285230, "num_documents": 488, - "min_document_length": 4, - "average_document_length": 125.88934426229508, - "max_document_length": 158, + "min_document_length": 141, + "average_document_length": 458.5983606557377, + "max_document_length": 1267, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 4, + "average_query_length": 68.26, + "max_query_length": 158, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "asm_Beng-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293409, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 141, + "average_document_length": 458.5983606557377, + "max_document_length": 1267, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-asm_Beng": { - "number_of_characters": 62410, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293483, "num_documents": 488, - "min_document_length": 4, - "average_document_length": 125.88934426229508, - "max_document_length": 158, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 4, + "average_query_length": 68.26, + "max_query_length": 158, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "azj_Latn-azj_Latn": { - "number_of_characters": 67137, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 319732, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 135.57581967213116, - "max_document_length": 156, + "min_document_length": 165, + "average_document_length": 519.6127049180328, + "max_document_length": 1549, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 73.51222222222222, + "max_query_length": 156, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "azj_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 323184, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 165, + "average_document_length": 519.6127049180328, + "max_document_length": 1549, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-azj_Latn": { - "number_of_characters": 67137, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298210, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 135.57581967213116, - "max_document_length": 156, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 73.51222222222222, + "max_query_length": 156, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "bam_Latn-bam_Latn": { - "number_of_characters": 66084, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 288276, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 133.41803278688525, - "max_document_length": 166, + "min_document_length": 150, + "average_document_length": 457.3114754098361, + "max_document_length": 1364, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 72.34222222222222, + "max_query_length": 166, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "bam_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292781, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 150, + "average_document_length": 457.3114754098361, + "max_document_length": 1364, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-bam_Latn": { - "number_of_characters": 66084, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297157, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 133.41803278688525, - "max_document_length": 166, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 72.34222222222222, + "max_query_length": 166, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ben_Beng-ben_Beng": { - "number_of_characters": 63512, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 290810, "num_documents": 488, - "min_document_length": 9, - "average_document_length": 128.14754098360655, - "max_document_length": 175, + "min_document_length": 136, + "average_document_length": 467.7745901639344, + "max_document_length": 1281, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 9, + "average_query_length": 69.48444444444445, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ben_Beng-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297887, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 136, + "average_document_length": 467.7745901639344, + "max_document_length": 1281, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ben_Beng": { - "number_of_characters": 63512, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 294585, "num_documents": 488, - "min_document_length": 9, - "average_document_length": 128.14754098360655, - "max_document_length": 175, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 9, + "average_query_length": 69.48444444444445, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ben_Latn-ben_Latn": { - "number_of_characters": 68285, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 322481, "num_documents": 488, - "min_document_length": 9, - "average_document_length": 137.92827868852459, - "max_document_length": 185, + "min_document_length": 159, + "average_document_length": 522.8934426229508, + "max_document_length": 1393, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 9, + "average_query_length": 74.78777777777778, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ben_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 324785, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 159, + "average_document_length": 522.8934426229508, + "max_document_length": 1393, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ben_Latn": { - "number_of_characters": 68285, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299358, "num_documents": 488, - "min_document_length": 9, - "average_document_length": 137.92827868852459, - "max_document_length": 185, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 9, + "average_query_length": 74.78777777777778, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "bod_Tibt-bod_Tibt": { - "number_of_characters": 79188, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 338505, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 160.2704918032787, - "max_document_length": 213, + "min_document_length": 153, + "average_document_length": 533.3872950819672, + "max_document_length": 1455, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 86.90222222222222, + "max_query_length": 213, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "bod_Tibt-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329906, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 153, + "average_document_length": 533.3872950819672, + "max_document_length": 1455, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-bod_Tibt": { - "number_of_characters": 79188, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 310261, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 160.2704918032787, - "max_document_length": 213, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 86.90222222222222, + "max_query_length": 213, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "bul_Cyrl-bul_Cyrl": { - "number_of_characters": 66577, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308123, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 134.42827868852459, - "max_document_length": 177, + "min_document_length": 146, + "average_document_length": 496.97131147540983, + "max_document_length": 1403, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.89, + "max_query_length": 177, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "bul_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312135, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 146, + "average_document_length": 496.97131147540983, + "max_document_length": 1403, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-bul_Cyrl": { - "number_of_characters": 66577, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297650, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 134.42827868852459, - "max_document_length": 177, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.89, + "max_query_length": 177, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "cat_Latn-cat_Latn": { - "number_of_characters": 68842, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 324284, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 139.06967213114754, - "max_document_length": 163, + "min_document_length": 172, + "average_document_length": 525.4467213114754, + "max_document_length": 1479, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 75.40666666666667, + "max_query_length": 163, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "cat_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 326031, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 172, + "average_document_length": 525.4467213114754, + "max_document_length": 1479, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-cat_Latn": { - "number_of_characters": 68842, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299915, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 139.06967213114754, - "max_document_length": 163, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 75.40666666666667, + "max_query_length": 163, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ceb_Latn-ceb_Latn": { - "number_of_characters": 74053, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 351651, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 149.74795081967213, - "max_document_length": 184, + "min_document_length": 176, + "average_document_length": 570.8483606557377, + "max_document_length": 1613, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 81.19666666666667, + "max_query_length": 184, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ceb_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 348187, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 176, + "average_document_length": 570.8483606557377, + "max_document_length": 1613, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ceb_Latn": { - "number_of_characters": 74053, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305126, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 149.74795081967213, - "max_document_length": 184, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 81.19666666666667, + "max_query_length": 184, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ces_Latn-ces_Latn": { - "number_of_characters": 61936, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 285931, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 124.91803278688525, - "max_document_length": 139, + "min_document_length": 145, + "average_document_length": 461.0061475409836, + "max_document_length": 1393, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 67.73333333333333, + "max_query_length": 139, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ces_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 294584, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 145, + "average_document_length": 461.0061475409836, + "max_document_length": 1393, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ces_Latn": { - "number_of_characters": 61936, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293009, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 124.91803278688525, - "max_document_length": 139, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 67.73333333333333, + "max_query_length": 139, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ckb_Arab-ckb_Arab": { - "number_of_characters": 64917, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 289879, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 131.0266393442623, - "max_document_length": 178, + "min_document_length": 150, + "average_document_length": 462.98770491803276, + "max_document_length": 1367, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 71.04555555555555, + "max_query_length": 178, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ckb_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295551, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 150, + "average_document_length": 462.98770491803276, + "max_document_length": 1367, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ckb_Arab": { - "number_of_characters": 64917, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295990, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 131.0266393442623, - "max_document_length": 178, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 71.04555555555555, + "max_query_length": 178, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "dan_Latn-dan_Latn": { - "number_of_characters": 66648, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304541, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 134.5737704918033, - "max_document_length": 159, + "min_document_length": 164, + "average_document_length": 489.4856557377049, + "max_document_length": 1405, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.96888888888888, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "dan_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308482, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 164, + "average_document_length": 489.4856557377049, + "max_document_length": 1405, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-dan_Latn": { - "number_of_characters": 66648, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297721, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 134.5737704918033, - "max_document_length": 159, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.96888888888888, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "deu_Latn-deu_Latn": { - "number_of_characters": 68768, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 338713, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 138.91803278688525, - "max_document_length": 182, + "min_document_length": 173, + "average_document_length": 555.1659836065573, + "max_document_length": 1602, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 75.32444444444444, + "max_query_length": 182, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "deu_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 340534, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 173, + "average_document_length": 555.1659836065573, + "max_document_length": 1602, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-deu_Latn": { - "number_of_characters": 68768, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299841, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 138.91803278688525, - "max_document_length": 182, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 75.32444444444444, + "max_query_length": 182, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ell_Grek-ell_Grek": { - "number_of_characters": 79210, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 355607, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 160.3155737704918, - "max_document_length": 212, + "min_document_length": 181, + "average_document_length": 568.3872950819672, + "max_document_length": 1686, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 86.92666666666666, + "max_query_length": 212, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ell_Grek-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 346986, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 568.3872950819672, + "max_document_length": 1686, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ell_Grek": { - "number_of_characters": 79210, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 310283, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 160.3155737704918, - "max_document_length": 212, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 86.92666666666666, + "max_query_length": 212, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 301662, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "est_Latn-est_Latn": { - "number_of_characters": 61779, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 288771, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 124.59631147540983, - "max_document_length": 164, + "min_document_length": 147, + "average_document_length": 467.1475409836066, + "max_document_length": 1411, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 67.55888888888889, + "max_query_length": 164, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "est_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297581, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 147, + "average_document_length": 467.1475409836066, + "max_document_length": 1411, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-est_Latn": { - "number_of_characters": 61779, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292852, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 124.59631147540983, - "max_document_length": 164, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 67.55888888888889, + "max_query_length": 164, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eus_Latn-eus_Latn": { - "number_of_characters": 67979, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 314025, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 137.3012295081967, - "max_document_length": 169, + "min_document_length": 178, + "average_document_length": 506.19262295081967, + "max_document_length": 1392, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 74.44777777777777, + "max_query_length": 169, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eus_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 316635, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 178, + "average_document_length": 506.19262295081967, + "max_document_length": 1392, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-eus_Latn": { - "number_of_characters": 67979, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299052, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 137.3012295081967, - "max_document_length": 169, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 74.44777777777777, + "max_query_length": 169, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "fin_Latn-fin_Latn": { - "number_of_characters": 66234, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312918, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.72540983606558, - "max_document_length": 161, + "min_document_length": 156, + "average_document_length": 507.5, + "max_document_length": 1482, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.50888888888889, + "max_query_length": 161, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "fin_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 317273, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 156, + "average_document_length": 507.5, + "max_document_length": 1482, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-fin_Latn": { - "number_of_characters": 66234, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297307, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.72540983606558, - "max_document_length": 161, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.50888888888889, + "max_query_length": 161, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "fra_Latn-fra_Latn": { - "number_of_characters": 82464, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 357130, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 166.98360655737704, - "max_document_length": 204, + "min_document_length": 168, + "average_document_length": 564.8401639344262, + "max_document_length": 1629, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 90.54222222222222, + "max_query_length": 204, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "fra_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 345255, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 168, + "average_document_length": 564.8401639344262, + "max_document_length": 1629, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-fra_Latn": { - "number_of_characters": 82464, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313537, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 166.98360655737704, - "max_document_length": 204, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 90.54222222222222, + "max_query_length": 204, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "fuv_Latn-fuv_Latn": { - "number_of_characters": 53555, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 268994, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 107.7438524590164, - "max_document_length": 122, + "min_document_length": 127, + "average_document_length": 443.4733606557377, + "max_document_length": 1308, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 58.42111111111111, + "max_query_length": 122, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "fuv_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 286028, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 127, + "average_document_length": 443.4733606557377, + "max_document_length": 1308, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-fuv_Latn": { - "number_of_characters": 53555, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 284628, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 107.7438524590164, - "max_document_length": 122, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 58.42111111111111, + "max_query_length": 122, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "gaz_Latn-gaz_Latn": { - "number_of_characters": 78315, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 352346, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 158.48155737704917, - "max_document_length": 191, + "min_document_length": 177, + "average_document_length": 563.5389344262295, + "max_document_length": 1650, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 85.93222222222222, + "max_query_length": 191, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "gaz_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 344620, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 177, + "average_document_length": 563.5389344262295, + "max_document_length": 1650, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-gaz_Latn": { - "number_of_characters": 78315, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 309388, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 158.48155737704917, - "max_document_length": 191, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 85.93222222222222, + "max_query_length": 191, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "grn_Latn-grn_Latn": { - "number_of_characters": 68572, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 301997, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 138.51639344262296, - "max_document_length": 161, + "min_document_length": 153, + "average_document_length": 480.3299180327869, + "max_document_length": 1359, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 75.10666666666667, + "max_query_length": 161, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "grn_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304014, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 153, + "average_document_length": 480.3299180327869, + "max_document_length": 1359, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-grn_Latn": { - "number_of_characters": 68572, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299645, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 138.51639344262296, - "max_document_length": 161, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 75.10666666666667, + "max_query_length": 161, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "guj_Gujr-guj_Gujr": { - "number_of_characters": 57007, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 279627, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 114.81762295081967, - "max_document_length": 138, + "min_document_length": 117, + "average_document_length": 458.1885245901639, + "max_document_length": 1326, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 62.25666666666667, + "max_query_length": 138, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "guj_Gujr-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293209, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 117, + "average_document_length": 458.1885245901639, + "max_document_length": 1326, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-guj_Gujr": { - "number_of_characters": 57007, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 288080, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 114.81762295081967, - "max_document_length": 138, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 62.25666666666667, + "max_query_length": 138, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hat_Latn-hat_Latn": { - "number_of_characters": 64558, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 277653, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 130.29098360655738, - "max_document_length": 179, + "min_document_length": 144, + "average_document_length": 438.6700819672131, + "max_document_length": 1260, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 70.64666666666666, + "max_query_length": 179, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hat_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 283684, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 144, + "average_document_length": 438.6700819672131, + "max_document_length": 1260, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hat_Latn": { - "number_of_characters": 64558, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295631, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 130.29098360655738, - "max_document_length": 179, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 70.64666666666666, + "max_query_length": 179, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hau_Latn-hau_Latn": { - "number_of_characters": 78240, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 324800, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 158.327868852459, - "max_document_length": 183, + "min_document_length": 166, + "average_document_length": 507.24590163934425, + "max_document_length": 1479, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 85.8488888888889, + "max_query_length": 183, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hau_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 317149, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 166, + "average_document_length": 507.24590163934425, + "max_document_length": 1479, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hau_Latn": { - "number_of_characters": 78240, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 309313, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 158.327868852459, - "max_document_length": 183, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 85.8488888888889, + "max_query_length": 183, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "heb_Hebr-heb_Hebr": { - "number_of_characters": 50598, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 230847, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 101.68442622950819, - "max_document_length": 134, + "min_document_length": 110, + "average_document_length": 371.36270491803276, + "max_document_length": 1065, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 55.135555555555555, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "heb_Hebr-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 250838, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 110, + "average_document_length": 371.36270491803276, + "max_document_length": 1065, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-heb_Hebr": { - "number_of_characters": 50598, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 281671, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 101.68442622950819, - "max_document_length": 134, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 55.135555555555555, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hin_Deva-hin_Deva": { - "number_of_characters": 66332, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296452, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.9262295081967, - "max_document_length": 165, + "min_document_length": 151, + "average_document_length": 473.55737704918033, + "max_document_length": 1348, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.61777777777777, + "max_query_length": 165, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hin_Deva-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300709, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 151, + "average_document_length": 473.55737704918033, + "max_document_length": 1348, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hin_Deva": { - "number_of_characters": 66332, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297405, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.9262295081967, - "max_document_length": 165, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.61777777777777, + "max_query_length": 165, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hin_Latn-hin_Latn": { - "number_of_characters": 68307, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 331696, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 137.9733606557377, - "max_document_length": 170, + "min_document_length": 165, + "average_document_length": 541.7315573770492, + "max_document_length": 1536, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 74.81222222222222, + "max_query_length": 170, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hin_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 333978, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 165, + "average_document_length": 541.7315573770492, + "max_document_length": 1536, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hin_Latn": { - "number_of_characters": 68307, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299380, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 137.9733606557377, - "max_document_length": 170, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 74.81222222222222, + "max_query_length": 170, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hrv_Latn-hrv_Latn": { - "number_of_characters": 62928, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 290923, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 126.95081967213115, - "max_document_length": 175, + "min_document_length": 145, + "average_document_length": 469.202868852459, + "max_document_length": 1341, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 68.83555555555556, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hrv_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298584, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 145, + "average_document_length": 469.202868852459, + "max_document_length": 1341, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hrv_Latn": { - "number_of_characters": 62928, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 294001, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 126.95081967213115, - "max_document_length": 175, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 68.83555555555556, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hun_Latn-hun_Latn": { - "number_of_characters": 67941, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 311548, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 137.2233606557377, - "max_document_length": 176, + "min_document_length": 159, + "average_document_length": 501.1946721311475, + "max_document_length": 1499, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 74.40555555555555, + "max_query_length": 176, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hun_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 314196, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 159, + "average_document_length": 501.1946721311475, + "max_document_length": 1499, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hun_Latn": { - "number_of_characters": 67941, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299014, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 137.2233606557377, - "max_document_length": 176, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 74.40555555555555, + "max_query_length": 176, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hye_Armn-hye_Armn": { - "number_of_characters": 68859, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 325308, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 139.1045081967213, - "max_document_length": 193, + "min_document_length": 150, + "average_document_length": 527.5102459016393, + "max_document_length": 1519, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 75.42555555555556, + "max_query_length": 193, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hye_Armn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 327038, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 150, + "average_document_length": 527.5102459016393, + "max_document_length": 1519, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-hye_Armn": { - "number_of_characters": 68859, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299932, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 139.1045081967213, - "max_document_length": 193, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 75.42555555555556, + "max_query_length": 193, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ibo_Latn-ibo_Latn": { - "number_of_characters": 66167, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 300577, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 133.58811475409837, - "max_document_length": 156, + "min_document_length": 168, + "average_document_length": 482.3483606557377, + "max_document_length": 1345, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 19, + "average_query_length": 72.51501668520578, + "max_query_length": 156, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ibo_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304999, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 168, + "average_document_length": 482.3483606557377, + "max_document_length": 1345, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ibo_Latn": { - "number_of_characters": 66167, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 297240, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 133.58811475409837, - "max_document_length": 156, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 19, + "average_query_length": 72.51501668520578, + "max_query_length": 156, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ilo_Latn-ilo_Latn": { - "number_of_characters": 78161, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 357638, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 158.16598360655738, - "max_document_length": 187, + "min_document_length": 198, + "average_document_length": 574.6987704918033, + "max_document_length": 1663, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 85.7611111111111, + "max_query_length": 187, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ilo_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 350066, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 198, + "average_document_length": 574.6987704918033, + "max_document_length": 1663, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ilo_Latn": { - "number_of_characters": 78161, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 309234, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 158.16598360655738, - "max_document_length": 187, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 85.7611111111111, + "max_query_length": 187, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ind_Latn-ind_Latn": { - "number_of_characters": 74871, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 325731, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 151.42418032786884, - "max_document_length": 207, + "min_document_length": 169, + "average_document_length": 516.0573770491803, + "max_document_length": 1462, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 82.10555555555555, + "max_query_length": 207, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ind_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 321449, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 169, + "average_document_length": 516.0573770491803, + "max_document_length": 1462, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ind_Latn": { - "number_of_characters": 74871, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305944, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 151.42418032786884, - "max_document_length": 207, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 82.10555555555555, + "max_query_length": 207, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "isl_Latn-isl_Latn": { - "number_of_characters": 70522, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299267, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 142.5122950819672, - "max_document_length": 170, + "min_document_length": 168, + "average_document_length": 470.73975409836066, + "max_document_length": 1295, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 77.27333333333333, + "max_query_length": 170, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "isl_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299334, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 168, + "average_document_length": 470.73975409836066, + "max_document_length": 1295, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-isl_Latn": { - "number_of_characters": 70522, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 301595, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 142.5122950819672, - "max_document_length": 170, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 77.27333333333333, + "max_query_length": 170, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ita_Latn-ita_Latn": { - "number_of_characters": 76124, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 348884, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 153.99180327868854, - "max_document_length": 185, + "min_document_length": 147, + "average_document_length": 560.9344262295082, + "max_document_length": 1482, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 83.49777777777778, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ita_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 343349, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 147, + "average_document_length": 560.9344262295082, + "max_document_length": 1482, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ita_Latn": { - "number_of_characters": 76124, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 307197, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 153.99180327868854, - "max_document_length": 185, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 83.49777777777778, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "jav_Latn-jav_Latn": { - "number_of_characters": 71722, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 311906, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 144.97131147540983, - "max_document_length": 174, + "min_document_length": 152, + "average_document_length": 494.1803278688525, + "max_document_length": 1485, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 78.60666666666667, + "max_query_length": 174, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "jav_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 310773, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 152, + "average_document_length": 494.1803278688525, + "max_document_length": 1485, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-jav_Latn": { - "number_of_characters": 71722, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302795, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 144.97131147540983, - "max_document_length": 174, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 78.60666666666667, + "max_query_length": 174, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "jpn_Jpan-jpn_Jpan": { - "number_of_characters": 33187, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 133592, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 66.0061475409836, - "max_document_length": 76, + "min_document_length": 66, + "average_document_length": 207.74795081967213, + "max_document_length": 609, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 35.79, + "max_query_length": 76, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "jpn_Jpan-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 170994, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 66, + "average_document_length": 207.74795081967213, + "max_document_length": 609, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-jpn_Jpan": { - "number_of_characters": 33187, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 264260, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 66.0061475409836, - "max_document_length": 76, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 35.79, + "max_query_length": 76, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kac_Latn-kac_Latn": { - "number_of_characters": 89655, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 384060, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 181.71926229508196, - "max_document_length": 195, + "min_document_length": 208, + "average_document_length": 605.2889344262295, + "max_document_length": 1682, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 18, + "average_query_length": 98.64182424916574, + "max_query_length": 195, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 899, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kac_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 364994, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 208, + "average_document_length": 605.2889344262295, + "max_document_length": 1682, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kac_Latn": { - "number_of_characters": 89655, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 320728, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 181.71926229508196, - "max_document_length": 195, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 18, + "average_query_length": 98.64182424916574, + "max_query_length": 195, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 899, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kan_Knda-kan_Knda": { - "number_of_characters": 65899, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308390, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.0389344262295, - "max_document_length": 165, + "min_document_length": 149, + "average_document_length": 498.9077868852459, + "max_document_length": 1436, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.13666666666667, + "max_query_length": 165, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kan_Knda-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313080, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 149, + "average_document_length": 498.9077868852459, + "max_document_length": 1436, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kan_Knda": { - "number_of_characters": 65899, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296972, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.0389344262295, - "max_document_length": 165, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.13666666666667, + "max_query_length": 165, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kat_Geor-kat_Geor": { - "number_of_characters": 68309, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 321960, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 137.97745901639345, - "max_document_length": 175, + "min_document_length": 173, + "average_document_length": 521.7766393442623, + "max_document_length": 1518, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 74.81444444444445, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kat_Geor-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 324240, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 173, + "average_document_length": 521.7766393442623, + "max_document_length": 1518, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kat_Geor": { - "number_of_characters": 68309, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299382, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 137.97745901639345, - "max_document_length": 175, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 74.81444444444445, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kaz_Cyrl-kaz_Cyrl": { - "number_of_characters": 64657, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 301928, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 130.49385245901638, - "max_document_length": 158, + "min_document_length": 142, + "average_document_length": 488.2110655737705, + "max_document_length": 1425, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 70.75666666666666, + "max_query_length": 158, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kaz_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 307860, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 142, + "average_document_length": 488.2110655737705, + "max_document_length": 1425, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kaz_Cyrl": { - "number_of_characters": 64657, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295730, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 130.49385245901638, - "max_document_length": 158, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 70.75666666666666, + "max_query_length": 158, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kea_Latn-kea_Latn": { - "number_of_characters": 69323, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298468, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 140.05532786885246, - "max_document_length": 183, + "min_document_length": 142, + "average_document_length": 471.5594262295082, + "max_document_length": 1360, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 75.94111111111111, + "max_query_length": 183, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kea_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299734, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 142, + "average_document_length": 471.5594262295082, + "max_document_length": 1360, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kea_Latn": { - "number_of_characters": 69323, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300396, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 140.05532786885246, - "max_document_length": 183, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 75.94111111111111, + "max_query_length": 183, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "khk_Cyrl-khk_Cyrl": { - "number_of_characters": 66977, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308369, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 135.24795081967213, - "max_document_length": 162, + "min_document_length": 142, + "average_document_length": 496.655737704918, + "max_document_length": 1491, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 73.33444444444444, + "max_query_length": 162, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "khk_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 311981, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 142, + "average_document_length": 496.655737704918, + "max_document_length": 1491, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-khk_Cyrl": { - "number_of_characters": 66977, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298050, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 135.24795081967213, - "max_document_length": 162, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 73.33444444444444, + "max_query_length": 162, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "khm_Khmr-khm_Khmr": { - "number_of_characters": 69150, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 342632, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 139.70081967213116, - "max_document_length": 169, + "min_document_length": 187, + "average_document_length": 562.4139344262295, + "max_document_length": 1617, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 75.74888888888889, + "max_query_length": 169, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "khm_Khmr-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 344071, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 187, + "average_document_length": 562.4139344262295, + "max_document_length": 1617, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-khm_Khmr": { - "number_of_characters": 69150, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300223, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 139.70081967213116, - "max_document_length": 169, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 75.74888888888889, + "max_query_length": 169, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kin_Latn-kin_Latn": { - "number_of_characters": 72803, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 330102, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 147.18647540983608, - "max_document_length": 194, + "min_document_length": 181, + "average_document_length": 529.2520491803278, + "max_document_length": 1606, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 13, + "average_query_length": 79.89655172413794, + "max_query_length": 194, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kin_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 327888, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 529.2520491803278, + "max_document_length": 1606, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kin_Latn": { - "number_of_characters": 72803, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 303876, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 147.18647540983608, - "max_document_length": 194, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 13, + "average_query_length": 79.89655172413794, + "max_query_length": 194, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kir_Cyrl-kir_Cyrl": { - "number_of_characters": 67957, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305031, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 137.25614754098362, - "max_document_length": 182, + "min_document_length": 141, + "average_document_length": 487.80737704918033, + "max_document_length": 1398, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 74.42333333333333, + "max_query_length": 182, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kir_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 307663, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 141, + "average_document_length": 487.80737704918033, + "max_document_length": 1398, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kir_Cyrl": { - "number_of_characters": 67957, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299030, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 137.25614754098362, - "max_document_length": 182, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 74.42333333333333, + "max_query_length": 182, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kor_Hang-kor_Hang": { - "number_of_characters": 32708, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 149501, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 65.02459016393442, - "max_document_length": 88, + "min_document_length": 77, + "average_document_length": 241.32991803278688, + "max_document_length": 696, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 35.257777777777775, + "max_query_length": 88, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "kor_Hang-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 187382, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 77, + "average_document_length": 241.32991803278688, + "max_document_length": 696, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-kor_Hang": { - "number_of_characters": 32708, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 263781, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 65.02459016393442, - "max_document_length": 88, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 35.257777777777775, + "max_query_length": 88, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lao_Laoo-lao_Laoo": { - "number_of_characters": 57958, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 287147, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 116.76639344262296, - "max_document_length": 142, + "min_document_length": 158, + "average_document_length": 471.6495901639344, + "max_document_length": 1380, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 63.31333333333333, + "max_query_length": 142, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lao_Laoo-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299778, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 158, + "average_document_length": 471.6495901639344, + "max_document_length": 1380, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-lao_Laoo": { - "number_of_characters": 57958, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 289031, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 116.76639344262296, - "max_document_length": 142, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 63.31333333333333, + "max_query_length": 142, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lin_Latn-lin_Latn": { - "number_of_characters": 74223, "num_samples": 1386, - "num_queries": 898, + "number_of_characters": 323543, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 150.09631147540983, - "max_document_length": 183, + "min_document_length": 157, + "average_document_length": 512.9016393442623, + "max_document_length": 1474, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.086859688195991, - "max_query_length": 2, + "num_queries": 898, + "min_query_length": 17, + "average_query_length": 81.56681514476615, + "max_query_length": 183, "unique_queries": 898, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0022271714922049, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lin_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 319909, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 157, + "average_document_length": 512.9016393442623, + "max_document_length": 1474, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-lin_Latn": { - "number_of_characters": 74223, "num_samples": 1386, - "num_queries": 898, + "number_of_characters": 305296, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 150.09631147540983, - "max_document_length": 183, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.086859688195991, - "max_query_length": 2, + "num_queries": 898, + "min_query_length": 17, + "average_query_length": 81.56681514476615, + "max_query_length": 183, "unique_queries": 898, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0022271714922049, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lit_Latn-lit_Latn": { - "number_of_characters": 62805, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293168, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 126.69877049180327, - "max_document_length": 167, + "min_document_length": 157, + "average_document_length": 474.0553278688525, + "max_document_length": 1418, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 68.69888888888889, + "max_query_length": 167, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lit_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300952, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 157, + "average_document_length": 474.0553278688525, + "max_document_length": 1418, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-lit_Latn": { - "number_of_characters": 62805, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293878, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 126.69877049180327, - "max_document_length": 167, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 68.69888888888889, + "max_query_length": 167, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lug_Latn-lug_Latn": { - "number_of_characters": 71566, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 307631, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 144.6516393442623, - "max_document_length": 237, + "min_document_length": 158, + "average_document_length": 485.73975409836066, + "max_document_length": 1436, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 12, + "average_query_length": 78.52057842046719, + "max_query_length": 237, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lug_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 306654, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 158, + "average_document_length": 485.73975409836066, + "max_document_length": 1436, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-lug_Latn": { - "number_of_characters": 71566, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 302639, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 144.6516393442623, - "max_document_length": 237, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 12, + "average_query_length": 78.52057842046719, + "max_query_length": 237, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "luo_Latn-luo_Latn": { - "number_of_characters": 66805, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308627, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 134.8954918032787, - "max_document_length": 178, + "min_document_length": 169, + "average_document_length": 497.53688524590166, + "max_document_length": 1442, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 73.14333333333333, + "max_query_length": 178, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "luo_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312411, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 169, + "average_document_length": 497.53688524590166, + "max_document_length": 1442, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-luo_Latn": { - "number_of_characters": 66805, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297878, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 134.8954918032787, - "max_document_length": 178, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 73.14333333333333, + "max_query_length": 178, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lvs_Latn-lvs_Latn": { - "number_of_characters": 63957, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300741, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 129.0594262295082, - "max_document_length": 172, + "min_document_length": 170, + "average_document_length": 487.21311475409834, + "max_document_length": 1511, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 69.97888888888889, + "max_query_length": 172, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "lvs_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 307373, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 170, + "average_document_length": 487.21311475409834, + "max_document_length": 1511, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-lvs_Latn": { - "number_of_characters": 63957, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295030, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 129.0594262295082, - "max_document_length": 172, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 69.97888888888889, + "max_query_length": 172, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mal_Mlym-mal_Mlym": { - "number_of_characters": 73599, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 335793, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 148.81762295081967, - "max_document_length": 191, + "min_document_length": 157, + "average_document_length": 539.2827868852459, + "max_document_length": 1581, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 80.69222222222223, + "max_query_length": 191, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mal_Mlym-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 332783, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 157, + "average_document_length": 539.2827868852459, + "max_document_length": 1581, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-mal_Mlym": { - "number_of_characters": 73599, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304672, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 148.81762295081967, - "max_document_length": 191, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 80.69222222222223, + "max_query_length": 191, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mar_Deva-mar_Deva": { - "number_of_characters": 62671, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 295288, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 126.42418032786885, - "max_document_length": 160, + "min_document_length": 147, + "average_document_length": 478.67418032786884, + "max_document_length": 1387, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 15, + "average_query_length": 68.62625139043382, + "max_query_length": 160, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mar_Deva-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303206, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 147, + "average_document_length": 478.67418032786884, + "max_document_length": 1387, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-mar_Deva": { - "number_of_characters": 62671, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 293744, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 126.42418032786885, - "max_document_length": 160, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 15, + "average_query_length": 68.62625139043382, + "max_query_length": 160, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mkd_Cyrl-mkd_Cyrl": { - "number_of_characters": 67588, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308552, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 136.5, - "max_document_length": 180, + "min_document_length": 152, + "average_document_length": 495.77868852459017, + "max_document_length": 1492, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 74.01333333333334, + "max_query_length": 180, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mkd_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 311553, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 152, + "average_document_length": 495.77868852459017, + "max_document_length": 1492, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-mkd_Cyrl": { - "number_of_characters": 67588, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298661, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 136.5, - "max_document_length": 180, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 74.01333333333334, + "max_query_length": 180, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mlt_Latn-mlt_Latn": { - "number_of_characters": 68480, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 324143, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 138.327868852459, - "max_document_length": 185, + "min_document_length": 162, + "average_document_length": 525.8995901639345, + "max_document_length": 1590, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 75.00444444444445, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mlt_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 326252, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 162, + "average_document_length": 525.8995901639345, + "max_document_length": 1590, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-mlt_Latn": { - "number_of_characters": 68480, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299553, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 138.327868852459, - "max_document_length": 185, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 75.00444444444445, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mri_Latn-mri_Latn": { - "number_of_characters": 74519, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 330273, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 150.702868852459, - "max_document_length": 185, + "min_document_length": 175, + "average_document_length": 526.0860655737705, + "max_document_length": 1590, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 81.71444444444444, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mri_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 326343, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 175, + "average_document_length": 526.0860655737705, + "max_document_length": 1590, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-mri_Latn": { - "number_of_characters": 74519, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305592, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 150.702868852459, - "max_document_length": 185, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 81.71444444444444, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mya_Mymr-mya_Mymr": { - "number_of_characters": 81331, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 368465, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 164.66188524590163, - "max_document_length": 171, + "min_document_length": 182, + "average_document_length": 590.389344262295, + "max_document_length": 1724, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 89.28333333333333, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "mya_Mymr-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 357723, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 182, + "average_document_length": 590.389344262295, + "max_document_length": 1724, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-mya_Mymr": { - "number_of_characters": 81331, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312404, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 164.66188524590163, - "max_document_length": 171, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 89.28333333333333, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nld_Latn-nld_Latn": { - "number_of_characters": 68789, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 326035, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 138.9610655737705, - "max_document_length": 183, + "min_document_length": 180, + "average_document_length": 529.1434426229508, + "max_document_length": 1617, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 75.34777777777778, + "max_query_length": 183, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nld_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 327835, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 180, + "average_document_length": 529.1434426229508, + "max_document_length": 1617, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-nld_Latn": { - "number_of_characters": 68789, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299862, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 138.9610655737705, - "max_document_length": 183, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 75.34777777777778, + "max_query_length": 183, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nob_Latn-nob_Latn": { - "number_of_characters": 64917, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297760, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 131.0266393442623, - "max_document_length": 168, + "min_document_length": 153, + "average_document_length": 479.13729508196724, + "max_document_length": 1422, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 71.04555555555555, + "max_query_length": 168, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nob_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303432, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 153, + "average_document_length": 479.13729508196724, + "max_document_length": 1422, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-nob_Latn": { - "number_of_characters": 64917, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295990, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 131.0266393442623, - "max_document_length": 168, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 71.04555555555555, + "max_query_length": 168, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "npi_Deva-npi_Deva": { - "number_of_characters": 61183, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 283203, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 123.375, - "max_document_length": 154, + "min_document_length": 132, + "average_document_length": 456.9590163934426, + "max_document_length": 1385, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 66.89666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "npi_Deva-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292609, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 132, + "average_document_length": 456.9590163934426, + "max_document_length": 1385, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-npi_Deva": { - "number_of_characters": 61183, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292256, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 123.375, - "max_document_length": 154, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 66.89666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "npi_Latn-npi_Latn": { - "number_of_characters": 65683, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 316506, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 132.59631147540983, - "max_document_length": 154, + "min_document_length": 149, + "average_document_length": 515.9815573770492, + "max_document_length": 1502, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 20, + "average_query_length": 71.89666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "npi_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 321412, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 149, + "average_document_length": 515.9815573770492, + "max_document_length": 1502, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-npi_Latn": { - "number_of_characters": 65683, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296756, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 132.59631147540983, - "max_document_length": 154, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 20, + "average_query_length": 71.89666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nso_Latn-nso_Latn": { - "number_of_characters": 79073, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 345532, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 160.03483606557376, - "max_document_length": 235, + "min_document_length": 179, + "average_document_length": 548.0225409836065, + "max_document_length": 1458, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 86.77444444444444, + "max_query_length": 235, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nso_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 337048, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 179, + "average_document_length": 548.0225409836065, + "max_document_length": 1458, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-nso_Latn": { - "number_of_characters": 79073, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 310146, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 160.03483606557376, - "max_document_length": 235, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 86.77444444444444, + "max_query_length": 235, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nya_Latn-nya_Latn": { - "number_of_characters": 82685, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 341517, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 167.43647540983608, - "max_document_length": 215, + "min_document_length": 148, + "average_document_length": 532.3934426229508, + "max_document_length": 1487, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 90.78777777777778, + "max_query_length": 215, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "nya_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329421, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 148, + "average_document_length": 532.3934426229508, + "max_document_length": 1487, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-nya_Latn": { - "number_of_characters": 82685, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313758, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 167.43647540983608, - "max_document_length": 215, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 90.78777777777778, + "max_query_length": 215, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ory_Orya-ory_Orya": { - "number_of_characters": 66638, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303701, "num_documents": 488, - "min_document_length": 10, - "average_document_length": 134.55327868852459, - "max_document_length": 168, + "min_document_length": 139, + "average_document_length": 487.78483606557376, + "max_document_length": 1389, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 10, + "average_query_length": 72.95777777777778, + "max_query_length": 168, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ory_Orya-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 307652, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 139, + "average_document_length": 487.78483606557376, + "max_document_length": 1389, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ory_Orya": { - "number_of_characters": 66638, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297711, "num_documents": 488, - "min_document_length": 10, - "average_document_length": 134.55327868852459, - "max_document_length": 168, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 10, + "average_query_length": 72.95777777777778, + "max_query_length": 168, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pan_Guru-pan_Guru": { - "number_of_characters": 66944, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300327, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 135.18032786885246, - "max_document_length": 157, + "min_document_length": 126, + "average_document_length": 480.2438524590164, + "max_document_length": 1417, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 73.29777777777778, + "max_query_length": 157, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pan_Guru-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303972, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 126, + "average_document_length": 480.2438524590164, + "max_document_length": 1417, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-pan_Guru": { - "number_of_characters": 66944, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298017, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 135.18032786885246, - "max_document_length": 157, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 73.29777777777778, + "max_query_length": 157, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pbt_Arab-pbt_Arab": { - "number_of_characters": 61880, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 282129, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 124.80327868852459, - "max_document_length": 155, + "min_document_length": 137, + "average_document_length": 453.3299180327869, + "max_document_length": 1389, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 67.67111111111112, + "max_query_length": 155, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pbt_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 290838, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 137, + "average_document_length": 453.3299180327869, + "max_document_length": 1389, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-pbt_Arab": { - "number_of_characters": 61880, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292953, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 124.80327868852459, - "max_document_length": 155, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 67.67111111111112, + "max_query_length": 155, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pes_Arab-pes_Arab": { - "number_of_characters": 59252, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 277313, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 119.41803278688525, - "max_document_length": 152, + "min_document_length": 131, + "average_document_length": 448.84631147540983, + "max_document_length": 1248, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 64.75111111111111, + "max_query_length": 152, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pes_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 288650, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 131, + "average_document_length": 448.84631147540983, + "max_document_length": 1248, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-pes_Arab": { - "number_of_characters": 59252, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 290325, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 119.41803278688525, - "max_document_length": 152, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 64.75111111111111, + "max_query_length": 152, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "plt_Latn-plt_Latn": { - "number_of_characters": 86472, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 369158, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 175.19672131147541, - "max_document_length": 222, + "min_document_length": 166, + "average_document_length": 581.2745901639345, + "max_document_length": 1660, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 94.99555555555555, + "max_query_length": 222, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "plt_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 353275, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 166, + "average_document_length": 581.2745901639345, + "max_document_length": 1660, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-plt_Latn": { - "number_of_characters": 86472, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 317545, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 175.19672131147541, - "max_document_length": 222, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 94.99555555555555, + "max_query_length": 222, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pol_Latn-pol_Latn": { - "number_of_characters": 67664, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312660, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 136.65573770491804, - "max_document_length": 196, + "min_document_length": 139, + "average_document_length": 504.0409836065574, + "max_document_length": 1424, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 74.09777777777778, + "max_query_length": 196, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "pol_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 315585, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 139, + "average_document_length": 504.0409836065574, + "max_document_length": 1424, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-pol_Latn": { - "number_of_characters": 67664, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298737, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 136.65573770491804, - "max_document_length": 196, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 74.09777777777778, + "max_query_length": 196, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "por_Latn-por_Latn": { - "number_of_characters": 71281, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 322739, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 144.06762295081967, - "max_document_length": 179, + "min_document_length": 166, + "average_document_length": 517.2827868852459, + "max_document_length": 1480, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 78.11666666666666, + "max_query_length": 179, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "por_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 322047, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 166, + "average_document_length": 517.2827868852459, + "max_document_length": 1480, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-por_Latn": { - "number_of_characters": 71281, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302354, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 144.06762295081967, - "max_document_length": 179, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 78.11666666666666, + "max_query_length": 179, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ron_Latn-ron_Latn": { - "number_of_characters": 71844, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 331883, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 145.22131147540983, - "max_document_length": 181, + "min_document_length": 185, + "average_document_length": 534.8668032786885, + "max_document_length": 1442, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 78.74222222222222, + "max_query_length": 181, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ron_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 330628, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 185, + "average_document_length": 534.8668032786885, + "max_document_length": 1442, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ron_Latn": { - "number_of_characters": 71844, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302917, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 145.22131147540983, - "max_document_length": 181, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 78.74222222222222, + "max_query_length": 181, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "rus_Cyrl-rus_Cyrl": { - "number_of_characters": 75823, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 328690, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 153.375, - "max_document_length": 196, + "min_document_length": 145, + "average_document_length": 520.1700819672132, + "max_document_length": 1527, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 83.16333333333333, + "max_query_length": 196, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "rus_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 323456, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 145, + "average_document_length": 520.1700819672132, + "max_document_length": 1527, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-rus_Cyrl": { - "number_of_characters": 75823, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 306896, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 153.375, - "max_document_length": 196, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 83.16333333333333, + "max_query_length": 196, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "shn_Mymr-shn_Mymr": { - "number_of_characters": 69288, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 398284, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 139.98360655737704, - "max_document_length": 159, + "min_document_length": 231, + "average_document_length": 676.172131147541, + "max_document_length": 1926, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 75.90222222222222, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "shn_Mymr-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 399585, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 231, + "average_document_length": 676.172131147541, + "max_document_length": 1926, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-shn_Mymr": { - "number_of_characters": 69288, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300361, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 139.98360655737704, - "max_document_length": 159, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 75.90222222222222, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sin_Latn-sin_Latn": { - "number_of_characters": 85996, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 373325, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 174.22131147540983, - "max_document_length": 224, + "min_document_length": 175, + "average_document_length": 590.7889344262295, + "max_document_length": 1721, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 94.46666666666667, + "max_query_length": 224, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sin_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 357918, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 175, + "average_document_length": 590.7889344262295, + "max_document_length": 1721, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-sin_Latn": { - "number_of_characters": 85996, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 317069, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 174.22131147540983, - "max_document_length": 224, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 94.46666666666667, + "max_query_length": 224, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sin_Sinh-sin_Sinh": { - "number_of_characters": 63902, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296516, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 128.94672131147541, - "max_document_length": 159, + "min_document_length": 142, + "average_document_length": 478.66803278688525, + "max_document_length": 1417, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 69.91777777777777, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sin_Sinh-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303203, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 142, + "average_document_length": 478.66803278688525, + "max_document_length": 1417, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-sin_Sinh": { - "number_of_characters": 63902, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 294975, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 128.94672131147541, - "max_document_length": 159, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 69.91777777777777, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "slk_Latn-slk_Latn": { - "number_of_characters": 62663, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 294354, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 126.4077868852459, - "max_document_length": 146, + "min_document_length": 147, + "average_document_length": 476.7766393442623, + "max_document_length": 1402, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 68.5411111111111, + "max_query_length": 146, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "slk_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302280, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 147, + "average_document_length": 476.7766393442623, + "max_document_length": 1402, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-slk_Latn": { - "number_of_characters": 62663, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293736, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 126.4077868852459, - "max_document_length": 146, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 68.5411111111111, + "max_query_length": 146, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "slv_Latn-slv_Latn": { - "number_of_characters": 62895, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293644, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 126.88319672131148, - "max_document_length": 176, + "min_document_length": 149, + "average_document_length": 474.84631147540983, + "max_document_length": 1451, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 68.79888888888888, + "max_query_length": 176, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "slv_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 301338, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 149, + "average_document_length": 474.84631147540983, + "max_document_length": 1451, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-slv_Latn": { - "number_of_characters": 62895, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293968, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 126.88319672131148, - "max_document_length": 176, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 68.79888888888888, + "max_query_length": 176, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sna_Latn-sna_Latn": { - "number_of_characters": 74071, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 332997, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 149.78483606557376, - "max_document_length": 191, + "min_document_length": 168, + "average_document_length": 532.5860655737705, + "max_document_length": 1492, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 20, + "average_query_length": 81.30700778642937, + "max_query_length": 191, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 899, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sna_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329515, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 168, + "average_document_length": 532.5860655737705, + "max_document_length": 1492, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-sna_Latn": { - "number_of_characters": 74071, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 305144, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 149.78483606557376, - "max_document_length": 191, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 20, + "average_query_length": 81.30700778642937, + "max_query_length": 191, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 899, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "snd_Arab-snd_Arab": { - "number_of_characters": 58057, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 267647, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 116.96926229508196, - "max_document_length": 164, + "min_document_length": 136, + "average_document_length": 431.48770491803276, + "max_document_length": 1346, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 63.42333333333333, + "max_query_length": 164, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "snd_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 280179, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 136, + "average_document_length": 431.48770491803276, + "max_document_length": 1346, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-snd_Arab": { - "number_of_characters": 58057, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 289130, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 116.96926229508196, - "max_document_length": 164, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 63.42333333333333, + "max_query_length": 164, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "som_Latn-som_Latn": { - "number_of_characters": 82838, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 346394, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 167.75, - "max_document_length": 201, + "min_document_length": 167, + "average_document_length": 542.0737704918033, + "max_document_length": 1649, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 90.95777777777778, + "max_query_length": 201, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "som_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 334145, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 167, + "average_document_length": 542.0737704918033, + "max_document_length": 1649, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-som_Latn": { - "number_of_characters": 82838, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313911, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 167.75, - "max_document_length": 201, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 90.95777777777778, + "max_query_length": 201, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sot_Latn-sot_Latn": { - "number_of_characters": 75794, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 354601, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 153.3155737704918, - "max_document_length": 186, + "min_document_length": 194, + "average_document_length": 573.3258196721312, + "max_document_length": 1555, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 83.13111111111111, + "max_query_length": 186, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sot_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 349396, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 194, + "average_document_length": 573.3258196721312, + "max_document_length": 1555, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-sot_Latn": { - "number_of_characters": 75794, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 306867, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 153.3155737704918, - "max_document_length": 186, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 83.13111111111111, + "max_query_length": 186, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "spa_Latn-spa_Latn": { - "number_of_characters": 74920, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 349338, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 151.52459016393442, - "max_document_length": 180, + "min_document_length": 167, + "average_document_length": 564.3319672131148, + "max_document_length": 1641, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 82.16, + "max_query_length": 180, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "spa_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 345007, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 167, + "average_document_length": 564.3319672131148, + "max_document_length": 1641, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-spa_Latn": { - "number_of_characters": 74920, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305993, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 151.52459016393442, - "max_document_length": 180, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 82.16, + "max_query_length": 180, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "srp_Cyrl-srp_Cyrl": { - "number_of_characters": 61657, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 290942, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 124.34631147540983, - "max_document_length": 160, + "min_document_length": 156, + "average_document_length": 471.84631147540983, + "max_document_length": 1368, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 13, + "average_query_length": 67.49833147942158, + "max_query_length": 160, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "srp_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299874, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 156, + "average_document_length": 471.84631147540983, + "max_document_length": 1368, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-srp_Cyrl": { - "number_of_characters": 61657, "num_samples": 1387, - "num_queries": 899, + "number_of_characters": 292730, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 124.34631147540983, - "max_document_length": 160, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.085650723025584, - "max_query_length": 2, + "num_queries": 899, + "min_query_length": 13, + "average_query_length": 67.49833147942158, + "max_query_length": 160, "unique_queries": 899, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011123470522802, "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ssw_Latn-ssw_Latn": { - "number_of_characters": 73964, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 334112, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 149.5655737704918, - "max_document_length": 182, + "min_document_length": 181, + "average_document_length": 535.0901639344262, + "max_document_length": 1599, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 81.09777777777778, + "max_query_length": 182, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ssw_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 330737, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 535.0901639344262, + "max_document_length": 1599, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ssw_Latn": { - "number_of_characters": 73964, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305037, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 149.5655737704918, - "max_document_length": 182, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 81.09777777777778, + "max_query_length": 182, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sun_Latn-sun_Latn": { - "number_of_characters": 71320, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312052, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 144.14754098360655, - "max_document_length": 173, + "min_document_length": 140, + "average_document_length": 495.3032786885246, + "max_document_length": 1511, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 78.16, + "max_query_length": 173, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sun_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 311321, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 140, + "average_document_length": 495.3032786885246, + "max_document_length": 1511, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-sun_Latn": { - "number_of_characters": 71320, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302393, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 144.14754098360655, - "max_document_length": 173, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 78.16, + "max_query_length": 173, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "swe_Latn-swe_Latn": { - "number_of_characters": 62785, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296381, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 126.6577868852459, - "max_document_length": 154, + "min_document_length": 163, + "average_document_length": 480.6803278688525, + "max_document_length": 1400, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 68.67666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "swe_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304185, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 163, + "average_document_length": 480.6803278688525, + "max_document_length": 1400, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-swe_Latn": { - "number_of_characters": 62785, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293858, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 126.6577868852459, - "max_document_length": 154, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 68.67666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "swh_Latn-swh_Latn": { - "number_of_characters": 73480, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 316064, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 148.5737704918033, - "max_document_length": 194, + "min_document_length": 181, + "average_document_length": 499.0983606557377, + "max_document_length": 1457, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 80.56, + "max_query_length": 194, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "swh_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313173, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 499.0983606557377, + "max_document_length": 1457, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-swh_Latn": { - "number_of_characters": 73480, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304553, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 148.5737704918033, - "max_document_length": 194, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 80.56, + "max_query_length": 194, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tam_Taml-tam_Taml": { - "number_of_characters": 73991, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 344113, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 149.62090163934425, - "max_document_length": 181, + "min_document_length": 181, + "average_document_length": 555.5286885245902, + "max_document_length": 1600, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 20, + "average_query_length": 81.12777777777778, + "max_query_length": 181, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tam_Taml-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 340711, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 555.5286885245902, + "max_document_length": 1600, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tam_Taml": { - "number_of_characters": 73991, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 305064, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 149.62090163934425, - "max_document_length": 181, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 20, + "average_query_length": 81.12777777777778, + "max_query_length": 181, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tel_Telu-tel_Telu": { - "number_of_characters": 65945, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299953, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 133.13319672131146, - "max_document_length": 149, + "min_document_length": 132, + "average_document_length": 481.5245901639344, + "max_document_length": 1369, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 72.18777777777778, + "max_query_length": 149, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tel_Telu-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 304597, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 132, + "average_document_length": 481.5245901639344, + "max_document_length": 1369, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tel_Telu": { - "number_of_characters": 65945, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297018, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 133.13319672131146, - "max_document_length": 149, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 72.18777777777778, + "max_query_length": 149, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tgk_Cyrl-tgk_Cyrl": { - "number_of_characters": 67829, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 324769, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 136.99385245901638, - "max_document_length": 171, + "min_document_length": 146, + "average_document_length": 528.516393442623, + "max_document_length": 1582, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 74.28111111111112, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tgk_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 327529, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 146, + "average_document_length": 528.516393442623, + "max_document_length": 1582, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tgk_Cyrl": { - "number_of_characters": 67829, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298902, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 136.99385245901638, - "max_document_length": 171, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 74.28111111111112, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tgl_Latn-tgl_Latn": { - "number_of_characters": 75087, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 365753, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 151.86680327868854, - "max_document_length": 184, + "min_document_length": 184, + "average_document_length": 597.6270491803278, + "max_document_length": 1740, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 82.34555555555555, + "max_query_length": 184, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tgl_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 361255, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 184, + "average_document_length": 597.6270491803278, + "max_document_length": 1740, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tgl_Latn": { - "number_of_characters": 75087, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 306160, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 151.86680327868854, - "max_document_length": 184, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 82.34555555555555, + "max_query_length": 184, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tha_Thai-tha_Thai": { - "number_of_characters": 54496, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 276129, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 109.67213114754098, - "max_document_length": 123, + "min_document_length": 126, + "average_document_length": 456.1659836065574, + "max_document_length": 1245, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 59.46666666666667, + "max_query_length": 123, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tha_Thai-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292222, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 126, + "average_document_length": 456.1659836065574, + "max_document_length": 1245, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tha_Thai": { - "number_of_characters": 54496, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 285569, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 109.67213114754098, - "max_document_length": 123, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 59.46666666666667, + "max_query_length": 123, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tir_Ethi-tir_Ethi": { - "number_of_characters": 47775, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 206715, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 95.89959016393442, - "max_document_length": 110, + "min_document_length": 109, + "average_document_length": 327.6967213114754, + "max_document_length": 1000, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 51.99888888888889, + "max_query_length": 110, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tir_Ethi-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 229529, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 109, + "average_document_length": 327.6967213114754, + "max_document_length": 1000, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tir_Ethi": { - "number_of_characters": 47775, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 278848, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 95.89959016393442, - "max_document_length": 110, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 51.99888888888889, + "max_query_length": 110, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tsn_Latn-tsn_Latn": { - "number_of_characters": 79391, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 367171, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 160.68647540983608, - "max_document_length": 204, + "min_document_length": 197, + "average_document_length": 591.7131147540983, + "max_document_length": 1750, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 87.12777777777778, + "max_query_length": 204, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tsn_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 358369, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 197, + "average_document_length": 591.7131147540983, + "max_document_length": 1750, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tsn_Latn": { - "number_of_characters": 79391, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 310464, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 160.68647540983608, - "max_document_length": 204, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 87.12777777777778, + "max_query_length": 204, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tso_Latn-tso_Latn": { - "number_of_characters": 83501, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 360513, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 169.10860655737704, - "max_document_length": 215, + "min_document_length": 197, + "average_document_length": 569.6475409836065, + "max_document_length": 1589, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 91.69444444444444, + "max_query_length": 215, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tso_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 347601, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 197, + "average_document_length": 569.6475409836065, + "max_document_length": 1589, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tso_Latn": { - "number_of_characters": 83501, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 314574, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 169.10860655737704, - "max_document_length": 215, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 91.69444444444444, + "max_query_length": 215, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tur_Latn-tur_Latn": { - "number_of_characters": 65382, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303058, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 131.9795081967213, - "max_document_length": 158, + "min_document_length": 143, + "average_document_length": 489.0409836065574, + "max_document_length": 1536, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 71.56222222222222, + "max_query_length": 158, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "tur_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 308265, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 143, + "average_document_length": 489.0409836065574, + "max_document_length": 1536, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-tur_Latn": { - "number_of_characters": 65382, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296455, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 131.9795081967213, - "max_document_length": 158, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 71.56222222222222, + "max_query_length": 158, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ukr_Cyrl-ukr_Cyrl": { - "number_of_characters": 65850, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303074, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 132.93852459016392, - "max_document_length": 159, + "min_document_length": 136, + "average_document_length": 488.11475409836066, + "max_document_length": 1388, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.08222222222223, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ukr_Cyrl-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 307813, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 136, + "average_document_length": 488.11475409836066, + "max_document_length": 1388, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-ukr_Cyrl": { - "number_of_characters": 65850, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 296923, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 132.93852459016392, - "max_document_length": 159, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.08222222222223, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "urd_Arab-urd_Arab": { - "number_of_characters": 64450, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293055, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 130.06967213114754, - "max_document_length": 187, + "min_document_length": 133, + "average_document_length": 470.452868852459, + "max_document_length": 1366, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 70.52666666666667, + "max_query_length": 187, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "urd_Arab-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 299194, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 133, + "average_document_length": 470.452868852459, + "max_document_length": 1366, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-urd_Arab": { - "number_of_characters": 64450, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295523, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 130.06967213114754, - "max_document_length": 187, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 70.52666666666667, + "max_query_length": 187, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "urd_Latn-urd_Latn": { - "number_of_characters": 82039, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 369244, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 166.1127049180328, - "max_document_length": 230, + "min_document_length": 163, + "average_document_length": 590.5348360655738, + "max_document_length": 1697, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 90.07, + "max_query_length": 230, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "urd_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 357794, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 163, + "average_document_length": 590.5348360655738, + "max_document_length": 1697, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-urd_Latn": { - "number_of_characters": 82039, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313112, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 166.1127049180328, - "max_document_length": 230, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 90.07, + "max_query_length": 230, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "uzn_Latn-uzn_Latn": { - "number_of_characters": 70828, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 333002, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 143.13934426229508, - "max_document_length": 175, + "min_document_length": 170, + "average_document_length": 539.2418032786885, + "max_document_length": 1548, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 77.61333333333333, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "uzn_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 332763, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 170, + "average_document_length": 539.2418032786885, + "max_document_length": 1548, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-uzn_Latn": { - "number_of_characters": 70828, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 301901, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 143.13934426229508, - "max_document_length": 175, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 77.61333333333333, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "vie_Latn-vie_Latn": { - "number_of_characters": 66724, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 309668, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 134.7295081967213, - "max_document_length": 161, + "min_document_length": 150, + "average_document_length": 499.8360655737705, + "max_document_length": 1314, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 73.05333333333333, + "max_query_length": 161, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "vie_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 313533, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 150, + "average_document_length": 499.8360655737705, + "max_document_length": 1314, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-vie_Latn": { - "number_of_characters": 66724, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 297797, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 134.7295081967213, - "max_document_length": 161, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 73.05333333333333, + "max_query_length": 161, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "war_Latn-war_Latn": { - "number_of_characters": 78444, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 366788, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 158.74590163934425, - "max_document_length": 207, + "min_document_length": 194, + "average_document_length": 592.8688524590164, + "max_document_length": 1675, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 86.07555555555555, + "max_query_length": 207, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "war_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 358933, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 194, + "average_document_length": 592.8688524590164, + "max_document_length": 1675, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-war_Latn": { - "number_of_characters": 78444, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 309517, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 158.74590163934425, - "max_document_length": 207, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 86.07555555555555, + "max_query_length": 207, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "wol_Latn-wol_Latn": { - "number_of_characters": 64521, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 286551, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 130.21516393442624, - "max_document_length": 139, + "min_document_length": 157, + "average_document_length": 456.9795081967213, + "max_document_length": 1299, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 70.60555555555555, + "max_query_length": 139, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "wol_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 292619, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 157, + "average_document_length": 456.9795081967213, + "max_document_length": 1299, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-wol_Latn": { - "number_of_characters": 64521, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295594, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 130.21516393442624, - "max_document_length": 139, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 70.60555555555555, + "max_query_length": 139, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "xho_Latn-xho_Latn": { - "number_of_characters": 71629, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 317125, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 144.78073770491804, - "max_document_length": 179, + "min_document_length": 157, + "average_document_length": 505.0655737704918, + "max_document_length": 1534, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 78.50333333333333, + "max_query_length": 179, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "xho_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 316085, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 157, + "average_document_length": 505.0655737704918, + "max_document_length": 1534, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-xho_Latn": { - "number_of_characters": 71629, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 302702, "num_documents": 488, - "min_document_length": 16, - "average_document_length": 144.78073770491804, - "max_document_length": 179, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 16, + "average_query_length": 78.50333333333333, + "max_query_length": 179, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "yor_Latn-yor_Latn": { - "number_of_characters": 62752, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 286022, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 126.59016393442623, - "max_document_length": 143, + "min_document_length": 177, + "average_document_length": 459.5204918032787, + "max_document_length": 1260, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 68.64, + "max_query_length": 143, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "yor_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293859, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 177, + "average_document_length": 459.5204918032787, + "max_document_length": 1260, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-yor_Latn": { - "number_of_characters": 62752, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 293825, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 126.59016393442623, - "max_document_length": 143, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 68.64, + "max_query_length": 143, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zho_Hans-zho_Hans": { - "number_of_characters": 20549, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 97536, "num_documents": 488, - "min_document_length": 7, - "average_document_length": 40.10860655737705, - "max_document_length": 64, + "min_document_length": 47, + "average_document_length": 159.76024590163934, + "max_document_length": 459, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 7, + "average_query_length": 21.747777777777777, + "max_query_length": 64, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zho_Hans-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 147576, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 47, + "average_document_length": 159.76024590163934, + "max_document_length": 459, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-zho_Hans": { - "number_of_characters": 20549, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 251622, "num_documents": 488, - "min_document_length": 7, - "average_document_length": 40.10860655737705, - "max_document_length": 64, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 7, + "average_query_length": 21.747777777777777, + "max_query_length": 64, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zho_Hant-zho_Hant": { - "number_of_characters": 19947, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 92060, "num_documents": 488, - "min_document_length": 7, - "average_document_length": 38.875, - "max_document_length": 45, + "min_document_length": 45, + "average_document_length": 149.77254098360655, + "max_document_length": 436, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 7, + "average_query_length": 21.07888888888889, + "max_query_length": 45, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zho_Hant-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 142702, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 45, + "average_document_length": 149.77254098360655, + "max_document_length": 436, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-zho_Hant": { - "number_of_characters": 19947, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 251020, "num_documents": 488, - "min_document_length": 7, - "average_document_length": 38.875, - "max_document_length": 45, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 7, + "average_query_length": 21.07888888888889, + "max_query_length": 45, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zsm_Latn-zsm_Latn": { - "number_of_characters": 72008, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329142, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 145.55737704918033, - "max_document_length": 210, + "min_document_length": 173, + "average_document_length": 528.9139344262295, + "max_document_length": 1571, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 78.92444444444445, + "max_query_length": 210, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zsm_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 327723, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 173, + "average_document_length": 528.9139344262295, + "max_document_length": 1571, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-zsm_Latn": { - "number_of_characters": 72008, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 303081, "num_documents": 488, - "min_document_length": 13, - "average_document_length": 145.55737704918033, - "max_document_length": 210, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 13, + "average_query_length": 78.92444444444445, + "max_query_length": 210, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zul_Latn-zul_Latn": { - "number_of_characters": 69413, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 328527, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 140.23975409836066, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 532.9713114754098, + "max_document_length": 1507, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 76.0411111111111, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "zul_Latn-eng_Latn": { - "number_of_characters": 70589, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329703, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 142.64959016393442, - "max_document_length": 171, + "min_document_length": 181, + "average_document_length": 532.9713114754098, + "max_document_length": 1507, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 77.34777777777778, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "eng_Latn-zul_Latn": { - "number_of_characters": 69413, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 300486, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 140.23975409836066, - "max_document_length": 171, + "min_document_length": 144, + "average_document_length": 475.51024590163934, + "max_document_length": 1352, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 76.0411111111111, + "max_query_length": 171, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arb_Arab-arb_Latn": { - "number_of_characters": 61298, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 266240, "num_documents": 488, - "min_document_length": 12, - "average_document_length": 123.61065573770492, - "max_document_length": 160, + "min_document_length": 142, + "average_document_length": 421.96311475409834, + "max_document_length": 1275, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 12, + "average_query_length": 67.02444444444444, + "max_query_length": 160, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "arb_Latn-arb_Arab": { - "number_of_characters": 53671, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 323837, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 107.98155737704919, - "max_document_length": 134, + "min_document_length": 142, + "average_document_length": 555.6188524590164, + "max_document_length": 1708, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 58.55, + "max_query_length": 134, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ben_Beng-ben_Latn": { - "number_of_characters": 68285, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 295583, "num_documents": 488, - "min_document_length": 9, - "average_document_length": 137.92827868852459, - "max_document_length": 185, + "min_document_length": 136, + "average_document_length": 467.7745901639344, + "max_document_length": 1281, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 9, + "average_query_length": 74.78777777777778, + "max_query_length": 185, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ben_Latn-ben_Beng": { - "number_of_characters": 63512, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 317708, "num_documents": 488, - "min_document_length": 9, - "average_document_length": 128.14754098360655, - "max_document_length": 175, + "min_document_length": 159, + "average_document_length": 522.8934426229508, + "max_document_length": 1393, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 9, + "average_query_length": 69.48444444444445, + "max_query_length": 175, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hin_Deva-hin_Latn": { - "number_of_characters": 68307, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 298427, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 137.9733606557377, - "max_document_length": 170, + "min_document_length": 151, + "average_document_length": 473.55737704918033, + "max_document_length": 1348, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 74.81222222222222, + "max_query_length": 170, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "hin_Latn-hin_Deva": { - "number_of_characters": 66332, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 329721, "num_documents": 488, - "min_document_length": 14, - "average_document_length": 133.9262295081967, - "max_document_length": 165, + "min_document_length": 165, + "average_document_length": 541.7315573770492, + "max_document_length": 1536, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 14, + "average_query_length": 72.61777777777777, + "max_query_length": 165, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "npi_Deva-npi_Latn": { - "number_of_characters": 65683, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 287703, "num_documents": 488, - "min_document_length": 20, - "average_document_length": 132.59631147540983, - "max_document_length": 154, + "min_document_length": 132, + "average_document_length": 456.9590163934426, + "max_document_length": 1385, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 20, + "average_query_length": 71.89666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "npi_Latn-npi_Deva": { - "number_of_characters": 61183, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 312006, "num_documents": 488, - "min_document_length": 18, - "average_document_length": 123.375, - "max_document_length": 154, + "min_document_length": 149, + "average_document_length": 515.9815573770492, + "max_document_length": 1502, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 18, + "average_query_length": 66.89666666666666, + "max_query_length": 154, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sin_Sinh-sin_Latn": { - "number_of_characters": 85996, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 318610, "num_documents": 488, - "min_document_length": 19, - "average_document_length": 174.22131147540983, - "max_document_length": 224, + "min_document_length": 142, + "average_document_length": 478.66803278688525, + "max_document_length": 1417, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 19, + "average_query_length": 94.46666666666667, + "max_query_length": 224, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "sin_Latn-sin_Sinh": { - "number_of_characters": 63902, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 351231, "num_documents": 488, - "min_document_length": 17, - "average_document_length": 128.94672131147541, - "max_document_length": 159, + "min_document_length": 175, + "average_document_length": 590.7889344262295, + "max_document_length": 1721, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 17, + "average_query_length": 69.91777777777777, + "max_query_length": 159, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "urd_Arab-urd_Latn": { - "number_of_characters": 82039, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 310644, "num_documents": 488, - "min_document_length": 15, - "average_document_length": 166.1127049180328, - "max_document_length": 230, + "min_document_length": 133, + "average_document_length": 470.452868852459, + "max_document_length": 1366, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 15, + "average_query_length": 90.07, + "max_query_length": 230, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "urd_Latn-urd_Arab": { - "number_of_characters": 64450, "num_samples": 1388, - "num_queries": 900, + "number_of_characters": 351655, "num_documents": 488, - "min_document_length": 11, - "average_document_length": 130.06967213114754, - "max_document_length": 187, + "min_document_length": 163, + "average_document_length": 590.5348360655738, + "max_document_length": 1697, "unique_documents": 488, - "min_query_length": 2, - "average_query_length": 1.0844444444444445, - "max_query_length": 2, + "num_queries": 900, + "min_query_length": 11, + "average_query_length": 70.52666666666667, + "max_query_length": 187, "unique_queries": 900, + "none_queries": 0, + "num_relevant_docs": 900, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 488 + "unique_relevant_docs": 488, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } } diff --git a/mteb/descriptive_stats/Retrieval/BrightRetrieval.json b/mteb/descriptive_stats/Retrieval/BrightRetrieval.json index c07260fc99..66f5029914 100644 --- a/mteb/descriptive_stats/Retrieval/BrightRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BrightRetrieval.json @@ -1,19 +1,19 @@ { "standard": { - "number_of_characters": 1068198065, "num_samples": 1334550, - "num_queries": 1384, + "number_of_characters": 1068198065, "num_documents": 1333166, - "num_relevant_docs": 8424, - "min_document_length": 12, - "average_document_length": 0.8236686204118617, - "max_document_length": 19341, + "min_document_length": 1, + "average_document_length": 800.42543689233, + "max_document_length": 233623, "unique_documents": 1333166, - "min_query_length": 1, - "average_query_length": 771025.9956647399, - "max_query_length": 233623, + "num_queries": 1384, + "min_query_length": 12, + "average_query_length": 793.4154624277456, + "max_query_length": 19341, "unique_queries": 1384, "none_queries": 0, + "num_relevant_docs": 8424, "min_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 6.086705202312139, "max_relevant_docs_per_query": 85, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "biology": { - "number_of_characters": 18960905, "num_samples": 57462, - "num_queries": 103, + "number_of_characters": 18960905, "num_documents": 57359, - "num_relevant_docs": 374, - "min_document_length": 89, - "average_document_length": 0.9392248818842728, - "max_document_length": 2195, + "min_document_length": 1, + "average_document_length": 329.6262487142384, + "max_document_length": 31131, "unique_documents": 57359, - "min_query_length": 1, - "average_query_length": 183563.41747572814, - "max_query_length": 31131, + "num_queries": 103, + "min_query_length": 89, + "average_query_length": 523.0388349514564, + "max_query_length": 2195, "unique_queries": 103, "none_queries": 0, + "num_relevant_docs": 374, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 3.6310679611650487, "max_relevant_docs_per_query": 19, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "earth_science": { - "number_of_characters": 41046773, "num_samples": 121365, - "num_queries": 116, + "number_of_characters": 41046773, "num_documents": 121249, - "num_relevant_docs": 609, - "min_document_length": 83, - "average_document_length": 0.4561687106697787, - "max_document_length": 1565, + "min_document_length": 2, + "average_document_length": 338.0767099110096, + "max_document_length": 233623, "unique_documents": 121249, - "min_query_length": 2, - "average_query_length": 353374.6810344828, - "max_query_length": 233623, + "num_queries": 116, + "min_query_length": 83, + "average_query_length": 476.8103448275862, + "max_query_length": 1565, "unique_queries": 116, "none_queries": 0, + "num_relevant_docs": 609, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 5.25, "max_relevant_docs_per_query": 23, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "economics": { - "number_of_characters": 19885867, "num_samples": 50323, - "num_queries": 103, + "number_of_characters": 19885867, "num_documents": 50220, - "num_relevant_docs": 823, - "min_document_length": 164, - "average_document_length": 1.5169653524492235, - "max_document_length": 2223, + "min_document_length": 3, + "average_document_length": 394.45808442851455, + "max_document_length": 39672, "unique_documents": 50220, - "min_query_length": 3, - "average_query_length": 192327.03883495147, - "max_query_length": 39672, + "num_queries": 103, + "min_query_length": 164, + "average_query_length": 739.6310679611651, + "max_query_length": 2223, "unique_queries": 103, "none_queries": 0, + "num_relevant_docs": 823, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 7.990291262135922, "max_relevant_docs_per_query": 85, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "psychology": { - "number_of_characters": 20378352, "num_samples": 52936, - "num_queries": 101, + "number_of_characters": 20378352, "num_documents": 52835, - "num_relevant_docs": 742, - "min_document_length": 166, - "average_document_length": 1.3251253903662346, - "max_document_length": 2334, + "min_document_length": 3, + "average_document_length": 384.37283997350244, + "max_document_length": 226941, "unique_documents": 52835, - "min_query_length": 3, - "average_query_length": 201072.66336633664, - "max_query_length": 226941, + "num_queries": 101, + "min_query_length": 166, + "average_query_length": 693.1980198019802, + "max_query_length": 2334, "unique_queries": 101, "none_queries": 0, + "num_relevant_docs": 742, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 7.346534653465347, "max_relevant_docs_per_query": 59, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "robotics": { - "number_of_characters": 18256389, "num_samples": 62062, - "num_queries": 101, + "number_of_characters": 18256389, "num_documents": 61961, - "num_relevant_docs": 553, - "min_document_length": 165, - "average_document_length": 3.5527993415212795, - "max_document_length": 19341, + "min_document_length": 3, + "average_document_length": 291.09042784977646, + "max_document_length": 28640, "unique_documents": 61961, - "min_query_length": 3, - "average_query_length": 178576.77227722772, - "max_query_length": 28640, + "num_queries": 101, + "min_query_length": 165, + "average_query_length": 2179.5544554455446, + "max_query_length": 19341, "unique_queries": 101, "none_queries": 0, + "num_relevant_docs": 553, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 5.475247524752476, "max_relevant_docs_per_query": 36, @@ -163,25 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "stackoverflow": { - "number_of_characters": 183787099, "num_samples": 107198, - "num_queries": 117, + "number_of_characters": 183787099, "num_documents": 107081, - "num_relevant_docs": 819, - "min_document_length": 185, - "average_document_length": 1.4127529627104716, - "max_document_length": 12432, + "min_document_length": 1, + "average_document_length": 1714.9244030220113, + "max_document_length": 4000, "unique_documents": 107081, - "min_query_length": 1, - "average_query_length": 1569536.923076923, - "max_query_length": 4000, + "num_queries": 117, + "min_query_length": 185, + "average_query_length": 1292.982905982906, + "max_query_length": 12432, "unique_queries": 117, "none_queries": 0, + "num_relevant_docs": 819, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 7.0, "max_relevant_docs_per_query": 59, @@ -191,25 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "sustainable_living": { - "number_of_characters": 21000744, "num_samples": 60900, - "num_queries": 108, + "number_of_characters": 21000744, "num_documents": 60792, - "num_relevant_docs": 604, - "min_document_length": 158, - "average_document_length": 1.213103697854981, - "max_document_length": 2843, + "min_document_length": 1, + "average_document_length": 344.2393242531912, + "max_document_length": 158299, "unique_documents": 60792, - "min_query_length": 1, - "average_query_length": 193768.49074074073, - "max_query_length": 158299, + "num_queries": 108, + "min_query_length": 158, + "average_query_length": 682.8425925925926, + "max_query_length": 2843, "unique_queries": 108, "none_queries": 0, + "num_relevant_docs": 604, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 5.592592592592593, "max_relevant_docs_per_query": 59, @@ -219,25 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pony": { - "number_of_characters": 2094456, "num_samples": 8006, - "num_queries": 112, + "number_of_characters": 2094456, "num_documents": 7894, - "num_relevant_docs": 2519, - "min_document_length": 182, - "average_document_length": 5.518748416518875, - "max_document_length": 946, + "min_document_length": 8, + "average_document_length": 259.80377501900176, + "max_document_length": 2583, "unique_documents": 7894, - "min_query_length": 8, - "average_query_length": 18311.526785714286, - "max_query_length": 2583, + "num_queries": 112, + "min_query_length": 182, + "average_query_length": 388.9732142857143, + "max_query_length": 946, "unique_queries": 112, "none_queries": 0, + "num_relevant_docs": 2519, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 22.491071428571427, "max_relevant_docs_per_query": 32, @@ -247,25 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "leetcode": { - "number_of_characters": 438348000, "num_samples": 414074, - "num_queries": 142, + "number_of_characters": 438348000, "num_documents": 413932, - "num_relevant_docs": 262, - "min_document_length": 422, - "average_document_length": 0.5006160432148276, - "max_document_length": 3964, + "min_document_length": 75, + "average_document_length": 1058.4849178125876, + "max_document_length": 103665, "unique_documents": 413932, - "min_query_length": 75, - "average_query_length": 3085498.443661972, - "max_query_length": 103665, + "num_queries": 142, + "min_query_length": 422, + "average_query_length": 1459.3028169014085, + "max_query_length": 3964, "unique_queries": 142, "none_queries": 0, + "num_relevant_docs": 262, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8450704225352113, "max_relevant_docs_per_query": 5, @@ -275,25 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "aops": { - "number_of_characters": 141769723, "num_samples": 188113, - "num_queries": 111, + "number_of_characters": 141769723, "num_documents": 188002, - "num_relevant_docs": 524, - "min_document_length": 85, - "average_document_length": 0.18875863022733802, - "max_document_length": 1167, + "min_document_length": 58, + "average_document_length": 753.8974904522292, + "max_document_length": 7334, "unique_documents": 188002, - "min_query_length": 58, - "average_query_length": 1276885.009009009, - "max_query_length": 7334, + "num_queries": 111, + "min_query_length": 85, + "average_query_length": 319.7027027027027, + "max_query_length": 1167, "unique_queries": 111, "none_queries": 0, + "num_relevant_docs": 524, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 4.7207207207207205, "max_relevant_docs_per_query": 8, @@ -303,25 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "theoremqa_theorems": { - "number_of_characters": 20852144, "num_samples": 23904, - "num_queries": 65, + "number_of_characters": 20852144, "num_documents": 23839, - "num_relevant_docs": 126, - "min_document_length": 13, - "average_document_length": 1.1702672091950166, - "max_document_length": 1255, + "min_document_length": 74, + "average_document_length": 873.5368933260623, + "max_document_length": 19106, "unique_documents": 23839, - "min_query_length": 74, - "average_query_length": 320373.0153846154, - "max_query_length": 19106, + "num_queries": 65, + "min_query_length": 13, + "average_query_length": 429.2, + "max_query_length": 1255, "unique_queries": 65, "none_queries": 0, + "num_relevant_docs": 126, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9384615384615385, "max_relevant_docs_per_query": 6, @@ -331,25 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "theoremqa_questions": { - "number_of_characters": 141817613, "num_samples": 188207, - "num_queries": 205, + "number_of_characters": 141817613, "num_documents": 188002, - "num_relevant_docs": 469, - "min_document_length": 12, - "average_document_length": 0.4434899628727354, - "max_document_length": 1255, + "min_document_length": 58, + "average_document_length": 753.8974904522292, + "max_document_length": 7334, "unique_documents": 188002, - "min_query_length": 58, - "average_query_length": 691386.5170731707, - "max_query_length": 7334, + "num_queries": 205, + "min_query_length": 12, + "average_query_length": 406.7170731707317, + "max_query_length": 1255, "unique_queries": 205, "none_queries": 0, + "num_relevant_docs": 469, "min_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 2.2878048780487803, "max_relevant_docs_per_query": 7, @@ -359,6 +371,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null @@ -366,20 +379,20 @@ } }, "long": { - "number_of_characters": 327567114, "num_samples": 6511, - "num_queries": 861, + "number_of_characters": 327567114, "num_documents": 5650, - "num_relevant_docs": 1679, - "min_document_length": 83, - "average_document_length": 131.69982300884956, - "max_document_length": 19341, + "min_document_length": 25, + "average_document_length": 57844.780530973454, + "max_document_length": 9182740, "unique_documents": 5650, - "min_query_length": 25, - "average_query_length": 379585.3774680604, - "max_query_length": 9182740, + "num_queries": 861, + "min_query_length": 83, + "average_query_length": 864.2322880371661, + "max_query_length": 19341, "unique_queries": 861, "none_queries": 0, + "num_relevant_docs": 1679, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9500580720092915, "max_relevant_docs_per_query": 12, @@ -389,25 +402,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "biology": { - "number_of_characters": 19398768, "num_samples": 627, - "num_queries": 103, + "number_of_characters": 19398768, "num_documents": 524, - "num_relevant_docs": 134, - "min_document_length": 89, - "average_document_length": 102.81106870229007, - "max_document_length": 2195, + "min_document_length": 142, + "average_document_length": 36917.73854961832, + "max_document_length": 1324203, "unique_documents": 524, - "min_query_length": 142, - "average_query_length": 187814.5145631068, - "max_query_length": 1324203, + "num_queries": 103, + "min_query_length": 89, + "average_query_length": 523.0388349514564, + "max_query_length": 2195, "unique_queries": 103, "none_queries": 0, + "num_relevant_docs": 134, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.3009708737864079, "max_relevant_docs_per_query": 4, @@ -417,25 +431,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "earth_science": { - "number_of_characters": 41705609, "num_samples": 717, - "num_queries": 116, + "number_of_characters": 41705609, "num_documents": 601, - "num_relevant_docs": 187, - "min_document_length": 83, - "average_document_length": 92.02995008319468, - "max_document_length": 1565, + "min_document_length": 33, + "average_document_length": 69301.6622296173, + "max_document_length": 2627263, "unique_documents": 601, - "min_query_length": 33, - "average_query_length": 359054.3017241379, - "max_query_length": 2627263, + "num_queries": 116, + "min_query_length": 83, + "average_query_length": 476.8103448275862, + "max_query_length": 1565, "unique_queries": 116, "none_queries": 0, + "num_relevant_docs": 187, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6120689655172413, "max_relevant_docs_per_query": 4, @@ -445,25 +460,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "economics": { - "number_of_characters": 19994187, "num_samples": 619, - "num_queries": 103, + "number_of_characters": 19994187, "num_documents": 516, - "num_relevant_docs": 109, - "min_document_length": 164, - "average_document_length": 147.63953488372093, - "max_document_length": 2223, + "min_document_length": 45, + "average_document_length": 38600.78488372093, + "max_document_length": 429509, "unique_documents": 516, - "min_query_length": 45, - "average_query_length": 193378.68932038834, - "max_query_length": 429509, + "num_queries": 103, + "min_query_length": 164, + "average_query_length": 739.6310679611651, + "max_query_length": 2223, "unique_queries": 103, "none_queries": 0, + "num_relevant_docs": 109, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.058252427184466, "max_relevant_docs_per_query": 3, @@ -473,25 +489,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "psychology": { - "number_of_characters": 20490305, "num_samples": 613, - "num_queries": 101, + "number_of_characters": 20490305, "num_documents": 512, - "num_relevant_docs": 116, - "min_document_length": 166, - "average_document_length": 136.744140625, - "max_document_length": 2334, + "min_document_length": 25, + "average_document_length": 39883.3828125, + "max_document_length": 669577, "unique_documents": 512, - "min_query_length": 25, - "average_query_length": 202181.10891089108, - "max_query_length": 669577, + "num_queries": 101, + "min_query_length": 166, + "average_query_length": 693.1980198019802, + "max_query_length": 2334, "unique_queries": 101, "none_queries": 0, + "num_relevant_docs": 116, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1485148514851484, "max_relevant_docs_per_query": 5, @@ -501,25 +518,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "robotics": { - "number_of_characters": 18387998, "num_samples": 609, - "num_queries": 101, + "number_of_characters": 18387998, "num_documents": 508, - "num_relevant_docs": 106, - "min_document_length": 165, - "average_document_length": 433.3366141732283, - "max_document_length": 19341, + "min_document_length": 120, + "average_document_length": 35763.509842519685, + "max_document_length": 3589950, "unique_documents": 508, - "min_query_length": 120, - "average_query_length": 179879.8316831683, - "max_query_length": 3589950, + "num_queries": 101, + "min_query_length": 165, + "average_query_length": 2179.5544554455446, + "max_query_length": 19341, "unique_queries": 101, "none_queries": 0, + "num_relevant_docs": 106, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0495049504950495, "max_relevant_docs_per_query": 2, @@ -529,25 +547,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "stackoverflow": { - "number_of_characters": 184328188, "num_samples": 1975, - "num_queries": 117, + "number_of_characters": 184328188, "num_documents": 1858, - "num_relevant_docs": 129, - "min_document_length": 185, - "average_document_length": 81.42034445640473, - "max_document_length": 12432, + "min_document_length": 43, + "average_document_length": 99126.43110871906, + "max_document_length": 9182740, "unique_documents": 1858, - "min_query_length": 43, - "average_query_length": 1574161.6153846155, - "max_query_length": 9182740, + "num_queries": 117, + "min_query_length": 185, + "average_query_length": 1292.982905982906, + "max_query_length": 12432, "unique_queries": 117, "none_queries": 0, + "num_relevant_docs": 129, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1025641025641026, "max_relevant_docs_per_query": 2, @@ -557,25 +576,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "sustainable_living": { - "number_of_characters": 21155433, "num_samples": 662, - "num_queries": 108, + "number_of_characters": 21155433, "num_documents": 554, - "num_relevant_docs": 129, - "min_document_length": 158, - "average_document_length": 133.1173285198556, - "max_document_length": 2843, + "min_document_length": 32, + "average_document_length": 38053.584837545124, + "max_document_length": 5732347, "unique_documents": 554, - "min_query_length": 32, - "average_query_length": 195200.7962962963, - "max_query_length": 5732347, + "num_queries": 108, + "min_query_length": 158, + "average_query_length": 682.8425925925926, + "max_query_length": 2843, "unique_queries": 108, "none_queries": 0, + "num_relevant_docs": 129, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1944444444444444, "max_relevant_docs_per_query": 5, @@ -585,25 +605,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pony": { - "number_of_characters": 2106626, "num_samples": 689, - "num_queries": 112, + "number_of_characters": 2106626, "num_documents": 577, - "num_relevant_docs": 769, - "min_document_length": 182, - "average_document_length": 75.50259965337955, - "max_document_length": 946, + "min_document_length": 54, + "average_document_length": 3575.4956672443673, + "max_document_length": 108909, "unique_documents": 577, - "min_query_length": 54, - "average_query_length": 18420.1875, - "max_query_length": 108909, + "num_queries": 112, + "min_query_length": 182, + "average_query_length": 388.9732142857143, + "max_query_length": 946, "unique_queries": 112, "none_queries": 0, + "num_relevant_docs": 769, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 6.866071428571429, "max_relevant_docs_per_query": 12, @@ -613,6 +634,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json index 3d27f624b9..d56fe75d9f 100644 --- a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json @@ -1,129 +1,206 @@ { "test": { - "number_of_characters": 36843313, "num_samples": 1056326, - "num_queries": 52561, + "number_of_characters": 218197874, "num_documents": 1003765, - "min_document_length": 54, - "average_document_length": 34.70511822986456, - "max_document_length": 334374, + "min_document_length": 3, + "average_document_length": 182.67432217700357, + "max_document_length": 47187, "unique_documents": 1003765, - "min_query_length": 2, - "average_query_length": 38.19428854093339, - "max_query_length": 2, + "num_queries": 52561, + "min_query_length": 54, + "average_query_length": 662.7686497593272, + "max_query_length": 334374, "unique_queries": 52561, + "none_queries": 0, + "num_relevant_docs": 52561, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, "unique_relevant_docs": 52561, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 14574651, "num_samples": 295228, - "num_queries": 14918, + "number_of_characters": 86601209, "num_documents": 280310, - "min_document_length": 95, - "average_document_length": 49.994759373550714, - "max_document_length": 14008, + "min_document_length": 3, + "average_document_length": 258.9532232171524, + "max_document_length": 17283, "unique_documents": 280310, - "min_query_length": 2, - "average_query_length": 37.5801045716584, - "max_query_length": 2, + "num_queries": 14918, + "min_query_length": 95, + "average_query_length": 939.4041426464673, + "max_query_length": 14008, "unique_queries": 14918, + "none_queries": 0, + "num_relevant_docs": 14918, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 14918 + "unique_relevant_docs": 14918, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "javascript": { - "number_of_characters": 2587540, "num_samples": 68145, - "num_queries": 3291, + "number_of_characters": 13797033, "num_documents": 64854, - "min_document_length": 87, - "average_document_length": 37.89792456903198, - "max_document_length": 334374, + "min_document_length": 4, + "average_document_length": 174.84196811299225, + "max_document_length": 11846, "unique_documents": 64854, - "min_query_length": 2, - "average_query_length": 39.412944393801276, - "max_query_length": 2, + "num_queries": 3291, + "min_query_length": 87, + "average_query_length": 746.8343968398663, + "max_query_length": 334374, "unique_queries": 3291, + "none_queries": 0, + "num_relevant_docs": 3291, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 3291 + "unique_relevant_docs": 3291, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "go": { - "number_of_characters": 3641108, "num_samples": 190562, - "num_queries": 8122, + "number_of_characters": 21010899, "num_documents": 182440, - "min_document_length": 54, - "average_document_length": 17.957838193378645, - "max_document_length": 5280, + "min_document_length": 6, + "average_document_length": 97.20823832492874, + "max_document_length": 2610, "unique_documents": 182440, - "min_query_length": 2, - "average_query_length": 44.9248953459739, - "max_query_length": 2, + "num_queries": 8122, + "min_query_length": 54, + "average_query_length": 403.3770007387343, + "max_query_length": 5280, "unique_queries": 8122, + "none_queries": 0, + "num_relevant_docs": 8122, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 8122 + "unique_relevant_docs": 8122, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ruby": { - "number_of_characters": 629446, "num_samples": 28831, - "num_queries": 1261, + "number_of_characters": 6898839, "num_documents": 27570, - "min_document_length": 83, - "average_document_length": 20.830830612985128, - "max_document_length": 3992, + "min_document_length": 6, + "average_document_length": 229.39909321726515, + "max_document_length": 15982, "unique_documents": 27570, - "min_query_length": 2, - "average_query_length": 43.72720063441713, - "max_query_length": 2, + "num_queries": 1261, + "min_query_length": 83, + "average_query_length": 455.43695479777955, + "max_query_length": 3992, "unique_queries": 1261, + "none_queries": 0, + "num_relevant_docs": 1261, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1261 + "unique_relevant_docs": 1261, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "java": { - "number_of_characters": 6791137, "num_samples": 191821, - "num_queries": 10955, + "number_of_characters": 38384820, "num_documents": 180866, - "min_document_length": 77, - "average_document_length": 35.54789180940586, - "max_document_length": 7615, + "min_document_length": 3, + "average_document_length": 176.680055953026, + "max_document_length": 15958, "unique_documents": 180866, - "min_query_length": 2, - "average_query_length": 33.019808306709265, - "max_query_length": 2, + "num_queries": 10955, + "min_query_length": 77, + "average_query_length": 586.8922866271109, + "max_query_length": 7615, "unique_queries": 10955, + "none_queries": 0, + "num_relevant_docs": 10955, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 10955 + "unique_relevant_docs": 10955, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "php": { - "number_of_characters": 8619431, "num_samples": 281739, - "num_queries": 14014, + "number_of_characters": 51505074, "num_documents": 267725, - "min_document_length": 94, - "average_document_length": 30.195091978709495, - "max_document_length": 4904, + "min_document_length": 3, + "average_document_length": 162.1854253431693, + "max_document_length": 47187, "unique_documents": 267725, - "min_query_length": 2, - "average_query_length": 38.20822035107749, - "max_query_length": 2, + "num_queries": 14014, + "min_query_length": 94, + "average_query_length": 576.8503639217925, + "max_query_length": 4904, "unique_queries": 14014, + "none_queries": 0, + "num_relevant_docs": 14014, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 14014 + "unique_relevant_docs": 14014, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } } diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json index 5295fd2e3a..6227d4cc62 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackAndroidRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 13713141, "num_samples": 23697, - "num_queries": 699, + "number_of_characters": 13713141, "num_documents": 22998, - "min_document_length": 16, - "average_document_length": 1.57339768675537, - "max_document_length": 127, + "min_document_length": 57, + "average_document_length": 594.701974084703, + "max_document_length": 27831, "unique_documents": 22998, - "min_query_length": 57, - "average_query_length": 19566.460658082975, - "max_query_length": 27831, + "num_queries": 699, + "min_query_length": 16, + "average_query_length": 51.76680972818312, + "max_query_length": 127, "unique_queries": 699, + "none_queries": 0, + "num_relevant_docs": 1696, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.4263233190271816, "max_relevant_docs_per_query": 262, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json index fd788671ee..59e0a79809 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackEnglishRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 19521569, "num_samples": 41791, - "num_queries": 1570, + "number_of_characters": 19521569, "num_documents": 40221, - "min_document_length": 15, - "average_document_length": 1.886526938663882, - "max_document_length": 149, + "min_document_length": 41, + "average_document_length": 483.4710971880361, + "max_document_length": 6511, "unique_documents": 40221, - "min_query_length": 41, - "average_query_length": 12385.790445859873, - "max_query_length": 6511, + "num_queries": 1570, + "min_query_length": 15, + "average_query_length": 48.32993630573248, + "max_query_length": 149, "unique_queries": 1570, + "none_queries": 0, + "num_relevant_docs": 3765, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.3980891719745223, "max_relevant_docs_per_query": 79, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json index 607914d9b9..1f4d104875 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackGamingRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 22263573, "num_samples": 46896, - "num_queries": 1595, + "number_of_characters": 22263573, "num_documents": 45301, - "min_document_length": 15, - "average_document_length": 1.7172247853248273, - "max_document_length": 149, + "min_document_length": 46, + "average_document_length": 489.74152888457206, + "max_document_length": 28835, "unique_documents": 45301, - "min_query_length": 46, - "average_query_length": 13909.580564263322, - "max_query_length": 28835, + "num_queries": 1595, + "min_query_length": 15, + "average_query_length": 48.772413793103446, + "max_query_length": 149, "unique_queries": 1595, + "none_queries": 0, + "num_relevant_docs": 2263, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.418808777429467, "max_relevant_docs_per_query": 30, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json index 9cd07a4019..d95156fae0 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackGisRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 38178794, "num_samples": 38522, - "num_queries": 885, + "number_of_characters": 38178794, "num_documents": 37637, - "min_document_length": 15, - "average_document_length": 1.227435767994261, - "max_document_length": 140, + "min_document_length": 52, + "average_document_length": 1013.167813587693, + "max_document_length": 28938, "unique_documents": 37637, - "min_query_length": 52, - "average_query_length": 43087.6802259887, - "max_query_length": 28938, + "num_queries": 885, + "min_query_length": 15, + "average_query_length": 52.2, + "max_query_length": 140, "unique_queries": 885, + "none_queries": 0, + "num_relevant_docs": 1114, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.2587570621468926, "max_relevant_docs_per_query": 22, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json index 9c821c8621..3c842c786c 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackMathematicaRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 19325188, "num_samples": 17509, - "num_queries": 804, + "number_of_characters": 19325188, "num_documents": 16705, - "min_document_length": 15, - "average_document_length": 2.3537862915294823, - "max_document_length": 137, + "min_document_length": 75, + "average_document_length": 1154.4967375037413, + "max_document_length": 28907, "unique_documents": 16705, - "min_query_length": 75, - "average_query_length": 23987.398009950248, - "max_query_length": 28907, + "num_queries": 804, + "min_query_length": 15, + "average_query_length": 48.90547263681592, + "max_query_length": 137, "unique_queries": 804, + "none_queries": 0, + "num_relevant_docs": 1358, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6890547263681592, "max_relevant_docs_per_query": 56, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json index 0c7464758d..6bc407e853 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackPhysicsRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 31461064, "num_samples": 39355, - "num_queries": 1039, + "number_of_characters": 31461064, "num_documents": 38316, - "min_document_length": 15, - "average_document_length": 1.4470717193861573, - "max_document_length": 149, + "min_document_length": 60, + "average_document_length": 819.6476145735463, + "max_document_length": 20177, "unique_documents": 38316, - "min_query_length": 60, - "average_query_length": 30226.773820981714, - "max_query_length": 20177, + "num_queries": 1039, + "min_query_length": 15, + "average_query_length": 53.36477382098171, + "max_query_length": 149, "unique_queries": 1039, + "none_queries": 0, + "num_relevant_docs": 1933, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8604427333974976, "max_relevant_docs_per_query": 72, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json index 9be488355e..f9ebfb70bc 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackProgrammersRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 34048829, "num_samples": 33052, - "num_queries": 876, + "number_of_characters": 34048829, "num_documents": 32176, - "min_document_length": 15, - "average_document_length": 1.5023930880159124, - "max_document_length": 149, + "min_document_length": 61, + "average_document_length": 1056.7033814022875, + "max_document_length": 21955, "unique_documents": 32176, - "min_query_length": 61, - "average_query_length": 38813.34246575343, - "max_query_length": 21955, + "num_queries": 876, + "min_query_length": 15, + "average_query_length": 55.1837899543379, + "max_query_length": 149, "unique_queries": 876, + "none_queries": 0, + "num_relevant_docs": 1675, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9121004566210045, "max_relevant_docs_per_query": 149, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json index c95f3f7c46..da61925219 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackStatsRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 44679836, "num_samples": 42921, - "num_queries": 652, + "number_of_characters": 44679836, "num_documents": 42269, - "min_document_length": 15, - "average_document_length": 0.8686981002626037, - "max_document_length": 138, + "min_document_length": 78, + "average_document_length": 1056.1668598736662, + "max_document_length": 43874, "unique_documents": 42269, - "min_query_length": 78, - "average_query_length": 68471.03834355828, - "max_query_length": 43874, + "num_queries": 652, + "min_query_length": 15, + "average_query_length": 56.31748466257669, + "max_query_length": 138, "unique_queries": 652, + "none_queries": 0, + "num_relevant_docs": 913, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.4003067484662577, "max_relevant_docs_per_query": 18, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json index 6f0c7b206b..2346a00bca 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackTexRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 88645392, "num_samples": 71090, - "num_queries": 2906, + "number_of_characters": 88645392, "num_documents": 68184, - "min_document_length": 15, - "average_document_length": 2.0003813211310573, - "max_document_length": 133, + "min_document_length": 61, + "average_document_length": 1298.09043177285, + "max_document_length": 31204, "unique_documents": 68184, - "min_query_length": 61, - "average_query_length": 30457.328974535445, - "max_query_length": 31204, + "num_queries": 2906, + "min_query_length": 15, + "average_query_length": 46.935306262904334, + "max_query_length": 133, "unique_queries": 2906, + "none_queries": 0, + "num_relevant_docs": 5154, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.7735719201651754, "max_relevant_docs_per_query": 146, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json index 2169ef7f37..a0d3183fca 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackUnixRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 47711333, "num_samples": 48454, - "num_queries": 1072, + "number_of_characters": 47711333, "num_documents": 47382, - "min_document_length": 15, - "average_document_length": 1.1385547254231565, - "max_document_length": 124, + "min_document_length": 56, + "average_document_length": 1005.8120383267908, + "max_document_length": 32623, "unique_documents": 47382, - "min_query_length": 56, - "average_query_length": 44456.51679104478, - "max_query_length": 32623, + "num_queries": 1072, + "min_query_length": 15, + "average_query_length": 50.32369402985075, + "max_query_length": 124, "unique_queries": 1072, + "none_queries": 0, + "num_relevant_docs": 1693, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5792910447761195, "max_relevant_docs_per_query": 22, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json index 84cd5016ea..f37c53d0a6 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackWebmastersRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 12355347, "num_samples": 17911, - "num_queries": 506, + "number_of_characters": 12355347, "num_documents": 17405, - "min_document_length": 15, - "average_document_length": 1.5098534903763285, - "max_document_length": 135, + "min_document_length": 49, + "average_document_length": 708.3635736857225, + "max_document_length": 24968, "unique_documents": 17405, - "min_query_length": 49, - "average_query_length": 24365.747035573124, - "max_query_length": 24968, + "num_queries": 506, + "min_query_length": 15, + "average_query_length": 51.93478260869565, + "max_query_length": 135, "unique_queries": 506, + "none_queries": 0, + "num_relevant_docs": 1395, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.7569169960474307, "max_relevant_docs_per_query": 207, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json b/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json index e4fe2a3fc6..84e12c852f 100644 --- a/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CQADupstackWordpressRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 54647154, "num_samples": 49146, - "num_queries": 541, + "number_of_characters": 54647154, "num_documents": 48605, - "min_document_length": 15, - "average_document_length": 0.5423516099166752, - "max_document_length": 121, + "min_document_length": 65, + "average_document_length": 1123.7690155333814, + "max_document_length": 32392, "unique_documents": 48605, - "min_query_length": 65, - "average_query_length": 100962.64879852126, - "max_query_length": 32392, + "num_queries": 541, + "min_query_length": 15, + "average_query_length": 48.7264325323475, + "max_query_length": 121, "unique_queries": 541, + "none_queries": 0, + "num_relevant_docs": 744, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.3752310536044363, "max_relevant_docs_per_query": 62, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/ClimateFEVER.json b/mteb/descriptive_stats/Retrieval/ClimateFEVER.json index 0b68ede31f..5bfa6d9a5e 100644 --- a/mteb/descriptive_stats/Retrieval/ClimateFEVER.json +++ b/mteb/descriptive_stats/Retrieval/ClimateFEVER.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2921043175, "num_samples": 5418128, - "num_queries": 1535, + "number_of_characters": 2921043175, "num_documents": 5416593, - "min_document_length": 26, - "average_document_length": 0.03496995251443112, - "max_document_length": 406, + "min_document_length": 2, + "average_document_length": 539.241873443325, + "max_document_length": 374597, "unique_documents": 5416593, - "min_query_length": 2, - "average_query_length": 1902836.3237785017, - "max_query_length": 374597, + "num_queries": 1535, + "min_query_length": 26, + "average_query_length": 123.39934853420195, + "max_query_length": 406, "unique_queries": 1535, + "none_queries": 0, + "num_relevant_docs": 4681, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 3.0495114006514656, "max_relevant_docs_per_query": 5, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json b/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json index 5a74183b2a..9ef2915e11 100644 --- a/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 59222302, "num_samples": 48416, - "num_queries": 1000, + "number_of_characters": 59222302, "num_documents": 47416, - "min_document_length": 29, - "average_document_length": 2.570419267757719, - "max_document_length": 406, + "min_document_length": 2, + "average_document_length": 1246.4236333727013, + "max_document_length": 36320, "unique_documents": 47416, - "min_query_length": 2, - "average_query_length": 59100.423, - "max_query_length": 36320, + "num_queries": 1000, + "min_query_length": 29, + "average_query_length": 121.879, + "max_query_length": 406, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 3048, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 3.048, "max_relevant_docs_per_query": 5, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json b/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json index 3d6aef70b4..72e052f10d 100644 --- a/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CmedqaRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 30971243, "num_samples": 104000, - "num_queries": 3999, + "number_of_characters": 30971243, "num_documents": 100001, - "min_document_length": 11, - "average_document_length": 1.938310616893831, - "max_document_length": 153, + "min_document_length": 1, + "average_document_length": 307.7710222897771, + "max_document_length": 60975, "unique_documents": 100001, - "min_query_length": 1, - "average_query_length": 7696.276569142286, - "max_query_length": 60975, + "num_queries": 3999, + "min_query_length": 11, + "average_query_length": 48.470367591897976, + "max_query_length": 153, "unique_queries": 3999, + "none_queries": 0, + "num_relevant_docs": 7449, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.86271567891973, "max_relevant_docs_per_query": 19, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json index 6d73096d42..4a6be04878 100644 --- a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json @@ -1,255 +1,409 @@ { "train": { - "number_of_characters": 935841, "num_samples": 26000, - "num_queries": 13000, + "number_of_characters": 8103527, "num_documents": 13000, - "min_document_length": 18, - "average_document_length": 70.98776923076923, - "max_document_length": 2532, + "min_document_length": 81, + "average_document_length": 552.3604615384615, + "max_document_length": 2538, "unique_documents": 13000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 13000, + "min_query_length": 18, + "average_query_length": 70.98776923076923, + "max_query_length": 2532, "unique_queries": 13000, + "none_queries": 0, + "num_relevant_docs": 13000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, "unique_relevant_docs": 13000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 70519, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 667111, "num_documents": 1000, - "min_document_length": 21, - "average_document_length": 69.519, - "max_document_length": 1811, + "min_document_length": 118, + "average_document_length": 597.592, + "max_document_length": 2518, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 21, + "average_query_length": 69.519, + "max_query_length": 1811, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "javascript": { - "number_of_characters": 57880, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 639434, "num_documents": 1000, - "min_document_length": 18, - "average_document_length": 56.88, - "max_document_length": 601, + "min_document_length": 95, + "average_document_length": 582.554, + "max_document_length": 2538, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 18, + "average_query_length": 56.88, + "max_query_length": 601, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "typescript": { - "number_of_characters": 61092, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 640969, "num_documents": 1000, - "min_document_length": 19, - "average_document_length": 60.092, - "max_document_length": 659, + "min_document_length": 134, + "average_document_length": 580.877, + "max_document_length": 2407, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 19, + "average_query_length": 60.092, + "max_query_length": 659, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "go": { - "number_of_characters": 71797, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 619295, "num_documents": 1000, - "min_document_length": 19, - "average_document_length": 70.797, - "max_document_length": 1529, + "min_document_length": 81, + "average_document_length": 548.498, + "max_document_length": 2192, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 19, + "average_query_length": 70.797, + "max_query_length": 1529, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ruby": { - "number_of_characters": 67900, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 585795, "num_documents": 1000, - "min_document_length": 20, - "average_document_length": 66.9, - "max_document_length": 751, + "min_document_length": 100, + "average_document_length": 518.895, + "max_document_length": 2241, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 20, + "average_query_length": 66.9, + "max_query_length": 751, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "java": { - "number_of_characters": 63984, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 683316, "num_documents": 1000, - "min_document_length": 23, - "average_document_length": 62.984, - "max_document_length": 807, + "min_document_length": 103, + "average_document_length": 620.332, + "max_document_length": 2366, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 23, + "average_query_length": 62.984, + "max_query_length": 807, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "php": { - "number_of_characters": 62927, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 607379, "num_documents": 1000, - "min_document_length": 21, - "average_document_length": 61.927, - "max_document_length": 766, + "min_document_length": 92, + "average_document_length": 545.452, + "max_document_length": 2316, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 21, + "average_query_length": 61.927, + "max_query_length": 766, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "c": { - "number_of_characters": 98588, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 573456, "num_documents": 1000, - "min_document_length": 20, - "average_document_length": 97.588, - "max_document_length": 1672, + "min_document_length": 85, + "average_document_length": 475.868, + "max_document_length": 2314, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 20, + "average_query_length": 97.588, + "max_query_length": 1672, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "c++": { - "number_of_characters": 115480, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 658926, "num_documents": 1000, - "min_document_length": 22, - "average_document_length": 114.48, - "max_document_length": 1856, + "min_document_length": 97, + "average_document_length": 544.446, + "max_document_length": 2235, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 22, + "average_query_length": 114.48, + "max_query_length": 1856, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "rust": { - "number_of_characters": 68503, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 677051, "num_documents": 1000, - "min_document_length": 19, - "average_document_length": 67.503, - "max_document_length": 2532, + "min_document_length": 105, + "average_document_length": 609.548, + "max_document_length": 2525, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 19, + "average_query_length": 67.503, + "max_query_length": 2532, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "swift": { - "number_of_characters": 58279, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 631899, "num_documents": 1000, - "min_document_length": 19, - "average_document_length": 57.279, - "max_document_length": 727, + "min_document_length": 121, + "average_document_length": 574.62, + "max_document_length": 2139, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 19, + "average_query_length": 57.279, + "max_query_length": 727, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "scala": { - "number_of_characters": 65833, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 560318, "num_documents": 1000, - "min_document_length": 22, - "average_document_length": 64.833, - "max_document_length": 685, + "min_document_length": 118, + "average_document_length": 495.485, + "max_document_length": 2129, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 22, + "average_query_length": 64.833, + "max_query_length": 685, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "shell": { - "number_of_characters": 73059, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 558578, "num_documents": 1000, - "min_document_length": 18, - "average_document_length": 72.059, - "max_document_length": 813, + "min_document_length": 103, + "average_document_length": 486.519, + "max_document_length": 2346, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 18, + "average_query_length": 72.059, + "max_query_length": 813, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json index 1be18319cd..522f13f549 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 156266302, "num_samples": 79660, - "num_queries": 13277, + "number_of_characters": 156266302, "num_documents": 66383, - "min_document_length": 127, - "average_document_length": 885.131117906693, - "max_document_length": 32432, + "min_document_length": 2, + "average_document_length": 1468.879728243677, + "max_document_length": 9403, "unique_documents": 66383, - "min_query_length": 2, - "average_query_length": 7344.177374406869, - "max_query_length": 9403, + "num_queries": 13277, + "min_query_length": 127, + "average_query_length": 4425.522256533855, + "max_query_length": 32432, "unique_queries": 13277, + "none_queries": 0, + "num_relevant_docs": 13277, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 13277 + "unique_relevant_docs": 13277, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json index 4511605dd5..2ac08f6921 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 260957682, "num_samples": 187832, - "num_queries": 31306, + "number_of_characters": 260957682, "num_documents": 156526, - "min_document_length": 26, - "average_document_length": 144.85253568097312, - "max_document_length": 13851, + "min_document_length": 1, + "average_document_length": 1522.3317148588733, + "max_document_length": 11354, "unique_documents": 156526, - "min_query_length": 1, - "average_query_length": 7611.464064396601, - "max_query_length": 11354, + "num_queries": 31306, + "min_query_length": 26, + "average_query_length": 724.2441704465598, + "max_query_length": 13851, "unique_queries": 31306, + "none_queries": 0, + "num_relevant_docs": 31306, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 31306 + "unique_relevant_docs": 31306, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json index a817119b43..d0a2a468eb 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json @@ -1,129 +1,206 @@ { "test": { - "number_of_characters": 22407915, "num_samples": 1058035, - "num_queries": 52561, + "number_of_characters": 288397099, "num_documents": 1005474, - "min_document_length": 23, - "average_document_length": 20.28592186371801, - "max_document_length": 214210, + "min_document_length": 17, + "average_document_length": 266.5410860947175, + "max_document_length": 139981, "unique_documents": 1005474, - "min_query_length": 2, - "average_query_length": 38.259317745096176, - "max_query_length": 2, + "num_queries": 52561, + "min_query_length": 23, + "average_query_length": 388.06276516809044, + "max_query_length": 214210, "unique_queries": 52561, + "none_queries": 0, + "num_relevant_docs": 52561, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, "unique_relevant_docs": 52561, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 8792958, "num_samples": 295570, - "num_queries": 14918, + "number_of_characters": 111456233, "num_documents": 280652, - "min_document_length": 38, - "average_document_length": 29.330466200133973, - "max_document_length": 8326, + "min_document_length": 27, + "average_document_length": 367.8027557259524, + "max_document_length": 10117, "unique_documents": 280652, - "min_query_length": 2, - "average_query_length": 37.62595522187961, - "max_query_length": 2, + "num_queries": 14918, + "min_query_length": 38, + "average_query_length": 551.7934039415471, + "max_query_length": 8326, "unique_queries": 14918, + "none_queries": 0, + "num_relevant_docs": 14918, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 14918 + "unique_relevant_docs": 14918, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "javascript": { - "number_of_characters": 1590642, "num_samples": 68492, - "num_queries": 3291, + "number_of_characters": 18912831, "num_documents": 65201, - "min_document_length": 40, - "average_document_length": 22.395975521847824, - "max_document_length": 214210, + "min_document_length": 18, + "average_document_length": 267.6736706492232, + "max_document_length": 139981, "unique_documents": 65201, - "min_query_length": 2, - "average_query_length": 39.6238225463385, - "max_query_length": 2, + "num_queries": 3291, + "min_query_length": 40, + "average_query_length": 443.70707991491946, + "max_query_length": 214210, "unique_queries": 3291, + "none_queries": 0, + "num_relevant_docs": 3291, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 3291 + "unique_relevant_docs": 3291, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "go": { - "number_of_characters": 2264134, "num_samples": 190857, - "num_queries": 8122, + "number_of_characters": 33346519, "num_documents": 182735, - "min_document_length": 23, - "average_document_length": 10.390259118395491, - "max_document_length": 3589, + "min_document_length": 17, + "average_document_length": 172.09541138807563, + "max_document_length": 51246, "unique_documents": 182735, - "min_query_length": 2, - "average_query_length": 44.99753755232701, - "max_query_length": 2, + "num_queries": 8122, + "min_query_length": 23, + "average_query_length": 233.76803742920464, + "max_query_length": 3589, "unique_queries": 8122, + "none_queries": 0, + "num_relevant_docs": 8122, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 8122 + "unique_relevant_docs": 8122, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ruby": { - "number_of_characters": 391703, "num_samples": 28849, - "num_queries": 1261, + "number_of_characters": 5340661, "num_documents": 27588, - "min_document_length": 36, - "average_document_length": 12.198310859794113, - "max_document_length": 2244, + "min_document_length": 23, + "average_document_length": 181.3880672756271, + "max_document_length": 6207, "unique_documents": 27588, - "min_query_length": 2, - "average_query_length": 43.75574940523394, - "max_query_length": 2, + "num_queries": 1261, + "min_query_length": 36, + "average_query_length": 266.8731165741475, + "max_query_length": 2244, "unique_queries": 1261, + "none_queries": 0, + "num_relevant_docs": 1261, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1261 + "unique_relevant_docs": 1261, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "java": { - "number_of_characters": 4114584, "num_samples": 192016, - "num_queries": 10955, + "number_of_characters": 49449127, "num_documents": 181061, - "min_document_length": 38, - "average_document_length": 20.724849636310413, - "max_document_length": 5066, + "min_document_length": 25, + "average_document_length": 252.3827052761224, + "max_document_length": 15047, "unique_documents": 181061, - "min_query_length": 2, - "average_query_length": 33.055408489274306, - "max_query_length": 2, + "num_queries": 10955, + "min_query_length": 38, + "average_query_length": 342.5341853035144, + "max_query_length": 5066, "unique_queries": 10955, + "none_queries": 0, + "num_relevant_docs": 10955, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 10955 + "unique_relevant_docs": 10955, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "php": { - "number_of_characters": 5253894, "num_samples": 282251, - "num_queries": 14014, + "number_of_characters": 69891728, "num_documents": 268237, - "min_document_length": 40, - "average_document_length": 17.586760961388624, - "max_document_length": 2995, + "min_document_length": 29, + "average_document_length": 242.97284863758543, + "max_document_length": 6966, "unique_documents": 268237, - "min_query_length": 2, - "average_query_length": 38.28129013843299, - "max_query_length": 2, + "num_queries": 14014, + "min_query_length": 40, + "average_query_length": 336.62194947909234, + "max_query_length": 2995, "unique_queries": 14014, + "none_queries": 0, + "num_relevant_docs": 14014, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 14014 + "unique_relevant_docs": 14014, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json index 853c4c79c6..b272377985 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json @@ -1,129 +1,206 @@ { "test": { - "number_of_characters": 1950074, "num_samples": 12000, - "num_queries": 6000, + "number_of_characters": 6496327, "num_documents": 6000, - "min_document_length": 2, - "average_document_length": 324.01233333333334, - "max_document_length": 17533, + "min_document_length": 69, + "average_document_length": 758.7088333333334, + "max_document_length": 334374, "unique_documents": 6000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 6000, + "min_query_length": 2, + "average_query_length": 324.01233333333334, + "max_query_length": 17533, "unique_queries": 6000, + "none_queries": 0, + "num_relevant_docs": 6000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, "unique_relevant_docs": 6000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 467546, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 1329388, "num_documents": 1000, - "min_document_length": 8, - "average_document_length": 466.546, - "max_document_length": 8636, + "min_document_length": 91, + "average_document_length": 862.842, + "max_document_length": 10914, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 8, + "average_query_length": 466.546, + "max_query_length": 8636, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "javascript": { - "number_of_characters": 187018, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 1601650, "num_documents": 1000, - "min_document_length": 2, - "average_document_length": 186.018, - "max_document_length": 7657, + "min_document_length": 95, + "average_document_length": 1415.632, + "max_document_length": 334374, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 2, + "average_query_length": 186.018, + "max_query_length": 7657, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "go": { - "number_of_characters": 126213, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 688942, "num_documents": 1000, - "min_document_length": 14, - "average_document_length": 125.213, - "max_document_length": 1501, + "min_document_length": 69, + "average_document_length": 563.729, + "max_document_length": 15904, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 14, + "average_query_length": 125.213, + "max_query_length": 1501, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "ruby": { - "number_of_characters": 314818, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 891452, "num_documents": 1000, - "min_document_length": 5, - "average_document_length": 313.818, - "max_document_length": 17533, + "min_document_length": 79, + "average_document_length": 577.634, + "max_document_length": 8171, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 5, + "average_query_length": 313.818, + "max_query_length": 17533, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "java": { - "number_of_characters": 691360, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 1110647, "num_documents": 1000, - "min_document_length": 2, - "average_document_length": 690.36, - "max_document_length": 6473, + "min_document_length": 106, + "average_document_length": 420.287, + "max_document_length": 9142, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 2, + "average_query_length": 690.36, + "max_query_length": 6473, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null }, "php": { - "number_of_characters": 163119, "num_samples": 2000, - "num_queries": 1000, + "number_of_characters": 874248, "num_documents": 1000, - "min_document_length": 5, - "average_document_length": 162.119, - "max_document_length": 1240, + "min_document_length": 108, + "average_document_length": 712.129, + "max_document_length": 15584, "unique_documents": 1000, - "min_query_length": 1, - "average_query_length": 1.0, - "max_query_length": 1, + "num_queries": 1000, + "min_query_length": 5, + "average_query_length": 162.119, + "max_query_length": 1240, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1000 + "unique_relevant_docs": 1000, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json index 07081e69c3..877f0e1a4c 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 1744286, "num_samples": 1229, - "num_queries": 221, + "number_of_characters": 1744286, "num_documents": 1008, "min_document_length": 8, - "average_document_length": 221.90178571428572, - "max_document_length": 4147, + "average_document_length": 1508.5406746031747, + "max_document_length": 10852, "unique_documents": 1008, + "num_queries": 221, "min_query_length": 8, - "average_query_length": 6880.58371040724, - "max_query_length": 10852, + "average_query_length": 1012.1131221719457, + "max_query_length": 4147, "unique_queries": 221, + "none_queries": 0, + "num_relevant_docs": 221, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 221 + "unique_relevant_docs": 221, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json index 042658caad..abe9973c60 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 1543912, "num_samples": 996, - "num_queries": 180, + "number_of_characters": 1543912, "num_documents": 816, - "min_document_length": 376, - "average_document_length": 411.97549019607845, - "max_document_length": 8285, + "min_document_length": 58, + "average_document_length": 1480.0735294117646, + "max_document_length": 8469, "unique_documents": 816, - "min_query_length": 58, - "average_query_length": 6709.666666666667, - "max_query_length": 8469, + "num_queries": 180, + "min_query_length": 376, + "average_query_length": 1867.6222222222223, + "max_query_length": 8285, "unique_queries": 180, + "none_queries": 0, + "num_relevant_docs": 180, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 180 + "unique_relevant_docs": 180, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CosQA.json b/mteb/descriptive_stats/Retrieval/CosQA.json index d8f17d4b21..43e95a8efc 100644 --- a/mteb/descriptive_stats/Retrieval/CosQA.json +++ b/mteb/descriptive_stats/Retrieval/CosQA.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 5728450, "num_samples": 21104, - "num_queries": 500, + "number_of_characters": 5728450, "num_documents": 20604, - "min_document_length": 18, - "average_document_length": 0.8933702193748787, - "max_document_length": 83, + "min_document_length": 88, + "average_document_length": 277.132741215298, + "max_document_length": 6396, "unique_documents": 20604, - "min_query_length": 88, - "average_query_length": 11420.086, - "max_query_length": 6396, + "num_queries": 500, + "min_query_length": 18, + "average_query_length": 36.814, + "max_query_length": 83, "unique_queries": 500, + "none_queries": 0, + "num_relevant_docs": 500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 500 + "unique_relevant_docs": 500, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CovidRetrieval.json b/mteb/descriptive_stats/Retrieval/CovidRetrieval.json index 86bc2cee79..eb23c0d4d8 100644 --- a/mteb/descriptive_stats/Retrieval/CovidRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CovidRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 33266467, "num_samples": 100950, - "num_queries": 949, + "number_of_characters": 33266467, "num_documents": 100001, - "min_document_length": 8, - "average_document_length": 0.24607753922460776, - "max_document_length": 91, + "min_document_length": 1, + "average_document_length": 332.4152658473415, + "max_document_length": 60975, "unique_documents": 100001, - "min_query_length": 1, - "average_query_length": 35028.302423603796, - "max_query_length": 60975, + "num_queries": 949, + "min_query_length": 8, + "average_query_length": 25.9304531085353, + "max_query_length": 91, "unique_queries": 949, + "none_queries": 0, + "num_relevant_docs": 959, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0105374077976819, "max_relevant_docs_per_query": 4, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json index e42129467e..80dea699c2 100644 --- a/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json +++ b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT19.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2662337, "num_samples": 17676, - "num_queries": 2946, + "number_of_characters": 2662337, "num_documents": 14730, - "min_document_length": 43, - "average_document_length": 29.88336727766463, - "max_document_length": 613, + "min_document_length": 38, + "average_document_length": 150.8591310251188, + "max_document_length": 648, "unique_documents": 14730, - "min_query_length": 38, - "average_query_length": 754.2956551255941, - "max_query_length": 648, + "num_queries": 2946, + "min_query_length": 43, + "average_query_length": 149.41683638832316, + "max_query_length": 613, "unique_queries": 2946, + "none_queries": 0, + "num_relevant_docs": 2946, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "deu-fra": { - "number_of_characters": 1311631, "num_samples": 8838, - "num_queries": 1473, + "number_of_characters": 1311631, "num_documents": 7365, - "min_document_length": 43, - "average_document_length": 30.591174473862864, - "max_document_length": 588, + "min_document_length": 38, + "average_document_length": 147.49857433808555, + "max_document_length": 648, "unique_documents": 7365, - "min_query_length": 38, - "average_query_length": 737.4928716904277, - "max_query_length": 648, + "num_queries": 1473, + "min_query_length": 43, + "average_query_length": 152.95587236931433, + "max_query_length": 588, "unique_queries": 1473, + "none_queries": 0, + "num_relevant_docs": 1473, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fra-deu": { - "number_of_characters": 1350706, "num_samples": 8838, - "num_queries": 1473, + "number_of_characters": 1350706, "num_documents": 7365, - "min_document_length": 44, - "average_document_length": 29.175560081466394, - "max_document_length": 613, + "min_document_length": 43, + "average_document_length": 154.21968771215208, + "max_document_length": 627, "unique_documents": 7365, - "min_query_length": 43, - "average_query_length": 771.0984385607603, - "max_query_length": 627, + "num_queries": 1473, + "min_query_length": 44, + "average_query_length": 145.877800407332, + "max_query_length": 613, "unique_queries": 1473, + "none_queries": 0, + "num_relevant_docs": 1473, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,6 +81,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json index 42807d55d3..5bc2438123 100644 --- a/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json +++ b/mteb/descriptive_stats/Retrieval/CrossLingualSemanticDiscriminationWMT21.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1881809, "num_samples": 10716, - "num_queries": 1786, + "number_of_characters": 1881809, "num_documents": 8930, "min_document_length": 40, - "average_document_length": 34.872228443449046, - "max_document_length": 567, + "average_document_length": 175.85666293393058, + "max_document_length": 572, "unique_documents": 8930, + "num_queries": 1786, "min_query_length": 40, - "average_query_length": 879.2833146696529, - "max_query_length": 572, + "average_query_length": 174.36114221724523, + "max_query_length": 567, "unique_queries": 1786, + "none_queries": 0, + "num_relevant_docs": 1786, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "deu-fra": { - "number_of_characters": 944833, "num_samples": 5358, - "num_queries": 893, + "number_of_characters": 944833, "num_documents": 4465, - "min_document_length": 40, - "average_document_length": 34.34602463605823, - "max_document_length": 543, + "min_document_length": 45, + "average_document_length": 177.26270996640537, + "max_document_length": 567, "unique_documents": 4465, - "min_query_length": 45, - "average_query_length": 886.3135498320269, - "max_query_length": 567, + "num_queries": 893, + "min_query_length": 40, + "average_query_length": 171.73012318029114, + "max_query_length": 543, "unique_queries": 893, + "none_queries": 0, + "num_relevant_docs": 893, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fra-deu": { - "number_of_characters": 936976, "num_samples": 5358, - "num_queries": 893, + "number_of_characters": 936976, "num_documents": 4465, - "min_document_length": 50, - "average_document_length": 35.39843225083987, - "max_document_length": 567, + "min_document_length": 40, + "average_document_length": 174.45061590145576, + "max_document_length": 572, "unique_documents": 4465, - "min_query_length": 40, - "average_query_length": 872.2530795072788, - "max_query_length": 572, + "num_queries": 893, + "min_query_length": 50, + "average_query_length": 176.99216125419932, + "max_query_length": 567, "unique_queries": 893, + "none_queries": 0, + "num_relevant_docs": 893, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,6 +81,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/DBPedia-PL.json b/mteb/descriptive_stats/Retrieval/DBPedia-PL.json index 3d86e72c82..30dbdfd37f 100644 --- a/mteb/descriptive_stats/Retrieval/DBPedia-PL.json +++ b/mteb/descriptive_stats/Retrieval/DBPedia-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1449670678, "num_samples": 4636322, - "num_queries": 400, + "number_of_characters": 1449670678, "num_documents": 4635922, - "min_document_length": 2, - "average_document_length": 0.003058722730882875, - "max_document_length": 90, + "min_document_length": 7, + "average_document_length": 312.7007956561823, + "max_document_length": 42899, "unique_documents": 4635922, - "min_query_length": 7, - "average_query_length": 3624141.245, - "max_query_length": 42899, + "num_queries": 400, + "min_query_length": 2, + "average_query_length": 35.45, + "max_query_length": 90, "unique_queries": 400, + "none_queries": 0, + "num_relevant_docs": 43515, "min_relevant_docs_per_query": 21, "average_relevant_docs_per_query": 38.215, "max_relevant_docs_per_query": 1499, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json index 26ce134325..90d27d7f19 100644 --- a/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/DBPedia-PLHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 32284954, "num_samples": 88942, - "num_queries": 400, + "number_of_characters": 32284954, "num_documents": 88542, - "min_document_length": 2, - "average_document_length": 0.16014998531770233, - "max_document_length": 90, + "min_document_length": 7, + "average_document_length": 364.468546000768, + "max_document_length": 12359, "unique_documents": 88542, - "min_query_length": 7, - "average_query_length": 80676.935, - "max_query_length": 12359, + "num_queries": 400, + "min_query_length": 2, + "average_query_length": 35.45, + "max_query_length": 90, "unique_queries": 400, + "none_queries": 0, + "num_relevant_docs": 43515, "min_relevant_docs_per_query": 21, "average_relevant_docs_per_query": 38.215, "max_relevant_docs_per_query": 1499, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/DBPedia.json b/mteb/descriptive_stats/Retrieval/DBPedia.json index 3eebad06db..e596234b14 100644 --- a/mteb/descriptive_stats/Retrieval/DBPedia.json +++ b/mteb/descriptive_stats/Retrieval/DBPedia.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 1437939026, "num_samples": 4635989, - "num_queries": 67, + "number_of_characters": 1437939026, "num_documents": 4635922, - "min_document_length": 5, - "average_document_length": 0.0004038031701137336, - "max_document_length": 79, + "min_document_length": 8, + "average_document_length": 310.17285321021365, + "max_document_length": 42899, "unique_documents": 4635922, - "min_query_length": 8, - "average_query_length": 21461748.56716418, - "max_query_length": 42899, + "num_queries": 67, + "min_query_length": 5, + "average_query_length": 27.940298507462686, + "max_query_length": 79, "unique_queries": 67, + "none_queries": 0, + "num_relevant_docs": 5673, "min_relevant_docs_per_query": 36, "average_relevant_docs_per_query": 20.970149253731343, "max_relevant_docs_per_query": 164, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 1437950788, "num_samples": 4636322, - "num_queries": 400, + "number_of_characters": 1437950788, "num_documents": 4635922, - "min_document_length": 6, - "average_document_length": 0.0029409468062663695, - "max_document_length": 88, + "min_document_length": 8, + "average_document_length": 310.17285321021365, + "max_document_length": 42899, "unique_documents": 4635922, - "min_query_length": 8, - "average_query_length": 3594842.885, - "max_query_length": 42899, + "num_queries": 400, + "min_query_length": 6, + "average_query_length": 34.085, + "max_query_length": 88, "unique_queries": 400, + "none_queries": 0, + "num_relevant_docs": 43515, "min_relevant_docs_per_query": 21, "average_relevant_docs_per_query": 38.215, "max_relevant_docs_per_query": 1499, @@ -47,6 +52,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json b/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json index bd2425dbb4..f11c95065d 100644 --- a/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 30600110, "num_samples": 90470, - "num_queries": 400, + "number_of_characters": 30600110, "num_documents": 90070, - "min_document_length": 6, - "average_document_length": 0.15137115576773622, - "max_document_length": 88, + "min_document_length": 8, + "average_document_length": 339.58561119129564, + "max_document_length": 5857, "unique_documents": 90070, - "min_query_length": 8, - "average_query_length": 76466.19, - "max_query_length": 5857, + "num_queries": 400, + "min_query_length": 6, + "average_query_length": 34.085, + "max_query_length": 88, "unique_queries": 400, + "none_queries": 0, + "num_relevant_docs": 43515, "min_relevant_docs_per_query": 21, "average_relevant_docs_per_query": 38.215, "max_relevant_docs_per_query": 1499, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json b/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json index ee7b5d859f..a2a500939b 100644 --- a/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/DanFeverRetrieval.json @@ -1,17 +1,19 @@ { "train": { - "number_of_characters": 1108138, "num_samples": 8897, - "num_queries": 6373, + "number_of_characters": 1108138, "num_documents": 2524, - "min_document_length": 11, - "average_document_length": 126.92868462757528, - "max_document_length": 188, + "min_document_length": 28, + "average_document_length": 312.1117274167987, + "max_document_length": 1748, "unique_documents": 2524, - "min_query_length": 28, - "average_query_length": 123.61054448454416, - "max_query_length": 1748, + "num_queries": 6373, + "min_query_length": 11, + "average_query_length": 50.26957476855484, + "max_query_length": 188, "unique_queries": 6373, + "none_queries": 0, + "num_relevant_docs": 6382, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 0.48721167425074535, "max_relevant_docs_per_query": 3, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/DuRetrieval.json b/mteb/descriptive_stats/Retrieval/DuRetrieval.json index dcf728482f..5d4976317f 100644 --- a/mteb/descriptive_stats/Retrieval/DuRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/DuRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 33151109, "num_samples": 102001, - "num_queries": 2000, + "number_of_characters": 33151109, "num_documents": 100001, - "min_document_length": 3, - "average_document_length": 0.1857781422185778, - "max_document_length": 55, + "min_document_length": 1, + "average_document_length": 331.3219967800322, + "max_document_length": 60975, "unique_documents": 100001, - "min_query_length": 1, - "average_query_length": 16566.2655, - "max_query_length": 60975, + "num_queries": 2000, + "min_query_length": 3, + "average_query_length": 9.289, + "max_query_length": 55, "unique_queries": 2000, + "none_queries": 0, + "num_relevant_docs": 9839, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 4.9195, "max_relevant_docs_per_query": 31, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/EcomRetrieval.json b/mteb/descriptive_stats/Retrieval/EcomRetrieval.json index 7267b3e790..3b22bb6002 100644 --- a/mteb/descriptive_stats/Retrieval/EcomRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/EcomRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 3334588, "num_samples": 101902, - "num_queries": 1000, + "number_of_characters": 3334588, "num_documents": 100902, - "min_document_length": 3, - "average_document_length": 0.06737230183742642, - "max_document_length": 34, + "min_document_length": 2, + "average_document_length": 32.98041664189015, + "max_document_length": 121, "unique_documents": 100902, - "min_query_length": 2, - "average_query_length": 3327.79, - "max_query_length": 121, + "num_queries": 1000, + "min_query_length": 3, + "average_query_length": 6.798, + "max_query_length": 34, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/EstQA.json b/mteb/descriptive_stats/Retrieval/EstQA.json index 5b9ed73cf2..8dadad152f 100644 --- a/mteb/descriptive_stats/Retrieval/EstQA.json +++ b/mteb/descriptive_stats/Retrieval/EstQA.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 128536, "num_samples": 724, - "num_queries": 603, + "number_of_characters": 128536, "num_documents": 121, - "min_document_length": 19, - "average_document_length": 275.68595041322317, - "max_document_length": 115, + "min_document_length": 510, + "average_document_length": 786.595041322314, + "max_document_length": 2725, "unique_documents": 121, - "min_query_length": 510, - "average_query_length": 157.8407960199005, - "max_query_length": 2725, + "num_queries": 603, + "min_query_length": 19, + "average_query_length": 55.32006633499171, + "max_query_length": 115, "unique_queries": 603, + "none_queries": 0, + "num_relevant_docs": 603, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FEVER.json b/mteb/descriptive_stats/Retrieval/FEVER.json index 18a770b3e0..48a37c3582 100644 --- a/mteb/descriptive_stats/Retrieval/FEVER.json +++ b/mteb/descriptive_stats/Retrieval/FEVER.json @@ -1,19 +1,19 @@ { "test": { - "number_of_characters": 2921128337, "num_samples": 5423234, - "num_queries": 6666, + "number_of_characters": 2921128337, "num_documents": 5416568, - "num_relevant_docs": 7937, - "min_document_length": 14, - "average_document_length": 0.061047881241406, - "max_document_length": 189, + "min_document_length": 2, + "average_document_length": 539.2340070317589, + "max_document_length": 374597, "unique_documents": 5416568, - "min_query_length": 2, - "average_query_length": 438163.46639663965, - "max_query_length": 374597, + "num_queries": 6666, + "min_query_length": 14, + "average_query_length": 49.60546054605461, + "max_query_length": 189, "unique_queries": 6666, "none_queries": 0, + "num_relevant_docs": 7937, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1906690669066906, "max_relevant_docs_per_query": 15, @@ -23,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json b/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json index 1f5a27ff36..dce7a498ce 100644 --- a/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/FEVERHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 114054968, "num_samples": 164698, - "num_queries": 1000, + "number_of_characters": 114054968, "num_documents": 163698, - "min_document_length": 15, - "average_document_length": 0.30311915845031706, - "max_document_length": 172, + "min_document_length": 2, + "average_document_length": 696.4370242764114, + "max_document_length": 29033, "unique_documents": 163698, - "min_query_length": 2, - "average_query_length": 114005.348, - "max_query_length": 29033, + "num_queries": 1000, + "min_query_length": 15, + "average_query_length": 49.62, + "max_query_length": 172, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1171, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.171, "max_relevant_docs_per_query": 15, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json index fe47d8b370..0e17d9f220 100644 --- a/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/FQuADRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 257749, "num_samples": 669, - "num_queries": 400, + "number_of_characters": 264521, "num_documents": 269, - "min_document_length": 18, - "average_document_length": 87.0185873605948, - "max_document_length": 169, + "min_document_length": 513, + "average_document_length": 896.3308550185874, + "max_document_length": 1879, "unique_documents": 269, - "min_query_length": 501, - "average_query_length": 585.8525, - "max_query_length": 1854, + "num_queries": 400, + "min_query_length": 18, + "average_query_length": 58.52, + "max_query_length": 169, "unique_queries": 400, + "none_queries": 0, + "num_relevant_docs": 400, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "validation": { - "number_of_characters": 89794, "num_samples": 197, - "num_queries": 100, + "number_of_characters": 92241, "num_documents": 97, - "min_document_length": 18, - "average_document_length": 55.8041237113402, - "max_document_length": 107, + "min_document_length": 513, + "average_document_length": 895.1340206185567, + "max_document_length": 1844, "unique_documents": 97, - "min_query_length": 501, - "average_query_length": 843.81, - "max_query_length": 1815, + "num_queries": 100, + "min_query_length": 18, + "average_query_length": 54.13, + "max_query_length": 107, "unique_queries": 100, + "none_queries": 0, + "num_relevant_docs": 100, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,6 +52,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FaithDial.json b/mteb/descriptive_stats/Retrieval/FaithDial.json index 5ba596ba3a..1b2cfea8a1 100644 --- a/mteb/descriptive_stats/Retrieval/FaithDial.json +++ b/mteb/descriptive_stats/Retrieval/FaithDial.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 507681, "num_samples": 5581, - "num_queries": 2042, + "number_of_characters": 507681, "num_documents": 3539, - "min_document_length": 1, - "average_document_length": 2.842610907035886, - "max_document_length": 9, + "min_document_length": 24, + "average_document_length": 140.61062447018932, + "max_document_length": 471, "unique_documents": 3539, - "min_query_length": 24, - "average_query_length": 243.69294809010773, - "max_query_length": 471, + "num_queries": 2042, + "min_query_length": 1, + "average_query_length": 4.926542605288932, + "max_query_length": 9, "unique_queries": 2042, + "none_queries": 0, + "num_relevant_docs": 2042, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json b/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json index 59a04e8600..f18e4469f7 100644 --- a/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/FeedbackQARetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2923673, "num_samples": 4356, - "num_queries": 1992, + "number_of_characters": 2923673, "num_documents": 2364, - "min_document_length": 17, - "average_document_length": 60.949661590524535, - "max_document_length": 206, + "min_document_length": 118, + "average_document_length": 1175.7986463620982, + "max_document_length": 15869, "unique_documents": 2364, - "min_query_length": 118, - "average_query_length": 1395.375502008032, - "max_query_length": 15869, + "num_queries": 1992, + "min_query_length": 17, + "average_query_length": 72.33182730923694, + "max_query_length": 206, "unique_queries": 1992, + "none_queries": 0, + "num_relevant_docs": 1992, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FiQA-PL.json b/mteb/descriptive_stats/Retrieval/FiQA-PL.json index bfe7debd7c..85b0d55c2a 100644 --- a/mteb/descriptive_stats/Retrieval/FiQA-PL.json +++ b/mteb/descriptive_stats/Retrieval/FiQA-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 45938883, "num_samples": 58286, - "num_queries": 648, + "number_of_characters": 45938883, "num_documents": 57638, - "min_document_length": 14, - "average_document_length": 0.7870675595960998, - "max_document_length": 185, + "min_document_length": 1, + "average_document_length": 796.2371699226205, + "max_document_length": 16991, "unique_documents": 57638, - "min_query_length": 1, - "average_query_length": 70823.33024691358, - "max_query_length": 16991, + "num_queries": 648, + "min_query_length": 14, + "average_query_length": 70.00771604938272, + "max_query_length": 185, "unique_queries": 648, + "none_queries": 0, + "num_relevant_docs": 1706, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.632716049382716, "max_relevant_docs_per_query": 15, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/FiQA2018.json b/mteb/descriptive_stats/Retrieval/FiQA2018.json index e7f77ea43a..e2fcf1bf5f 100644 --- a/mteb/descriptive_stats/Retrieval/FiQA2018.json +++ b/mteb/descriptive_stats/Retrieval/FiQA2018.json @@ -1,17 +1,19 @@ { "train": { - "number_of_characters": 44616372, "num_samples": 63138, - "num_queries": 5500, + "number_of_characters": 44616372, "num_documents": 57638, - "min_document_length": 14, - "average_document_length": 5.868298691835248, - "max_document_length": 158, + "min_document_length": 1, + "average_document_length": 768.2108157812554, + "max_document_length": 16991, "unique_documents": 57638, - "min_query_length": 1, - "average_query_length": 8050.57, - "max_query_length": 16991, + "num_queries": 5500, + "min_query_length": 14, + "average_query_length": 61.49763636363636, + "max_query_length": 158, "unique_queries": 5500, + "none_queries": 0, + "num_relevant_docs": 14166, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.5756363636363635, "max_relevant_docs_per_query": 23, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "dev": { - "number_of_characters": 44309513, "num_samples": 58138, - "num_queries": 500, + "number_of_characters": 44309513, "num_documents": 57638, - "min_document_length": 15, - "average_document_length": 0.5443977931225927, - "max_document_length": 166, + "min_document_length": 1, + "average_document_length": 768.2108157812554, + "max_document_length": 16991, "unique_documents": 57638, - "min_query_length": 1, - "average_query_length": 88556.27, - "max_query_length": 16991, + "num_queries": 500, + "min_query_length": 15, + "average_query_length": 62.756, + "max_query_length": 166, "unique_queries": 500, + "none_queries": 0, + "num_relevant_docs": 1238, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.476, "max_relevant_docs_per_query": 20, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 44318767, "num_samples": 58286, - "num_queries": 648, + "number_of_characters": 44318767, "num_documents": 57638, - "min_document_length": 16, - "average_document_length": 0.7049515944342274, - "max_document_length": 147, + "min_document_length": 1, + "average_document_length": 768.2108157812554, + "max_document_length": 16991, "unique_documents": 57638, - "min_query_length": 1, - "average_query_length": 68330.45524691358, - "max_query_length": 16991, + "num_queries": 648, + "min_query_length": 16, + "average_query_length": 62.7037037037037, + "max_query_length": 147, "unique_queries": 648, + "none_queries": 0, + "num_relevant_docs": 1706, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.632716049382716, "max_relevant_docs_per_query": 15, @@ -73,6 +81,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json b/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json index b2cd429220..3c5ed5e73e 100644 --- a/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/GeorgianFAQRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1470108, "num_samples": 5131, - "num_queries": 2565, + "number_of_characters": 1470108, "num_documents": 2566, - "min_document_length": 16, - "average_document_length": 61.67147310989868, - "max_document_length": 1089, + "min_document_length": 45, + "average_document_length": 511.24668745128605, + "max_document_length": 4847, "unique_documents": 2566, - "min_query_length": 45, - "average_query_length": 511.44600389863547, - "max_query_length": 4847, + "num_queries": 2565, + "min_query_length": 16, + "average_query_length": 61.69551656920078, + "max_query_length": 1089, "unique_queries": 2565, + "none_queries": 0, + "num_relevant_docs": 2566, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0003898635477584, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GerDaLIR.json b/mteb/descriptive_stats/Retrieval/GerDaLIR.json index db7989c665..54539741b0 100644 --- a/mteb/descriptive_stats/Retrieval/GerDaLIR.json +++ b/mteb/descriptive_stats/Retrieval/GerDaLIR.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2047828528, "num_samples": 143743, - "num_queries": 12298, + "number_of_characters": 2047828528, "num_documents": 131445, - "min_document_length": 3, - "average_document_length": 96.11887101068888, - "max_document_length": 23560, + "min_document_length": 8, + "average_document_length": 15483.237726805888, + "max_document_length": 1170783, "unique_documents": 131445, - "min_query_length": 8, - "average_query_length": 165489.85062611807, - "max_query_length": 1170783, + "num_queries": 12298, + "min_query_length": 3, + "average_query_length": 1027.3495690356156, + "max_query_length": 23560, "unique_queries": 12298, + "none_queries": 0, + "num_relevant_docs": 14394, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1704342169458448, "max_relevant_docs_per_query": 9, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json b/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json index 9423b5ce57..d4f8f54137 100644 --- a/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json +++ b/mteb/descriptive_stats/Retrieval/GerDaLIRSmall.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 209081381, "num_samples": 22203, - "num_queries": 12234, + "number_of_characters": 209081381, "num_documents": 9969, - "min_document_length": 150, - "average_document_length": 1265.3312268030895, - "max_document_length": 23560, + "min_document_length": 151, + "average_document_length": 19707.823653325308, + "max_document_length": 427235, "unique_documents": 9969, - "min_query_length": 151, - "average_query_length": 16059.121628249142, - "max_query_length": 427235, + "num_queries": 12234, + "min_query_length": 150, + "average_query_length": 1031.0680889324833, + "max_query_length": 23560, "unique_queries": 12234, + "none_queries": 0, + "num_relevant_docs": 14320, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1705084191597188, "max_relevant_docs_per_query": 9, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GermanDPR.json b/mteb/descriptive_stats/Retrieval/GermanDPR.json index e26b779a64..ef806814cf 100644 --- a/mteb/descriptive_stats/Retrieval/GermanDPR.json +++ b/mteb/descriptive_stats/Retrieval/GermanDPR.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3084969, "num_samples": 3901, - "num_queries": 1025, + "number_of_characters": 3771263, "num_documents": 2876, - "min_document_length": 16, - "average_document_length": 22.946453407510432, - "max_document_length": 133, + "min_document_length": 478, + "average_document_length": 1288.3410987482614, + "max_document_length": 4784, "unique_documents": 2876, - "min_query_length": 456, - "average_query_length": 2945.3414634146343, - "max_query_length": 2446, + "num_queries": 1025, + "min_query_length": 16, + "average_query_length": 64.38439024390244, + "max_query_length": 133, "unique_queries": 1025, + "none_queries": 0, + "num_relevant_docs": 1025, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json b/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json index 47ffa53fab..20358f9b5d 100644 --- a/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/GermanGovServiceRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 151500, "num_samples": 461, - "num_queries": 356, + "number_of_characters": 155150, "num_documents": 105, - "min_document_length": 17, - "average_document_length": 231.16190476190476, - "max_document_length": 172, + "min_document_length": 636, + "average_document_length": 1246.4571428571428, + "max_document_length": 1539, "unique_documents": 105, - "min_query_length": 589, - "average_query_length": 357.3820224719101, - "max_query_length": 1493, + "num_queries": 356, + "min_query_length": 17, + "average_query_length": 68.17977528089888, + "max_query_length": 172, "unique_queries": 356, + "none_queries": 0, + "num_relevant_docs": 356, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json b/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json index cf2db0ed7c..8832893c2d 100644 --- a/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json +++ b/mteb/descriptive_stats/Retrieval/GermanQuAD-Retrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1045149, "num_samples": 2678, - "num_queries": 2204, + "number_of_characters": 1045149, "num_documents": 474, - "min_document_length": 15, - "average_document_length": 263.86497890295357, - "max_document_length": 130, + "min_document_length": 507, + "average_document_length": 1941.090717299578, + "max_document_length": 11647, "unique_documents": 474, - "min_query_length": 507, - "average_query_length": 417.4578039927405, - "max_query_length": 11647, + "num_queries": 2204, + "min_query_length": 15, + "average_query_length": 56.74773139745916, + "max_query_length": 130, "unique_queries": 2204, + "none_queries": 0, + "num_relevant_docs": 2204, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json b/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json index fb179201f5..3d76e4bc6c 100644 --- a/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json +++ b/mteb/descriptive_stats/Retrieval/GreekCivicsQA.json @@ -1,17 +1,19 @@ { "default": { - "number_of_characters": 468846, "num_samples": 814, - "num_queries": 407, + "number_of_characters": 468846, "num_documents": 407, - "min_document_length": 18, - "average_document_length": 77.06142506142506, - "max_document_length": 313, + "min_document_length": 110, + "average_document_length": 1074.894348894349, + "max_document_length": 5057, "unique_documents": 407, - "min_query_length": 110, - "average_query_length": 1074.894348894349, - "max_query_length": 5057, + "num_queries": 407, + "min_query_length": 18, + "average_query_length": 77.06142506142506, + "max_query_length": 313, "unique_queries": 407, + "none_queries": 0, + "num_relevant_docs": 407, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HagridRetrieval.json b/mteb/descriptive_stats/Retrieval/HagridRetrieval.json index 8ed81e0f75..812ea084d6 100644 --- a/mteb/descriptive_stats/Retrieval/HagridRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/HagridRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 133142, "num_samples": 992, - "num_queries": 496, + "number_of_characters": 133142, "num_documents": 496, - "min_document_length": 16, - "average_document_length": 40.064516129032256, - "max_document_length": 122, + "min_document_length": 27, + "average_document_length": 228.36693548387098, + "max_document_length": 1205, "unique_documents": 496, - "min_query_length": 27, - "average_query_length": 228.36693548387098, - "max_query_length": 1205, + "num_queries": 496, + "min_query_length": 16, + "average_query_length": 40.064516129032256, + "max_query_length": 122, "unique_queries": 496, + "none_queries": 0, + "num_relevant_docs": 496, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HellaSwag.json b/mteb/descriptive_stats/Retrieval/HellaSwag.json index eb7ac0ecdb..1199fad18d 100644 --- a/mteb/descriptive_stats/Retrieval/HellaSwag.json +++ b/mteb/descriptive_stats/Retrieval/HellaSwag.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 29811884, "num_samples": 209204, - "num_queries": 10042, + "number_of_characters": 29811884, "num_documents": 199162, - "min_document_length": 30, - "average_document_length": 11.32141673612436, - "max_document_length": 499, + "min_document_length": 2, + "average_document_length": 138.36519014671472, + "max_document_length": 506, "unique_documents": 199162, - "min_query_length": 2, - "average_query_length": 2744.1832304321847, - "max_query_length": 506, + "num_queries": 10042, + "min_query_length": 30, + "average_query_length": 224.53654650468033, + "max_query_length": 499, "unique_queries": 10042, + "none_queries": 0, + "num_relevant_docs": 10042, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json b/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json index 1727a39137..18bd608e77 100644 --- a/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json +++ b/mteb/descriptive_stats/Retrieval/HotpotQA-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1535470621, "num_samples": 5240734, - "num_queries": 7405, + "number_of_characters": 1535470621, "num_documents": 5233329, - "min_document_length": 24, - "average_document_length": 0.13391361406859764, - "max_document_length": 293, + "min_document_length": 7, + "average_document_length": 293.26835882093405, + "max_document_length": 9292, "unique_documents": 5233329, - "min_query_length": 7, - "average_query_length": 207261.28386225522, - "max_query_length": 9292, + "num_queries": 7405, + "min_query_length": 24, + "average_query_length": 94.64064821066847, + "max_query_length": 293, "unique_queries": 7405, + "none_queries": 0, + "num_relevant_docs": 14810, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json index 5d501ccdfe..b0c2decba3 100644 --- a/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/HotpotQA-PLHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 93585678, "num_samples": 213774, - "num_queries": 1000, + "number_of_characters": 93585678, "num_documents": 212774, - "min_document_length": 32, - "average_document_length": 0.4472397943357741, - "max_document_length": 268, + "min_document_length": 11, + "average_document_length": 439.3888210025661, + "max_document_length": 6315, "unique_documents": 212774, - "min_query_length": 11, - "average_query_length": 93490.517, - "max_query_length": 6315, + "num_queries": 1000, + "min_query_length": 32, + "average_query_length": 95.161, + "max_query_length": 268, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 2000, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HotpotQA.json b/mteb/descriptive_stats/Retrieval/HotpotQA.json index ae51a60fc8..02875ca5ae 100644 --- a/mteb/descriptive_stats/Retrieval/HotpotQA.json +++ b/mteb/descriptive_stats/Retrieval/HotpotQA.json @@ -1,19 +1,19 @@ { "train": { - "number_of_characters": 1520922083, "num_samples": 5318329, - "num_queries": 85000, + "number_of_characters": 1520922083, "num_documents": 5233329, - "num_relevant_docs": 170000, - "min_document_length": 13, - "average_document_length": 1.7143430118763792, - "max_document_length": 654, + "min_document_length": 9, + "average_document_length": 288.9079517072212, + "max_document_length": 8276, "unique_documents": 5233329, - "min_query_length": 9, - "average_query_length": 17787.651317647058, - "max_query_length": 8276, + "num_queries": 85000, + "min_query_length": 13, + "average_query_length": 105.54965882352941, + "max_query_length": 654, "unique_queries": 85000, "none_queries": 0, + "num_relevant_docs": 170000, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "dev": { - "number_of_characters": 1512524238, "num_samples": 5238776, - "num_queries": 5447, + "number_of_characters": 1512524238, "num_documents": 5233329, - "num_relevant_docs": 10894, - "min_document_length": 18, - "average_document_length": 0.10965792519446035, - "max_document_length": 630, + "min_document_length": 9, + "average_document_length": 288.9079517072212, + "max_document_length": 8276, "unique_documents": 5233329, - "min_query_length": 9, - "average_query_length": 277574.8782816229, - "max_query_length": 8276, + "num_queries": 5447, + "min_query_length": 18, + "average_query_length": 105.35634294106848, + "max_query_length": 630, "unique_queries": 5447, "none_queries": 0, + "num_relevant_docs": 10894, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 1512632888, "num_samples": 5240734, - "num_queries": 7405, + "number_of_characters": 1512632888, "num_documents": 5233329, - "num_relevant_docs": 14810, - "min_document_length": 32, - "average_document_length": 0.13041908888204812, - "max_document_length": 288, + "min_document_length": 9, + "average_document_length": 288.9079517072212, + "max_document_length": 8276, "unique_documents": 5233329, - "min_query_length": 9, - "average_query_length": 204179.65725860905, - "max_query_length": 8276, + "num_queries": 7405, + "min_query_length": 32, + "average_query_length": 92.17096556380824, + "max_query_length": 288, "unique_queries": 7405, "none_queries": 0, + "num_relevant_docs": 14810, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -79,6 +81,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json b/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json index 20a1e4dd0d..2741774d24 100644 --- a/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 84600920, "num_samples": 226621, - "num_queries": 1000, + "number_of_characters": 84600920, "num_documents": 225621, - "min_document_length": 34, - "average_document_length": 0.41035187327420763, - "max_document_length": 288, + "min_document_length": 9, + "average_document_length": 374.558822095461, + "max_document_length": 3463, "unique_documents": 225621, - "min_query_length": 9, - "average_query_length": 84508.336, - "max_query_length": 3463, + "num_queries": 1000, + "min_query_length": 34, + "average_query_length": 92.584, + "max_query_length": 288, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 2000, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json b/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json index 72b908be1b..93787b9096 100644 --- a/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/HunSum2AbstractiveRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 5321531, "num_samples": 3996, - "num_queries": 1998, + "number_of_characters": 5419061, "num_documents": 1998, - "min_document_length": 30, - "average_document_length": 201.2112112112112, - "max_document_length": 748, + "min_document_length": 323, + "average_document_length": 2511.0315315315315, + "max_document_length": 14909, "unique_documents": 1998, - "min_query_length": 309, - "average_query_length": 2462.2177177177177, - "max_query_length": 14850, + "num_queries": 1998, + "min_query_length": 30, + "average_query_length": 201.2112112112112, + "max_query_length": 748, "unique_queries": 1998, + "none_queries": 0, + "num_relevant_docs": 1998, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json b/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json index e5a62aa559..38c60de1a3 100644 --- a/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/IndicQARetrieval.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 6031160, "num_samples": 21319, - "num_queries": 18560, + "number_of_characters": 6031160, "num_documents": 2759, - "min_document_length": 8, - "average_document_length": 395.7480971366437, - "max_document_length": 226, + "min_document_length": 146, + "average_document_length": 1790.2468285610728, + "max_document_length": 14782, "unique_documents": 2759, - "min_query_length": 146, - "average_query_length": 266.1255926724138, - "max_query_length": 14782, + "num_queries": 18560, + "min_query_length": 8, + "average_query_length": 58.829148706896554, + "max_query_length": 226, "unique_queries": 18560, "none_queries": 0, + "num_relevant_docs": 18574, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0007543103448275, "max_relevant_docs_per_query": 2, @@ -22,24 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "as": { - "number_of_characters": 451360, "num_samples": 2035, - "num_queries": 1785, + "number_of_characters": 451360, "num_documents": 250, - "min_document_length": 13, - "average_document_length": 404.16, - "max_document_length": 184, + "min_document_length": 355, + "average_document_length": 1401.28, + "max_document_length": 6654, "unique_documents": 250, - "min_query_length": 355, - "average_query_length": 196.2577030812325, - "max_query_length": 6654, + "num_queries": 1785, + "min_query_length": 13, + "average_query_length": 56.60504201680672, + "max_query_length": 184, "unique_queries": 1785, "none_queries": 0, + "num_relevant_docs": 1788, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0016806722689076, "max_relevant_docs_per_query": 2, @@ -49,24 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "bn": { - "number_of_characters": 649559, "num_samples": 2012, - "num_queries": 1762, + "number_of_characters": 649559, "num_documents": 250, - "min_document_length": 15, - "average_document_length": 402.224, - "max_document_length": 202, + "min_document_length": 684, + "average_document_length": 2196.012, + "max_document_length": 6767, "unique_documents": 250, - "min_query_length": 684, - "average_query_length": 311.5794551645857, - "max_query_length": 6767, + "num_queries": 1762, + "min_query_length": 15, + "average_query_length": 57.069239500567534, + "max_query_length": 202, "unique_queries": 1762, "none_queries": 0, + "num_relevant_docs": 1763, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0005675368898979, "max_relevant_docs_per_query": 2, @@ -76,24 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "gu": { - "number_of_characters": 359851, "num_samples": 2263, - "num_queries": 2015, + "number_of_characters": 359851, "num_documents": 248, - "min_document_length": 9, - "average_document_length": 490.51612903225805, - "max_document_length": 173, + "min_document_length": 147, + "average_document_length": 960.4959677419355, + "max_document_length": 3253, "unique_documents": 248, - "min_query_length": 147, - "average_query_length": 118.21488833746898, - "max_query_length": 3253, + "num_queries": 2015, + "min_query_length": 9, + "average_query_length": 60.3712158808933, + "max_query_length": 173, "unique_queries": 2015, "none_queries": 0, + "num_relevant_docs": 2017, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0009925558312656, "max_relevant_docs_per_query": 2, @@ -103,24 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 747350, "num_samples": 1805, - "num_queries": 1544, + "number_of_characters": 747350, "num_documents": 261, - "min_document_length": 19, - "average_document_length": 312.63984674329504, - "max_document_length": 138, + "min_document_length": 1156, + "average_document_length": 2550.770114942529, + "max_document_length": 8857, "unique_documents": 261, - "min_query_length": 1156, - "average_query_length": 431.18588082901556, - "max_query_length": 8857, + "num_queries": 1544, + "min_query_length": 19, + "average_query_length": 52.84909326424871, + "max_query_length": 138, "unique_queries": 1544, "none_queries": 0, + "num_relevant_docs": 1547, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0019430051813472, "max_relevant_docs_per_query": 2, @@ -130,24 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "kn": { - "number_of_characters": 303604, "num_samples": 1774, - "num_queries": 1517, + "number_of_characters": 303604, "num_documents": 257, - "min_document_length": 14, - "average_document_length": 298.6031128404669, - "max_document_length": 133, + "min_document_length": 146, + "average_document_length": 882.7354085603113, + "max_document_length": 3130, "unique_documents": 257, - "min_query_length": 146, - "average_query_length": 149.547132498352, - "max_query_length": 3130, + "num_queries": 1517, + "min_query_length": 14, + "average_query_length": 50.58734344100198, + "max_query_length": 133, "unique_queries": 1517, "none_queries": 0, + "num_relevant_docs": 1517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -157,24 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ml": { - "number_of_characters": 743604, "num_samples": 1834, - "num_queries": 1587, + "number_of_characters": 743604, "num_documents": 247, - "min_document_length": 25, - "average_document_length": 487.8987854251012, - "max_document_length": 219, + "min_document_length": 859, + "average_document_length": 2522.6437246963565, + "max_document_length": 11919, "unique_documents": 247, - "min_query_length": 859, - "average_query_length": 392.6231884057971, - "max_query_length": 11919, + "num_queries": 1587, + "min_query_length": 25, + "average_query_length": 75.93635790800252, + "max_query_length": 219, "unique_queries": 1587, "none_queries": 0, + "num_relevant_docs": 1587, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -184,24 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "mr": { - "number_of_characters": 521991, "num_samples": 1850, - "num_queries": 1600, + "number_of_characters": 521991, "num_documents": 250, - "min_document_length": 13, - "average_document_length": 376.224, - "max_document_length": 215, + "min_document_length": 746, + "average_document_length": 1711.74, + "max_document_length": 6702, "unique_documents": 250, - "min_query_length": 746, - "average_query_length": 267.459375, - "max_query_length": 6702, + "num_queries": 1600, + "min_query_length": 13, + "average_query_length": 58.785, + "max_query_length": 215, "unique_queries": 1600, "none_queries": 0, + "num_relevant_docs": 1600, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -211,24 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "or": { - "number_of_characters": 294386, "num_samples": 1928, - "num_queries": 1676, + "number_of_characters": 294386, "num_documents": 252, - "min_document_length": 8, - "average_document_length": 366.27777777777777, - "max_document_length": 195, + "min_document_length": 260, + "average_document_length": 801.9206349206349, + "max_document_length": 2277, "unique_documents": 252, - "min_query_length": 260, - "average_query_length": 120.57517899761336, - "max_query_length": 2277, + "num_queries": 1676, + "min_query_length": 8, + "average_query_length": 55.072792362768496, + "max_query_length": 195, "unique_queries": 1676, "none_queries": 0, + "num_relevant_docs": 1678, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011933174224343, "max_relevant_docs_per_query": 2, @@ -238,24 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pa": { - "number_of_characters": 432818, "num_samples": 1778, - "num_queries": 1537, + "number_of_characters": 432818, "num_documents": 241, - "min_document_length": 13, - "average_document_length": 372.4190871369295, - "max_document_length": 226, + "min_document_length": 422, + "average_document_length": 1423.5062240663901, + "max_document_length": 6082, "unique_documents": 241, - "min_query_length": 422, - "average_query_length": 223.20429407937542, - "max_query_length": 6082, + "num_queries": 1537, + "min_query_length": 13, + "average_query_length": 58.394925178919976, + "max_query_length": 226, "unique_queries": 1537, "none_queries": 0, + "num_relevant_docs": 1539, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0013012361743656, "max_relevant_docs_per_query": 2, @@ -265,24 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ta": { - "number_of_characters": 676404, "num_samples": 2056, - "num_queries": 1803, + "number_of_characters": 676404, "num_documents": 253, - "min_document_length": 19, - "average_document_length": 385.27272727272725, - "max_document_length": 196, + "min_document_length": 769, + "average_document_length": 2288.2608695652175, + "max_document_length": 6940, "unique_documents": 253, - "min_query_length": 769, - "average_query_length": 321.0926234054354, - "max_query_length": 6940, + "num_queries": 1803, + "min_query_length": 19, + "average_query_length": 54.06211869107044, + "max_query_length": 196, "unique_queries": 1803, "none_queries": 0, + "num_relevant_docs": 1804, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0005546311702718, "max_relevant_docs_per_query": 2, @@ -292,24 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "te": { - "number_of_characters": 850233, "num_samples": 1984, - "num_queries": 1734, + "number_of_characters": 850233, "num_documents": 250, - "min_document_length": 13, - "average_document_length": 464.756, - "max_document_length": 147, + "min_document_length": 1072, + "average_document_length": 2936.176, + "max_document_length": 14782, "unique_documents": 250, - "min_query_length": 1072, - "average_query_length": 423.32410611303345, - "max_query_length": 14782, + "num_queries": 1734, + "min_query_length": 13, + "average_query_length": 67.00634371395617, + "max_query_length": 147, "unique_queries": 1734, "none_queries": 0, + "num_relevant_docs": 1734, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -319,6 +342,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json b/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json index 7c008ace3b..50a21a7858 100644 --- a/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaGovFaqsRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 4909152, "num_samples": 24842, - "num_queries": 2048, + "number_of_characters": 4909152, "num_documents": 22794, - "min_document_length": 5, - "average_document_length": 5.344345003070984, - "max_document_length": 597, + "min_document_length": 2, + "average_document_length": 210.02601561814512, + "max_document_length": 4922, "unique_documents": 22794, - "min_query_length": 2, - "average_query_length": 2337.56494140625, - "max_query_length": 4922, + "num_queries": 2048, + "min_query_length": 5, + "average_query_length": 59.48193359375, + "max_query_length": 597, "unique_queries": 2048, + "none_queries": 0, + "num_relevant_docs": 2048, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json index 9a427835ae..d7405ce5da 100644 --- a/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaQuADRetrieval.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 508977, "num_samples": 5062, - "num_queries": 2048, + "number_of_characters": 532741, "num_documents": 3014, - "min_document_length": 8, - "average_document_length": 20.946250829462507, - "max_document_length": 146, + "min_document_length": 4, + "average_document_length": 155.80922362309224, + "max_document_length": 1287, "unique_documents": 3014, - "min_query_length": 1, - "average_query_length": 217.69775390625, - "max_query_length": 1284, + "num_queries": 2048, + "min_query_length": 8, + "average_query_length": 30.826171875, + "max_query_length": 146, "unique_queries": 2048, + "none_queries": 0, + "num_relevant_docs": 4096, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json index 4598b2af77..271db0c524 100644 --- a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 428294530, "num_samples": 115226, - "num_queries": 997, + "number_of_characters": 428294530, "num_documents": 114229, - "min_document_length": 16, - "average_document_length": 0.4425671239352529, - "max_document_length": 98, + "min_document_length": 8, + "average_document_length": 3748.995228882333, + "max_document_length": 188424, "unique_documents": 114229, - "min_query_length": 8, - "average_query_length": 429532.5737211635, - "max_query_length": 188424, + "num_queries": 997, + "min_query_length": 16, + "average_query_length": 50.70611835506519, + "max_query_length": 98, "unique_queries": 997, + "none_queries": 0, + "num_relevant_docs": 997, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 989 + "unique_relevant_docs": 989, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json b/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json index bfb2c3a8b8..5cd5df9c34 100644 --- a/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json +++ b/mteb/descriptive_stats/Retrieval/Ko-StrategyQA.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 2976191, "num_samples": 9843, - "num_queries": 592, + "number_of_characters": 2976191, "num_documents": 9251, - "min_document_length": 10, - "average_document_length": 1.4560588044535725, - "max_document_length": 60, + "min_document_length": 29, + "average_document_length": 320.25953950924225, + "max_document_length": 5016, "unique_documents": 9251, - "min_query_length": 29, - "average_query_length": 5004.596283783784, - "max_query_length": 5016, + "num_queries": 592, + "min_query_length": 10, + "average_query_length": 22.75337837837838, + "max_query_length": 60, "unique_queries": 592, + "none_queries": 0, + "num_relevant_docs": 1145, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9341216216216217, "max_relevant_docs_per_query": 7, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json index 6b41aa6a11..acf2e59c8c 100644 --- a/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LEMBNarrativeQARetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 116497954, "num_samples": 10804, - "num_queries": 10449, + "number_of_characters": 116497954, "num_documents": 355, - "min_document_length": 10, - "average_document_length": 1409.718309859155, - "max_document_length": 1220, + "min_document_length": 21216, + "average_document_length": 326753.5323943662, + "max_document_length": 1874086, "unique_documents": 355, - "min_query_length": 21216, - "average_query_length": 11101.301942769644, - "max_query_length": 1874086, + "num_queries": 10449, + "min_query_length": 10, + "average_query_length": 47.89453536223562, + "max_query_length": 1220, "unique_queries": 10449, + "none_queries": 0, + "num_relevant_docs": 10449, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json index c71b8e094d..9c0a0955d8 100644 --- a/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LEMBNeedleRetrieval.json @@ -1,17 +1,19 @@ { "test_256": { - "number_of_characters": 104346, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 104346, "num_documents": 100, - "min_document_length": 25, - "average_document_length": 30.24, - "max_document_length": 83, + "min_document_length": 954, + "average_document_length": 1013.22, + "max_document_length": 1092, "unique_documents": 100, - "min_query_length": 954, - "average_query_length": 2026.44, - "max_query_length": 1092, + "num_queries": 50, + "min_query_length": 25, + "average_query_length": 60.48, + "max_query_length": 83, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_512": { - "number_of_characters": 203861, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 203861, "num_documents": 100, - "min_document_length": 25, - "average_document_length": 28.65, - "max_document_length": 82, + "min_document_length": 1951, + "average_document_length": 2009.96, + "max_document_length": 2089, "unique_documents": 100, - "min_query_length": 1951, - "average_query_length": 4019.92, - "max_query_length": 2089, + "num_queries": 50, + "min_query_length": 25, + "average_query_length": 57.3, + "max_query_length": 82, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_1024": { - "number_of_characters": 409904, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 409904, "num_documents": 100, - "min_document_length": 26, - "average_document_length": 29.14, - "max_document_length": 82, + "min_document_length": 4011, + "average_document_length": 4069.9, + "max_document_length": 4149, "unique_documents": 100, - "min_query_length": 4011, - "average_query_length": 8139.8, - "max_query_length": 4149, + "num_queries": 50, + "min_query_length": 26, + "average_query_length": 58.28, + "max_query_length": 82, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_2048": { - "number_of_characters": 848378, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 848378, "num_documents": 100, - "min_document_length": 25, - "average_document_length": 29.96, - "max_document_length": 83, + "min_document_length": 8395, + "average_document_length": 8453.82, + "max_document_length": 8533, "unique_documents": 100, - "min_query_length": 8395, - "average_query_length": 16907.64, - "max_query_length": 8533, + "num_queries": 50, + "min_query_length": 25, + "average_query_length": 59.92, + "max_query_length": 83, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_4096": { - "number_of_characters": 1742373, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 1742373, "num_documents": 100, - "min_document_length": 25, - "average_document_length": 27.93, - "max_document_length": 83, + "min_document_length": 17337, + "average_document_length": 17395.8, + "max_document_length": 17475, "unique_documents": 100, - "min_query_length": 17337, - "average_query_length": 34791.6, - "max_query_length": 17475, + "num_queries": 50, + "min_query_length": 25, + "average_query_length": 55.86, + "max_query_length": 83, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_8192": { - "number_of_characters": 3523362, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 3523362, "num_documents": 100, - "min_document_length": 27, - "average_document_length": 29.8, - "max_document_length": 82, + "min_document_length": 35145, + "average_document_length": 35203.82, + "max_document_length": 35283, "unique_documents": 100, - "min_query_length": 35145, - "average_query_length": 70407.64, - "max_query_length": 35283, + "num_queries": 50, + "min_query_length": 27, + "average_query_length": 59.6, + "max_query_length": 82, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_16384": { - "number_of_characters": 7208436, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 7208436, "num_documents": 100, - "min_document_length": 26, - "average_document_length": 29.56, - "max_document_length": 81, + "min_document_length": 71996, + "average_document_length": 72054.8, + "max_document_length": 72134, "unique_documents": 100, - "min_query_length": 71996, - "average_query_length": 144109.6, - "max_query_length": 72134, + "num_queries": 50, + "min_query_length": 26, + "average_query_length": 59.12, + "max_query_length": 81, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_32768": { - "number_of_characters": 14179897, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 14179897, "num_documents": 100, - "min_document_length": 26, - "average_document_length": 29.17, - "max_document_length": 82, + "min_document_length": 141711, + "average_document_length": 141769.8, + "max_document_length": 141849, "unique_documents": 100, - "min_query_length": 141711, - "average_query_length": 283539.6, - "max_query_length": 141849, + "num_queries": 50, + "min_query_length": 26, + "average_query_length": 58.34, + "max_query_length": 82, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -203,6 +226,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json index b91392a9de..c993242945 100644 --- a/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LEMBPasskeyRetrieval.json @@ -1,17 +1,19 @@ { "test_256": { - "number_of_characters": 89529, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 89529, "num_documents": 100, - "min_document_length": 35, - "average_document_length": 19.05, - "max_document_length": 45, + "min_document_length": 867, + "average_document_length": 876.24, + "max_document_length": 891, "unique_documents": 100, - "min_query_length": 867, - "average_query_length": 1752.48, - "max_query_length": 891, + "num_queries": 50, + "min_query_length": 35, + "average_query_length": 38.1, + "max_query_length": 45, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_512": { - "number_of_characters": 180408, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 180408, "num_documents": 100, - "min_document_length": 34, - "average_document_length": 18.88, - "max_document_length": 42, + "min_document_length": 1776, + "average_document_length": 1785.2, + "max_document_length": 1800, "unique_documents": 100, - "min_query_length": 1776, - "average_query_length": 3570.4, - "max_query_length": 1800, + "num_queries": 50, + "min_query_length": 34, + "average_query_length": 37.76, + "max_query_length": 42, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_1024": { - "number_of_characters": 362602, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 362602, "num_documents": 100, - "min_document_length": 33, - "average_document_length": 18.84, - "max_document_length": 42, + "min_document_length": 3598, + "average_document_length": 3607.18, + "max_document_length": 3622, "unique_documents": 100, - "min_query_length": 3598, - "average_query_length": 7214.36, - "max_query_length": 3622, + "num_queries": 50, + "min_query_length": 33, + "average_query_length": 37.68, + "max_query_length": 42, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_2048": { - "number_of_characters": 726110, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 726110, "num_documents": 100, - "min_document_length": 35, - "average_document_length": 18.9, - "max_document_length": 42, + "min_document_length": 7233, + "average_document_length": 7242.2, + "max_document_length": 7257, "unique_documents": 100, - "min_query_length": 7233, - "average_query_length": 14484.4, - "max_query_length": 7257, + "num_queries": 50, + "min_query_length": 35, + "average_query_length": 37.8, + "max_query_length": 42, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_4096": { - "number_of_characters": 1453698, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 1453698, "num_documents": 100, - "min_document_length": 34, - "average_document_length": 18.82, - "max_document_length": 42, + "min_document_length": 14509, + "average_document_length": 14518.16, + "max_document_length": 14533, "unique_documents": 100, - "min_query_length": 14509, - "average_query_length": 29036.32, - "max_query_length": 14533, + "num_queries": 50, + "min_query_length": 34, + "average_query_length": 37.64, + "max_query_length": 42, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_8192": { - "number_of_characters": 2908993, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 2908993, "num_documents": 100, - "min_document_length": 33, - "average_document_length": 18.77, - "max_document_length": 41, + "min_document_length": 29062, + "average_document_length": 29071.16, + "max_document_length": 29086, "unique_documents": 100, - "min_query_length": 29062, - "average_query_length": 58142.32, - "max_query_length": 29086, + "num_queries": 50, + "min_query_length": 33, + "average_query_length": 37.54, + "max_query_length": 41, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_16384": { - "number_of_characters": 5819422, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 5819422, "num_documents": 100, - "min_document_length": 34, - "average_document_length": 19.06, - "max_document_length": 45, + "min_document_length": 58166, + "average_document_length": 58175.16, + "max_document_length": 58190, "unique_documents": 100, - "min_query_length": 58166, - "average_query_length": 116350.32, - "max_query_length": 58190, + "num_queries": 50, + "min_query_length": 34, + "average_query_length": 38.12, + "max_query_length": 45, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test_32768": { - "number_of_characters": 11639903, "num_samples": 150, - "num_queries": 50, + "number_of_characters": 11639903, "num_documents": 100, - "min_document_length": 33, - "average_document_length": 18.87, - "max_document_length": 45, + "min_document_length": 116371, + "average_document_length": 116380.16, + "max_document_length": 116395, "unique_documents": 100, - "min_query_length": 116371, - "average_query_length": 232760.32, - "max_query_length": 116395, + "num_queries": 50, + "min_query_length": 33, + "average_query_length": 37.74, + "max_query_length": 45, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -203,6 +226,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json index 5ebf7b80c9..7c59b15926 100644 --- a/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LEMBQMSumRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 11169115, "num_samples": 1724, - "num_queries": 1527, + "number_of_characters": 11169115, "num_documents": 197, - "min_document_length": 84, - "average_document_length": 3360.1979695431473, - "max_document_length": 1574, + "min_document_length": 6428, + "average_document_length": 53335.817258883246, + "max_document_length": 147260, "unique_documents": 197, - "min_query_length": 6428, - "average_query_length": 6880.914210870988, - "max_query_length": 147260, + "num_queries": 1527, + "min_query_length": 84, + "average_query_length": 433.50294695481335, + "max_query_length": 1574, "unique_queries": 1527, + "none_queries": 0, + "num_relevant_docs": 1527, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json index 2b1e0b5102..1c891f988b 100644 --- a/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LEMBSummScreenFDRetrieval.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 10565795, "num_samples": 672, - "num_queries": 336, + "number_of_characters": 10565795, "num_documents": 336, - "min_document_length": 151, - "average_document_length": 591.4910714285714, - "max_document_length": 2495, + "min_document_length": 8768, + "average_document_length": 30854.32738095238, + "max_document_length": 91515, "unique_documents": 336, - "min_query_length": 8768, - "average_query_length": 30854.32738095238, - "max_query_length": 91515, + "num_queries": 336, + "min_query_length": 151, + "average_query_length": 591.4910714285714, + "max_query_length": 2495, "unique_queries": 336, + "none_queries": 0, + "num_relevant_docs": 336, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json b/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json index 52ef998c12..888557eaed 100644 --- a/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LEMBWikimQARetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 11253952, "num_samples": 600, - "num_queries": 300, + "number_of_characters": 11253952, "num_documents": 300, - "min_document_length": 33, - "average_document_length": 67.57, - "max_document_length": 129, + "min_document_length": 5796, + "average_document_length": 37445.60333333333, + "max_document_length": 75837, "unique_documents": 300, - "min_query_length": 5796, - "average_query_length": 37445.60333333333, - "max_query_length": 75837, + "num_queries": 300, + "min_query_length": 33, + "average_query_length": 67.57, + "max_query_length": 129, "unique_queries": 300, + "none_queries": 0, + "num_relevant_docs": 300, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LeCaRDv2.json b/mteb/descriptive_stats/Retrieval/LeCaRDv2.json index 4a440f1f0a..21c2e0e913 100644 --- a/mteb/descriptive_stats/Retrieval/LeCaRDv2.json +++ b/mteb/descriptive_stats/Retrieval/LeCaRDv2.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 28129613, "num_samples": 3954, - "num_queries": 159, + "number_of_characters": 28129613, "num_documents": 3795, - "min_document_length": 556, - "average_document_length": 178.45876152832673, - "max_document_length": 34790, + "min_document_length": 967, + "average_document_length": 7233.823978919631, + "max_document_length": 168523, "unique_documents": 3795, - "min_query_length": 967, - "average_query_length": 172656.36477987422, - "max_query_length": 168523, + "num_queries": 159, + "min_query_length": 556, + "average_query_length": 4259.440251572327, + "max_query_length": 34790, "unique_queries": 159, + "none_queries": 0, + "num_relevant_docs": 3896, "min_relevant_docs_per_query": 4, "average_relevant_docs_per_query": 24.50314465408805, "max_relevant_docs_per_query": 30, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json b/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json index f02b1ea887..ab7d595c5c 100644 --- a/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json +++ b/mteb/descriptive_stats/Retrieval/LegalBenchConsumerContractsQA.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 459605, "num_samples": 550, - "num_queries": 396, + "number_of_characters": 459605, "num_documents": 154, - "min_document_length": 24, - "average_document_length": 237.62337662337663, - "max_document_length": 258, + "min_document_length": 613, + "average_document_length": 2746.8246753246754, + "max_document_length": 8095, "unique_documents": 154, - "min_query_length": 613, - "average_query_length": 1068.209595959596, - "max_query_length": 8095, + "num_queries": 396, + "min_query_length": 24, + "average_query_length": 92.4090909090909, + "max_query_length": 258, "unique_queries": 396, + "none_queries": 0, + "num_relevant_docs": 396, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json b/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json index 03d2936b45..0d4e4be493 100644 --- a/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json +++ b/mteb/descriptive_stats/Retrieval/LegalBenchCorporateLobbying.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 429952, "num_samples": 659, - "num_queries": 340, + "number_of_characters": 429952, "num_documents": 319, - "min_document_length": 41, - "average_document_length": 189.58934169278996, - "max_document_length": 733, + "min_document_length": 137, + "average_document_length": 1158.2225705329154, + "max_document_length": 11451, "unique_documents": 319, - "min_query_length": 137, - "average_query_length": 1086.6852941176471, - "max_query_length": 11451, + "num_queries": 340, + "min_query_length": 41, + "average_query_length": 177.87941176470588, + "max_query_length": 733, "unique_queries": 340, + "none_queries": 0, + "num_relevant_docs": 340, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LegalQuAD.json b/mteb/descriptive_stats/Retrieval/LegalQuAD.json index a31b9a3adf..535380da81 100644 --- a/mteb/descriptive_stats/Retrieval/LegalQuAD.json +++ b/mteb/descriptive_stats/Retrieval/LegalQuAD.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3910984, "num_samples": 400, - "num_queries": 200, + "number_of_characters": 3910984, "num_documents": 200, - "min_document_length": 22, - "average_document_length": 71.965, - "max_document_length": 119, + "min_document_length": 769, + "average_document_length": 19482.955, + "max_document_length": 94998, "unique_documents": 200, - "min_query_length": 769, - "average_query_length": 19482.955, - "max_query_length": 94998, + "num_queries": 200, + "min_query_length": 22, + "average_query_length": 71.965, + "max_query_length": 119, "unique_queries": 200, + "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LegalSummarization.json b/mteb/descriptive_stats/Retrieval/LegalSummarization.json index 573c1e4f61..937493bfc9 100644 --- a/mteb/descriptive_stats/Retrieval/LegalSummarization.json +++ b/mteb/descriptive_stats/Retrieval/LegalSummarization.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 295244, "num_samples": 722, - "num_queries": 284, + "number_of_characters": 295244, "num_documents": 438, - "min_document_length": 17, - "average_document_length": 66.90867579908675, - "max_document_length": 466, + "min_document_length": 45, + "average_document_length": 607.1643835616438, + "max_document_length": 6497, "unique_documents": 438, - "min_query_length": 45, - "average_query_length": 936.4014084507043, - "max_query_length": 6497, + "num_queries": 284, + "min_query_length": 17, + "average_query_length": 103.19014084507042, + "max_query_length": 466, "unique_queries": 284, + "none_queries": 0, + "num_relevant_docs": 439, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.545774647887324, "max_relevant_docs_per_query": 11, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json index cfa8267140..42e181152a 100644 --- a/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/LitSearchRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 54079974, "num_samples": 64780, - "num_queries": 597, + "number_of_characters": 58371129, "num_documents": 64183, - "min_document_length": 37, - "average_document_length": 1.3134007447454934, - "max_document_length": 327, + "min_document_length": 0, + "average_document_length": 908.135035757132, + "max_document_length": 18451, "unique_documents": 64183, - "min_query_length": 0, - "average_query_length": 90445.01842546064, - "max_query_length": 18356, + "num_queries": 597, + "min_query_length": 37, + "average_query_length": 141.20268006700167, + "max_query_length": 327, "unique_queries": 597, + "none_queries": 0, + "num_relevant_docs": 639, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.07035175879397, "max_relevant_docs_per_query": 5, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json b/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json index a8c8e7075b..d8bbfea436 100644 --- a/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json @@ -1,19 +1,19 @@ { "dev": { - "number_of_characters": 35274535649, "num_samples": 106345647, - "num_queries": 13495, + "number_of_characters": 37176781172, "num_documents": 106332152, - "num_relevant_docs": 130408, - "min_document_length": 5, - "average_document_length": 0.004631364932781573, - "max_document_length": 176, + "min_document_length": 2, + "average_document_length": 349.6241542163089, + "max_document_length": 84930, "unique_documents": 106332152, - "min_query_length": 1, - "average_query_length": 2613860.1842163764, - "max_query_length": 84925, + "num_queries": 13495, + "min_query_length": 5, + "average_query_length": 36.49225639125602, + "max_query_length": 176, "unique_queries": 13495, "none_queries": 0, + "num_relevant_docs": 130408, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.3059651722860317, "max_relevant_docs_per_query": 20, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 624607465, "num_samples": 2064310, - "num_queries": 2896, + "number_of_characters": 656963110, "num_documents": 2061414, - "num_relevant_docs": 29197, - "min_document_length": 12, - "average_document_length": 0.041416231771007665, - "max_document_length": 101, + "min_document_length": 4, + "average_document_length": 318.6539598547405, + "max_document_length": 48550, "unique_documents": 2061414, - "min_query_length": 1, - "average_query_length": 215649.89261049725, - "max_query_length": 48538, + "num_queries": 2896, + "min_query_length": 12, + "average_query_length": 29.480662983425415, + "max_query_length": 101, "unique_queries": 2896, "none_queries": 0, + "num_relevant_docs": 29197, "min_relevant_docs_per_query": 7, "average_relevant_docs_per_query": 1.953729281767956, "max_relevant_docs_per_query": 17, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "bn": { - "number_of_characters": 109132820, "num_samples": 297676, - "num_queries": 411, + "number_of_characters": 113943984, "num_documents": 297265, - "num_relevant_docs": 4206, - "min_document_length": 16, - "average_document_length": 0.06495551107597598, - "max_document_length": 112, + "min_document_length": 3, + "average_document_length": 383.2428136511194, + "max_document_length": 17108, "unique_documents": 297265, - "min_query_length": 1, - "average_query_length": 265482.99513381993, - "max_query_length": 17102, + "num_queries": 411, + "min_query_length": 16, + "average_query_length": 46.98053527980535, + "max_query_length": 112, "unique_queries": 411, "none_queries": 0, + "num_relevant_docs": 4206, "min_relevant_docs_per_query": 7, "average_relevant_docs_per_query": 2.099756690997567, "max_relevant_docs_per_query": 13, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 6274005668, "num_samples": 15866527, - "num_queries": 305, + "number_of_characters": 6573073185, "num_documents": 15866222, - "num_relevant_docs": 3144, - "min_document_length": 15, - "average_document_length": 0.0008842684792888944, - "max_document_length": 87, + "min_document_length": 4, + "average_document_length": 414.28004442393404, + "max_document_length": 64968, "unique_documents": 15866222, - "min_query_length": 1, - "average_query_length": 20570464.386885244, - "max_query_length": 64939, + "num_queries": 305, + "min_query_length": 15, + "average_query_length": 46.0, + "max_query_length": 87, "unique_queries": 305, "none_queries": 0, + "num_relevant_docs": 3144, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.6590163934426227, "max_relevant_docs_per_query": 20, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 12534362069, "num_samples": 32894020, - "num_queries": 799, + "number_of_characters": 13190354940, "num_documents": 32893221, - "num_relevant_docs": 8350, - "min_document_length": 16, - "average_document_length": 0.0009776482515956707, - "max_document_length": 122, + "min_document_length": 3, + "average_document_length": 401.0042914921588, + "max_document_length": 36471, "unique_documents": 32893221, - "min_query_length": 1, - "average_query_length": 15687521.790988736, - "max_query_length": 36444, + "num_queries": 799, + "min_query_length": 16, + "average_query_length": 40.247809762202756, + "max_query_length": 122, "unique_queries": 799, "none_queries": 0, + "num_relevant_docs": 8350, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.911138923654568, "max_relevant_docs_per_query": 16, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 3984898056, "num_samples": 10374601, - "num_queries": 648, + "number_of_characters": 4188115187, "num_documents": 10373953, - "num_relevant_docs": 6443, - "min_document_length": 19, - "average_document_length": 0.0029591419972695076, - "max_document_length": 88, + "min_document_length": 2, + "average_document_length": 403.71153493754986, + "max_document_length": 57012, "unique_documents": 10373953, - "min_query_length": 1, - "average_query_length": 6149486.663580247, - "max_query_length": 56999, + "num_queries": 648, + "min_query_length": 19, + "average_query_length": 47.373456790123456, + "max_query_length": 88, "unique_queries": 648, "none_queries": 0, + "num_relevant_docs": 6443, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 4.609567901234568, "max_relevant_docs_per_query": 10, @@ -163,25 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fa": { - "number_of_characters": 548173051, "num_samples": 2207804, - "num_queries": 632, + "number_of_characters": 579734962, "num_documents": 2207172, - "num_relevant_docs": 6571, - "min_document_length": 18, - "average_document_length": 0.011782951215401427, - "max_document_length": 82, + "min_document_length": 4, + "average_document_length": 262.6478385010321, + "max_document_length": 36495, "unique_documents": 2207172, - "min_query_length": 1, - "average_query_length": 867321.2721518987, - "max_query_length": 36480, + "num_queries": 632, + "min_query_length": 18, + "average_query_length": 41.1503164556962, + "max_query_length": 82, "unique_queries": 632, "none_queries": 0, + "num_relevant_docs": 6571, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.079113924050633, "max_relevant_docs_per_query": 20, @@ -191,25 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fi": { - "number_of_characters": 647319788, "num_samples": 1884780, - "num_queries": 1271, + "number_of_characters": 677881948, "num_documents": 1883509, - "num_relevant_docs": 12008, - "min_document_length": 14, - "average_document_length": 0.026071019570386975, - "max_document_length": 130, + "min_document_length": 4, + "average_document_length": 359.87767671935734, + "max_document_length": 11578, "unique_documents": 1883509, - "min_query_length": 1, - "average_query_length": 509260.96223446104, - "max_query_length": 11549, + "num_queries": 1271, + "min_query_length": 14, + "average_query_length": 38.63493312352478, + "max_query_length": 130, "unique_queries": 1271, "none_queries": 0, + "num_relevant_docs": 12008, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.925255704169945, "max_relevant_docs_per_query": 16, @@ -219,25 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fr": { - "number_of_characters": 4741061206, "num_samples": 14637296, - "num_queries": 343, + "number_of_characters": 5029687134, "num_documents": 14636953, - "num_relevant_docs": 3429, - "min_document_length": 16, - "average_document_length": 0.0010283561066295698, - "max_document_length": 83, + "min_document_length": 3, + "average_document_length": 343.6283550271699, + "max_document_length": 52638, "unique_documents": 14636953, - "min_query_length": 1, - "average_query_length": 13822291.994169096, - "max_query_length": 52598, + "num_queries": 343, + "min_query_length": 16, + "average_query_length": 43.883381924198254, + "max_query_length": 83, "unique_queries": 343, "none_queries": 0, + "num_relevant_docs": 3429, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.131195335276968, "max_relevant_docs_per_query": 10, @@ -247,25 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 179956335, "num_samples": 506614, - "num_queries": 350, + "number_of_characters": 187823359, "num_documents": 506264, - "num_relevant_docs": 3494, - "min_document_length": 24, - "average_document_length": 0.0368760172558191, - "max_document_length": 120, + "min_document_length": 2, + "average_document_length": 370.96196845914386, + "max_document_length": 44769, "unique_documents": 506264, - "min_query_length": 1, - "average_query_length": 514107.61714285717, - "max_query_length": 44761, + "num_queries": 350, + "min_query_length": 24, + "average_query_length": 53.34, + "max_query_length": 120, "unique_queries": 350, "none_queries": 0, + "num_relevant_docs": 3494, "min_relevant_docs_per_query": 6, "average_relevant_docs_per_query": 2.1485714285714286, "max_relevant_docs_per_query": 10, @@ -275,25 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "id": { - "number_of_characters": 479789527, "num_samples": 1447275, - "num_queries": 960, + "number_of_characters": 506649583, "num_documents": 1446315, - "num_relevant_docs": 9668, - "min_document_length": 13, - "average_document_length": 0.025195064698907223, - "max_document_length": 93, + "min_document_length": 4, + "average_document_length": 350.2785651811673, + "max_document_length": 39539, "unique_documents": 1446315, - "min_query_length": 1, - "average_query_length": 499742.7989583333, - "max_query_length": 39510, + "num_queries": 960, + "min_query_length": 13, + "average_query_length": 37.958333333333336, + "max_query_length": 93, "unique_queries": 960, "none_queries": 0, + "num_relevant_docs": 9668, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 3.216666666666667, "max_relevant_docs_per_query": 17, @@ -303,25 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ja": { - "number_of_characters": 956943052, "num_samples": 6954474, - "num_queries": 860, + "number_of_characters": 1014226413, "num_documents": 6953614, - "num_relevant_docs": 8354, - "min_document_length": 7, - "average_document_length": 0.0021908032283644158, - "max_document_length": 48, + "min_document_length": 2, + "average_document_length": 145.8538220556965, + "max_document_length": 25236, "unique_documents": 6953614, - "min_query_length": 1, - "average_query_length": 1112706.765116279, - "max_query_length": 25232, + "num_queries": 860, + "min_query_length": 7, + "average_query_length": 17.71395348837209, + "max_query_length": 48, "unique_queries": 860, "none_queries": 0, + "num_relevant_docs": 8354, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0813953488372094, "max_relevant_docs_per_query": 16, @@ -331,25 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ko": { - "number_of_characters": 247737159, "num_samples": 1486965, - "num_queries": 213, + "number_of_characters": 258664503, "num_documents": 1486752, - "num_relevant_docs": 3057, - "min_document_length": 5, - "average_document_length": 0.0030980284539721486, - "max_document_length": 92, + "min_document_length": 3, + "average_document_length": 173.97649170809927, + "max_document_length": 25246, "unique_documents": 1486752, - "min_query_length": 1, - "average_query_length": 1163063.6291079812, - "max_query_length": 25243, + "num_queries": 213, + "min_query_length": 5, + "average_query_length": 21.624413145539908, + "max_query_length": 92, "unique_queries": 213, "none_queries": 0, + "num_relevant_docs": 3057, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.568075117370892, "max_relevant_docs_per_query": 20, @@ -359,25 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ru": { - "number_of_characters": 2969123834, "num_samples": 9545170, - "num_queries": 1252, + "number_of_characters": 3170998510, "num_documents": 9543918, - "num_relevant_docs": 13100, - "min_document_length": 15, - "average_document_length": 0.00578944622114314, - "max_document_length": 108, + "min_document_length": 3, + "average_document_length": 332.2475377512674, + "max_document_length": 61659, "unique_documents": 9543918, - "min_query_length": 1, - "average_query_length": 2371460.5271565495, - "max_query_length": 61639, + "num_queries": 1252, + "min_query_length": 15, + "average_query_length": 44.13258785942492, + "max_query_length": 108, "unique_queries": 1252, "none_queries": 0, + "num_relevant_docs": 13100, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.8434504792332267, "max_relevant_docs_per_query": 18, @@ -387,25 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "sw": { - "number_of_characters": 28413887, "num_samples": 132406, - "num_queries": 482, + "number_of_characters": 30191582, "num_documents": 131924, - "num_relevant_docs": 5092, - "min_document_length": 13, - "average_document_length": 0.14238500955095357, - "max_document_length": 80, + "min_document_length": 6, + "average_document_length": 228.71348655286377, + "max_document_length": 11203, "unique_documents": 131924, - "min_query_length": 1, - "average_query_length": 58911.0020746888, - "max_query_length": 11185, + "num_queries": 482, + "min_query_length": 13, + "average_query_length": 38.97095435684647, + "max_query_length": 80, "unique_queries": 482, "none_queries": 0, + "num_relevant_docs": 5092, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.887966804979253, "max_relevant_docs_per_query": 17, @@ -415,25 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "te": { - "number_of_characters": 197801286, "num_samples": 518907, - "num_queries": 828, + "number_of_characters": 205300087, "num_documents": 518079, - "num_relevant_docs": 1606, - "min_document_length": 14, - "average_document_length": 0.060911559820027446, - "max_document_length": 111, + "min_document_length": 5, + "average_document_length": 396.2108674545774, + "max_document_length": 17850, "unique_documents": 518079, - "min_query_length": 1, - "average_query_length": 238852.32971014493, - "max_query_length": 17811, + "num_queries": 828, + "min_query_length": 14, + "average_query_length": 38.11231884057971, + "max_query_length": 111, "unique_queries": 828, "none_queries": 0, + "num_relevant_docs": 1606, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0314009661835748, "max_relevant_docs_per_query": 11, @@ -443,25 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "th": { - "number_of_characters": 183360331, "num_samples": 542899, - "num_queries": 733, + "number_of_characters": 193491627, "num_documents": 542166, - "num_relevant_docs": 7573, - "min_document_length": 14, - "average_document_length": 0.0579674859729308, - "max_document_length": 176, + "min_document_length": 5, + "average_document_length": 356.8283496198581, + "max_document_length": 31250, "unique_documents": 542166, - "min_query_length": 1, - "average_query_length": 250107.64392905866, - "max_query_length": 31243, + "num_queries": 733, + "min_query_length": 14, + "average_query_length": 42.87585266030014, + "max_query_length": 176, "unique_queries": 733, "none_queries": 0, + "num_relevant_docs": 7573, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8321964529331514, "max_relevant_docs_per_query": 15, @@ -471,25 +487,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "yo": { - "number_of_characters": 7047836, "num_samples": 49162, - "num_queries": 119, + "number_of_characters": 7819610, "num_documents": 49043, - "num_relevant_docs": 1188, - "min_document_length": 25, - "average_document_length": 0.09145035988826132, - "max_document_length": 56, + "min_document_length": 2, + "average_document_length": 159.35250698366738, + "max_document_length": 10469, "unique_documents": 49043, - "min_query_length": 1, - "average_query_length": 59187.82352941176, - "max_query_length": 10457, + "num_queries": 119, + "min_query_length": 25, + "average_query_length": 37.6890756302521, + "max_query_length": 56, "unique_queries": 119, "none_queries": 0, + "num_relevant_docs": 1188, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.2100840336134453, "max_relevant_docs_per_query": 10, @@ -499,25 +516,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zh": { - "number_of_characters": 560802279, "num_samples": 4934761, - "num_queries": 393, + "number_of_characters": 591861448, "num_documents": 4934368, - "num_relevant_docs": 3928, - "min_document_length": 7, - "average_document_length": 0.0008655617092199042, - "max_document_length": 22, + "min_document_length": 2, + "average_document_length": 119.9458931721347, + "max_document_length": 84930, "unique_documents": 4934368, - "min_query_length": 1, - "average_query_length": 1426966.941475827, - "max_query_length": 84925, + "num_queries": 393, + "min_query_length": 7, + "average_query_length": 10.867684478371501, + "max_query_length": 22, "unique_queries": 393, "none_queries": 0, + "num_relevant_docs": 3928, "min_relevant_docs_per_query": 8, "average_relevant_docs_per_query": 2.5292620865139948, "max_relevant_docs_per_query": 10, @@ -527,6 +545,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json index cc6c3af951..fe2ab24008 100644 --- a/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/MIRACLRetrievalHardNegatives.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 983901912, "num_samples": 2460458, - "num_queries": 11076, + "number_of_characters": 1023437450, "num_documents": 2449382, - "min_document_length": 5, - "average_document_length": 0.1694358005407078, - "max_document_length": 176, + "min_document_length": 2, + "average_document_length": 417.6655323669399, + "max_document_length": 48550, "unique_documents": 2449382, - "min_query_length": 1, - "average_query_length": 88794.41124954858, - "max_query_length": 48538, + "num_queries": 11076, + "min_query_length": 5, + "average_query_length": 37.46957385337667, + "max_query_length": 176, "unique_queries": 11076, + "none_queries": 0, + "num_relevant_docs": 106090, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.3643011917659806, "max_relevant_docs_per_query": 20, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 81272741, "num_samples": 193103, - "num_queries": 1000, + "number_of_characters": 84206668, "num_documents": 192103, - "min_document_length": 12, - "average_document_length": 0.1540007183646273, - "max_document_length": 83, + "min_document_length": 4, + "average_document_length": 438.1872433017704, + "max_document_length": 48550, "unique_documents": 192103, - "min_query_length": 1, - "average_query_length": 81243.157, - "max_query_length": 48538, + "num_queries": 1000, + "min_query_length": 12, + "average_query_length": 29.584, + "max_query_length": 83, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 10081, "min_relevant_docs_per_query": 7, "average_relevant_docs_per_query": 1.982, "max_relevant_docs_per_query": 17, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "bn": { - "number_of_characters": 109132820, "num_samples": 297676, - "num_queries": 411, + "number_of_characters": 113943984, "num_documents": 297265, - "min_document_length": 16, - "average_document_length": 0.06495551107597598, - "max_document_length": 112, + "min_document_length": 3, + "average_document_length": 383.2428136511194, + "max_document_length": 17108, "unique_documents": 297265, - "min_query_length": 1, - "average_query_length": 265482.99513381993, - "max_query_length": 17102, + "num_queries": 411, + "min_query_length": 16, + "average_query_length": 46.98053527980535, + "max_query_length": 112, "unique_queries": 411, + "none_queries": 0, + "num_relevant_docs": 4206, "min_relevant_docs_per_query": 7, "average_relevant_docs_per_query": 2.099756690997567, "max_relevant_docs_per_query": 13, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 35275409, "num_samples": 71582, - "num_queries": 305, + "number_of_characters": 36634702, "num_documents": 71277, - "min_document_length": 15, - "average_document_length": 0.19683768957728298, - "max_document_length": 87, + "min_document_length": 6, + "average_document_length": 513.7796484139344, + "max_document_length": 7667, "unique_documents": 71277, - "min_query_length": 1, - "average_query_length": 115611.07868852459, - "max_query_length": 7635, + "num_queries": 305, + "min_query_length": 15, + "average_query_length": 46.0, + "max_query_length": 87, "unique_queries": 305, + "none_queries": 0, + "num_relevant_docs": 3144, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.6590163934426227, "max_relevant_docs_per_query": 20, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 90963438, "num_samples": 179567, - "num_queries": 799, + "number_of_characters": 94644879, "num_documents": 178768, - "min_document_length": 16, - "average_document_length": 0.17988678063188043, - "max_document_length": 122, + "min_document_length": 4, + "average_document_length": 529.2486406963214, + "max_document_length": 8699, "unique_documents": 178768, - "min_query_length": 1, - "average_query_length": 113806.3579474343, - "max_query_length": 8675, + "num_queries": 799, + "min_query_length": 16, + "average_query_length": 40.247809762202756, + "max_query_length": 122, "unique_queries": 799, + "none_queries": 0, + "num_relevant_docs": 8350, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.911138923654568, "max_relevant_docs_per_query": 16, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 75804446, "num_samples": 147398, - "num_queries": 648, + "number_of_characters": 78659695, "num_documents": 146750, - "min_document_length": 19, - "average_document_length": 0.20918568994889267, - "max_document_length": 88, + "min_document_length": 4, + "average_document_length": 535.8023645655877, + "max_document_length": 21549, "unique_documents": 146750, - "min_query_length": 1, - "average_query_length": 116934.79629629629, - "max_query_length": 21535, + "num_queries": 648, + "min_query_length": 19, + "average_query_length": 47.373456790123456, + "max_query_length": 88, "unique_queries": 648, + "none_queries": 0, + "num_relevant_docs": 6443, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 4.609567901234568, "max_relevant_docs_per_query": 10, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fa": { - "number_of_characters": 53033692, "num_samples": 134228, - "num_queries": 632, + "number_of_characters": 54969343, "num_documents": 133596, - "min_document_length": 18, - "average_document_length": 0.19466900206593013, - "max_document_length": 82, + "min_document_length": 4, + "average_document_length": 411.2648282882721, + "max_document_length": 13668, "unique_documents": 133596, - "min_query_length": 1, - "average_query_length": 83872.91930379746, - "max_query_length": 13646, + "num_queries": 632, + "min_query_length": 18, + "average_query_length": 41.1503164556962, + "max_query_length": 82, "unique_queries": 632, + "none_queries": 0, + "num_relevant_docs": 6571, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.079113924050633, "max_relevant_docs_per_query": 20, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fi": { - "number_of_characters": 86890075, "num_samples": 195415, - "num_queries": 1000, + "number_of_characters": 90042007, "num_documents": 194415, - "min_document_length": 14, - "average_document_length": 0.19878095825939357, - "max_document_length": 130, + "min_document_length": 4, + "average_document_length": 462.9445310289844, + "max_document_length": 10097, "unique_documents": 194415, - "min_query_length": 1, - "average_query_length": 86851.429, - "max_query_length": 10055, + "num_queries": 1000, + "min_query_length": 14, + "average_query_length": 38.646, + "max_query_length": 130, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 9436, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.918, "max_relevant_docs_per_query": 16, @@ -203,23 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fr": { - "number_of_characters": 33185924, "num_samples": 75700, - "num_queries": 343, + "number_of_characters": 34710100, "num_documents": 75357, - "min_document_length": 16, - "average_document_length": 0.1997425587536659, - "max_document_length": 83, + "min_document_length": 4, + "average_document_length": 460.40909271865917, + "max_document_length": 9705, "unique_documents": 75357, - "min_query_length": 1, - "average_query_length": 96708.08163265306, - "max_query_length": 9670, + "num_queries": 343, + "min_query_length": 16, + "average_query_length": 43.883381924198254, + "max_query_length": 83, "unique_queries": 343, + "none_queries": 0, + "num_relevant_docs": 3429, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.131195335276968, "max_relevant_docs_per_query": 10, @@ -229,23 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 30538763, "num_samples": 63416, - "num_queries": 350, + "number_of_characters": 31468166, "num_documents": 63066, - "min_document_length": 24, - "average_document_length": 0.2960232137760441, - "max_document_length": 120, + "min_document_length": 2, + "average_document_length": 498.6759426632417, + "max_document_length": 29680, "unique_documents": 63066, - "min_query_length": 1, - "average_query_length": 87200.26857142858, - "max_query_length": 29655, + "num_queries": 350, + "min_query_length": 24, + "average_query_length": 53.34, + "max_query_length": 120, "unique_queries": 350, + "none_queries": 0, + "num_relevant_docs": 3494, "min_relevant_docs_per_query": 6, "average_relevant_docs_per_query": 2.1485714285714286, "max_relevant_docs_per_query": 10, @@ -255,23 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "id": { - "number_of_characters": 80132242, "num_samples": 169133, - "num_queries": 960, + "number_of_characters": 83142320, "num_documents": 168173, - "min_document_length": 13, - "average_document_length": 0.2166816314152688, - "max_document_length": 93, + "min_document_length": 6, + "average_document_length": 494.1689807519638, + "max_document_length": 13960, "unique_documents": 168173, - "min_query_length": 1, - "average_query_length": 83433.12708333334, - "max_query_length": 13952, + "num_queries": 960, + "min_query_length": 13, + "average_query_length": 37.958333333333336, + "max_query_length": 93, "unique_queries": 960, + "none_queries": 0, + "num_relevant_docs": 9668, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 3.216666666666667, "max_relevant_docs_per_query": 17, @@ -281,23 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ja": { - "number_of_characters": 36602824, "num_samples": 186179, - "num_queries": 860, + "number_of_characters": 38216252, "num_documents": 185319, - "min_document_length": 7, - "average_document_length": 0.08220419924562511, - "max_document_length": 48, + "min_document_length": 3, + "average_document_length": 206.13654293407583, + "max_document_length": 13229, "unique_documents": 185319, - "min_query_length": 1, - "average_query_length": 42543.70930232558, - "max_query_length": 13222, + "num_queries": 860, + "min_query_length": 7, + "average_query_length": 17.71395348837209, + "max_query_length": 48, "unique_queries": 860, + "none_queries": 0, + "num_relevant_docs": 8354, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0813953488372094, "max_relevant_docs_per_query": 16, @@ -307,23 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ko": { - "number_of_characters": 10865749, "num_samples": 43506, - "num_queries": 213, + "number_of_characters": 11166687, "num_documents": 43293, - "min_document_length": 5, - "average_document_length": 0.10639133347192387, - "max_document_length": 92, + "min_document_length": 3, + "average_document_length": 257.82646155267594, + "max_document_length": 7852, "unique_documents": 43293, - "min_query_length": 1, - "average_query_length": 50991.281690140844, - "max_query_length": 7849, + "num_queries": 213, + "min_query_length": 5, + "average_query_length": 21.624413145539908, + "max_query_length": 92, "unique_queries": 213, + "none_queries": 0, + "num_relevant_docs": 3057, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.568075117370892, "max_relevant_docs_per_query": 20, @@ -333,23 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ru": { - "number_of_characters": 99786144, "num_samples": 220114, - "num_queries": 1000, + "number_of_characters": 104360294, "num_documents": 219114, - "min_document_length": 16, - "average_document_length": 0.20105972233631808, - "max_document_length": 108, + "min_document_length": 3, + "average_document_length": 476.0820349224605, + "max_document_length": 12426, "unique_documents": 219114, - "min_query_length": 1, - "average_query_length": 99742.089, - "max_query_length": 12411, + "num_queries": 1000, + "min_query_length": 16, + "average_query_length": 44.055, + "max_query_length": 108, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 10470, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 2.833, "max_relevant_docs_per_query": 18, @@ -359,23 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "sw": { - "number_of_characters": 28413887, "num_samples": 132406, - "num_queries": 482, + "number_of_characters": 30191582, "num_documents": 131924, - "min_document_length": 13, - "average_document_length": 0.14238500955095357, - "max_document_length": 80, + "min_document_length": 6, + "average_document_length": 228.71348655286377, + "max_document_length": 11203, "unique_documents": 131924, - "min_query_length": 1, - "average_query_length": 58911.0020746888, - "max_query_length": 11185, + "num_queries": 482, + "min_query_length": 13, + "average_query_length": 38.97095435684647, + "max_query_length": 80, "unique_queries": 482, + "none_queries": 0, + "num_relevant_docs": 5092, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.887966804979253, "max_relevant_docs_per_query": 17, @@ -385,23 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "te": { - "number_of_characters": 59846144, "num_samples": 102789, - "num_queries": 828, + "number_of_characters": 61382503, "num_documents": 101961, - "min_document_length": 14, - "average_document_length": 0.309500691440845, - "max_document_length": 111, + "min_document_length": 5, + "average_document_length": 601.7099283059209, + "max_document_length": 17850, "unique_documents": 101961, - "min_query_length": 1, - "average_query_length": 72239.83937198068, - "max_query_length": 17811, + "num_queries": 828, + "min_query_length": 14, + "average_query_length": 38.11231884057971, + "max_query_length": 111, "unique_queries": 828, + "none_queries": 0, + "num_relevant_docs": 1606, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0314009661835748, "max_relevant_docs_per_query": 11, @@ -411,23 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "th": { - "number_of_characters": 53618254, "num_samples": 117382, - "num_queries": 733, + "number_of_characters": 55892521, "num_documents": 116649, - "min_document_length": 14, - "average_document_length": 0.269423655582131, - "max_document_length": 176, + "min_document_length": 6, + "average_document_length": 478.8818849711528, + "max_document_length": 14613, "unique_documents": 116649, - "min_query_length": 1, - "average_query_length": 73106.17462482947, - "max_query_length": 14607, + "num_queries": 733, + "min_query_length": 14, + "average_query_length": 42.87585266030014, + "max_query_length": 176, "unique_queries": 733, + "none_queries": 0, + "num_relevant_docs": 7573, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8321964529331514, "max_relevant_docs_per_query": 15, @@ -437,23 +487,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "yo": { - "number_of_characters": 7047836, "num_samples": 49162, - "num_queries": 119, + "number_of_characters": 7819610, "num_documents": 49043, - "min_document_length": 25, - "average_document_length": 0.09145035988826132, - "max_document_length": 56, + "min_document_length": 2, + "average_document_length": 159.35250698366738, + "max_document_length": 10469, "unique_documents": 49043, - "min_query_length": 1, - "average_query_length": 59187.82352941176, - "max_query_length": 10457, + "num_queries": 119, + "min_query_length": 25, + "average_query_length": 37.6890756302521, + "max_query_length": 56, "unique_queries": 119, + "none_queries": 0, + "num_relevant_docs": 1188, "min_relevant_docs_per_query": 9, "average_relevant_docs_per_query": 1.2100840336134453, "max_relevant_docs_per_query": 10, @@ -463,23 +516,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zh": { - "number_of_characters": 11491524, "num_samples": 81702, - "num_queries": 393, + "number_of_characters": 11986137, "num_documents": 81309, - "min_document_length": 7, - "average_document_length": 0.052528010429349764, - "max_document_length": 22, + "min_document_length": 3, + "average_document_length": 147.36211243527777, + "max_document_length": 8288, "unique_documents": 81309, - "min_query_length": 1, - "average_query_length": 29229.651399491093, - "max_query_length": 8284, + "num_queries": 393, + "min_query_length": 7, + "average_query_length": 10.867684478371501, + "max_query_length": 22, "unique_queries": 393, + "none_queries": 0, + "num_relevant_docs": 3928, "min_relevant_docs_per_query": 8, "average_relevant_docs_per_query": 2.5292620865139948, "max_relevant_docs_per_query": 10, @@ -489,6 +545,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MLQARetrieval.json b/mteb/descriptive_stats/Retrieval/MLQARetrieval.json index b19c2b832e..2e09aaf6fd 100644 --- a/mteb/descriptive_stats/Retrieval/MLQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MLQARetrieval.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 10812098, "num_samples": 29490, - "num_queries": 15747, + "number_of_characters": 10812098, "num_documents": 13743, - "min_document_length": 5, - "average_document_length": 51.51284290184094, - "max_document_length": 182, + "min_document_length": 51, + "average_document_length": 735.2220766935894, + "max_document_length": 12791, "unique_documents": 13743, - "min_query_length": 51, - "average_query_length": 641.6559979678669, - "max_query_length": 12791, + "num_queries": 15747, + "min_query_length": 5, + "average_query_length": 44.95719819648187, + "max_query_length": 182, "unique_queries": 15747, + "none_queries": 0, + "num_relevant_docs": 15747, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ara-ara": { - "number_of_characters": 326497, "num_samples": 956, - "num_queries": 517, + "number_of_characters": 326497, "num_documents": 439, - "min_document_length": 12, - "average_document_length": 49.840546697038725, - "max_document_length": 119, + "min_document_length": 56, + "average_document_length": 693.8883826879271, + "max_document_length": 3967, "unique_documents": 439, - "min_query_length": 56, - "average_query_length": 589.2011605415861, - "max_query_length": 3967, + "num_queries": 517, + "min_query_length": 12, + "average_query_length": 42.321083172147, + "max_query_length": 119, "unique_queries": 517, + "none_queries": 0, + "num_relevant_docs": 517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-deu": { - "number_of_characters": 140511, "num_samples": 377, - "num_queries": 207, + "number_of_characters": 140511, "num_documents": 170, - "min_document_length": 18, - "average_document_length": 67.1470588235294, - "max_document_length": 172, + "min_document_length": 56, + "average_document_length": 759.3882352941176, + "max_document_length": 3967, "unique_documents": 170, - "min_query_length": 56, - "average_query_length": 623.6521739130435, - "max_query_length": 3967, + "num_queries": 207, + "min_query_length": 18, + "average_query_length": 55.14492753623188, + "max_query_length": 172, "unique_queries": 207, + "none_queries": 0, + "num_relevant_docs": 207, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-eng": { - "number_of_characters": 330482, "num_samples": 956, - "num_queries": 517, + "number_of_characters": 330482, "num_documents": 439, - "min_document_length": 17, - "average_document_length": 58.91799544419134, - "max_document_length": 139, + "min_document_length": 56, + "average_document_length": 693.8883826879271, + "max_document_length": 3967, "unique_documents": 439, - "min_query_length": 56, - "average_query_length": 589.2011605415861, - "max_query_length": 3967, + "num_queries": 517, + "min_query_length": 17, + "average_query_length": 50.029013539651835, + "max_query_length": 139, "unique_queries": 517, + "none_queries": 0, + "num_relevant_docs": 517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-spa": { - "number_of_characters": 100247, "num_samples": 301, - "num_queries": 161, + "number_of_characters": 100247, "num_documents": 140, - "min_document_length": 19, - "average_document_length": 61.74285714285714, - "max_document_length": 136, + "min_document_length": 56, + "average_document_length": 654.3071428571428, + "max_document_length": 3338, "unique_documents": 140, - "min_query_length": 56, - "average_query_length": 568.9627329192547, - "max_query_length": 3338, + "num_queries": 161, + "min_query_length": 19, + "average_query_length": 53.68944099378882, + "max_query_length": 136, "unique_queries": 161, + "none_queries": 0, + "num_relevant_docs": 161, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-hin": { - "number_of_characters": 106786, "num_samples": 341, - "num_queries": 186, + "number_of_characters": 106786, "num_documents": 155, - "min_document_length": 18, - "average_document_length": 62.348387096774196, - "max_document_length": 123, + "min_document_length": 73, + "average_document_length": 626.5935483870968, + "max_document_length": 2860, "unique_documents": 155, - "min_query_length": 73, - "average_query_length": 522.1612903225806, - "max_query_length": 2860, + "num_queries": 186, + "min_query_length": 18, + "average_query_length": 51.956989247311824, + "max_query_length": 123, "unique_queries": 186, + "none_queries": 0, + "num_relevant_docs": 186, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-vie": { - "number_of_characters": 127164, "num_samples": 311, - "num_queries": 163, + "number_of_characters": 127164, "num_documents": 148, - "min_document_length": 15, - "average_document_length": 54.5945945945946, - "max_document_length": 133, + "min_document_length": 78, + "average_document_length": 804.6216216216217, + "max_document_length": 3787, "unique_documents": 148, - "min_query_length": 78, - "average_query_length": 730.5766871165645, - "max_query_length": 3787, + "num_queries": 163, + "min_query_length": 15, + "average_query_length": 49.57055214723926, + "max_query_length": 133, "unique_queries": 163, + "none_queries": 0, + "num_relevant_docs": 163, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-zho": { - "number_of_characters": 124970, "num_samples": 343, - "num_queries": 188, + "number_of_characters": 124970, "num_documents": 155, - "min_document_length": 7, - "average_document_length": 18.941935483870967, - "max_document_length": 36, + "min_document_length": 60, + "average_document_length": 787.3161290322581, + "max_document_length": 3967, "unique_documents": 155, - "min_query_length": 60, - "average_query_length": 649.1170212765958, - "max_query_length": 3967, + "num_queries": 188, + "min_query_length": 7, + "average_query_length": 15.617021276595745, + "max_query_length": 36, "unique_queries": 188, + "none_queries": 0, + "num_relevant_docs": 188, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -203,23 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-ara": { - "number_of_characters": 134602, "num_samples": 386, - "num_queries": 207, + "number_of_characters": 134602, "num_documents": 179, - "min_document_length": 15, - "average_document_length": 49.798882681564244, - "max_document_length": 115, + "min_document_length": 62, + "average_document_length": 702.1675977653631, + "max_document_length": 3009, "unique_documents": 179, - "min_query_length": 62, - "average_query_length": 607.1884057971015, - "max_query_length": 3009, + "num_queries": 207, + "min_query_length": 15, + "average_query_length": 43.06280193236715, + "max_query_length": 115, "unique_queries": 207, + "none_queries": 0, + "num_relevant_docs": 207, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -229,23 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-deu": { - "number_of_characters": 355878, "num_samples": 968, - "num_queries": 512, + "number_of_characters": 355878, "num_documents": 456, - "min_document_length": 16, - "average_document_length": 59.02850877192982, - "max_document_length": 172, + "min_document_length": 55, + "average_document_length": 721.405701754386, + "max_document_length": 5536, "unique_documents": 456, - "min_query_length": 55, - "average_query_length": 642.501953125, - "max_query_length": 5536, + "num_queries": 512, + "min_query_length": 16, + "average_query_length": 52.572265625, + "max_query_length": 172, "unique_queries": 512, + "none_queries": 0, + "num_relevant_docs": 512, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -255,23 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-eng": { - "number_of_characters": 353711, "num_samples": 968, - "num_queries": 512, + "number_of_characters": 353711, "num_documents": 456, - "min_document_length": 15, - "average_document_length": 54.276315789473685, - "max_document_length": 162, + "min_document_length": 55, + "average_document_length": 721.405701754386, + "max_document_length": 5536, "unique_documents": 456, - "min_query_length": 55, - "average_query_length": 642.501953125, - "max_query_length": 5536, + "num_queries": 512, + "min_query_length": 15, + "average_query_length": 48.33984375, + "max_query_length": 162, "unique_queries": 512, + "none_queries": 0, + "num_relevant_docs": 512, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -281,23 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-spa": { - "number_of_characters": 132505, "num_samples": 377, - "num_queries": 196, + "number_of_characters": 132505, "num_documents": 181, - "min_document_length": 14, - "average_document_length": 54.79558011049724, - "max_document_length": 182, + "min_document_length": 55, + "average_document_length": 677.2762430939226, + "max_document_length": 5536, "unique_documents": 181, - "min_query_length": 55, - "average_query_length": 625.4438775510204, - "max_query_length": 5536, + "num_queries": 196, + "min_query_length": 14, + "average_query_length": 50.60204081632653, + "max_query_length": 182, "unique_queries": 196, + "none_queries": 0, + "num_relevant_docs": 196, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -307,23 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-hin": { - "number_of_characters": 107808, "num_samples": 309, - "num_queries": 163, + "number_of_characters": 107808, "num_documents": 146, - "min_document_length": 17, - "average_document_length": 52.49315068493151, - "max_document_length": 117, + "min_document_length": 58, + "average_document_length": 685.917808219178, + "max_document_length": 4604, "unique_documents": 146, - "min_query_length": 58, - "average_query_length": 614.3803680981595, - "max_query_length": 4604, + "num_queries": 163, + "min_query_length": 17, + "average_query_length": 47.01840490797546, + "max_query_length": 117, "unique_queries": 163, + "none_queries": 0, + "num_relevant_docs": 163, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -333,23 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-vie": { - "number_of_characters": 158745, "num_samples": 345, - "num_queries": 182, + "number_of_characters": 158745, "num_documents": 163, - "min_document_length": 17, - "average_document_length": 52.2760736196319, - "max_document_length": 171, + "min_document_length": 60, + "average_document_length": 921.6196319018405, + "max_document_length": 5536, "unique_documents": 163, - "min_query_length": 60, - "average_query_length": 825.4065934065934, - "max_query_length": 5536, + "num_queries": 182, + "min_query_length": 17, + "average_query_length": 46.81868131868132, + "max_query_length": 171, "unique_queries": 182, + "none_queries": 0, + "num_relevant_docs": 182, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -359,23 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-zho": { - "number_of_characters": 125856, "num_samples": 357, - "num_queries": 190, + "number_of_characters": 125856, "num_documents": 167, - "min_document_length": 5, - "average_document_length": 16.994011976047904, - "max_document_length": 38, + "min_document_length": 55, + "average_document_length": 736.6347305389221, + "max_document_length": 4781, "unique_documents": 167, - "min_query_length": 55, - "average_query_length": 647.4631578947368, - "max_query_length": 4781, + "num_queries": 190, + "min_query_length": 5, + "average_query_length": 14.936842105263159, + "max_query_length": 38, "unique_queries": 190, + "none_queries": 0, + "num_relevant_docs": 190, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -385,23 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-ara": { - "number_of_characters": 450833, "num_samples": 955, - "num_queries": 517, + "number_of_characters": 450833, "num_documents": 438, - "min_document_length": 12, - "average_document_length": 49.954337899543376, - "max_document_length": 119, + "min_document_length": 63, + "average_document_length": 979.3447488584475, + "max_document_length": 4923, "unique_documents": 438, - "min_query_length": 63, - "average_query_length": 829.6963249516441, - "max_query_length": 4923, + "num_queries": 517, + "min_query_length": 12, + "average_query_length": 42.321083172147, + "max_query_length": 119, "unique_queries": 517, + "none_queries": 0, + "num_relevant_docs": 517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -411,23 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-deu": { - "number_of_characters": 450365, "num_samples": 959, - "num_queries": 512, + "number_of_characters": 450365, "num_documents": 447, - "min_document_length": 16, - "average_document_length": 60.21700223713646, - "max_document_length": 172, + "min_document_length": 73, + "average_document_length": 947.3109619686801, + "max_document_length": 4993, "unique_documents": 447, - "min_query_length": 73, - "average_query_length": 827.046875, - "max_query_length": 4993, + "num_queries": 512, + "min_query_length": 16, + "average_query_length": 52.572265625, + "max_query_length": 172, "unique_queries": 512, + "none_queries": 0, + "num_relevant_docs": 512, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -437,23 +487,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-eng": { - "number_of_characters": 975867, "num_samples": 2126, - "num_queries": 1148, + "number_of_characters": 975867, "num_documents": 978, - "min_document_length": 11, - "average_document_length": 57.534764826175866, - "max_document_length": 162, + "min_document_length": 52, + "average_document_length": 940.2842535787321, + "max_document_length": 4993, "unique_documents": 978, - "min_query_length": 52, - "average_query_length": 801.0435540069686, - "max_query_length": 4993, + "num_queries": 1148, + "min_query_length": 11, + "average_query_length": 49.01480836236934, + "max_query_length": 162, "unique_queries": 1148, + "none_queries": 0, + "num_relevant_docs": 1148, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -463,23 +516,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-spa": { - "number_of_characters": 423068, "num_samples": 939, - "num_queries": 500, + "number_of_characters": 423068, "num_documents": 439, - "min_document_length": 14, - "average_document_length": 59.391799544419136, - "max_document_length": 182, + "min_document_length": 73, + "average_document_length": 904.3166287015945, + "max_document_length": 4956, "unique_documents": 439, - "min_query_length": 73, - "average_query_length": 793.99, - "max_query_length": 4956, + "num_queries": 500, + "min_query_length": 14, + "average_query_length": 52.146, + "max_query_length": 182, "unique_queries": 500, + "none_queries": 0, + "num_relevant_docs": 500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -489,23 +545,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-hin": { - "number_of_characters": 417146, "num_samples": 930, - "num_queries": 507, + "number_of_characters": 417146, "num_documents": 423, - "min_document_length": 12, - "average_document_length": 59.198581560283685, - "max_document_length": 148, + "min_document_length": 98, + "average_document_length": 926.9621749408983, + "max_document_length": 4993, "unique_documents": 423, - "min_query_length": 98, - "average_query_length": 773.3826429980276, - "max_query_length": 4993, + "num_queries": 507, + "min_query_length": 12, + "average_query_length": 49.3905325443787, + "max_query_length": 148, "unique_queries": 507, + "none_queries": 0, + "num_relevant_docs": 507, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -515,23 +574,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-vie": { - "number_of_characters": 481917, "num_samples": 963, - "num_queries": 511, + "number_of_characters": 481917, "num_documents": 452, - "min_document_length": 8, - "average_document_length": 54.35840707964602, - "max_document_length": 171, + "min_document_length": 52, + "average_document_length": 1011.8296460176991, + "max_document_length": 4993, "unique_documents": 452, - "min_query_length": 52, - "average_query_length": 895.0039138943249, - "max_query_length": 4993, + "num_queries": 511, + "min_query_length": 8, + "average_query_length": 48.082191780821915, + "max_query_length": 171, "unique_queries": 511, + "none_queries": 0, + "num_relevant_docs": 511, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -541,23 +603,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-zho": { - "number_of_characters": 438406, "num_samples": 934, - "num_queries": 504, + "number_of_characters": 438406, "num_documents": 430, - "min_document_length": 5, - "average_document_length": 18.044186046511626, - "max_document_length": 51, + "min_document_length": 75, + "average_document_length": 1001.5046511627907, + "max_document_length": 4993, "unique_documents": 430, - "min_query_length": 75, - "average_query_length": 854.4583333333334, - "max_query_length": 4993, + "num_queries": 504, + "min_query_length": 5, + "average_query_length": 15.39484126984127, + "max_query_length": 51, "unique_queries": 504, + "none_queries": 0, + "num_relevant_docs": 504, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -567,23 +632,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-ara": { - "number_of_characters": 104441, "num_samples": 306, - "num_queries": 161, + "number_of_characters": 104441, "num_documents": 145, - "min_document_length": 12, - "average_document_length": 45.92413793103448, - "max_document_length": 119, + "min_document_length": 71, + "average_document_length": 674.3586206896551, + "max_document_length": 7789, "unique_documents": 145, - "min_query_length": 71, - "average_query_length": 607.3416149068323, - "max_query_length": 7789, + "num_queries": 161, + "min_query_length": 12, + "average_query_length": 41.36024844720497, + "max_query_length": 119, "unique_queries": 161, + "none_queries": 0, + "num_relevant_docs": 161, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -593,23 +661,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-deu": { - "number_of_characters": 110271, "num_samples": 380, - "num_queries": 196, + "number_of_characters": 110271, "num_documents": 184, - "min_document_length": 18, - "average_document_length": 55.25, - "max_document_length": 150, + "min_document_length": 52, + "average_document_length": 544.0489130434783, + "max_document_length": 2044, "unique_documents": 184, - "min_query_length": 52, - "average_query_length": 510.73979591836735, - "max_query_length": 2044, + "num_queries": 196, + "min_query_length": 18, + "average_query_length": 51.86734693877551, + "max_query_length": 150, "unique_queries": 196, + "none_queries": 0, + "num_relevant_docs": 196, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -619,23 +690,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-eng": { - "number_of_characters": 315965, "num_samples": 954, - "num_queries": 500, + "number_of_characters": 315965, "num_documents": 454, - "min_document_length": 14, - "average_document_length": 54.136563876651984, - "max_document_length": 162, + "min_document_length": 52, + "average_document_length": 641.8215859030837, + "max_document_length": 12791, "unique_documents": 454, - "min_query_length": 52, - "average_query_length": 582.774, - "max_query_length": 12791, + "num_queries": 500, + "min_query_length": 14, + "average_query_length": 49.156, + "max_query_length": 162, "unique_queries": 500, + "none_queries": 0, + "num_relevant_docs": 500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -645,23 +719,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-spa": { - "number_of_characters": 317460, "num_samples": 954, - "num_queries": 500, + "number_of_characters": 317460, "num_documents": 454, - "min_document_length": 14, - "average_document_length": 57.429515418502206, - "max_document_length": 182, + "min_document_length": 52, + "average_document_length": 641.8215859030837, + "max_document_length": 12791, "unique_documents": 454, - "min_query_length": 52, - "average_query_length": 582.774, - "max_query_length": 12791, + "num_queries": 500, + "min_query_length": 14, + "average_query_length": 52.146, + "max_query_length": 182, "unique_queries": 500, + "none_queries": 0, + "num_relevant_docs": 500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -671,23 +748,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-hin": { - "number_of_characters": 125039, "num_samples": 352, - "num_queries": 187, + "number_of_characters": 125039, "num_documents": 165, - "min_document_length": 17, - "average_document_length": 54.49090909090909, - "max_document_length": 129, + "min_document_length": 52, + "average_document_length": 703.3212121212122, + "max_document_length": 12791, "unique_documents": 165, - "min_query_length": 52, - "average_query_length": 620.5775401069519, - "max_query_length": 12791, + "num_queries": 187, + "min_query_length": 17, + "average_query_length": 48.080213903743314, + "max_query_length": 129, "unique_queries": 187, + "none_queries": 0, + "num_relevant_docs": 187, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -697,23 +777,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-vie": { - "number_of_characters": 139091, "num_samples": 365, - "num_queries": 189, + "number_of_characters": 139091, "num_documents": 176, - "min_document_length": 11, - "average_document_length": 52.43181818181818, - "max_document_length": 171, + "min_document_length": 65, + "average_document_length": 737.8579545454545, + "max_document_length": 12791, "unique_documents": 176, - "min_query_length": 65, - "average_query_length": 687.1058201058202, - "max_query_length": 12791, + "num_queries": 189, + "min_query_length": 11, + "average_query_length": 48.82539682539682, + "max_query_length": 171, "unique_queries": 189, + "none_queries": 0, + "num_relevant_docs": 189, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -723,23 +806,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-zho": { - "number_of_characters": 93338, "num_samples": 311, - "num_queries": 161, + "number_of_characters": 93338, "num_documents": 150, - "min_document_length": 5, - "average_document_length": 16.733333333333334, - "max_document_length": 51, + "min_document_length": 90, + "average_document_length": 605.52, + "max_document_length": 2037, "unique_documents": 150, - "min_query_length": 90, - "average_query_length": 564.1490683229814, - "max_query_length": 2037, + "num_queries": 161, + "min_query_length": 5, + "average_query_length": 15.590062111801242, + "max_query_length": 51, "unique_queries": 161, + "none_queries": 0, + "num_relevant_docs": 161, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -749,23 +835,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-ara": { - "number_of_characters": 109960, "num_samples": 338, - "num_queries": 186, + "number_of_characters": 109960, "num_documents": 152, - "min_document_length": 12, - "average_document_length": 53.38157894736842, - "max_document_length": 115, + "min_document_length": 65, + "average_document_length": 670.0394736842105, + "max_document_length": 3913, "unique_documents": 152, - "min_query_length": 65, - "average_query_length": 547.5591397849462, - "max_query_length": 3913, + "num_queries": 186, + "min_query_length": 12, + "average_query_length": 43.623655913978496, + "max_query_length": 115, "unique_queries": 186, + "none_queries": 0, + "num_relevant_docs": 186, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -775,23 +864,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-deu": { - "number_of_characters": 93151, "num_samples": 305, - "num_queries": 163, + "number_of_characters": 93151, "num_documents": 142, - "min_document_length": 17, - "average_document_length": 59.021126760563384, - "max_document_length": 150, + "min_document_length": 65, + "average_document_length": 596.9718309859155, + "max_document_length": 2375, "unique_documents": 142, - "min_query_length": 65, - "average_query_length": 520.0613496932515, - "max_query_length": 2375, + "num_queries": 163, + "min_query_length": 17, + "average_query_length": 51.41717791411043, + "max_query_length": 150, "unique_queries": 163, + "none_queries": 0, + "num_relevant_docs": 163, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -801,23 +893,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-eng": { - "number_of_characters": 319136, "num_samples": 932, - "num_queries": 507, + "number_of_characters": 319136, "num_documents": 425, - "min_document_length": 11, - "average_document_length": 59.36, - "max_document_length": 139, + "min_document_length": 65, + "average_document_length": 691.5482352941176, + "max_document_length": 3916, "unique_documents": 425, - "min_query_length": 65, - "average_query_length": 579.7001972386588, - "max_query_length": 3916, + "num_queries": 507, + "min_query_length": 11, + "average_query_length": 49.75936883629191, + "max_query_length": 139, "unique_queries": 507, + "none_queries": 0, + "num_relevant_docs": 507, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -827,23 +922,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-spa": { - "number_of_characters": 122669, "num_samples": 344, - "num_queries": 187, + "number_of_characters": 122669, "num_documents": 157, - "min_document_length": 14, - "average_document_length": 62.84076433121019, - "max_document_length": 136, + "min_document_length": 70, + "average_document_length": 718.4904458598726, + "max_document_length": 3916, "unique_documents": 157, - "min_query_length": 70, - "average_query_length": 603.2245989304813, - "max_query_length": 3916, + "num_queries": 187, + "min_query_length": 14, + "average_query_length": 52.75935828877005, + "max_query_length": 136, "unique_queries": 187, + "none_queries": 0, + "num_relevant_docs": 187, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -853,23 +951,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-hin": { - "number_of_characters": 318949, "num_samples": 932, - "num_queries": 507, + "number_of_characters": 318949, "num_documents": 425, - "min_document_length": 12, - "average_document_length": 58.92, - "max_document_length": 148, + "min_document_length": 65, + "average_document_length": 691.5482352941176, + "max_document_length": 3916, "unique_documents": 425, - "min_query_length": 65, - "average_query_length": 579.7001972386588, - "max_query_length": 3916, + "num_queries": 507, + "min_query_length": 12, + "average_query_length": 49.3905325443787, + "max_query_length": 148, "unique_queries": 507, + "none_queries": 0, + "num_relevant_docs": 507, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -879,23 +980,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-vie": { - "number_of_characters": 130780, "num_samples": 334, - "num_queries": 177, + "number_of_characters": 130780, "num_documents": 157, - "min_document_length": 8, - "average_document_length": 54.50955414012739, - "max_document_length": 152, + "min_document_length": 75, + "average_document_length": 778.484076433121, + "max_document_length": 3916, "unique_documents": 157, - "min_query_length": 75, - "average_query_length": 690.5197740112994, - "max_query_length": 3916, + "num_queries": 177, + "min_query_length": 8, + "average_query_length": 48.35028248587571, + "max_query_length": 152, "unique_queries": 177, + "none_queries": 0, + "num_relevant_docs": 177, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -905,23 +1009,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-zho": { - "number_of_characters": 114001, "num_samples": 351, - "num_queries": 189, + "number_of_characters": 114001, "num_documents": 162, - "min_document_length": 5, - "average_document_length": 18.641975308641975, - "max_document_length": 51, + "min_document_length": 70, + "average_document_length": 685.0679012345679, + "max_document_length": 3308, "unique_documents": 162, - "min_query_length": 70, - "average_query_length": 587.2010582010582, - "max_query_length": 3308, + "num_queries": 189, + "min_query_length": 5, + "average_query_length": 15.97883597883598, + "max_query_length": 51, "unique_queries": 189, + "none_queries": 0, + "num_relevant_docs": 189, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -931,23 +1038,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-ara": { - "number_of_characters": 141482, "num_samples": 315, - "num_queries": 163, + "number_of_characters": 141482, "num_documents": 152, - "min_document_length": 12, - "average_document_length": 44.19736842105263, - "max_document_length": 119, + "min_document_length": 59, + "average_document_length": 886.6052631578947, + "max_document_length": 5540, "unique_documents": 152, - "min_query_length": 59, - "average_query_length": 826.7730061349694, - "max_query_length": 5540, + "num_queries": 163, + "min_query_length": 12, + "average_query_length": 41.214723926380366, + "max_query_length": 119, "unique_queries": 163, + "none_queries": 0, + "num_relevant_docs": 163, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -957,23 +1067,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-deu": { - "number_of_characters": 167346, "num_samples": 343, - "num_queries": 182, + "number_of_characters": 167346, "num_documents": 161, - "min_document_length": 18, - "average_document_length": 57.962732919254655, - "max_document_length": 148, + "min_document_length": 71, + "average_document_length": 981.4534161490683, + "max_document_length": 4601, "unique_documents": 161, - "min_query_length": 71, - "average_query_length": 868.2087912087912, - "max_query_length": 4601, + "num_queries": 182, + "min_query_length": 18, + "average_query_length": 51.27472527472528, + "max_query_length": 148, "unique_queries": 182, + "none_queries": 0, + "num_relevant_docs": 182, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -983,23 +1096,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-eng": { - "number_of_characters": 427193, "num_samples": 962, - "num_queries": 511, + "number_of_characters": 427193, "num_documents": 451, - "min_document_length": 11, - "average_document_length": 54.48780487804878, - "max_document_length": 162, + "min_document_length": 51, + "average_document_length": 892.7250554323725, + "max_document_length": 8272, "unique_documents": 451, - "min_query_length": 51, - "average_query_length": 787.9041095890411, - "max_query_length": 8272, + "num_queries": 511, + "min_query_length": 11, + "average_query_length": 48.09001956947162, + "max_query_length": 162, "unique_queries": 511, + "none_queries": 0, + "num_relevant_docs": 511, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1009,23 +1125,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-spa": { - "number_of_characters": 165288, "num_samples": 355, - "num_queries": 189, + "number_of_characters": 165288, "num_documents": 166, - "min_document_length": 14, - "average_document_length": 59.036144578313255, - "max_document_length": 182, + "min_document_length": 65, + "average_document_length": 936.6746987951807, + "max_document_length": 8272, "unique_documents": 166, - "min_query_length": 65, - "average_query_length": 822.6878306878307, - "max_query_length": 8272, + "num_queries": 189, + "min_query_length": 14, + "average_query_length": 51.851851851851855, + "max_query_length": 182, "unique_queries": 189, + "none_queries": 0, + "num_relevant_docs": 189, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1035,23 +1154,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-hin": { - "number_of_characters": 144662, "num_samples": 334, - "num_queries": 177, + "number_of_characters": 144662, "num_documents": 157, - "min_document_length": 12, - "average_document_length": 52.36305732484077, - "max_document_length": 148, + "min_document_length": 65, + "average_document_length": 869.0509554140127, + "max_document_length": 8272, "unique_documents": 157, - "min_query_length": 65, - "average_query_length": 770.8531073446328, - "max_query_length": 8272, + "num_queries": 177, + "min_query_length": 12, + "average_query_length": 46.44632768361582, + "max_query_length": 148, "unique_queries": 177, + "none_queries": 0, + "num_relevant_docs": 177, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1061,23 +1183,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-vie": { - "number_of_characters": 427189, "num_samples": 962, - "num_queries": 511, + "number_of_characters": 427189, "num_documents": 451, - "min_document_length": 8, - "average_document_length": 54.47893569844789, - "max_document_length": 171, + "min_document_length": 51, + "average_document_length": 892.7250554323725, + "max_document_length": 8272, "unique_documents": 451, - "min_query_length": 51, - "average_query_length": 787.9041095890411, - "max_query_length": 8272, + "num_queries": 511, + "min_query_length": 8, + "average_query_length": 48.082191780821915, + "max_query_length": 171, "unique_queries": 511, + "none_queries": 0, + "num_relevant_docs": 511, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1087,23 +1212,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-zho": { - "number_of_characters": 162251, "num_samples": 350, - "num_queries": 184, + "number_of_characters": 162251, "num_documents": 166, - "min_document_length": 5, - "average_document_length": 16.680722891566266, - "max_document_length": 32, + "min_document_length": 83, + "average_document_length": 960.7349397590361, + "max_document_length": 5540, "unique_documents": 166, - "min_query_length": 83, - "average_query_length": 866.75, - "max_query_length": 5540, + "num_queries": 184, + "min_query_length": 5, + "average_query_length": 15.048913043478262, + "max_query_length": 32, "unique_queries": 184, + "none_queries": 0, + "num_relevant_docs": 184, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1113,23 +1241,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-ara": { - "number_of_characters": 46776, "num_samples": 349, - "num_queries": 188, + "number_of_characters": 46776, "num_documents": 161, - "min_document_length": 12, - "average_document_length": 51.78260869565217, - "max_document_length": 103, + "min_document_length": 52, + "average_document_length": 238.75155279503105, + "max_document_length": 1039, "unique_documents": 161, - "min_query_length": 52, - "average_query_length": 204.4627659574468, - "max_query_length": 1039, + "num_queries": 188, + "min_query_length": 12, + "average_query_length": 44.34574468085106, + "max_query_length": 103, "unique_queries": 188, + "none_queries": 0, + "num_relevant_docs": 188, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1139,23 +1270,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-deu": { - "number_of_characters": 52397, "num_samples": 354, - "num_queries": 190, + "number_of_characters": 52397, "num_documents": 164, - "min_document_length": 16, - "average_document_length": 62.38414634146341, - "max_document_length": 172, + "min_document_length": 52, + "average_document_length": 257.109756097561, + "max_document_length": 1399, "unique_documents": 164, - "min_query_length": 52, - "average_query_length": 221.92631578947368, - "max_query_length": 1399, + "num_queries": 190, + "min_query_length": 16, + "average_query_length": 53.84736842105263, + "max_query_length": 172, "unique_queries": 190, + "none_queries": 0, + "num_relevant_docs": 190, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1165,23 +1299,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-eng": { - "number_of_characters": 134543, "num_samples": 947, - "num_queries": 504, + "number_of_characters": 134543, "num_documents": 443, - "min_document_length": 11, - "average_document_length": 57.05643340857788, - "max_document_length": 125, + "min_document_length": 51, + "average_document_length": 246.65237020316027, + "max_document_length": 1399, "unique_documents": 443, - "min_query_length": 51, - "average_query_length": 216.79960317460316, - "max_query_length": 1399, + "num_queries": 504, + "min_query_length": 11, + "average_query_length": 50.15079365079365, + "max_query_length": 125, "unique_queries": 504, + "none_queries": 0, + "num_relevant_docs": 504, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1191,23 +1328,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-spa": { - "number_of_characters": 45452, "num_samples": 309, - "num_queries": 161, + "number_of_characters": 45452, "num_documents": 148, - "min_document_length": 14, - "average_document_length": 57.5, - "max_document_length": 136, + "min_document_length": 53, + "average_document_length": 249.6081081081081, + "max_document_length": 1052, "unique_documents": 148, - "min_query_length": 53, - "average_query_length": 229.45341614906832, - "max_query_length": 1052, + "num_queries": 161, + "min_query_length": 14, + "average_query_length": 52.857142857142854, + "max_query_length": 136, "unique_queries": 161, + "none_queries": 0, + "num_relevant_docs": 161, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1217,23 +1357,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-hin": { - "number_of_characters": 48722, "num_samples": 352, - "num_queries": 189, + "number_of_characters": 48722, "num_documents": 163, - "min_document_length": 12, - "average_document_length": 60.355828220858896, - "max_document_length": 148, + "min_document_length": 51, + "average_document_length": 238.5521472392638, + "max_document_length": 873, "unique_documents": 163, - "min_query_length": 51, - "average_query_length": 205.73544973544975, - "max_query_length": 873, + "num_queries": 189, + "min_query_length": 12, + "average_query_length": 52.05291005291005, + "max_query_length": 148, "unique_queries": 189, + "none_queries": 0, + "num_relevant_docs": 189, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1243,23 +1386,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-vie": { - "number_of_characters": 54156, "num_samples": 352, - "num_queries": 184, + "number_of_characters": 54156, "num_documents": 168, - "min_document_length": 8, - "average_document_length": 54.035714285714285, - "max_document_length": 152, + "min_document_length": 53, + "average_document_length": 268.32142857142856, + "max_document_length": 1399, "unique_documents": 168, - "min_query_length": 53, - "average_query_length": 244.9891304347826, - "max_query_length": 1399, + "num_queries": 184, + "min_query_length": 8, + "average_query_length": 49.33695652173913, + "max_query_length": 152, "unique_queries": 184, + "none_queries": 0, + "num_relevant_docs": 184, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1269,23 +1415,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-zho": { - "number_of_characters": 117026, "num_samples": 947, - "num_queries": 504, + "number_of_characters": 117026, "num_documents": 443, - "min_document_length": 5, - "average_document_length": 17.51467268623025, - "max_document_length": 51, + "min_document_length": 51, + "average_document_length": 246.65237020316027, + "max_document_length": 1399, "unique_documents": 443, - "min_query_length": 51, - "average_query_length": 216.79960317460316, - "max_query_length": 1399, + "num_queries": 504, + "min_query_length": 5, + "average_query_length": 15.39484126984127, + "max_query_length": 51, "unique_queries": 504, + "none_queries": 0, + "num_relevant_docs": 504, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1295,6 +1444,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null @@ -1302,18 +1452,20 @@ } }, "test": { - "number_of_characters": 111293089, "num_samples": 296665, - "num_queries": 158029, + "number_of_characters": 111293089, "num_documents": 138636, - "min_document_length": 5, - "average_document_length": 49.93927262760033, - "max_document_length": 190, + "min_document_length": 51, + "average_document_length": 752.832655298768, + "max_document_length": 10727, "unique_documents": 138636, - "min_query_length": 51, - "average_query_length": 660.4465509495093, - "max_query_length": 10727, + "num_queries": 158029, + "min_query_length": 5, + "average_query_length": 43.81082586107613, + "max_query_length": 190, "unique_queries": 158029, + "none_queries": 0, + "num_relevant_docs": 158083, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0003417094330787, "max_relevant_docs_per_query": 3, @@ -1323,23 +1475,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ara-ara": { - "number_of_characters": 3465612, "num_samples": 9979, - "num_queries": 5333, + "number_of_characters": 3465612, "num_documents": 4646, - "min_document_length": 8, - "average_document_length": 47.36310804993543, - "max_document_length": 148, + "min_document_length": 51, + "average_document_length": 698.5714593198451, + "max_document_length": 6491, "unique_documents": 4646, - "min_query_length": 51, - "average_query_length": 608.5810988186762, - "max_query_length": 6491, + "num_queries": 5333, + "min_query_length": 8, + "average_query_length": 41.26176636039752, + "max_query_length": 148, "unique_queries": 5333, + "none_queries": 0, + "num_relevant_docs": 5335, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000375023438965, "max_relevant_docs_per_query": 2, @@ -1349,23 +1504,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-deu": { - "number_of_characters": 975142, "num_samples": 3151, - "num_queries": 1648, + "number_of_characters": 975142, "num_documents": 1503, - "min_document_length": 12, - "average_document_length": 56.224218230206255, - "max_document_length": 153, + "min_document_length": 51, + "average_document_length": 592.5728542914171, + "max_document_length": 4845, "unique_documents": 1503, - "min_query_length": 51, - "average_query_length": 540.435072815534, - "max_query_length": 4845, + "num_queries": 1648, + "min_query_length": 12, + "average_query_length": 51.27730582524272, + "max_query_length": 153, "unique_queries": 1648, + "none_queries": 0, + "num_relevant_docs": 1649, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0006067961165048, "max_relevant_docs_per_query": 2, @@ -1375,23 +1533,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-eng": { - "number_of_characters": 3504466, "num_samples": 9978, - "num_queries": 5332, + "number_of_characters": 3504466, "num_documents": 4646, - "min_document_length": 13, - "average_document_length": 55.72600086095566, - "max_document_length": 167, + "min_document_length": 51, + "average_document_length": 698.5714593198451, + "max_document_length": 6491, "unique_documents": 4646, - "min_query_length": 51, - "average_query_length": 608.6952363090772, - "max_query_length": 6491, + "num_queries": 5332, + "min_query_length": 13, + "average_query_length": 48.556451612903224, + "max_query_length": 167, "unique_queries": 5332, + "none_queries": 0, + "num_relevant_docs": 5335, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000562640660165, "max_relevant_docs_per_query": 2, @@ -1401,23 +1562,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-spa": { - "number_of_characters": 1363834, "num_samples": 3747, - "num_queries": 1978, + "number_of_characters": 1363834, "num_documents": 1769, - "min_document_length": 12, - "average_document_length": 57.47993216506501, - "max_document_length": 181, + "min_document_length": 52, + "average_document_length": 713.4833239118146, + "max_document_length": 6491, "unique_documents": 1769, - "min_query_length": 52, - "average_query_length": 638.0950455005055, - "max_query_length": 6491, + "num_queries": 1978, + "min_query_length": 12, + "average_query_length": 51.406471183013146, + "max_query_length": 181, "unique_queries": 1978, + "none_queries": 0, + "num_relevant_docs": 1978, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1427,23 +1591,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-hin": { - "number_of_characters": 1150837, "num_samples": 3343, - "num_queries": 1831, + "number_of_characters": 1150837, "num_documents": 1512, - "min_document_length": 14, - "average_document_length": 58.99669312169312, - "max_document_length": 147, + "min_document_length": 51, + "average_document_length": 702.1388888888889, + "max_document_length": 4682, "unique_documents": 1512, - "min_query_length": 51, - "average_query_length": 579.811032222829, - "max_query_length": 4682, + "num_queries": 1831, + "min_query_length": 14, + "average_query_length": 48.71818678317859, + "max_query_length": 147, "unique_queries": 1831, + "none_queries": 0, + "num_relevant_docs": 1831, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1453,23 +1620,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-vie": { - "number_of_characters": 1466341, "num_samples": 3880, - "num_queries": 2047, + "number_of_characters": 1466341, "num_documents": 1833, - "min_document_length": 12, - "average_document_length": 54.51500272776868, - "max_document_length": 152, + "min_document_length": 54, + "average_document_length": 745.4528096017458, + "max_document_length": 4615, "unique_documents": 1833, - "min_query_length": 54, - "average_query_length": 667.5207620908647, - "max_query_length": 4615, + "num_queries": 2047, + "min_query_length": 12, + "average_query_length": 48.815828041035665, + "max_query_length": 152, "unique_queries": 2047, + "none_queries": 0, + "num_relevant_docs": 2047, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1479,23 +1649,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-zho": { - "number_of_characters": 1343684, "num_samples": 3610, - "num_queries": 1912, + "number_of_characters": 1343684, "num_documents": 1698, - "min_document_length": 5, - "average_document_length": 16.87396937573616, - "max_document_length": 69, + "min_document_length": 53, + "average_document_length": 774.4593639575971, + "max_document_length": 5425, "unique_documents": 1698, - "min_query_length": 53, - "average_query_length": 687.7782426778243, - "max_query_length": 5425, + "num_queries": 1912, + "min_query_length": 5, + "average_query_length": 14.985355648535565, + "max_query_length": 69, "unique_queries": 1912, + "none_queries": 0, + "num_relevant_docs": 1912, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1505,23 +1678,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-ara": { - "number_of_characters": 1142572, "num_samples": 3146, - "num_queries": 1649, + "number_of_characters": 1142572, "num_documents": 1497, - "min_document_length": 8, - "average_document_length": 43.56112224448898, - "max_document_length": 139, + "min_document_length": 53, + "average_document_length": 719.6800267201069, + "max_document_length": 6774, "unique_documents": 1497, - "min_query_length": 53, - "average_query_length": 653.3420254699818, - "max_query_length": 6774, + "num_queries": 1649, + "min_query_length": 8, + "average_query_length": 39.54578532443905, + "max_query_length": 139, "unique_queries": 1649, + "none_queries": 0, + "num_relevant_docs": 1649, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1531,23 +1707,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-deu": { - "number_of_characters": 3173494, "num_samples": 8566, - "num_queries": 4513, + "number_of_characters": 3173494, "num_documents": 4053, - "min_document_length": 10, - "average_document_length": 57.46829509005675, - "max_document_length": 190, + "min_document_length": 51, + "average_document_length": 725.5304712558599, + "max_document_length": 6774, "unique_documents": 4053, - "min_query_length": 51, - "average_query_length": 651.5787724351873, - "max_query_length": 6774, + "num_queries": 4513, + "min_query_length": 10, + "average_query_length": 51.610680257035234, + "max_query_length": 190, "unique_queries": 4513, + "none_queries": 0, + "num_relevant_docs": 4517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0008863283846665, "max_relevant_docs_per_query": 3, @@ -1557,23 +1736,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-eng": { - "number_of_characters": 3153037, "num_samples": 8566, - "num_queries": 4513, + "number_of_characters": 3153037, "num_documents": 4053, - "min_document_length": 9, - "average_document_length": 52.42092277325438, - "max_document_length": 158, + "min_document_length": 51, + "average_document_length": 725.5304712558599, + "max_document_length": 6774, "unique_documents": 4053, - "min_query_length": 51, - "average_query_length": 651.5787724351873, - "max_query_length": 6774, + "num_queries": 4513, + "min_query_length": 9, + "average_query_length": 47.07777531575449, + "max_query_length": 158, "unique_queries": 4513, + "none_queries": 0, + "num_relevant_docs": 4517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0008863283846665, "max_relevant_docs_per_query": 2, @@ -1583,23 +1765,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-spa": { - "number_of_characters": 1269348, "num_samples": 3369, - "num_queries": 1775, + "number_of_characters": 1269348, "num_documents": 1594, - "min_document_length": 13, - "average_document_length": 55.78732747804266, - "max_document_length": 155, + "min_document_length": 52, + "average_document_length": 740.5414052697616, + "max_document_length": 5662, "unique_documents": 1594, - "min_query_length": 52, - "average_query_length": 665.0270422535211, - "max_query_length": 5662, + "num_queries": 1775, + "min_query_length": 13, + "average_query_length": 50.098591549295776, + "max_query_length": 155, "unique_queries": 1775, + "none_queries": 0, + "num_relevant_docs": 1776, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0005633802816902, "max_relevant_docs_per_query": 2, @@ -1609,23 +1794,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-hin": { - "number_of_characters": 932475, "num_samples": 2717, - "num_queries": 1430, + "number_of_characters": 932475, "num_documents": 1287, - "min_document_length": 13, - "average_document_length": 50.162393162393165, - "max_document_length": 130, + "min_document_length": 51, + "average_document_length": 674.3714063714064, + "max_document_length": 4818, "unique_documents": 1287, - "min_query_length": 51, - "average_query_length": 606.9342657342658, - "max_query_length": 4818, + "num_queries": 1430, + "min_query_length": 13, + "average_query_length": 45.146153846153844, + "max_query_length": 130, "unique_queries": 1430, + "none_queries": 0, + "num_relevant_docs": 1430, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1635,23 +1823,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-vie": { - "number_of_characters": 1231990, "num_samples": 3193, - "num_queries": 1675, + "number_of_characters": 1231990, "num_documents": 1518, - "min_document_length": 8, - "average_document_length": 51.46772068511199, - "max_document_length": 176, + "min_document_length": 51, + "average_document_length": 760.1198945981555, + "max_document_length": 6558, "unique_documents": 1518, - "min_query_length": 51, - "average_query_length": 688.8728358208955, - "max_query_length": 6558, + "num_queries": 1675, + "min_query_length": 8, + "average_query_length": 46.64358208955224, + "max_query_length": 176, "unique_queries": 1675, + "none_queries": 0, + "num_relevant_docs": 1675, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1661,23 +1852,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-zho": { - "number_of_characters": 1146502, "num_samples": 3075, - "num_queries": 1620, + "number_of_characters": 1146502, "num_documents": 1455, - "min_document_length": 5, - "average_document_length": 16.637113402061857, - "max_document_length": 69, + "min_document_length": 58, + "average_document_length": 771.3367697594501, + "max_document_length": 6774, "unique_documents": 1455, - "min_query_length": 58, - "average_query_length": 692.7746913580247, - "max_query_length": 6774, + "num_queries": 1620, + "min_query_length": 5, + "average_query_length": 14.942592592592593, + "max_query_length": 69, "unique_queries": 1620, + "none_queries": 0, + "num_relevant_docs": 1621, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0006172839506173, "max_relevant_docs_per_query": 2, @@ -1687,23 +1881,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-ara": { - "number_of_characters": 4864548, "num_samples": 9939, - "num_queries": 5333, + "number_of_characters": 4864548, "num_documents": 4606, - "min_document_length": 8, - "average_document_length": 47.774424663482414, - "max_document_length": 148, + "min_document_length": 52, + "average_document_length": 1008.3584455058619, + "max_document_length": 9610, "unique_documents": 4606, - "min_query_length": 52, - "average_query_length": 870.8979936246016, - "max_query_length": 9610, + "num_queries": 5333, + "min_query_length": 8, + "average_query_length": 41.26176636039752, + "max_query_length": 148, "unique_queries": 5333, + "none_queries": 0, + "num_relevant_docs": 5335, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000375023438965, "max_relevant_docs_per_query": 2, @@ -1713,23 +1910,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-deu": { - "number_of_characters": 3903340, "num_samples": 8545, - "num_queries": 4513, + "number_of_characters": 3903340, "num_documents": 4032, - "min_document_length": 10, - "average_document_length": 57.76760912698413, - "max_document_length": 190, + "min_document_length": 51, + "average_document_length": 910.3226686507936, + "max_document_length": 7552, "unique_documents": 4032, - "min_query_length": 51, - "average_query_length": 813.2995789940173, - "max_query_length": 7552, + "num_queries": 4513, + "min_query_length": 10, + "average_query_length": 51.610680257035234, + "max_query_length": 190, "unique_queries": 4513, + "none_queries": 0, + "num_relevant_docs": 4517, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0008863283846665, "max_relevant_docs_per_query": 3, @@ -1739,23 +1939,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-eng": { - "number_of_characters": 10303894, "num_samples": 21498, - "num_queries": 11582, + "number_of_characters": 10303894, "num_documents": 9916, - "min_document_length": 9, - "average_document_length": 56.01865671641791, - "max_document_length": 167, + "min_document_length": 51, + "average_document_length": 983.0993344090359, + "max_document_length": 10727, "unique_documents": 9916, - "min_query_length": 51, - "average_query_length": 841.6864962873424, - "max_query_length": 10727, + "num_queries": 11582, + "min_query_length": 9, + "average_query_length": 47.960714902434816, + "max_query_length": 167, "unique_queries": 11582, + "none_queries": 0, + "num_relevant_docs": 11590, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000690726990157, "max_relevant_docs_per_query": 3, @@ -1765,23 +1968,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-spa": { - "number_of_characters": 4738041, "num_samples": 9872, - "num_queries": 5251, + "number_of_characters": 4738041, "num_documents": 4621, - "min_document_length": 12, - "average_document_length": 57.86582990694655, - "max_document_length": 181, + "min_document_length": 51, + "average_document_length": 967.4622376109068, + "max_document_length": 10727, "unique_documents": 4621, - "min_query_length": 51, - "average_query_length": 851.3888783088936, - "max_query_length": 10727, + "num_queries": 5251, + "min_query_length": 12, + "average_query_length": 50.923252713768804, + "max_query_length": 181, "unique_queries": 5251, + "none_queries": 0, + "num_relevant_docs": 5253, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000380879832413, "max_relevant_docs_per_query": 2, @@ -1791,23 +1997,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-hin": { - "number_of_characters": 4235029, "num_samples": 8975, - "num_queries": 4916, + "number_of_characters": 4235029, "num_documents": 4059, - "min_document_length": 8, - "average_document_length": 57.3210150283321, - "max_document_length": 147, + "min_document_length": 51, + "average_document_length": 986.0465631929046, + "max_document_length": 10727, "unique_documents": 4059, - "min_query_length": 51, - "average_query_length": 814.1503254678601, - "max_query_length": 10727, + "num_queries": 4916, + "min_query_length": 8, + "average_query_length": 47.328315703824245, + "max_query_length": 147, "unique_queries": 4916, + "none_queries": 0, + "num_relevant_docs": 4918, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000406834825061, "max_relevant_docs_per_query": 2, @@ -1817,23 +2026,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-vie": { - "number_of_characters": 5254594, "num_samples": 10254, - "num_queries": 5495, + "number_of_characters": 5254594, "num_documents": 4759, - "min_document_length": 8, - "average_document_length": 55.532044547173776, - "max_document_length": 176, + "min_document_length": 51, + "average_document_length": 1048.6062197940744, + "max_document_length": 9610, "unique_documents": 4759, - "min_query_length": 51, - "average_query_length": 908.1559599636033, - "max_query_length": 9610, + "num_queries": 5495, + "min_query_length": 8, + "average_query_length": 48.094085532302095, + "max_query_length": 176, "unique_queries": 5495, + "none_queries": 0, + "num_relevant_docs": 5495, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1843,23 +2055,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-zho": { - "number_of_characters": 4830436, "num_samples": 9604, - "num_queries": 5136, + "number_of_characters": 4830436, "num_documents": 4468, - "min_document_length": 5, - "average_document_length": 17.264547896150404, - "max_document_length": 69, + "min_document_length": 51, + "average_document_length": 1063.8536257833482, + "max_document_length": 10727, "unique_documents": 4468, - "min_query_length": 51, - "average_query_length": 925.4863707165109, - "max_query_length": 10727, + "num_queries": 5136, + "min_query_length": 5, + "average_query_length": 15.019080996884735, + "max_query_length": 69, "unique_queries": 5136, + "none_queries": 0, + "num_relevant_docs": 5137, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0001947040498442, "max_relevant_docs_per_query": 2, @@ -1869,23 +2084,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-ara": { - "number_of_characters": 1249059, "num_samples": 3788, - "num_queries": 1978, + "number_of_characters": 1249059, "num_documents": 1810, - "min_document_length": 9, - "average_document_length": 44.56961325966851, - "max_document_length": 140, + "min_document_length": 58, + "average_document_length": 645.5182320441988, + "max_document_length": 10458, "unique_documents": 1810, - "min_query_length": 58, - "average_query_length": 590.6916076845298, - "max_query_length": 10458, + "num_queries": 1978, + "min_query_length": 9, + "average_query_length": 40.78412537917088, + "max_query_length": 140, "unique_queries": 1978, + "none_queries": 0, + "num_relevant_docs": 1978, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1895,23 +2113,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-deu": { - "number_of_characters": 1045840, "num_samples": 3400, - "num_queries": 1774, + "number_of_characters": 1045840, "num_documents": 1626, - "min_document_length": 10, - "average_document_length": 56.59225092250922, - "max_document_length": 165, + "min_document_length": 51, + "average_document_length": 586.6057810578105, + "max_document_length": 6250, "unique_documents": 1626, - "min_query_length": 51, - "average_query_length": 537.6668545659527, - "max_query_length": 6250, + "num_queries": 1774, + "min_query_length": 10, + "average_query_length": 51.870913190529876, + "max_query_length": 165, "unique_queries": 1774, + "none_queries": 0, + "num_relevant_docs": 1776, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0011273957158964, "max_relevant_docs_per_query": 3, @@ -1921,23 +2142,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-eng": { - "number_of_characters": 3253877, "num_samples": 10014, - "num_queries": 5253, + "number_of_characters": 3253877, "num_documents": 4761, - "min_document_length": 9, - "average_document_length": 52.77042638101239, - "max_document_length": 167, + "min_document_length": 51, + "average_document_length": 630.6735979836169, + "max_document_length": 10458, "unique_documents": 4761, - "min_query_length": 51, - "average_query_length": 571.604226156482, - "max_query_length": 10458, + "num_queries": 5253, + "min_query_length": 9, + "average_query_length": 47.827907862173994, + "max_query_length": 167, "unique_queries": 5253, + "none_queries": 0, + "num_relevant_docs": 5253, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1947,23 +2171,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-spa": { - "number_of_characters": 3270035, "num_samples": 10012, - "num_queries": 5251, + "number_of_characters": 3270035, "num_documents": 4761, - "min_document_length": 12, - "average_document_length": 56.16425120772947, - "max_document_length": 181, + "min_document_length": 51, + "average_document_length": 630.6735979836169, + "max_document_length": 10458, "unique_documents": 4761, - "min_query_length": 51, - "average_query_length": 571.821938678347, - "max_query_length": 10458, + "num_queries": 5251, + "min_query_length": 12, + "average_query_length": 50.923252713768804, + "max_query_length": 181, "unique_queries": 5251, + "none_queries": 0, + "num_relevant_docs": 5253, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000380879832413, "max_relevant_docs_per_query": 2, @@ -1973,23 +2200,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-hin": { - "number_of_characters": 1010952, "num_samples": 3241, - "num_queries": 1723, + "number_of_characters": 1010952, "num_documents": 1518, - "min_document_length": 12, - "average_document_length": 52.62845849802372, - "max_document_length": 147, + "min_document_length": 52, + "average_document_length": 613.3478260869565, + "max_document_length": 8825, "unique_documents": 1518, - "min_query_length": 52, - "average_query_length": 540.3726059199072, - "max_query_length": 8825, + "num_queries": 1723, + "min_query_length": 12, + "average_query_length": 46.36680208937899, + "max_query_length": 147, "unique_queries": 1723, + "none_queries": 0, + "num_relevant_docs": 1723, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -1999,23 +2229,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-vie": { - "number_of_characters": 1333310, "num_samples": 3892, - "num_queries": 2018, + "number_of_characters": 1333310, "num_documents": 1874, - "min_document_length": 8, - "average_document_length": 51.86019210245464, - "max_document_length": 152, + "min_document_length": 51, + "average_document_length": 659.6179295624333, + "max_document_length": 10458, "unique_documents": 1874, - "min_query_length": 51, - "average_query_length": 612.5490584737364, - "max_query_length": 10458, + "num_queries": 2018, + "min_query_length": 8, + "average_query_length": 48.1595639246779, + "max_query_length": 152, "unique_queries": 2018, + "none_queries": 0, + "num_relevant_docs": 2018, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2025,23 +2258,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-zho": { - "number_of_characters": 1225671, "num_samples": 3736, - "num_queries": 1947, + "number_of_characters": 1225671, "num_documents": 1789, - "min_document_length": 5, - "average_document_length": 16.450531022917833, - "max_document_length": 46, + "min_document_length": 53, + "average_document_length": 668.6646171045277, + "max_document_length": 6250, "unique_documents": 1789, - "min_query_length": 53, - "average_query_length": 614.402157164869, - "max_query_length": 6250, + "num_queries": 1947, + "min_query_length": 5, + "average_query_length": 15.115562403697997, + "max_query_length": 46, "unique_queries": 1947, + "none_queries": 0, + "num_relevant_docs": 1947, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2051,23 +2287,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-ara": { - "number_of_characters": 1226070, "num_samples": 3333, - "num_queries": 1831, + "number_of_characters": 1226070, "num_documents": 1502, - "min_document_length": 11, - "average_document_length": 51.25632490013316, - "max_document_length": 148, + "min_document_length": 56, + "average_document_length": 765.0352862849534, + "max_document_length": 5572, "unique_documents": 1502, - "min_query_length": 56, - "average_query_length": 627.5712725286728, - "max_query_length": 5572, + "num_queries": 1831, + "min_query_length": 11, + "average_query_length": 42.04642271982523, + "max_query_length": 148, "unique_queries": 1831, + "none_queries": 0, + "num_relevant_docs": 1831, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2077,23 +2316,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-deu": { - "number_of_characters": 990471, "num_samples": 2704, - "num_queries": 1429, + "number_of_characters": 990471, "num_documents": 1275, - "min_document_length": 11, - "average_document_length": 57.16313725490196, - "max_document_length": 174, + "min_document_length": 51, + "average_document_length": 719.676862745098, + "max_document_length": 5103, "unique_documents": 1275, - "min_query_length": 51, - "average_query_length": 642.1189643107068, - "max_query_length": 5103, + "num_queries": 1429, + "min_query_length": 11, + "average_query_length": 51.002799160251925, + "max_query_length": 174, "unique_queries": 1429, + "none_queries": 0, + "num_relevant_docs": 1430, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000699790062981, "max_relevant_docs_per_query": 2, @@ -2103,23 +2345,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-eng": { - "number_of_characters": 3354858, "num_samples": 9015, - "num_queries": 4916, + "number_of_characters": 3354858, "num_documents": 4099, - "min_document_length": 9, - "average_document_length": 57.46206391802879, - "max_document_length": 144, + "min_document_length": 51, + "average_document_length": 760.9956086850451, + "max_document_length": 9013, "unique_documents": 4099, - "min_query_length": 51, - "average_query_length": 634.5242066720912, - "max_query_length": 9013, + "num_queries": 4916, + "min_query_length": 9, + "average_query_length": 47.91232709519935, + "max_query_length": 144, "unique_queries": 4916, + "none_queries": 0, + "num_relevant_docs": 4918, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000406834825061, "max_relevant_docs_per_query": 2, @@ -2129,23 +2374,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-spa": { - "number_of_characters": 1186262, "num_samples": 3181, - "num_queries": 1722, + "number_of_characters": 1186262, "num_documents": 1459, - "min_document_length": 13, - "average_document_length": 59.564084989718985, - "max_document_length": 154, + "min_document_length": 51, + "average_document_length": 753.5010281014394, + "max_document_length": 6119, "unique_documents": 1459, - "min_query_length": 51, - "average_query_length": 638.4192799070847, - "max_query_length": 6119, + "num_queries": 1722, + "min_query_length": 13, + "average_query_length": 50.46689895470383, + "max_query_length": 154, "unique_queries": 1722, + "none_queries": 0, + "num_relevant_docs": 1723, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0005807200929153, "max_relevant_docs_per_query": 2, @@ -2155,23 +2403,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-hin": { - "number_of_characters": 3351987, "num_samples": 9015, - "num_queries": 4916, + "number_of_characters": 3351987, "num_documents": 4099, - "min_document_length": 8, - "average_document_length": 56.761649182727496, - "max_document_length": 147, + "min_document_length": 51, + "average_document_length": 760.9956086850451, + "max_document_length": 9013, "unique_documents": 4099, - "min_query_length": 51, - "average_query_length": 634.5242066720912, - "max_query_length": 9013, + "num_queries": 4916, + "min_query_length": 8, + "average_query_length": 47.328315703824245, + "max_query_length": 147, "unique_queries": 4916, + "none_queries": 0, + "num_relevant_docs": 4918, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000406834825061, "max_relevant_docs_per_query": 2, @@ -2181,23 +2432,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-vie": { - "number_of_characters": 1385396, "num_samples": 3582, - "num_queries": 1947, + "number_of_characters": 1385396, "num_documents": 1635, - "min_document_length": 12, - "average_document_length": 57.411620795107034, - "max_document_length": 138, + "min_document_length": 51, + "average_document_length": 789.9253822629969, + "max_document_length": 9013, "unique_documents": 1635, - "min_query_length": 51, - "average_query_length": 663.3425783256291, - "max_query_length": 9013, + "num_queries": 1947, + "min_query_length": 12, + "average_query_length": 48.21160760143811, + "max_query_length": 138, "unique_queries": 1947, + "none_queries": 0, + "num_relevant_docs": 1947, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2207,23 +2461,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-zho": { - "number_of_characters": 1275490, "num_samples": 3264, - "num_queries": 1767, + "number_of_characters": 1275490, "num_documents": 1497, - "min_document_length": 5, - "average_document_length": 17.8249832999332, - "max_document_length": 56, + "min_document_length": 68, + "average_document_length": 834.2057448229793, + "max_document_length": 9013, "unique_documents": 1497, - "min_query_length": 68, - "average_query_length": 706.737973967176, - "max_query_length": 9013, + "num_queries": 1767, + "min_query_length": 5, + "average_query_length": 15.101301641199774, + "max_query_length": 56, "unique_queries": 1767, + "none_queries": 0, + "num_relevant_docs": 1767, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2233,23 +2490,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-ara": { - "number_of_characters": 1893427, "num_samples": 3869, - "num_queries": 2047, + "number_of_characters": 1893427, "num_documents": 1822, - "min_document_length": 8, - "average_document_length": 46.98957189901208, - "max_document_length": 140, + "min_document_length": 53, + "average_document_length": 992.2129527991218, + "max_document_length": 10556, "unique_documents": 1822, - "min_query_length": 53, - "average_query_length": 883.151929653151, - "max_query_length": 10556, + "num_queries": 2047, + "min_query_length": 8, + "average_query_length": 41.82462139716659, + "max_query_length": 140, "unique_queries": 2047, + "none_queries": 0, + "num_relevant_docs": 2047, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2259,23 +2519,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-deu": { - "number_of_characters": 1384837, "num_samples": 3182, - "num_queries": 1674, + "number_of_characters": 1384837, "num_documents": 1508, - "min_document_length": 10, - "average_document_length": 57.26591511936339, - "max_document_length": 157, + "min_document_length": 51, + "average_document_length": 861.0610079575597, + "max_document_length": 5614, "unique_documents": 1508, - "min_query_length": 51, - "average_query_length": 775.6750298685782, - "max_query_length": 5614, + "num_queries": 1674, + "min_query_length": 10, + "average_query_length": 51.58721624850657, + "max_query_length": 157, "unique_queries": 1674, + "none_queries": 0, + "num_relevant_docs": 1675, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0005973715651135, "max_relevant_docs_per_query": 2, @@ -2285,23 +2548,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-eng": { - "number_of_characters": 4646248, "num_samples": 10288, - "num_queries": 5493, + "number_of_characters": 4646248, "num_documents": 4795, - "min_document_length": 9, - "average_document_length": 55.114285714285714, - "max_document_length": 167, + "min_document_length": 51, + "average_document_length": 913.8633993743483, + "max_document_length": 10556, "unique_documents": 4795, - "min_query_length": 51, - "average_query_length": 797.7380302202804, - "max_query_length": 10556, + "num_queries": 5493, + "min_query_length": 9, + "average_query_length": 48.11086837793555, + "max_query_length": 167, "unique_queries": 5493, + "none_queries": 0, + "num_relevant_docs": 5495, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0003640997633352, "max_relevant_docs_per_query": 2, @@ -2311,23 +2577,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-spa": { - "number_of_characters": 1822456, "num_samples": 3846, - "num_queries": 2017, + "number_of_characters": 1822456, "num_documents": 1829, - "min_document_length": 12, - "average_document_length": 56.389830508474574, - "max_document_length": 181, + "min_document_length": 51, + "average_document_length": 940.0322580645161, + "max_document_length": 10556, "unique_documents": 1829, - "min_query_length": 51, - "average_query_length": 852.4139811601389, - "max_query_length": 10556, + "num_queries": 2017, + "min_query_length": 12, + "average_query_length": 51.13386217154189, + "max_query_length": 181, "unique_queries": 2017, + "none_queries": 0, + "num_relevant_docs": 2018, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0004957858205255, "max_relevant_docs_per_query": 2, @@ -2337,23 +2606,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-hin": { - "number_of_characters": 1467053, "num_samples": 3587, - "num_queries": 1947, + "number_of_characters": 1467053, "num_documents": 1640, - "min_document_length": 15, - "average_document_length": 56.37317073170732, - "max_document_length": 130, + "min_document_length": 54, + "average_document_length": 838.1713414634146, + "max_document_length": 5381, "unique_documents": 1640, - "min_query_length": 54, - "average_query_length": 706.009758602979, - "max_query_length": 5381, + "num_queries": 1947, + "min_query_length": 15, + "average_query_length": 47.484334874165384, + "max_query_length": 130, "unique_queries": 1947, + "none_queries": 0, + "num_relevant_docs": 1947, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2363,23 +2635,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-vie": { - "number_of_characters": 4646252, "num_samples": 10290, - "num_queries": 5495, + "number_of_characters": 4646252, "num_documents": 4795, - "min_document_length": 8, - "average_document_length": 55.11511991657977, - "max_document_length": 176, + "min_document_length": 51, + "average_document_length": 913.8633993743483, + "max_document_length": 10556, "unique_documents": 4795, - "min_query_length": 51, - "average_query_length": 797.4476797088262, - "max_query_length": 10556, + "num_queries": 5495, + "min_query_length": 8, + "average_query_length": 48.094085532302095, + "max_query_length": 176, "unique_queries": 5495, + "none_queries": 0, + "num_relevant_docs": 5495, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2389,23 +2664,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vie-zho": { - "number_of_characters": 1747625, "num_samples": 3663, - "num_queries": 1943, + "number_of_characters": 1747625, "num_documents": 1720, - "min_document_length": 5, - "average_document_length": 16.996511627906976, - "max_document_length": 55, + "min_document_length": 55, + "average_document_length": 999.064534883721, + "max_document_length": 8544, "unique_documents": 1720, - "min_query_length": 55, - "average_query_length": 884.4009264024704, - "max_query_length": 8544, + "num_queries": 1943, + "min_query_length": 5, + "average_query_length": 15.045805455481215, + "max_query_length": 55, "unique_queries": 1943, + "none_queries": 0, + "num_relevant_docs": 1943, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2415,23 +2693,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-ara": { - "number_of_characters": 516234, "num_samples": 3629, - "num_queries": 1911, + "number_of_characters": 516234, "num_documents": 1718, - "min_document_length": 8, - "average_document_length": 46.77240977881257, - "max_document_length": 148, + "min_document_length": 51, + "average_document_length": 253.71303841676368, + "max_document_length": 2435, "unique_documents": 1718, - "min_query_length": 51, - "average_query_length": 228.0894819466248, - "max_query_length": 2435, + "num_queries": 1911, + "min_query_length": 8, + "average_query_length": 42.04866562009419, + "max_query_length": 148, "unique_queries": 1911, + "none_queries": 0, + "num_relevant_docs": 1912, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000523286237572, "max_relevant_docs_per_query": 2, @@ -2441,23 +2722,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-deu": { - "number_of_characters": 438762, "num_samples": 3085, - "num_queries": 1621, + "number_of_characters": 438762, "num_documents": 1464, - "min_document_length": 12, - "average_document_length": 57.85450819672131, - "max_document_length": 190, + "min_document_length": 51, + "average_document_length": 241.84631147540983, + "max_document_length": 2658, "unique_documents": 1464, - "min_query_length": 51, - "average_query_length": 218.42257865515114, - "max_query_length": 2658, + "num_queries": 1621, + "min_query_length": 12, + "average_query_length": 52.25107958050586, + "max_query_length": 190, "unique_queries": 1621, + "none_queries": 0, + "num_relevant_docs": 1621, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2467,23 +2751,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-eng": { - "number_of_characters": 1375165, "num_samples": 9681, - "num_queries": 5135, + "number_of_characters": 1375165, "num_documents": 4546, - "min_document_length": 12, - "average_document_length": 54.94390673119226, - "max_document_length": 158, + "min_document_length": 51, + "average_document_length": 247.55609326880776, + "max_document_length": 2658, "unique_documents": 4546, - "min_query_length": 51, - "average_query_length": 219.16066212268743, - "max_query_length": 2658, + "num_queries": 5135, + "min_query_length": 12, + "average_query_length": 48.64167478091529, + "max_query_length": 158, "unique_queries": 5135, + "none_queries": 0, + "num_relevant_docs": 5137, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0003894839337877, "max_relevant_docs_per_query": 2, @@ -2493,23 +2780,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-spa": { - "number_of_characters": 547101, "num_samples": 3700, - "num_queries": 1947, + "number_of_characters": 547101, "num_documents": 1753, - "min_document_length": 12, - "average_document_length": 57.64860239589275, - "max_document_length": 162, + "min_document_length": 51, + "average_document_length": 254.44552196235026, + "max_document_length": 2658, "unique_documents": 1753, - "min_query_length": 51, - "average_query_length": 229.0924499229584, - "max_query_length": 2658, + "num_queries": 1947, + "min_query_length": 12, + "average_query_length": 51.90446841294299, + "max_query_length": 162, "unique_queries": 1947, + "none_queries": 0, + "num_relevant_docs": 1947, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2519,23 +2809,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-hin": { - "number_of_characters": 436800, "num_samples": 3291, - "num_queries": 1766, + "number_of_characters": 436800, "num_documents": 1525, - "min_document_length": 12, - "average_document_length": 56.82032786885246, - "max_document_length": 130, + "min_document_length": 51, + "average_document_length": 229.60590163934427, + "max_document_length": 2570, "unique_documents": 1525, - "min_query_length": 51, - "average_query_length": 198.27236693091731, - "max_query_length": 2570, + "num_queries": 1766, + "min_query_length": 12, + "average_query_length": 49.06625141562854, + "max_query_length": 130, "unique_queries": 1766, + "none_queries": 0, + "num_relevant_docs": 1767, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0005662514156286, "max_relevant_docs_per_query": 2, @@ -2545,23 +2838,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-vie": { - "number_of_characters": 560107, "num_samples": 3688, - "num_queries": 1943, + "number_of_characters": 560107, "num_documents": 1745, - "min_document_length": 10, - "average_document_length": 54.86418338108883, - "max_document_length": 176, + "min_document_length": 51, + "average_document_length": 266.1140401146132, + "max_document_length": 2435, "unique_documents": 1745, - "min_query_length": 51, - "average_query_length": 238.99588265568707, - "max_query_length": 2435, + "num_queries": 1943, + "min_query_length": 10, + "average_query_length": 49.27328872876994, + "max_query_length": 176, "unique_queries": 1943, + "none_queries": 0, + "num_relevant_docs": 1943, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -2571,23 +2867,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho-zho": { - "number_of_characters": 1202528, "num_samples": 9682, - "num_queries": 5136, + "number_of_characters": 1202528, "num_documents": 4546, - "min_document_length": 5, - "average_document_length": 16.968323801143864, - "max_document_length": 69, + "min_document_length": 51, + "average_document_length": 247.55609326880776, + "max_document_length": 2658, "unique_documents": 4546, - "min_query_length": 51, - "average_query_length": 219.1179906542056, - "max_query_length": 2658, + "num_queries": 5136, + "min_query_length": 5, + "average_query_length": 15.019080996884735, + "max_query_length": 69, "unique_queries": 5136, + "none_queries": 0, + "num_relevant_docs": 5137, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0001947040498442, "max_relevant_docs_per_query": 2, @@ -2597,6 +2896,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MLQuestions.json b/mteb/descriptive_stats/Retrieval/MLQuestions.json index 1c34a0fc82..2230f3c6a6 100644 --- a/mteb/descriptive_stats/Retrieval/MLQuestions.json +++ b/mteb/descriptive_stats/Retrieval/MLQuestions.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 2915233, "num_samples": 12500, - "num_queries": 1500, + "number_of_characters": 2915233, "num_documents": 11000, - "min_document_length": 14, - "average_document_length": 6.143909090909091, - "max_document_length": 160, + "min_document_length": 3, + "average_document_length": 258.8772727272727, + "max_document_length": 395, "unique_documents": 11000, - "min_query_length": 3, - "average_query_length": 1898.4333333333334, - "max_query_length": 395, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 45.05533333333333, + "max_query_length": 160, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 2916280, "num_samples": 12500, - "num_queries": 1500, + "number_of_characters": 2916280, "num_documents": 11000, - "min_document_length": 12, - "average_document_length": 6.239090909090909, - "max_document_length": 165, + "min_document_length": 3, + "average_document_length": 258.8772727272727, + "max_document_length": 395, "unique_documents": 11000, - "min_query_length": 3, - "average_query_length": 1898.4333333333334, - "max_query_length": 395, + "num_queries": 1500, + "min_query_length": 12, + "average_query_length": 45.75333333333333, + "max_query_length": 165, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,6 +52,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json b/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json index c43b47c217..1d26ef1dbf 100644 --- a/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MMarcoRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 12294685, "num_samples": 113793, - "num_queries": 6980, + "number_of_characters": 12294685, "num_documents": 106813, - "min_document_length": 2, - "average_document_length": 0.6868920449757988, - "max_document_length": 61, + "min_document_length": 13, + "average_document_length": 114.41787048392986, + "max_document_length": 1709, "unique_documents": 106813, - "min_query_length": 13, - "average_query_length": 1750.904871060172, - "max_query_length": 1709, + "num_queries": 6980, + "min_query_length": 2, + "average_query_length": 10.51131805157593, + "max_query_length": 61, "unique_queries": 6980, + "none_queries": 0, + "num_relevant_docs": 7437, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0654727793696275, "max_relevant_docs_per_query": 4, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json b/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json index f82cbce0ae..0f0102975d 100644 --- a/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json +++ b/mteb/descriptive_stats/Retrieval/MSMARCO-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3097800368, "num_samples": 8841866, - "num_queries": 43, + "number_of_characters": 3097800368, "num_documents": 8841823, - "min_document_length": 16, - "average_document_length": 0.00016060036487950506, - "max_document_length": 55, + "min_document_length": 2, + "average_document_length": 350.3574939240471, + "max_document_length": 1727, "unique_documents": 8841823, - "min_query_length": 2, - "average_query_length": 72041836.0, - "max_query_length": 1727, + "num_queries": 43, + "min_query_length": 16, + "average_query_length": 33.02325581395349, + "max_query_length": 55, "unique_queries": 43, + "none_queries": 0, + "num_relevant_docs": 9260, "min_relevant_docs_per_query": 132, "average_relevant_docs_per_query": 95.3953488372093, "max_relevant_docs_per_query": 582, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json index f4f404fe90..2399e06f64 100644 --- a/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/MSMARCO-PLHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3635939, "num_samples": 9524, - "num_queries": 43, + "number_of_characters": 3635939, "num_documents": 9481, - "min_document_length": 16, - "average_document_length": 0.14977323067187007, - "max_document_length": 55, + "min_document_length": 10, + "average_document_length": 383.3476426537285, + "max_document_length": 1619, "unique_documents": 9481, - "min_query_length": 10, - "average_query_length": 84523.69767441861, - "max_query_length": 1619, + "num_queries": 43, + "min_query_length": 16, + "average_query_length": 33.02325581395349, + "max_query_length": 55, "unique_queries": 43, + "none_queries": 0, + "num_relevant_docs": 9260, "min_relevant_docs_per_query": 132, "average_relevant_docs_per_query": 95.3953488372093, "max_relevant_docs_per_query": 582, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MSMARCO.json b/mteb/descriptive_stats/Retrieval/MSMARCO.json index 3a6908af62..ce787dd3b6 100644 --- a/mteb/descriptive_stats/Retrieval/MSMARCO.json +++ b/mteb/descriptive_stats/Retrieval/MSMARCO.json @@ -1,19 +1,19 @@ { "train": { - "number_of_characters": 2994608051, "num_samples": 9344762, - "num_queries": 502939, + "number_of_characters": 2994608051, "num_documents": 8841823, - "num_relevant_docs": 532751, - "min_document_length": 5, - "average_document_length": 1.8895562600608495, - "max_document_length": 215, + "min_document_length": 4, + "average_document_length": 336.79716603691344, + "max_document_length": 1670, "unique_documents": 8841823, - "min_query_length": 4, - "average_query_length": 5920.9982304016985, - "max_query_length": 1670, + "num_queries": 502939, + "min_query_length": 5, + "average_query_length": 33.21898281898998, + "max_query_length": 215, "unique_queries": 502939, "none_queries": 0, + "num_relevant_docs": 532751, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0592755781516248, "max_relevant_docs_per_query": 7, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "dev": { - "number_of_characters": 2978133099, "num_samples": 8848803, - "num_queries": 6980, + "number_of_characters": 2978133099, "num_documents": 8841823, - "num_relevant_docs": 7437, - "min_document_length": 9, - "average_document_length": 0.026258159657799075, - "max_document_length": 186, + "min_document_length": 4, + "average_document_length": 336.79716603691344, + "max_document_length": 1670, "unique_documents": 8841823, - "min_query_length": 4, - "average_query_length": 426633.37091690546, - "max_query_length": 1670, + "num_queries": 6980, + "min_query_length": 9, + "average_query_length": 33.2621776504298, + "max_query_length": 186, "unique_queries": 6980, "none_queries": 0, + "num_relevant_docs": 7437, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0654727793696275, "max_relevant_docs_per_query": 4, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 2977902337, "num_samples": 8841866, - "num_queries": 43, + "number_of_characters": 2977902337, "num_documents": 8841823, - "num_relevant_docs": 9260, - "min_document_length": 16, - "average_document_length": 0.00015924317869742472, - "max_document_length": 55, + "min_document_length": 4, + "average_document_length": 336.79716603691344, + "max_document_length": 1670, "unique_documents": 8841823, - "min_query_length": 4, - "average_query_length": 69253509.97674419, - "max_query_length": 1670, + "num_queries": 43, + "min_query_length": 16, + "average_query_length": 32.74418604651163, + "max_query_length": 55, "unique_queries": 43, "none_queries": 0, + "num_relevant_docs": 9260, "min_relevant_docs_per_query": 132, "average_relevant_docs_per_query": 95.3953488372093, "max_relevant_docs_per_query": 582, @@ -79,6 +81,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json b/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json index 1d6d7b2bd5..4d2541fad0 100644 --- a/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/MSMARCOHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3141044, "num_samples": 8855, - "num_queries": 43, + "number_of_characters": 3141044, "num_documents": 8812, - "min_document_length": 16, - "average_document_length": 0.15978211529732184, - "max_document_length": 55, + "min_document_length": 65, + "average_document_length": 356.2909668633681, + "max_document_length": 1111, "unique_documents": 8812, - "min_query_length": 65, - "average_query_length": 73014.79069767441, - "max_query_length": 1111, + "num_queries": 43, + "min_query_length": 16, + "average_query_length": 32.74418604651163, + "max_query_length": 55, "unique_queries": 43, + "none_queries": 0, + "num_relevant_docs": 9260, "min_relevant_docs_per_query": 132, "average_relevant_docs_per_query": 95.3953488372093, "max_relevant_docs_per_query": 582, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MSMARCOv2.json b/mteb/descriptive_stats/Retrieval/MSMARCOv2.json new file mode 100644 index 0000000000..feeab3fefb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/MSMARCOv2.json @@ -0,0 +1,89 @@ +{ + "train": { + "num_samples": 138641342, + "number_of_characters": 47326141477, + "num_documents": 138364198, + "min_document_length": 24, + "average_document_length": 341.97456860914264, + "max_document_length": 1032556, + "unique_documents": 138364198, + "num_queries": 277144, + "min_query_length": 6, + "average_query_length": 32.851351643910746, + "max_query_length": 215, + "unique_queries": 277144, + "none_queries": 0, + "num_relevant_docs": 284212, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.025502987616546, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 245838, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "dev": { + "num_samples": 138368101, + "number_of_characters": 47317165079, + "num_documents": 138364198, + "min_document_length": 24, + "average_document_length": 341.97456860914264, + "max_document_length": 1032556, + "unique_documents": 138364198, + "num_queries": 3903, + "min_query_length": 9, + "average_query_length": 32.83551114527287, + "max_query_length": 153, + "unique_queries": 3903, + "none_queries": 0, + "num_relevant_docs": 4009, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.027158595951832, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 4003, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "dev2": { + "num_samples": 138368479, + "number_of_characters": 47317176644, + "num_documents": 138364198, + "min_document_length": 24, + "average_document_length": 341.97456860914264, + "max_document_length": 1032556, + "unique_documents": 138364198, + "num_queries": 4281, + "min_query_length": 10, + "average_query_length": 32.63770147161878, + "max_query_length": 199, + "unique_queries": 4281, + "none_queries": 0, + "num_relevant_docs": 4411, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0303667367437515, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 4400, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json b/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json index b513e5de41..5007daa4cf 100644 --- a/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MedicalQARetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2471858, "num_samples": 4096, - "num_queries": 2048, + "number_of_characters": 2471858, "num_documents": 2048, - "min_document_length": 16, - "average_document_length": 52.4794921875, - "max_document_length": 191, + "min_document_length": 7, + "average_document_length": 1154.482421875, + "max_document_length": 14442, "unique_documents": 2048, - "min_query_length": 7, - "average_query_length": 1154.482421875, - "max_query_length": 14442, + "num_queries": 2048, + "min_query_length": 16, + "average_query_length": 52.4794921875, + "max_query_length": 191, "unique_queries": 2048, + "none_queries": 0, + "num_relevant_docs": 2048, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json b/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json index 909d60c736..a3270c1160 100644 --- a/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MedicalRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 12344090, "num_samples": 101999, - "num_queries": 1000, + "number_of_characters": 12344090, "num_documents": 100999, - "min_document_length": 2, - "average_document_length": 0.17760571886850365, - "max_document_length": 110, + "min_document_length": 7, + "average_document_length": 122.04231725066585, + "max_document_length": 512, "unique_documents": 100999, - "min_query_length": 7, - "average_query_length": 12326.152, - "max_query_length": 512, + "num_queries": 1000, + "min_query_length": 2, + "average_query_length": 17.938, + "max_query_length": 110, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json b/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json index 3a3f9ea336..904e7f0ca9 100644 --- a/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MintakaRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1209570, "num_samples": 30048, - "num_queries": 17841, + "number_of_characters": 1209570, "num_documents": 12207, - "min_document_length": 8, - "average_document_length": 85.71966904235275, - "max_document_length": 222, + "min_document_length": 1, + "average_document_length": 13.3685590235111, + "max_document_length": 86, "unique_documents": 12207, - "min_query_length": 1, - "average_query_length": 9.146908805560226, - "max_query_length": 86, + "num_queries": 17841, + "min_query_length": 8, + "average_query_length": 58.65029987108346, + "max_query_length": 222, "unique_queries": 17841, + "none_queries": 0, + "num_relevant_docs": 17843, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.000112101339611, "max_relevant_docs_per_query": 2, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 140762, "num_samples": 3694, - "num_queries": 2203, + "number_of_characters": 140762, "num_documents": 1491, - "min_document_length": 15, - "average_document_length": 81.67136150234742, - "max_document_length": 182, + "min_document_length": 2, + "average_document_length": 12.736418511066399, + "max_document_length": 71, "unique_documents": 1491, - "min_query_length": 2, - "average_query_length": 8.620063549704948, - "max_query_length": 71, + "num_queries": 2203, + "min_query_length": 15, + "average_query_length": 55.275533363595095, + "max_query_length": 182, "unique_queries": 2203, + "none_queries": 0, + "num_relevant_docs": 2203, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 179124, "num_samples": 4029, - "num_queries": 2374, + "number_of_characters": 179124, "num_documents": 1655, - "min_document_length": 17, - "average_document_length": 93.83141993957705, - "max_document_length": 200, + "min_document_length": 1, + "average_document_length": 14.40060422960725, + "max_document_length": 71, "unique_documents": 1655, - "min_query_length": 1, - "average_query_length": 10.039174389216512, - "max_query_length": 71, + "num_queries": 2374, + "min_query_length": 17, + "average_query_length": 65.41322662173546, + "max_query_length": 200, "unique_queries": 2374, + "none_queries": 0, + "num_relevant_docs": 2374, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 181473, "num_samples": 4117, - "num_queries": 2424, + "number_of_characters": 181473, "num_documents": 1693, - "min_document_length": 20, - "average_document_length": 92.8984051978736, - "max_document_length": 181, + "min_document_length": 1, + "average_document_length": 14.291789722386296, + "max_document_length": 71, "unique_documents": 1693, - "min_query_length": 1, - "average_query_length": 9.981848184818482, - "max_query_length": 71, + "num_queries": 2424, + "min_query_length": 20, + "average_query_length": 64.88325082508251, + "max_query_length": 181, "unique_queries": 2424, + "none_queries": 0, + "num_relevant_docs": 2424, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fr": { - "number_of_characters": 192910, "num_samples": 4156, - "num_queries": 2442, + "number_of_characters": 192910, "num_documents": 1714, - "min_document_length": 18, - "average_document_length": 98.14235705950992, - "max_document_length": 222, + "min_document_length": 1, + "average_document_length": 14.407234539089849, + "max_document_length": 71, "unique_documents": 1714, - "min_query_length": 1, - "average_query_length": 10.112203112203112, - "max_query_length": 71, + "num_queries": 2442, + "min_query_length": 18, + "average_query_length": 68.88452088452088, + "max_query_length": 222, "unique_queries": 2442, + "none_queries": 0, + "num_relevant_docs": 2442, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 87874, "num_samples": 2107, - "num_queries": 1337, + "number_of_characters": 87874, "num_documents": 770, - "min_document_length": 20, - "average_document_length": 101.41168831168831, - "max_document_length": 187, + "min_document_length": 2, + "average_document_length": 12.71038961038961, + "max_document_length": 71, "unique_documents": 770, - "min_query_length": 2, - "average_query_length": 7.320119670905012, - "max_query_length": 71, + "num_queries": 1337, + "min_query_length": 20, + "average_query_length": 58.404637247569184, + "max_query_length": 187, "unique_queries": 1337, + "none_queries": 0, + "num_relevant_docs": 1337, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "it": { - "number_of_characters": 178136, "num_samples": 4059, - "num_queries": 2395, + "number_of_characters": 178136, "num_documents": 1664, - "min_document_length": 20, - "average_document_length": 92.68689903846153, - "max_document_length": 185, + "min_document_length": 1, + "average_document_length": 14.365985576923077, + "max_document_length": 86, "unique_documents": 1664, - "min_query_length": 1, - "average_query_length": 9.981210855949895, - "max_query_length": 86, + "num_queries": 2395, + "min_query_length": 20, + "average_query_length": 64.39707724425887, + "max_query_length": 185, "unique_queries": 2395, + "none_queries": 0, + "num_relevant_docs": 2396, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0004175365344468, "max_relevant_docs_per_query": 2, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ja": { - "number_of_characters": 83867, "num_samples": 3904, - "num_queries": 2312, + "number_of_characters": 83867, "num_documents": 1592, - "min_document_length": 8, - "average_document_length": 43.51256281407035, - "max_document_length": 68, + "min_document_length": 1, + "average_document_length": 9.167713567839195, + "max_document_length": 71, "unique_documents": 1592, - "min_query_length": 1, - "average_query_length": 6.312716262975779, - "max_query_length": 71, + "num_queries": 2312, + "min_query_length": 8, + "average_query_length": 29.961937716262977, + "max_query_length": 68, "unique_queries": 2312, + "none_queries": 0, + "num_relevant_docs": 2312, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -203,23 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pt": { - "number_of_characters": 165424, "num_samples": 3982, - "num_queries": 2354, + "number_of_characters": 165424, "num_documents": 1628, - "min_document_length": 18, - "average_document_length": 87.36732186732186, - "max_document_length": 165, + "min_document_length": 1, + "average_document_length": 14.244471744471744, + "max_document_length": 80, "unique_documents": 1628, - "min_query_length": 1, - "average_query_length": 9.851316907391674, - "max_query_length": 80, + "num_queries": 2354, + "min_query_length": 18, + "average_query_length": 60.42225998300765, + "max_query_length": 165, "unique_queries": 2354, + "none_queries": 0, + "num_relevant_docs": 2355, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0004248088360237, "max_relevant_docs_per_query": 2, @@ -229,6 +255,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json b/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json index fd9d85feb0..dfe4cd242e 100644 --- a/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json @@ -1,19 +1,19 @@ { "test": { - "number_of_characters": 19085636965, "num_samples": 58051987, - "num_queries": 8661, + "number_of_characters": 20129435459, "num_documents": 58043326, - "num_relevant_docs": 10105, - "min_document_length": 6, - "average_document_length": 0.005544547877907617, - "max_document_length": 144, + "min_document_length": 2, + "average_document_length": 346.79462777512094, + "max_document_length": 61659, "unique_documents": 58043326, - "min_query_length": 1, - "average_query_length": 2203592.557556864, - "max_query_length": 61639, + "num_queries": 8661, + "min_query_length": 6, + "average_query_length": 37.15783396836393, + "max_query_length": 144, "unique_queries": 8661, "none_queries": 0, + "num_relevant_docs": 10105, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1667243967209329, "max_relevant_docs_per_query": 3, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "bengali": { - "number_of_characters": 110965082, "num_samples": 304170, - "num_queries": 111, + "number_of_characters": 115881122, "num_documents": 304059, - "num_relevant_docs": 130, - "min_document_length": 20, - "average_document_length": 0.018641118993353262, - "max_document_length": 122, + "min_document_length": 3, + "average_document_length": 381.0952940054397, + "max_document_length": 16797, "unique_documents": 304059, - "min_query_length": 1, - "average_query_length": 999634.3603603604, - "max_query_length": 16791, + "num_queries": 111, + "min_query_length": 20, + "average_query_length": 51.06306306306306, + "max_query_length": 122, "unique_queries": 111, "none_queries": 0, + "num_relevant_docs": 130, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1711711711711712, "max_relevant_docs_per_query": 2, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "english": { - "number_of_characters": 12550942597, "num_samples": 32907844, - "num_queries": 744, + "number_of_characters": 13206799486, "num_documents": 32907100, - "num_relevant_docs": 935, - "min_document_length": 16, - "average_document_length": 0.0009153647693051043, - "max_document_length": 108, + "min_document_length": 3, + "average_document_length": 401.3349509376396, + "max_document_length": 36471, "unique_documents": 32907100, - "min_query_length": 1, - "average_query_length": 16869506.014784947, - "max_query_length": 36444, + "num_queries": 744, + "min_query_length": 16, + "average_query_length": 40.486559139784944, + "max_query_length": 108, "unique_queries": 744, "none_queries": 0, + "num_relevant_docs": 935, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.256720430107527, "max_relevant_docs_per_query": 3, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "finnish": { - "number_of_characters": 656119952, "num_samples": 1910011, - "num_queries": 1254, + "number_of_characters": 687045646, "num_documents": 1908757, - "num_relevant_docs": 1451, - "min_document_length": 13, - "average_document_length": 0.024742803824688003, - "max_document_length": 89, + "min_document_length": 4, + "average_document_length": 359.91926578396306, + "max_document_length": 29389, "unique_documents": 1908757, - "min_query_length": 1, - "average_query_length": 523183.990430622, - "max_query_length": 29374, + "num_queries": 1254, + "min_query_length": 13, + "average_query_length": 37.66188197767145, + "max_query_length": 89, "unique_queries": 1254, "none_queries": 0, + "num_relevant_docs": 1451, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.157097288676236, "max_relevant_docs_per_query": 3, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "russian": { - "number_of_characters": 2994189913, "num_samples": 9598499, - "num_queries": 995, + "number_of_characters": 3197142093, "num_documents": 9597504, - "num_relevant_docs": 1168, - "min_document_length": 14, - "average_document_length": 0.004856262628283353, - "max_document_length": 138, + "min_document_length": 4, + "average_document_length": 333.1173902089543, + "max_document_length": 61659, "unique_documents": 9597504, - "min_query_length": 1, - "average_query_length": 3009189.2512562815, - "max_query_length": 61639, + "num_queries": 995, + "min_query_length": 14, + "average_query_length": 46.84221105527638, + "max_query_length": 138, "unique_queries": 995, "none_queries": 0, + "num_relevant_docs": 1168, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1738693467336683, "max_relevant_docs_per_query": 3, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "korean": { - "number_of_characters": 249708451, "num_samples": 1496547, - "num_queries": 421, + "number_of_characters": 260689346, "num_documents": 1496126, - "num_relevant_docs": 492, - "min_document_length": 6, - "average_document_length": 0.006410556330148664, - "max_document_length": 122, + "min_document_length": 3, + "average_document_length": 174.23649812916827, + "max_document_length": 25246, "unique_documents": 1496126, - "min_query_length": 1, - "average_query_length": 593108.9311163896, - "max_query_length": 25243, + "num_queries": 421, + "min_query_length": 6, + "average_query_length": 22.78147268408551, + "max_query_length": 122, "unique_queries": 421, "none_queries": 0, + "num_relevant_docs": 492, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.168646080760095, "max_relevant_docs_per_query": 3, @@ -163,25 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "japanese": { - "number_of_characters": 971241388, "num_samples": 7000747, - "num_queries": 720, + "number_of_characters": 1028855660, "num_documents": 7000027, - "num_relevant_docs": 923, - "min_document_length": 6, - "average_document_length": 0.0018598499691501189, - "max_document_length": 44, + "min_document_length": 2, + "average_document_length": 146.97695323175182, + "max_document_length": 25236, "unique_documents": 7000027, - "min_query_length": 1, - "average_query_length": 1348928.2902777777, - "max_query_length": 25232, + "num_queries": 720, + "min_query_length": 6, + "average_query_length": 18.081944444444446, + "max_query_length": 44, "unique_queries": 720, "none_queries": 0, + "num_relevant_docs": 923, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.2819444444444446, "max_relevant_docs_per_query": 3, @@ -191,25 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "telugu": { - "number_of_characters": 202749454, "num_samples": 548870, - "num_queries": 646, + "number_of_characters": 210635377, "num_documents": 548224, - "num_relevant_docs": 677, - "min_document_length": 13, - "average_document_length": 0.04451647501751109, - "max_document_length": 119, + "min_document_length": 5, + "average_document_length": 384.1695584286715, + "max_document_length": 17850, "unique_documents": 548224, - "min_query_length": 1, - "average_query_length": 313815.8653250774, - "max_query_length": 17811, + "num_queries": 646, + "min_query_length": 13, + "average_query_length": 37.77863777089783, + "max_query_length": 119, "unique_queries": 646, "none_queries": 0, + "num_relevant_docs": 677, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0479876160990713, "max_relevant_docs_per_query": 2, @@ -219,25 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "thai": { - "number_of_characters": 192485963, "num_samples": 570045, - "num_queries": 1190, + "number_of_characters": 203126081, "num_documents": 568855, - "num_relevant_docs": 1368, - "min_document_length": 13, - "average_document_length": 0.08903850717669705, - "max_document_length": 144, + "min_document_length": 5, + "average_document_length": 356.989797048457, + "max_document_length": 31251, "unique_documents": 568855, - "min_query_length": 1, - "average_query_length": 161710.34705882354, - "max_query_length": 31244, + "num_queries": 1190, + "min_query_length": 13, + "average_query_length": 42.563025210084035, + "max_query_length": 144, "unique_queries": 1190, "none_queries": 0, + "num_relevant_docs": 1368, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.149579831932773, "max_relevant_docs_per_query": 3, @@ -247,25 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "swahili": { - "number_of_characters": 29636822, "num_samples": 137359, - "num_queries": 670, + "number_of_characters": 31479914, "num_documents": 136689, - "num_relevant_docs": 743, - "min_document_length": 15, - "average_document_length": 0.2054664237795287, - "max_document_length": 98, + "min_document_length": 6, + "average_document_length": 230.0977328095165, + "max_document_length": 11203, "unique_documents": 136689, - "min_query_length": 1, - "average_query_length": 44192.1447761194, - "max_query_length": 11185, + "num_queries": 670, + "min_query_length": 15, + "average_query_length": 41.917910447761194, + "max_query_length": 98, "unique_queries": 670, "none_queries": 0, + "num_relevant_docs": 743, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.108955223880597, "max_relevant_docs_per_query": 3, @@ -275,25 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "arabic": { - "number_of_characters": 640057511, "num_samples": 2107667, - "num_queries": 1081, + "number_of_characters": 673016175, "num_documents": 2106586, - "num_relevant_docs": 1257, - "min_document_length": 12, - "average_document_length": 0.015663257991840828, - "max_document_length": 93, + "min_document_length": 5, + "average_document_length": 319.4662733921141, + "max_document_length": 48550, "unique_documents": 2106586, - "min_query_length": 1, - "average_query_length": 592067.0814061054, - "max_query_length": 48538, + "num_queries": 1081, + "min_query_length": 12, + "average_query_length": 30.523589269195188, + "max_query_length": 93, "unique_queries": 1081, "none_queries": 0, + "num_relevant_docs": 1257, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1628122109158188, "max_relevant_docs_per_query": 3, @@ -303,25 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "indonesian": { - "number_of_characters": 487539832, "num_samples": 1470228, - "num_queries": 829, + "number_of_characters": 514764559, "num_documents": 1469399, - "num_relevant_docs": 961, - "min_document_length": 17, - "average_document_length": 0.02276577022306399, - "max_document_length": 128, + "min_document_length": 4, + "average_document_length": 350.3004337147364, + "max_document_length": 39539, "unique_documents": 1469399, - "min_query_length": 1, - "average_query_length": 588065.5971049457, - "max_query_length": 39510, + "num_queries": 829, + "min_query_length": 17, + "average_query_length": 40.352231604342585, + "max_query_length": 128, "unique_queries": 829, "none_queries": 0, + "num_relevant_docs": 961, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1592279855247285, "max_relevant_docs_per_query": 3, @@ -331,6 +342,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json b/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json index a7de31f155..502e80eb89 100644 --- a/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json @@ -1,19 +1,19 @@ { "dev": { - "number_of_characters": 6641969996, "num_samples": 496309, - "num_queries": 2600, + "number_of_characters": 6641969996, "num_documents": 493709, - "num_relevant_docs": 2600, - "min_document_length": 3, - "average_document_length": 0.49106862544535346, - "max_document_length": 2041, + "min_document_length": 36, + "average_document_length": 13452.717189680561, + "max_document_length": 471024, "unique_documents": 493709, - "min_query_length": 36, - "average_query_length": 2554510.5965384617, - "max_query_length": 471024, + "num_queries": 2600, + "min_query_length": 3, + "average_query_length": 93.24807692307692, + "max_query_length": 2041, "unique_queries": 2600, "none_queries": 0, + "num_relevant_docs": 2600, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 222400555, "num_samples": 7807, - "num_queries": 200, + "number_of_characters": 222400555, "num_documents": 7607, - "num_relevant_docs": 200, - "min_document_length": 6, - "average_document_length": 1.8212172998553964, - "max_document_length": 194, + "min_document_length": 2173, + "average_document_length": 29234.48153016958, + "max_document_length": 276627, "unique_documents": 7607, - "min_query_length": 2173, - "average_query_length": 1111933.505, - "max_query_length": 276627, + "num_queries": 200, + "min_query_length": 6, + "average_query_length": 69.27, + "max_query_length": 194, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 337742837, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 337742837, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 7, - "average_document_length": 3.0726, - "max_document_length": 2041, + "min_document_length": 104, + "average_document_length": 33771.2111, + "max_document_length": 186335, "unique_documents": 10000, - "min_query_length": 104, - "average_query_length": 1688560.555, - "max_query_length": 186335, + "num_queries": 200, + "min_query_length": 7, + "average_query_length": 153.63, + "max_query_length": 2041, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 2666569772, "num_samples": 200200, - "num_queries": 200, + "number_of_characters": 2666569772, "num_documents": 200000, - "num_relevant_docs": 200, - "min_document_length": 16, - "average_document_length": 0.08122, - "max_document_length": 180, + "min_document_length": 2137, + "average_document_length": 13332.76764, + "max_document_length": 382998, "unique_documents": 200000, - "min_query_length": 2137, - "average_query_length": 13332767.64, - "max_query_length": 382998, + "num_queries": 200, + "min_query_length": 16, + "average_query_length": 81.22, + "max_query_length": 180, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 349277698, "num_samples": 9751, - "num_queries": 200, + "number_of_characters": 349277698, "num_documents": 9551, - "num_relevant_docs": 200, - "min_document_length": 19, - "average_document_length": 2.5779499528845147, - "max_document_length": 305, + "min_document_length": 2657, + "average_document_length": 36567.1736990891, + "max_document_length": 471024, "unique_documents": 9551, - "min_query_length": 2657, - "average_query_length": 1746265.38, - "max_query_length": 471024, + "num_queries": 200, + "min_query_length": 19, + "average_query_length": 123.11, + "max_query_length": 305, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fr": { - "number_of_characters": 360123367, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 360123367, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 13, - "average_document_length": 2.8433, - "max_document_length": 1590, + "min_document_length": 2093, + "average_document_length": 36009.4934, + "max_document_length": 425370, "unique_documents": 10000, - "min_query_length": 2093, - "average_query_length": 1800474.67, - "max_query_length": 425370, + "num_queries": 200, + "min_query_length": 13, + "average_query_length": 142.165, + "max_query_length": 1590, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -163,25 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 71144060, "num_samples": 4006, - "num_queries": 200, + "number_of_characters": 71144060, "num_documents": 3806, - "num_relevant_docs": 200, - "min_document_length": 4, - "average_document_length": 4.098528638991067, - "max_document_length": 318, + "min_document_length": 2426, + "average_document_length": 18688.50788229112, + "max_document_length": 227264, "unique_documents": 3806, - "min_query_length": 2426, - "average_query_length": 355642.305, - "max_query_length": 227264, + "num_queries": 200, + "min_query_length": 4, + "average_query_length": 77.995, + "max_query_length": 318, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -191,25 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "it": { - "number_of_characters": 366359892, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 366359892, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 9, - "average_document_length": 1.9923, - "max_document_length": 950, + "min_document_length": 2491, + "average_document_length": 36633.9969, + "max_document_length": 312623, "unique_documents": 10000, - "min_query_length": 2491, - "average_query_length": 1831699.845, - "max_query_length": 312623, + "num_queries": 200, + "min_query_length": 9, + "average_query_length": 99.615, + "max_query_length": 950, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -219,25 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ja": { - "number_of_characters": 144819833, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 144819833, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 3, - "average_document_length": 1.2325, - "max_document_length": 576, + "min_document_length": 1245, + "average_document_length": 14480.7508, + "max_document_length": 234888, "unique_documents": 10000, - "min_query_length": 1245, - "average_query_length": 724037.54, - "max_query_length": 234888, + "num_queries": 200, + "min_query_length": 3, + "average_query_length": 61.625, + "max_query_length": 576, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -247,25 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ko": { - "number_of_characters": 85323582, "num_samples": 6376, - "num_queries": 200, + "number_of_characters": 85323582, "num_documents": 6176, - "num_relevant_docs": 200, - "min_document_length": 8, - "average_document_length": 1.9056023316062176, - "max_document_length": 664, + "min_document_length": 1490, + "average_document_length": 13813.441224093263, + "max_document_length": 171299, "unique_documents": 6176, - "min_query_length": 1490, - "average_query_length": 426559.065, - "max_query_length": 171299, + "num_queries": 200, + "min_query_length": 8, + "average_query_length": 58.845, + "max_query_length": 664, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -275,25 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pt": { - "number_of_characters": 211070508, "num_samples": 6769, - "num_queries": 200, + "number_of_characters": 211070508, "num_documents": 6569, - "num_relevant_docs": 200, - "min_document_length": 7, - "average_document_length": 3.722788856751408, - "max_document_length": 506, + "min_document_length": 3078, + "average_document_length": 32127.576952351956, + "max_document_length": 400864, "unique_documents": 6569, - "min_query_length": 3078, - "average_query_length": 1055230.265, - "max_query_length": 400864, + "num_queries": 200, + "min_query_length": 7, + "average_query_length": 122.275, + "max_query_length": 506, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -303,25 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ru": { - "number_of_characters": 359366331, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 359366331, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 8, - "average_document_length": 1.7575, - "max_document_length": 216, + "min_document_length": 2901, + "average_document_length": 35934.8756, + "max_document_length": 303226, "unique_documents": 10000, - "min_query_length": 2901, - "average_query_length": 1796743.78, - "max_query_length": 303226, + "num_queries": 200, + "min_query_length": 8, + "average_query_length": 87.875, + "max_query_length": 216, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -331,25 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "th": { - "number_of_characters": 259954258, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 259954258, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 30, - "average_document_length": 2.1562, - "max_document_length": 1123, + "min_document_length": 36, + "average_document_length": 25993.2696, + "max_document_length": 183497, "unique_documents": 10000, - "min_query_length": 36, - "average_query_length": 1299663.48, - "max_query_length": 183497, + "num_queries": 200, + "min_query_length": 30, + "average_query_length": 107.81, + "max_query_length": 1123, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -359,25 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zh": { - "number_of_characters": 1207817303, "num_samples": 200200, - "num_queries": 200, + "number_of_characters": 1207817303, "num_documents": 200000, - "num_relevant_docs": 200, - "min_document_length": 5, - "average_document_length": 0.02679, - "max_document_length": 476, + "min_document_length": 1038, + "average_document_length": 6039.059725, + "max_document_length": 278468, "unique_documents": 200000, - "min_query_length": 1038, - "average_query_length": 6039059.725, - "max_query_length": 278468, + "num_queries": 200, + "min_query_length": 5, + "average_query_length": 26.79, + "max_query_length": 476, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -387,6 +400,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null @@ -394,20 +408,20 @@ } }, "test": { - "number_of_characters": 6642036446, "num_samples": 497509, - "num_queries": 3800, + "number_of_characters": 6642036446, "num_documents": 493709, - "num_relevant_docs": 3800, - "min_document_length": 3, - "average_document_length": 0.6256620802942624, - "max_document_length": 2589, + "min_document_length": 36, + "average_document_length": 13452.717189680561, + "max_document_length": 471024, "unique_documents": 493709, - "min_query_length": 36, - "average_query_length": 1747823.039736842, - "max_query_length": 471024, + "num_queries": 3800, + "min_query_length": 3, + "average_query_length": 81.28815789473684, + "max_query_length": 2589, "unique_queries": 3800, "none_queries": 0, + "num_relevant_docs": 3800, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -417,25 +431,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 222401855, "num_samples": 7807, - "num_queries": 200, + "number_of_characters": 222401855, "num_documents": 7607, - "num_relevant_docs": 200, - "min_document_length": 7, - "average_document_length": 1.9921125279347969, - "max_document_length": 695, + "min_document_length": 2173, + "average_document_length": 29234.48153016958, + "max_document_length": 276627, "unique_documents": 7607, - "min_query_length": 2173, - "average_query_length": 1111933.505, - "max_query_length": 276627, + "num_queries": 200, + "min_query_length": 7, + "average_query_length": 75.77, + "max_query_length": 695, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -445,25 +460,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 337736841, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 337736841, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 10, - "average_document_length": 2.473, - "max_document_length": 957, + "min_document_length": 104, + "average_document_length": 33771.2111, + "max_document_length": 186335, "unique_documents": 10000, - "min_query_length": 104, - "average_query_length": 1688560.555, - "max_query_length": 186335, + "num_queries": 200, + "min_query_length": 10, + "average_query_length": 123.65, + "max_query_length": 957, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -473,25 +489,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 2666618592, "num_samples": 200800, - "num_queries": 800, + "number_of_characters": 2666618592, "num_documents": 200000, - "num_relevant_docs": 800, - "min_document_length": 18, - "average_document_length": 0.32532, - "max_document_length": 255, + "min_document_length": 2137, + "average_document_length": 13332.76764, + "max_document_length": 382998, "unique_documents": 200000, - "min_query_length": 2137, - "average_query_length": 3333191.91, - "max_query_length": 382998, + "num_queries": 800, + "min_query_length": 18, + "average_query_length": 81.33, + "max_query_length": 255, "unique_queries": 800, "none_queries": 0, + "num_relevant_docs": 800, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -501,25 +518,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 349279473, "num_samples": 9751, - "num_queries": 200, + "number_of_characters": 349279473, "num_documents": 9551, - "num_relevant_docs": 200, - "min_document_length": 40, - "average_document_length": 2.763794367081981, - "max_document_length": 480, + "min_document_length": 2657, + "average_document_length": 36567.1736990891, + "max_document_length": 471024, "unique_documents": 9551, - "min_query_length": 2657, - "average_query_length": 1746265.38, - "max_query_length": 471024, + "num_queries": 200, + "min_query_length": 40, + "average_query_length": 131.985, + "max_query_length": 480, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -529,25 +547,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fr": { - "number_of_characters": 360124893, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 360124893, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 33, - "average_document_length": 2.9959, - "max_document_length": 2589, + "min_document_length": 2093, + "average_document_length": 36009.4934, + "max_document_length": 425370, "unique_documents": 10000, - "min_query_length": 2093, - "average_query_length": 1800474.67, - "max_query_length": 425370, + "num_queries": 200, + "min_query_length": 33, + "average_query_length": 149.795, + "max_query_length": 2589, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -557,25 +576,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 71149213, "num_samples": 4006, - "num_queries": 200, + "number_of_characters": 71149213, "num_documents": 3806, - "num_relevant_docs": 200, - "min_document_length": 6, - "average_document_length": 5.452443510246979, - "max_document_length": 2022, + "min_document_length": 2426, + "average_document_length": 18688.50788229112, + "max_document_length": 227264, "unique_documents": 3806, - "min_query_length": 2426, - "average_query_length": 355642.305, - "max_query_length": 227264, + "num_queries": 200, + "min_query_length": 6, + "average_query_length": 103.76, + "max_query_length": 2022, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -585,25 +605,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "it": { - "number_of_characters": 366362888, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 366362888, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 12, - "average_document_length": 2.2919, - "max_document_length": 1899, + "min_document_length": 2491, + "average_document_length": 36633.9969, + "max_document_length": 312623, "unique_documents": 10000, - "min_query_length": 2491, - "average_query_length": 1831699.845, - "max_query_length": 312623, + "num_queries": 200, + "min_query_length": 12, + "average_query_length": 114.595, + "max_query_length": 1899, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -613,25 +634,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ja": { - "number_of_characters": 144818654, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 144818654, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 6, - "average_document_length": 1.1146, - "max_document_length": 416, + "min_document_length": 1245, + "average_document_length": 14480.7508, + "max_document_length": 234888, "unique_documents": 10000, - "min_query_length": 1245, - "average_query_length": 724037.54, - "max_query_length": 234888, + "num_queries": 200, + "min_query_length": 6, + "average_query_length": 55.73, + "max_query_length": 416, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -641,25 +663,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ko": { - "number_of_characters": 85323557, "num_samples": 6376, - "num_queries": 200, + "number_of_characters": 85323557, "num_documents": 6176, - "num_relevant_docs": 200, - "min_document_length": 8, - "average_document_length": 1.9015544041450778, - "max_document_length": 330, + "min_document_length": 1490, + "average_document_length": 13813.441224093263, + "max_document_length": 171299, "unique_documents": 6176, - "min_query_length": 1490, - "average_query_length": 426559.065, - "max_query_length": 171299, + "num_queries": 200, + "min_query_length": 8, + "average_query_length": 58.72, + "max_query_length": 330, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -669,25 +692,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pt": { - "number_of_characters": 211068744, "num_samples": 6769, - "num_queries": 200, + "number_of_characters": 211068744, "num_documents": 6569, - "num_relevant_docs": 200, - "min_document_length": 4, - "average_document_length": 3.4542548333079615, - "max_document_length": 511, + "min_document_length": 3078, + "average_document_length": 32127.576952351956, + "max_document_length": 400864, "unique_documents": 6569, - "min_query_length": 3078, - "average_query_length": 1055230.265, - "max_query_length": 400864, + "num_queries": 200, + "min_query_length": 4, + "average_query_length": 113.455, + "max_query_length": 511, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -697,25 +721,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ru": { - "number_of_characters": 359367730, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 359367730, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 12, - "average_document_length": 1.8974, - "max_document_length": 413, + "min_document_length": 2901, + "average_document_length": 35934.8756, + "max_document_length": 303226, "unique_documents": 10000, - "min_query_length": 2901, - "average_query_length": 1796743.78, - "max_query_length": 303226, + "num_queries": 200, + "min_query_length": 12, + "average_query_length": 94.87, + "max_query_length": 413, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -725,25 +750,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "th": { - "number_of_characters": 259952294, "num_samples": 10200, - "num_queries": 200, + "number_of_characters": 259952294, "num_documents": 10000, - "num_relevant_docs": 200, - "min_document_length": 11, - "average_document_length": 1.9598, - "max_document_length": 309, + "min_document_length": 36, + "average_document_length": 25993.2696, + "max_document_length": 183497, "unique_documents": 10000, - "min_query_length": 36, - "average_query_length": 1299663.48, - "max_query_length": 183497, + "num_queries": 200, + "min_query_length": 11, + "average_query_length": 97.99, + "max_query_length": 309, "unique_queries": 200, "none_queries": 0, + "num_relevant_docs": 200, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -753,25 +779,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zh": { - "number_of_characters": 1207831712, "num_samples": 200800, - "num_queries": 800, + "number_of_characters": 1207831712, "num_documents": 200000, - "num_relevant_docs": 800, - "min_document_length": 3, - "average_document_length": 0.098835, - "max_document_length": 646, + "min_document_length": 1038, + "average_document_length": 6039.059725, + "max_document_length": 278468, "unique_documents": 200000, - "min_query_length": 1038, - "average_query_length": 1509764.93125, - "max_query_length": 278468, + "num_queries": 800, + "min_query_length": 3, + "average_query_length": 24.70875, + "max_query_length": 646, "unique_queries": 800, "none_queries": 0, + "num_relevant_docs": 800, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -781,6 +808,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json b/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json index e983c08303..c732010846 100644 --- a/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json +++ b/mteb/descriptive_stats/Retrieval/NFCorpus-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 6013927, "num_samples": 3956, - "num_queries": 323, + "number_of_characters": 6013927, "num_documents": 3633, - "min_document_length": 3, - "average_document_length": 2.1684558216350123, - "max_document_length": 96, + "min_document_length": 110, + "average_document_length": 1653.1926782273604, + "max_document_length": 10705, "unique_documents": 3633, - "min_query_length": 110, - "average_query_length": 18594.57894736842, - "max_query_length": 10705, + "num_queries": 323, + "min_query_length": 3, + "average_query_length": 24.390092879256965, + "max_query_length": 96, "unique_queries": 323, + "none_queries": 0, + "num_relevant_docs": 12334, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 38.18575851393189, "max_relevant_docs_per_query": 475, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NFCorpus.json b/mteb/descriptive_stats/Retrieval/NFCorpus.json index edaaf460c7..c104d6d8b7 100644 --- a/mteb/descriptive_stats/Retrieval/NFCorpus.json +++ b/mteb/descriptive_stats/Retrieval/NFCorpus.json @@ -1,14 +1,31 @@ { "test": { - "number_of_characters": 1612.5486310130989, "num_samples": 3956, - "num_queries": 323, + "number_of_characters": 5786348, "num_documents": 3633, - "average_document_length": 0.43787060972495073, - "average_query_length": 0.06738299034784193, + "min_document_length": 123, + "average_document_length": 1590.783925130746, + "max_document_length": 10090, + "unique_documents": 3633, + "num_queries": 323, + "min_query_length": 3, + "average_query_length": 21.764705882352942, + "max_query_length": 72, + "unique_queries": 323, + "none_queries": 0, + "num_relevant_docs": 12334, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 38.18575851393189, - "average_instruction_length": 0, - "num_instructions": 0, - "average_top_ranked_per_query": 0 + "max_relevant_docs_per_query": 475, + "unique_relevant_docs": 3128, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json b/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json index 08c175272e..f42a324859 100644 --- a/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NLPJournalAbsIntroRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1214218, "num_samples": 908, - "num_queries": 404, + "number_of_characters": 1214218, "num_documents": 504, - "min_document_length": 120, - "average_document_length": 356.3015873015873, - "max_document_length": 1290, + "min_document_length": 304, + "average_document_length": 2052.8611111111113, + "max_document_length": 9565, "unique_documents": 504, - "min_query_length": 304, - "average_query_length": 2560.9950495049507, - "max_query_length": 9565, + "num_queries": 404, + "min_query_length": 120, + "average_query_length": 444.4950495049505, + "max_query_length": 1290, "unique_queries": 404, + "none_queries": 0, + "num_relevant_docs": 404, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json b/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json index 4ac76aa5be..52ed4db6f5 100644 --- a/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NLPJournalTitleAbsRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 233742, "num_samples": 908, - "num_queries": 404, + "number_of_characters": 233742, "num_documents": 504, - "min_document_length": 5, - "average_document_length": 22.099206349206348, - "max_document_length": 71, + "min_document_length": 120, + "average_document_length": 441.6746031746032, + "max_document_length": 1290, "unique_documents": 504, - "min_query_length": 120, - "average_query_length": 551.0, - "max_query_length": 1290, + "num_queries": 404, + "min_query_length": 5, + "average_query_length": 27.56930693069307, + "max_query_length": 71, "unique_queries": 404, + "none_queries": 0, + "num_relevant_docs": 404, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json b/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json index 1c49e377eb..57c50ef73a 100644 --- a/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NLPJournalTitleIntroRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1045780, "num_samples": 908, - "num_queries": 404, + "number_of_characters": 1045780, "num_documents": 504, - "min_document_length": 5, - "average_document_length": 22.099206349206348, - "max_document_length": 71, + "min_document_length": 304, + "average_document_length": 2052.8611111111113, + "max_document_length": 9565, "unique_documents": 504, - "min_query_length": 304, - "average_query_length": 2560.9950495049507, - "max_query_length": 9565, + "num_queries": 404, + "min_query_length": 5, + "average_query_length": 27.56930693069307, + "max_query_length": 71, "unique_queries": 404, + "none_queries": 0, + "num_relevant_docs": 404, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NQ-PL.json b/mteb/descriptive_stats/Retrieval/NQ-PL.json index d44203e5ae..44b85fadd6 100644 --- a/mteb/descriptive_stats/Retrieval/NQ-PL.json +++ b/mteb/descriptive_stats/Retrieval/NQ-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1349328700, "num_samples": 2684920, - "num_queries": 3452, + "number_of_characters": 1349328700, "num_documents": 2681468, - "min_document_length": 18, - "average_document_length": 0.062200630400959474, - "max_document_length": 111, + "min_document_length": 5, + "average_document_length": 503.14302128535564, + "max_document_length": 17008, "unique_documents": 2681468, - "min_query_length": 5, - "average_query_length": 390834.8525492468, - "max_query_length": 17008, + "num_queries": 3452, + "min_query_length": 18, + "average_query_length": 48.31662804171495, + "max_query_length": 111, "unique_queries": 3452, + "none_queries": 0, + "num_relevant_docs": 4201, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.2169756662804172, "max_relevant_docs_per_query": 4, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json index 6c6a35f437..937ba60a5f 100644 --- a/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/NQ-PLHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 113077430, "num_samples": 185765, - "num_queries": 1000, + "number_of_characters": 113077430, "num_documents": 184765, - "min_document_length": 18, - "average_document_length": 0.2618515411468622, - "max_document_length": 106, + "min_document_length": 5, + "average_document_length": 611.7449138094336, + "max_document_length": 14247, "unique_documents": 184765, - "min_query_length": 5, - "average_query_length": 113029.049, - "max_query_length": 14247, + "num_queries": 1000, + "min_query_length": 18, + "average_query_length": 48.381, + "max_query_length": 106, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1213, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.213, "max_relevant_docs_per_query": 4, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NQ.json b/mteb/descriptive_stats/Retrieval/NQ.json index 5df9862d66..f9a86075c1 100644 --- a/mteb/descriptive_stats/Retrieval/NQ.json +++ b/mteb/descriptive_stats/Retrieval/NQ.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1322743518, "num_samples": 2684920, - "num_queries": 3452, + "number_of_characters": 1322743518, "num_documents": 2681468, - "min_document_length": 25, - "average_document_length": 0.06202348862637928, - "max_document_length": 100, + "min_document_length": 5, + "average_document_length": 493.2287851281462, + "max_document_length": 17008, "unique_documents": 2681468, - "min_query_length": 5, - "average_query_length": 383133.6048667439, - "max_query_length": 17008, + "num_queries": 3452, + "min_query_length": 25, + "average_query_length": 48.17902665121669, + "max_query_length": 100, "unique_queries": 3452, + "none_queries": 0, + "num_relevant_docs": 4201, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.2169756662804172, "max_relevant_docs_per_query": 4, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NQHardNegatives.json b/mteb/descriptive_stats/Retrieval/NQHardNegatives.json index 5d68b54792..082975d840 100644 --- a/mteb/descriptive_stats/Retrieval/NQHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/NQHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 120068721, "num_samples": 199779, - "num_queries": 1000, + "number_of_characters": 120068721, "num_documents": 198779, - "min_document_length": 29, - "average_document_length": 0.24086045306596773, - "max_document_length": 94, + "min_document_length": 5, + "average_document_length": 603.7903551179953, + "max_document_length": 17008, "unique_documents": 198779, - "min_query_length": 5, - "average_query_length": 120020.843, - "max_query_length": 17008, + "num_queries": 1000, + "min_query_length": 29, + "average_query_length": 47.878, + "max_query_length": 94, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1213, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.213, "max_relevant_docs_per_query": 4, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json b/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json index e13d67496c..7340971b93 100644 --- a/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NarrativeQARetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 116501399, "num_samples": 10912, - "num_queries": 10557, + "number_of_characters": 116501399, "num_documents": 355, - "min_document_length": 10, - "average_document_length": 1419.4225352112676, - "max_document_length": 1220, + "min_document_length": 21216, + "average_document_length": 326753.5323943662, + "max_document_length": 1874086, "unique_documents": 355, - "min_query_length": 21216, - "average_query_length": 10987.73363644975, - "max_query_length": 1874086, + "num_queries": 10557, + "min_query_length": 10, + "average_query_length": 47.730889457232166, + "max_query_length": 1220, "unique_queries": 10557, + "none_queries": 0, + "num_relevant_docs": 10557, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NeuCLIR2022Retrieval.json b/mteb/descriptive_stats/Retrieval/NeuCLIR2022Retrieval.json new file mode 100644 index 0000000000..d62f6971a4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NeuCLIR2022Retrieval.json @@ -0,0 +1,120 @@ +{ + "test": { + "num_samples": 10039110, + "number_of_characters": 15033110515, + "num_documents": 10038768, + "min_document_length": 0, + "average_document_length": 1497.5033054852945, + "max_document_length": 24394, + "unique_documents": 10038768, + "num_queries": 342, + "min_query_length": 12, + "average_query_length": 65.06432748538012, + "max_query_length": 168, + "unique_queries": 342, + "none_queries": 0, + "num_relevant_docs": 103755, + "min_relevant_docs_per_query": 348, + "average_relevant_docs_per_query": 39.24285714285714, + "max_relevant_docs_per_query": 1288, + "unique_relevant_docs": 97926, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "fas": { + "num_samples": 2232130, + "number_of_characters": 4535674160, + "num_documents": 2232016, + "min_document_length": 99, + "average_document_length": 2032.093148525817, + "max_document_length": 24394, + "unique_documents": 2232016, + "num_queries": 114, + "min_query_length": 40, + "average_query_length": 85.4298245614035, + "max_query_length": 168, + "unique_queries": 114, + "none_queries": 0, + "num_relevant_docs": 34174, + "min_relevant_docs_per_query": 348, + "average_relevant_docs_per_query": 32.0, + "max_relevant_docs_per_query": 1288, + "unique_relevant_docs": 31702, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "rus": { + "num_samples": 4627657, + "number_of_characters": 8134827747, + "num_documents": 4627543, + "min_document_length": 0, + "average_document_length": 1757.9129983233004, + "max_document_length": 24212, + "unique_documents": 4627543, + "num_queries": 114, + "min_query_length": 29, + "average_query_length": 85.58771929824562, + "max_query_length": 160, + "unique_queries": 114, + "none_queries": 0, + "num_relevant_docs": 33006, + "min_relevant_docs_per_query": 364, + "average_relevant_docs_per_query": 41.977777777777774, + "max_relevant_docs_per_query": 1080, + "unique_relevant_docs": 31673, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho": { + "num_samples": 3179323, + "number_of_characters": 2362608608, + "num_documents": 3179209, + "min_document_length": 0, + "average_document_length": 743.1426659901881, + "max_document_length": 23870, + "unique_documents": 3179209, + "num_queries": 114, + "min_query_length": 12, + "average_query_length": 24.17543859649123, + "max_query_length": 46, + "unique_queries": 114, + "none_queries": 0, + "num_relevant_docs": 36575, + "min_relevant_docs_per_query": 470, + "average_relevant_docs_per_query": 43.53061224489796, + "max_relevant_docs_per_query": 1201, + "unique_relevant_docs": 34551, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json index 58ab161af5..7cf9329f14 100644 --- a/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/NeuCLIR2022RetrievalHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 56422814, "num_samples": 28067, - "num_queries": 136, + "number_of_characters": 57740491, "num_documents": 27931, - "min_document_length": 12, - "average_document_length": 0.3093337152268089, - "max_document_length": 153, + "min_document_length": 0, + "average_document_length": 2066.9453653646488, + "max_document_length": 23840, "unique_documents": 27931, - "min_query_length": 0, - "average_query_length": 414810.10294117645, - "max_query_length": 23822, + "num_queries": 136, + "min_query_length": 12, + "average_query_length": 63.529411764705884, + "max_query_length": 153, "unique_queries": 136, + "none_queries": 0, + "num_relevant_docs": 100187, "min_relevant_docs_per_query": 348, "average_relevant_docs_per_query": 40.39705882352941, "max_relevant_docs_per_query": 1288, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "fas": { - "number_of_characters": 24554222, "num_samples": 8927, - "num_queries": 45, + "number_of_characters": 25022989, "num_documents": 8882, - "min_document_length": 40, - "average_document_length": 0.4218644449448322, - "max_document_length": 128, + "min_document_length": 99, + "average_document_length": 2816.847782031074, + "max_document_length": 23739, "unique_documents": 8882, - "min_query_length": 0, - "average_query_length": 545566.1111111111, - "max_query_length": 23692, + "num_queries": 45, + "min_query_length": 40, + "average_query_length": 83.26666666666667, + "max_query_length": 128, "unique_queries": 45, + "none_queries": 0, + "num_relevant_docs": 33602, "min_relevant_docs_per_query": 348, "average_relevant_docs_per_query": 32.71111111111111, "max_relevant_docs_per_query": 1288, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "rus": { - "number_of_characters": 20783375, "num_samples": 8768, - "num_queries": 44, + "number_of_characters": 21347532, "num_documents": 8724, - "min_document_length": 29, - "average_document_length": 0.4315680880330124, - "max_document_length": 153, + "min_document_length": 0, + "average_document_length": 2446.5574277854193, + "max_document_length": 23686, "unique_documents": 8724, - "min_query_length": 0, - "average_query_length": 472263.86363636365, - "max_query_length": 23626, + "num_queries": 44, + "min_query_length": 29, + "average_query_length": 85.56818181818181, + "max_query_length": 153, "unique_queries": 44, + "none_queries": 0, + "num_relevant_docs": 32191, "min_relevant_docs_per_query": 364, "average_relevant_docs_per_query": 42.93181818181818, "max_relevant_docs_per_query": 1080, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho": { - "number_of_characters": 11085217, "num_samples": 10372, - "num_queries": 47, + "number_of_characters": 11369970, "num_documents": 10325, - "min_document_length": 12, - "average_document_length": 0.10924939467312349, - "max_document_length": 43, + "min_document_length": 74, + "average_document_length": 1101.0984987893462, + "max_document_length": 23840, "unique_documents": 10325, - "min_query_length": 0, - "average_query_length": 235831.68085106384, - "max_query_length": 23822, + "num_queries": 47, + "min_query_length": 12, + "average_query_length": 24.0, + "max_query_length": 43, "unique_queries": 47, + "none_queries": 0, + "num_relevant_docs": 34394, "min_relevant_docs_per_query": 470, "average_relevant_docs_per_query": 45.38297872340426, "max_relevant_docs_per_query": 1114, @@ -99,6 +110,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NeuCLIR2023Retrieval.json b/mteb/descriptive_stats/Retrieval/NeuCLIR2023Retrieval.json new file mode 100644 index 0000000000..4d2c3276a4 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NeuCLIR2023Retrieval.json @@ -0,0 +1,120 @@ +{ + "test": { + "num_samples": 10038996, + "number_of_characters": 15033100585, + "num_documents": 10038768, + "min_document_length": 0, + "average_document_length": 1497.5033054852945, + "max_document_length": 24394, + "unique_documents": 10038768, + "num_queries": 228, + "min_query_length": 10, + "average_query_length": 54.04385964912281, + "max_query_length": 135, + "unique_queries": 228, + "none_queries": 0, + "num_relevant_docs": 79934, + "min_relevant_docs_per_query": 94, + "average_relevant_docs_per_query": 60.73245614035088, + "max_relevant_docs_per_query": 801, + "unique_relevant_docs": 76913, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null, + "hf_subset_descriptive_stats": { + "fas": { + "num_samples": 2232092, + "number_of_characters": 4535669398, + "num_documents": 2232016, + "min_document_length": 99, + "average_document_length": 2032.093148525817, + "max_document_length": 24394, + "unique_documents": 2232016, + "num_queries": 76, + "min_query_length": 25, + "average_query_length": 65.48684210526316, + "max_query_length": 126, + "unique_queries": 76, + "none_queries": 0, + "num_relevant_docs": 26662, + "min_relevant_docs_per_query": 94, + "average_relevant_docs_per_query": 66.28947368421052, + "max_relevant_docs_per_query": 801, + "unique_relevant_docs": 25447, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "rus": { + "num_samples": 4627619, + "number_of_characters": 8134823647, + "num_documents": 4627543, + "min_document_length": 0, + "average_document_length": 1757.9129983233004, + "max_document_length": 24212, + "unique_documents": 4627543, + "num_queries": 76, + "min_query_length": 26, + "average_query_length": 74.4342105263158, + "max_query_length": 135, + "unique_queries": 76, + "none_queries": 0, + "num_relevant_docs": 25634, + "min_relevant_docs_per_query": 119, + "average_relevant_docs_per_query": 62.223684210526315, + "max_relevant_docs_per_query": 730, + "unique_relevant_docs": 24871, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + }, + "zho": { + "num_samples": 3179285, + "number_of_characters": 2362607540, + "num_documents": 3179209, + "min_document_length": 0, + "average_document_length": 743.1426659901881, + "max_document_length": 23870, + "unique_documents": 3179209, + "num_queries": 76, + "min_query_length": 10, + "average_query_length": 22.210526315789473, + "max_query_length": 44, + "unique_queries": 76, + "none_queries": 0, + "num_relevant_docs": 27638, + "min_relevant_docs_per_query": 136, + "average_relevant_docs_per_query": 53.68421052631579, + "max_relevant_docs_per_query": 777, + "unique_relevant_docs": 26595, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json index faa1046d0e..54022d8a13 100644 --- a/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/NeuCLIR2023RetrievalHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 108197361, "num_samples": 49657, - "num_queries": 224, + "number_of_characters": 110553005, "num_documents": 49433, - "min_document_length": 10, - "average_document_length": 0.24516011571217608, - "max_document_length": 135, + "min_document_length": 0, + "average_document_length": 2236.175955333482, + "max_document_length": 24119, "unique_documents": 49433, - "min_query_length": 0, - "average_query_length": 482969.83035714284, - "max_query_length": 23987, + "num_queries": 224, + "min_query_length": 10, + "average_query_length": 54.10267857142857, + "max_query_length": 135, "unique_queries": 224, + "none_queries": 0, + "num_relevant_docs": 78199, "min_relevant_docs_per_query": 94, "average_relevant_docs_per_query": 61.816964285714285, "max_relevant_docs_per_query": 801, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "fas": { - "number_of_characters": 45268849, "num_samples": 15995, - "num_queries": 74, + "number_of_characters": 46110020, "num_documents": 15921, - "min_document_length": 25, - "average_document_length": 0.30626216946171725, - "max_document_length": 126, + "min_document_length": 99, + "average_document_length": 2895.869857421016, + "max_document_length": 24119, "unique_documents": 15921, - "min_query_length": 0, - "average_query_length": 611675.3108108108, - "max_query_length": 23987, + "num_queries": 74, + "min_query_length": 25, + "average_query_length": 65.89189189189189, + "max_query_length": 126, "unique_queries": 74, + "none_queries": 0, + "num_relevant_docs": 25612, "min_relevant_docs_per_query": 94, "average_relevant_docs_per_query": 68.08108108108108, "max_relevant_docs_per_query": 801, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "rus": { - "number_of_characters": 43207455, "num_samples": 16322, - "num_queries": 75, + "number_of_characters": 44267198, "num_documents": 16247, - "min_document_length": 26, - "average_document_length": 0.34350957099772267, - "max_document_length": 135, + "min_document_length": 0, + "average_document_length": 2724.294762109928, + "max_document_length": 23933, "unique_documents": 16247, - "min_query_length": 0, - "average_query_length": 576024.9866666667, - "max_query_length": 23814, + "num_queries": 75, + "min_query_length": 26, + "average_query_length": 74.41333333333333, + "max_query_length": 135, "unique_queries": 75, + "none_queries": 0, + "num_relevant_docs": 25381, "min_relevant_docs_per_query": 119, "average_relevant_docs_per_query": 63.053333333333335, "max_relevant_docs_per_query": 730, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zho": { - "number_of_characters": 19721057, "num_samples": 17340, - "num_queries": 75, + "number_of_characters": 20175787, "num_documents": 17265, - "min_document_length": 10, - "average_document_length": 0.09626411815812337, - "max_document_length": 44, + "min_document_length": 74, + "average_document_length": 1168.4984071821605, + "max_document_length": 21570, "unique_documents": 17265, - "min_query_length": 0, - "average_query_length": 262925.26666666666, - "max_query_length": 21556, + "num_queries": 75, + "min_query_length": 10, + "average_query_length": 22.16, + "max_query_length": 44, "unique_queries": 75, + "none_queries": 0, + "num_relevant_docs": 27206, "min_relevant_docs_per_query": 136, "average_relevant_docs_per_query": 54.4, "max_relevant_docs_per_query": 777, @@ -99,6 +110,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json b/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json index 236c823e12..1f1d46a88c 100644 --- a/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NorQuadRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 273854, "num_samples": 2072, - "num_queries": 1024, + "number_of_characters": 273854, "num_documents": 1048, - "min_document_length": 11, - "average_document_length": 46.79961832061068, - "max_document_length": 100, + "min_document_length": 1, + "average_document_length": 214.5114503816794, + "max_document_length": 2606, "unique_documents": 1048, - "min_query_length": 1, - "average_query_length": 219.5390625, - "max_query_length": 2606, + "num_queries": 1024, + "min_query_length": 11, + "average_query_length": 47.896484375, + "max_query_length": 100, "unique_queries": 1024, + "none_queries": 0, + "num_relevant_docs": 2048, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/PIQA.json b/mteb/descriptive_stats/Retrieval/PIQA.json index 4af121c529..b2949f9b87 100644 --- a/mteb/descriptive_stats/Retrieval/PIQA.json +++ b/mteb/descriptive_stats/Retrieval/PIQA.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3652153, "num_samples": 37380, - "num_queries": 1838, + "number_of_characters": 3652153, "num_documents": 35542, - "min_document_length": 3, - "average_document_length": 1.8658488548759213, - "max_document_length": 116, + "min_document_length": 4, + "average_document_length": 100.89012998705756, + "max_document_length": 2187, "unique_documents": 35542, - "min_query_length": 4, - "average_query_length": 1950.9450489662677, - "max_query_length": 2187, + "num_queries": 1838, + "min_query_length": 3, + "average_query_length": 36.08052230685528, + "max_query_length": 116, "unique_queries": 1838, + "none_queries": 0, + "num_relevant_docs": 1838, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/PublicHealthQA.json b/mteb/descriptive_stats/Retrieval/PublicHealthQA.json index 80d408468b..de9e4e19a3 100644 --- a/mteb/descriptive_stats/Retrieval/PublicHealthQA.json +++ b/mteb/descriptive_stats/Retrieval/PublicHealthQA.json @@ -1,18 +1,19 @@ { "test": { + "num_samples": 1775, "number_of_characters": 692595, - "num_samples": 1776, - "num_queries": 888, "num_documents": 888, - "min_document_length": 7, - "average_document_length": 67.0608108108108, - "max_document_length": 310, + "min_document_length": 24, + "average_document_length": 712.8885135135135, + "max_document_length": 4976, "unique_documents": 888, - "min_query_length": 24, - "average_query_length": 712.8885135135135, - "max_query_length": 4976, + "num_queries": 887, + "min_query_length": 7, + "average_query_length": 67.13641488162345, + "max_query_length": 310, "unique_queries": 888, - "none_queries": 0, + "none_queries": 1, + "num_relevant_docs": 888, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -22,24 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "arabic": { + "num_samples": 173, "number_of_characters": 79676, - "num_samples": 174, - "num_queries": 87, "num_documents": 87, - "min_document_length": 19, - "average_document_length": 78.93103448275862, - "max_document_length": 310, + "min_document_length": 181, + "average_document_length": 836.8850574712644, + "max_document_length": 3749, "unique_documents": 87, - "min_query_length": 181, - "average_query_length": 836.8850574712644, - "max_query_length": 3749, + "num_queries": 86, + "min_query_length": 19, + "average_query_length": 79.84883720930233, + "max_query_length": 310, "unique_queries": 87, - "none_queries": 0, + "none_queries": 1, + "num_relevant_docs": 87, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -49,24 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "chinese": { - "number_of_characters": 43099, "num_samples": 326, - "num_queries": 163, + "number_of_characters": 43099, "num_documents": 163, - "min_document_length": 7, - "average_document_length": 24.828220858895705, - "max_document_length": 60, + "min_document_length": 24, + "average_document_length": 239.58282208588957, + "max_document_length": 1208, "unique_documents": 163, - "min_query_length": 24, - "average_query_length": 239.58282208588957, - "max_query_length": 1208, + "num_queries": 163, + "min_query_length": 7, + "average_query_length": 24.828220858895705, + "max_query_length": 60, "unique_queries": 163, "none_queries": 0, + "num_relevant_docs": 163, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -76,24 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "english": { - "number_of_characters": 149834, "num_samples": 344, - "num_queries": 172, + "number_of_characters": 149834, "num_documents": 172, - "min_document_length": 17, - "average_document_length": 71.78488372093024, - "max_document_length": 201, + "min_document_length": 69, + "average_document_length": 799.3430232558139, + "max_document_length": 3896, "unique_documents": 172, - "min_query_length": 69, - "average_query_length": 799.3430232558139, - "max_query_length": 3896, + "num_queries": 172, + "min_query_length": 17, + "average_query_length": 71.78488372093024, + "max_query_length": 201, "unique_queries": 172, "none_queries": 0, + "num_relevant_docs": 172, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -103,24 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "french": { - "number_of_characters": 95503, "num_samples": 170, - "num_queries": 85, + "number_of_characters": 95503, "num_documents": 85, - "min_document_length": 27, - "average_document_length": 101.88235294117646, - "max_document_length": 255, + "min_document_length": 210, + "average_document_length": 1021.6823529411764, + "max_document_length": 4320, "unique_documents": 85, - "min_query_length": 210, - "average_query_length": 1021.6823529411764, - "max_query_length": 4320, + "num_queries": 85, + "min_query_length": 27, + "average_query_length": 101.88235294117646, + "max_query_length": 255, "unique_queries": 85, "none_queries": 0, + "num_relevant_docs": 85, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -130,24 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "korean": { - "number_of_characters": 28945, "num_samples": 154, - "num_queries": 77, + "number_of_characters": 28945, "num_documents": 77, - "min_document_length": 11, - "average_document_length": 36.90909090909091, - "max_document_length": 90, + "min_document_length": 38, + "average_document_length": 339.0, + "max_document_length": 1289, "unique_documents": 77, - "min_query_length": 38, - "average_query_length": 339.0, - "max_query_length": 1289, + "num_queries": 77, + "min_query_length": 11, + "average_query_length": 36.90909090909091, + "max_query_length": 90, "unique_queries": 77, "none_queries": 0, + "num_relevant_docs": 77, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -157,24 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "russian": { - "number_of_characters": 69570, "num_samples": 130, - "num_queries": 65, + "number_of_characters": 69570, "num_documents": 65, - "min_document_length": 15, - "average_document_length": 85.2, - "max_document_length": 275, + "min_document_length": 175, + "average_document_length": 985.1076923076923, + "max_document_length": 4559, "unique_documents": 65, - "min_query_length": 175, - "average_query_length": 985.1076923076923, - "max_query_length": 4559, + "num_queries": 65, + "min_query_length": 15, + "average_query_length": 85.2, + "max_query_length": 275, "unique_queries": 65, "none_queries": 0, + "num_relevant_docs": 65, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -184,24 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spanish": { - "number_of_characters": 166187, "num_samples": 324, - "num_queries": 162, + "number_of_characters": 166187, "num_documents": 162, - "min_document_length": 20, - "average_document_length": 84.67901234567901, - "max_document_length": 285, + "min_document_length": 86, + "average_document_length": 941.1666666666666, + "max_document_length": 4976, "unique_documents": 162, - "min_query_length": 86, - "average_query_length": 941.1666666666666, - "max_query_length": 4976, + "num_queries": 162, + "min_query_length": 20, + "average_query_length": 84.67901234567901, + "max_query_length": 285, "unique_queries": 162, "none_queries": 0, + "num_relevant_docs": 162, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -211,24 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vietnamese": { - "number_of_characters": 59781, "num_samples": 154, - "num_queries": 77, + "number_of_characters": 59781, "num_documents": 77, - "min_document_length": 26, - "average_document_length": 71.83116883116882, - "max_document_length": 180, + "min_document_length": 69, + "average_document_length": 704.5454545454545, + "max_document_length": 2604, "unique_documents": 77, - "min_query_length": 69, - "average_query_length": 704.5454545454545, - "max_query_length": 2604, + "num_queries": 77, + "min_query_length": 26, + "average_query_length": 71.83116883116882, + "max_query_length": 180, "unique_queries": 77, "none_queries": 0, + "num_relevant_docs": 77, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -238,6 +255,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/Quail.json b/mteb/descriptive_stats/Retrieval/Quail.json index 41a06a2007..f82f08b03c 100644 --- a/mteb/descriptive_stats/Retrieval/Quail.json +++ b/mteb/descriptive_stats/Retrieval/Quail.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 6258716, "num_samples": 35507, - "num_queries": 2720, + "number_of_characters": 6258716, "num_documents": 32787, - "min_document_length": 1520, - "average_document_length": 162.38228566200019, - "max_document_length": 2587, + "min_document_length": 2, + "average_document_length": 28.50788422240522, + "max_document_length": 161, "unique_documents": 32787, - "min_query_length": 2, - "average_query_length": 343.63529411764705, - "max_query_length": 161, + "num_queries": 2720, + "min_query_length": 1520, + "average_query_length": 1957.3632352941177, + "max_query_length": 2587, "unique_queries": 2720, + "none_queries": 0, + "num_relevant_docs": 2720, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/Quora-PL.json b/mteb/descriptive_stats/Retrieval/Quora-PL.json index 6243b649e4..ff5a12ba9a 100644 --- a/mteb/descriptive_stats/Retrieval/Quora-PL.json +++ b/mteb/descriptive_stats/Retrieval/Quora-PL.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 35217726, "num_samples": 527931, - "num_queries": 5000, + "number_of_characters": 35217726, "num_documents": 522931, - "min_document_length": 11, - "average_document_length": 0.5220631402613347, - "max_document_length": 317, + "min_document_length": 2, + "average_document_length": 66.82473022253414, + "max_document_length": 1266, "unique_documents": 522931, - "min_query_length": 2, - "average_query_length": 6988.9446, - "max_query_length": 1266, + "num_queries": 5000, + "min_query_length": 11, + "average_query_length": 54.6006, + "max_query_length": 317, "unique_queries": 5000, + "none_queries": 0, + "num_relevant_docs": 7626, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5252, "max_relevant_docs_per_query": 84, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 35490077, "num_samples": 532931, - "num_queries": 10000, + "number_of_characters": 35490077, "num_documents": 522931, "min_document_length": 2, - "average_document_length": 1.0428794621087676, - "max_document_length": 270, + "average_document_length": 66.82473022253414, + "max_document_length": 1266, "unique_documents": 522931, + "num_queries": 10000, "min_query_length": 2, - "average_query_length": 3494.4723, - "max_query_length": 1266, + "average_query_length": 54.5354, + "max_query_length": 270, "unique_queries": 10000, + "none_queries": 0, + "num_relevant_docs": 15675, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5675, "max_relevant_docs_per_query": 75, @@ -47,6 +52,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json b/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json index f797184aaf..2659125998 100644 --- a/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/Quora-PLHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 11885329, "num_samples": 173031, - "num_queries": 1000, + "number_of_characters": 11885329, "num_documents": 172031, "min_document_length": 2, - "average_document_length": 0.3130017264330266, - "max_document_length": 187, + "average_document_length": 68.77529631287385, + "max_document_length": 1266, "unique_documents": 172031, + "num_queries": 1000, "min_query_length": 2, - "average_query_length": 11831.483, - "max_query_length": 1266, + "average_query_length": 53.846, + "max_query_length": 187, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1641, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.641, "max_relevant_docs_per_query": 34, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json b/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json index 05053f3d4c..f635a69c5c 100644 --- a/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/QuoraRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 33285028, "num_samples": 527931, - "num_queries": 5000, + "number_of_characters": 33285028, "num_documents": 522931, - "min_document_length": 12, - "average_document_length": 0.49274378455283774, - "max_document_length": 268, + "min_document_length": 2, + "average_document_length": 63.158154708747425, + "max_document_length": 1170, "unique_documents": 522931, - "min_query_length": 2, - "average_query_length": 6605.4714, - "max_query_length": 1170, + "num_queries": 5000, + "min_query_length": 12, + "average_query_length": 51.5342, + "max_query_length": 268, "unique_queries": 5000, + "none_queries": 0, + "num_relevant_docs": 7626, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5252, "max_relevant_docs_per_query": 84, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 33542753, "num_samples": 532931, - "num_queries": 10000, + "number_of_characters": 33542753, "num_documents": 522931, "min_document_length": 2, - "average_document_length": 0.9855908332074403, - "max_document_length": 258, + "average_document_length": 63.158154708747425, + "max_document_length": 1170, "unique_documents": 522931, + "num_queries": 10000, "min_query_length": 2, - "average_query_length": 3302.7357, - "max_query_length": 1170, + "average_query_length": 51.5396, + "max_query_length": 258, "unique_queries": 10000, + "none_queries": 0, + "num_relevant_docs": 15675, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5675, "max_relevant_docs_per_query": 75, @@ -47,6 +52,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json index aa1cb8b4fd..df2c427c5b 100644 --- a/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 10675629, "num_samples": 178163, - "num_queries": 1000, + "number_of_characters": 10675629, "num_documents": 177163, "min_document_length": 2, - "average_document_length": 0.28915744258112586, - "max_document_length": 180, + "average_document_length": 59.96963812985781, + "max_document_length": 582, "unique_documents": 177163, + "num_queries": 1000, "min_query_length": 2, - "average_query_length": 10624.401, - "max_query_length": 582, + "average_query_length": 51.228, + "max_query_length": 180, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1641, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.641, "max_relevant_docs_per_query": 34, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/RARbCode.json b/mteb/descriptive_stats/Retrieval/RARbCode.json index 8b657358b6..28d1dc2e38 100644 --- a/mteb/descriptive_stats/Retrieval/RARbCode.json +++ b/mteb/descriptive_stats/Retrieval/RARbCode.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 240139724, "num_samples": 302966, - "num_queries": 1484, + "number_of_characters": 240139724, "num_documents": 301482, - "min_document_length": 37, - "average_document_length": 1.849576425789931, - "max_document_length": 1512, + "min_document_length": 17, + "average_document_length": 794.6813076734267, + "max_document_length": 11365, "unique_documents": 301482, - "min_query_length": 17, - "average_query_length": 161443.4703504043, - "max_query_length": 11365, + "num_queries": 1484, + "min_query_length": 37, + "average_query_length": 375.7506738544474, + "max_query_length": 1512, "unique_queries": 1484, + "none_queries": 0, + "num_relevant_docs": 1484, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/RARbMath.json b/mteb/descriptive_stats/Retrieval/RARbMath.json index 88a19cb11d..0cdf44614f 100644 --- a/mteb/descriptive_stats/Retrieval/RARbMath.json +++ b/mteb/descriptive_stats/Retrieval/RARbMath.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 197971515, "num_samples": 395695, - "num_queries": 6319, + "number_of_characters": 197971515, "num_documents": 389376, - "min_document_length": 25, - "average_document_length": 3.412978714661407, - "max_document_length": 2837, + "min_document_length": 16, + "average_document_length": 505.0197829347469, + "max_document_length": 5368, "unique_documents": 389376, - "min_query_length": 16, - "average_query_length": 31119.256686184523, - "max_query_length": 5368, + "num_queries": 6319, + "min_query_length": 25, + "average_query_length": 210.30732710871973, + "max_query_length": 2837, "unique_queries": 6319, + "none_queries": 0, + "num_relevant_docs": 6319, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json b/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json index bfbeab6b4e..3eb621ff1a 100644 --- a/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/RiaNewsRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 822341995, "num_samples": 714344, - "num_queries": 10000, + "number_of_characters": 822341995, "num_documents": 704344, - "min_document_length": 4, - "average_document_length": 0.8859719114523585, - "max_document_length": 100, + "min_document_length": 1, + "average_document_length": 1166.6429557148213, + "max_document_length": 2001, "unique_documents": 704344, - "min_query_length": 1, - "average_query_length": 82171.7966, - "max_query_length": 2001, + "num_queries": 10000, + "min_query_length": 4, + "average_query_length": 62.4029, + "max_query_length": 100, "unique_queries": 10000, + "none_queries": 0, + "num_relevant_docs": 10000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json b/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json index 3955ec2cd5..632a5c5d37 100644 --- a/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 234657607, "num_samples": 192237, - "num_queries": 1000, + "number_of_characters": 234657607, "num_documents": 191237, - "min_document_length": 4, - "average_document_length": 0.32597248440416865, - "max_document_length": 85, + "min_document_length": 1, + "average_document_length": 1226.7253146619116, + "max_document_length": 2001, "unique_documents": 191237, - "min_query_length": 1, - "average_query_length": 234595.269, - "max_query_length": 2001, + "num_queries": 1000, + "min_query_length": 4, + "average_query_length": 62.338, + "max_query_length": 85, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json b/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json index 70a7bd0d69..dd0069fb65 100644 --- a/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/RuBQRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 25645306, "num_samples": 58518, - "num_queries": 1692, + "number_of_characters": 25645306, "num_documents": 56826, - "min_document_length": 13, - "average_document_length": 1.3486960194277267, - "max_document_length": 152, + "min_document_length": 2, + "average_document_length": 449.94659134903037, + "max_document_length": 11011, "unique_documents": 56826, - "min_query_length": 2, - "average_query_length": 15111.504137115839, - "max_query_length": 11011, + "num_queries": 1692, + "min_query_length": 13, + "average_query_length": 45.29609929078014, + "max_query_length": 152, "unique_queries": 1692, + "none_queries": 0, + "num_relevant_docs": 2845, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6814420803782506, "max_relevant_docs_per_query": 7, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json b/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json index 0d1787959c..cb596f457d 100644 --- a/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json +++ b/mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 32692750, "num_samples": 26657, - "num_queries": 1000, + "number_of_characters": 32692750, "num_documents": 25657, - "min_document_length": 14, - "average_document_length": 3.144210157072144, - "max_document_length": 235, + "min_document_length": 12, + "average_document_length": 1271.0791986592353, + "max_document_length": 11840, "unique_documents": 25657, - "min_query_length": 12, - "average_query_length": 32612.079, - "max_query_length": 11840, + "num_queries": 1000, + "min_query_length": 14, + "average_query_length": 80.671, + "max_query_length": 235, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 29928, "min_relevant_docs_per_query": 27, "average_relevant_docs_per_query": 4.928, "max_relevant_docs_per_query": 30, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SCIDOCS.json b/mteb/descriptive_stats/Retrieval/SCIDOCS.json index f1b0b36c97..e40a89ee0f 100644 --- a/mteb/descriptive_stats/Retrieval/SCIDOCS.json +++ b/mteb/descriptive_stats/Retrieval/SCIDOCS.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 30972050, "num_samples": 26657, - "num_queries": 1000, + "number_of_characters": 30972050, "num_documents": 25657, - "min_document_length": 16, - "average_document_length": 2.791908640916709, - "max_document_length": 206, + "min_document_length": 11, + "average_document_length": 1204.3659819932182, + "max_document_length": 10169, "unique_documents": 25657, - "min_query_length": 11, - "average_query_length": 30900.418, - "max_query_length": 10169, + "num_queries": 1000, + "min_query_length": 16, + "average_query_length": 71.632, + "max_query_length": 206, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 29928, "min_relevant_docs_per_query": 27, "average_relevant_docs_per_query": 4.928, "max_relevant_docs_per_query": 30, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SIQA.json b/mteb/descriptive_stats/Retrieval/SIQA.json index c63d2a4ead..98e39e7c62 100644 --- a/mteb/descriptive_stats/Retrieval/SIQA.json +++ b/mteb/descriptive_stats/Retrieval/SIQA.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1957909, "num_samples": 73230, - "num_queries": 1954, + "number_of_characters": 1957909, "num_documents": 71276, - "min_document_length": 70, - "average_document_length": 3.502314944721926, - "max_document_length": 276, + "min_document_length": 3, + "average_document_length": 23.967085695044617, + "max_document_length": 170, "unique_documents": 71276, - "min_query_length": 3, - "average_query_length": 874.2466734902764, - "max_query_length": 170, + "num_queries": 1954, + "min_query_length": 70, + "average_query_length": 127.75383828045035, + "max_query_length": 276, "unique_queries": 1954, + "none_queries": 0, + "num_relevant_docs": 1954, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json b/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json index 99f58b5129..212af3aceb 100644 --- a/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SKQuadRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 7706966, "num_samples": 7611, - "num_queries": 1134, + "number_of_characters": 7822656, "num_documents": 6477, - "min_document_length": 11, - "average_document_length": 9.390304153157325, - "max_document_length": 173, + "min_document_length": 18, + "average_document_length": 1198.3688436004322, + "max_document_length": 76907, "unique_documents": 6477, - "min_query_length": 8, - "average_query_length": 6742.632275132275, - "max_query_length": 76886, + "num_queries": 1134, + "min_query_length": 11, + "average_query_length": 53.63403880070547, + "max_query_length": 173, "unique_queries": 1134, + "none_queries": 0, + "num_relevant_docs": 12451, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.853615520282187, "max_relevant_docs_per_query": 11, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SNLRetrieval.json b/mteb/descriptive_stats/Retrieval/SNLRetrieval.json index 50c79436e7..673830901e 100644 --- a/mteb/descriptive_stats/Retrieval/SNLRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SNLRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2602407, "num_samples": 2600, - "num_queries": 1300, + "number_of_characters": 2602407, "num_documents": 1300, - "min_document_length": 2, - "average_document_length": 14.906153846153845, - "max_document_length": 64, + "min_document_length": 400, + "average_document_length": 1986.9453846153847, + "max_document_length": 68710, "unique_documents": 1300, - "min_query_length": 400, - "average_query_length": 1986.9453846153847, - "max_query_length": 68710, + "num_queries": 1300, + "min_query_length": 2, + "average_query_length": 14.906153846153845, + "max_query_length": 64, "unique_queries": 1300, + "none_queries": 0, + "num_relevant_docs": 1300, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json b/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json index 4c4d7c5f27..659e246b5b 100644 --- a/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 10203228, "num_samples": 25068, - "num_queries": 2089, + "number_of_characters": 10203228, "num_documents": 22979, - "min_document_length": 12, - "average_document_length": 6.116454153792593, - "max_document_length": 163, + "min_document_length": 101, + "average_document_length": 437.90756777927675, + "max_document_length": 4944, "unique_documents": 22979, - "min_query_length": 101, - "average_query_length": 4816.983245572044, - "max_query_length": 4944, + "num_queries": 2089, + "min_query_length": 12, + "average_query_length": 67.28099569171853, + "max_query_length": 163, "unique_queries": 2089, + "none_queries": 0, + "num_relevant_docs": 2089, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SciFact-PL.json b/mteb/descriptive_stats/Retrieval/SciFact-PL.json index d8cde257d0..e495a4f670 100644 --- a/mteb/descriptive_stats/Retrieval/SciFact-PL.json +++ b/mteb/descriptive_stats/Retrieval/SciFact-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 8085698, "num_samples": 5483, - "num_queries": 300, + "number_of_characters": 8085698, "num_documents": 5183, - "min_document_length": 27, - "average_document_length": 5.524213775805518, - "max_document_length": 227, + "min_document_length": 233, + "average_document_length": 1554.5178468068686, + "max_document_length": 10870, "unique_documents": 5183, - "min_query_length": 233, - "average_query_length": 26856.886666666665, - "max_query_length": 10870, + "num_queries": 300, + "min_query_length": 27, + "average_query_length": 95.44, + "max_query_length": 227, "unique_queries": 300, + "none_queries": 0, + "num_relevant_docs": 339, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.13, "max_relevant_docs_per_query": 5, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SciFact.json b/mteb/descriptive_stats/Retrieval/SciFact.json index 9a38e491ec..bdf6054960 100644 --- a/mteb/descriptive_stats/Retrieval/SciFact.json +++ b/mteb/descriptive_stats/Retrieval/SciFact.json @@ -1,17 +1,19 @@ { "train": { - "number_of_characters": 7843137, "num_samples": 5992, - "num_queries": 809, + "number_of_characters": 7843137, "num_documents": 5183, - "min_document_length": 26, - "average_document_length": 13.827513023345553, - "max_document_length": 249, + "min_document_length": 221, + "average_document_length": 1499.4152035500674, + "max_document_length": 10127, "unique_documents": 5183, - "min_query_length": 221, - "average_query_length": 9606.265760197775, - "max_query_length": 10127, + "num_queries": 809, + "min_query_length": 26, + "average_query_length": 88.58838071693448, + "max_query_length": 249, "unique_queries": 809, + "none_queries": 0, + "num_relevant_docs": 919, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.1359703337453646, "max_relevant_docs_per_query": 5, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "test": { - "number_of_characters": 7798573, "num_samples": 5483, - "num_queries": 300, + "number_of_characters": 7798573, "num_documents": 5183, - "min_document_length": 28, - "average_document_length": 5.229403820181362, - "max_document_length": 204, + "min_document_length": 221, + "average_document_length": 1499.4152035500674, + "max_document_length": 10127, "unique_documents": 5183, - "min_query_length": 221, - "average_query_length": 25904.896666666667, - "max_query_length": 10127, + "num_queries": 300, + "min_query_length": 28, + "average_query_length": 90.34666666666666, + "max_query_length": 204, "unique_queries": 300, + "none_queries": 0, + "num_relevant_docs": 339, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.13, "max_relevant_docs_per_query": 5, @@ -47,6 +52,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json b/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json index a4b9ce59c7..c199f171bc 100644 --- a/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SlovakSumRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 1339304, "num_samples": 1200, - "num_queries": 600, + "number_of_characters": 1380026, "num_documents": 600, - "min_document_length": 25, - "average_document_length": 143.59833333333333, - "max_document_length": 606, + "min_document_length": 204, + "average_document_length": 2156.445, + "max_document_length": 22509, "unique_documents": 600, - "min_query_length": 177, - "average_query_length": 2088.575, - "max_query_length": 22445, + "num_queries": 600, + "min_query_length": 25, + "average_query_length": 143.59833333333333, + "max_query_length": 606, "unique_queries": 600, + "none_queries": 0, + "num_relevant_docs": 600, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json index 7d6b12791f..b7226cf14d 100644 --- a/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json +++ b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2P.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 26460964, "num_samples": 10204, - "num_queries": 167, + "number_of_characters": 26460964, "num_documents": 10037, - "min_document_length": 24, - "average_document_length": 1.1240410481219487, - "max_document_length": 116, + "min_document_length": 22, + "average_document_length": 2635.217893792966, + "max_document_length": 22035, "unique_documents": 10037, - "min_query_length": 22, - "average_query_length": 158381.32934131735, - "max_query_length": 22035, + "num_queries": 167, + "min_query_length": 24, + "average_query_length": 67.55688622754491, + "max_query_length": 116, "unique_queries": 167, + "none_queries": 0, + "num_relevant_docs": 1011, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 6.053892215568863, "max_relevant_docs_per_query": 19, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json index f7204e72de..de07c11e26 100644 --- a/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json +++ b/mteb/descriptive_stats/Retrieval/SpanishPassageRetrievalS2S.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 126449, "num_samples": 432, - "num_queries": 167, + "number_of_characters": 126449, "num_documents": 265, - "min_document_length": 24, - "average_document_length": 42.573584905660375, - "max_document_length": 116, + "min_document_length": 51, + "average_document_length": 434.5924528301887, + "max_document_length": 2084, "unique_documents": 265, - "min_query_length": 51, - "average_query_length": 689.622754491018, - "max_query_length": 2084, + "num_queries": 167, + "min_query_length": 24, + "average_query_length": 67.55688622754491, + "max_query_length": 116, "unique_queries": 167, + "none_queries": 0, + "num_relevant_docs": 1289, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 7.718562874251497, "max_relevant_docs_per_query": 24, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SpartQA.json b/mteb/descriptive_stats/Retrieval/SpartQA.json index 97df0dac4c..c2b6ff47cd 100644 --- a/mteb/descriptive_stats/Retrieval/SpartQA.json +++ b/mteb/descriptive_stats/Retrieval/SpartQA.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 2440343, "num_samples": 5186, - "num_queries": 3594, + "number_of_characters": 2440343, "num_documents": 1592, - "min_document_length": 380, - "average_document_length": 1481.4704773869346, - "max_document_length": 1442, + "min_document_length": 8, + "average_document_length": 51.40829145728643, + "max_document_length": 91, "unique_documents": 1592, - "min_query_length": 8, - "average_query_length": 22.771841958820257, - "max_query_length": 91, + "num_queries": 3594, + "min_query_length": 380, + "average_query_length": 656.2328881469115, + "max_query_length": 1442, "unique_queries": 3594, + "none_queries": 0, + "num_relevant_docs": 6752, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8786867000556482, "max_relevant_docs_per_query": 3, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json index 51972461e6..7178e95f94 100644 --- a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json +++ b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 26584028, "num_samples": 21925, - "num_queries": 1994, + "number_of_characters": 26584028, "num_documents": 19931, - "min_document_length": 61, - "average_document_length": 130.32145903366614, - "max_document_length": 22234, + "min_document_length": 5, + "average_document_length": 1203.4815613867845, + "max_document_length": 46028, "unique_documents": 19931, - "min_query_length": 5, - "average_query_length": 12029.38365095286, - "max_query_length": 46028, + "num_queries": 1994, + "min_query_length": 61, + "average_query_length": 1302.6263791374122, + "max_query_length": 22234, "unique_queries": 1994, + "none_queries": 0, + "num_relevant_docs": 1994, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 1994 + "unique_relevant_docs": 1994, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json b/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json index 9e91322e47..8f97b3551d 100644 --- a/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/StatcanDialogueDatasetRetrieval.json @@ -1,17 +1,19 @@ { "dev": { + "num_samples": 16381, "number_of_characters": 80426667, - "num_samples": 12479, - "num_queries": 665, "num_documents": 11814, - "min_document_length": 2, - "average_document_length": 0.7731504994074826, - "max_document_length": 2, + "min_document_length": 257, + "average_document_length": 6806.969104452344, + "max_document_length": 398046, "unique_documents": 11814, - "min_query_length": 257, - "average_query_length": 120928.62105263158, - "max_query_length": 398046, + "num_queries": 4567, + "min_query_length": 2, + "average_query_length": 2.0, + "max_query_length": 2, "unique_queries": 665, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5037593984962405, "max_relevant_docs_per_query": 11, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "english": { + "num_samples": 9637, "number_of_characters": 38614817, - "num_samples": 6450, - "num_queries": 543, "num_documents": 5907, - "min_document_length": 2, - "average_document_length": 1.2629084137464026, - "max_document_length": 2, + "min_document_length": 257, + "average_document_length": 6535.865413915693, + "max_document_length": 391133, "unique_documents": 5907, - "min_query_length": 257, - "average_query_length": 71100.10497237569, - "max_query_length": 391133, + "num_queries": 3730, + "min_query_length": 2, + "average_query_length": 2.0, + "max_query_length": 2, "unique_queries": 543, + "none_queries": 0, + "num_relevant_docs": 799, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.4714548802946592, "max_relevant_docs_per_query": 11, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "french": { + "num_samples": 6744, "number_of_characters": 41811850, - "num_samples": 6029, - "num_queries": 122, "num_documents": 5907, - "min_document_length": 2, - "average_document_length": 0.28339258506856274, - "max_document_length": 2, + "min_document_length": 268, + "average_document_length": 7078.072794988996, + "max_document_length": 398046, "unique_documents": 5907, - "min_query_length": 268, - "average_query_length": 342706.3606557377, - "max_query_length": 398046, + "num_queries": 837, + "min_query_length": 2, + "average_query_length": 2.0, + "max_query_length": 2, "unique_queries": 122, + "none_queries": 0, + "num_relevant_docs": 201, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6475409836065573, "max_relevant_docs_per_query": 9, @@ -73,6 +81,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null @@ -80,18 +89,20 @@ } }, "test": { + "num_samples": 16683, "number_of_characters": 80427271, - "num_samples": 12475, - "num_queries": 661, "num_documents": 11814, - "min_document_length": 2, - "average_document_length": 0.824276282376841, - "max_document_length": 2, + "min_document_length": 257, + "average_document_length": 6806.969104452344, + "max_document_length": 398046, "unique_documents": 11814, - "min_query_length": 257, - "average_query_length": 121660.41301059001, - "max_query_length": 398046, + "num_queries": 4869, + "min_query_length": 2, + "average_query_length": 2.0, + "max_query_length": 2, "unique_queries": 661, + "none_queries": 0, + "num_relevant_docs": 1011, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.529500756429652, "max_relevant_docs_per_query": 11, @@ -101,23 +112,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "english": { + "num_samples": 10138, "number_of_characters": 38615819, - "num_samples": 6460, - "num_queries": 553, "num_documents": 5907, - "min_document_length": 2, - "average_document_length": 1.4325376671745387, - "max_document_length": 2, + "min_document_length": 257, + "average_document_length": 6535.865413915693, + "max_document_length": 391133, "unique_documents": 5907, - "min_query_length": 257, - "average_query_length": 69814.38878842676, - "max_query_length": 391133, + "num_queries": 4231, + "min_query_length": 2, + "average_query_length": 2.0, + "max_query_length": 2, "unique_queries": 553, + "none_queries": 0, + "num_relevant_docs": 870, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.573236889692586, "max_relevant_docs_per_query": 11, @@ -127,23 +141,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "french": { + "num_samples": 6545, "number_of_characters": 41811452, - "num_samples": 6015, - "num_queries": 108, "num_documents": 5907, - "min_document_length": 2, - "average_document_length": 0.21601489757914338, - "max_document_length": 2, + "min_document_length": 268, + "average_document_length": 7078.072794988996, + "max_document_length": 398046, "unique_documents": 5907, - "min_query_length": 268, - "average_query_length": 387131.25925925927, - "max_query_length": 398046, + "num_queries": 638, + "min_query_length": 2, + "average_query_length": 2.0, + "max_query_length": 2, "unique_queries": 108, + "none_queries": 0, + "num_relevant_docs": 141, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.3055555555555556, "max_relevant_docs_per_query": 4, @@ -153,6 +170,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json b/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json index 94c3d40385..c435138dd2 100644 --- a/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SweFaqRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 199616, "num_samples": 1024, - "num_queries": 513, + "number_of_characters": 199616, "num_documents": 511, - "min_document_length": 11, - "average_document_length": 70.79060665362036, - "max_document_length": 229, + "min_document_length": 31, + "average_document_length": 319.8473581213307, + "max_document_length": 1904, "unique_documents": 511, - "min_query_length": 31, - "average_query_length": 318.6003898635478, - "max_query_length": 1904, + "num_queries": 513, + "min_query_length": 11, + "average_query_length": 70.51461988304094, + "max_query_length": 229, "unique_queries": 513, + "none_queries": 0, + "num_relevant_docs": 513, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SwednRetrieval.json b/mteb/descriptive_stats/Retrieval/SwednRetrieval.json index 9995eb778f..53b4642565 100644 --- a/mteb/descriptive_stats/Retrieval/SwednRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SwednRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 5973257, "num_samples": 3070, - "num_queries": 1024, + "number_of_characters": 5973257, "num_documents": 2046, - "min_document_length": 6, - "average_document_length": 22.960899315738025, - "max_document_length": 122, + "min_document_length": 63, + "average_document_length": 2896.519550342131, + "max_document_length": 33779, "unique_documents": 2046, - "min_query_length": 63, - "average_query_length": 5787.3818359375, - "max_query_length": 33779, + "num_queries": 1024, + "min_query_length": 6, + "average_query_length": 45.876953125, + "max_query_length": 122, "unique_queries": 1024, + "none_queries": 0, + "num_relevant_docs": 2048, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json b/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json index 79d46dd193..b92186fdc4 100644 --- a/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/SyntecRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 113404, "num_samples": 190, - "num_queries": 100, + "number_of_characters": 117466, "num_documents": 90, - "min_document_length": 18, - "average_document_length": 80.91111111111111, - "max_document_length": 175, + "min_document_length": 82, + "average_document_length": 1224.2666666666667, + "max_document_length": 6898, "unique_documents": 90, - "min_query_length": 51, - "average_query_length": 1061.22, - "max_query_length": 6874, + "num_queries": 100, + "min_query_length": 18, + "average_query_length": 72.82, + "max_query_length": 175, "unique_queries": 100, + "none_queries": 0, + "num_relevant_docs": 100, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json index 56c3964a58..af7cc09fb6 100644 --- a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json +++ b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 14041553, "num_samples": 111702, - "num_queries": 5851, + "number_of_characters": 14041553, "num_documents": 105851, - "min_document_length": 13, - "average_document_length": 4.582686984534865, - "max_document_length": 281, + "min_document_length": 17, + "average_document_length": 128.07126054548374, + "max_document_length": 762, "unique_documents": 105851, - "min_query_length": 17, - "average_query_length": 2316.9494103572038, - "max_query_length": 762, + "num_queries": 5851, + "min_query_length": 13, + "average_query_length": 82.90582806357888, + "max_query_length": 281, "unique_queries": 5851, + "none_queries": 0, + "num_relevant_docs": 5851, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 5851 + "unique_relevant_docs": 5851, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/T2Retrieval.json b/mteb/descriptive_stats/Retrieval/T2Retrieval.json index 77ad018985..81ecddc019 100644 --- a/mteb/descriptive_stats/Retrieval/T2Retrieval.json +++ b/mteb/descriptive_stats/Retrieval/T2Retrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 103924352, "num_samples": 141417, - "num_queries": 22812, + "number_of_characters": 103924352, "num_documents": 118605, - "min_document_length": 4, - "average_document_length": 2.1039332237258126, - "max_document_length": 31, + "min_document_length": 1, + "average_document_length": 874.1184182791619, + "max_document_length": 42956, "unique_documents": 118605, - "min_query_length": 1, - "average_query_length": 4544.7490355953005, - "max_query_length": 42956, + "num_queries": 22812, + "min_query_length": 4, + "average_query_length": 10.938847974750132, + "max_query_length": 31, "unique_queries": 22812, + "none_queries": 0, + "num_relevant_docs": 118932, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 5.213571804313519, "max_relevant_docs_per_query": 62, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json b/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json index a0fc70975b..5c5df2565a 100644 --- a/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json +++ b/mteb/descriptive_stats/Retrieval/TRECCOVID-PL.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 198886004, "num_samples": 171382, - "num_queries": 50, + "number_of_characters": 198886004, "num_documents": 171332, - "min_document_length": 30, - "average_document_length": 0.020258912520720006, - "max_document_length": 199, + "min_document_length": 1, + "average_document_length": 1160.8020276422385, + "max_document_length": 122472, "unique_documents": 171332, - "min_query_length": 1, - "average_query_length": 3977650.66, - "max_query_length": 122472, + "num_queries": 50, + "min_query_length": 30, + "average_query_length": 69.42, + "max_query_length": 199, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 66336, "min_relevant_docs_per_query": 631, "average_relevant_docs_per_query": 493.5, "max_relevant_docs_per_query": 1941, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TRECCOVID.json b/mteb/descriptive_stats/Retrieval/TRECCOVID.json index b9d80db016..b0e9575b40 100644 --- a/mteb/descriptive_stats/Retrieval/TRECCOVID.json +++ b/mteb/descriptive_stats/Retrieval/TRECCOVID.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 191508678, "num_samples": 171382, - "num_queries": 50, + "number_of_characters": 191508678, "num_documents": 171332, - "min_document_length": 30, - "average_document_length": 0.020206382929050033, - "max_document_length": 165, + "min_document_length": 1, + "average_document_length": 1117.7434221277986, + "max_document_length": 122459, "unique_documents": 171332, - "min_query_length": 1, - "average_query_length": 3830104.32, - "max_query_length": 122459, + "num_queries": 50, + "min_query_length": 30, + "average_query_length": 69.24, + "max_query_length": 165, "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 66336, "min_relevant_docs_per_query": 631, "average_relevant_docs_per_query": 493.5, "max_relevant_docs_per_query": 1941, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json b/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json index e1a96bb010..4089e8960f 100644 --- a/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json +++ b/mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 3209663, "num_samples": 4096, - "num_queries": 2048, + "number_of_characters": 3209663, "num_documents": 2048, - "min_document_length": 25, - "average_document_length": 126.552734375, - "max_document_length": 400, + "min_document_length": 28, + "average_document_length": 1440.66552734375, + "max_document_length": 15619, "unique_documents": 2048, - "min_query_length": 28, - "average_query_length": 1440.66552734375, - "max_query_length": 15619, + "num_queries": 2048, + "min_query_length": 25, + "average_query_length": 126.552734375, + "max_query_length": 400, "unique_queries": 2048, + "none_queries": 0, + "num_relevant_docs": 2048, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL1.json b/mteb/descriptive_stats/Retrieval/TempReasonL1.json index e5be9a73bd..267d3e1cf0 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL1.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL1.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 325808, "num_samples": 16504, - "num_queries": 4000, + "number_of_characters": 325808, "num_documents": 12504, - "min_document_length": 39, - "average_document_length": 16.066458733205373, - "max_document_length": 54, + "min_document_length": 9, + "average_document_length": 9.989843250159948, + "max_document_length": 10, "unique_documents": 12504, - "min_query_length": 9, - "average_query_length": 31.22825, - "max_query_length": 10, + "num_queries": 4000, + "min_query_length": 39, + "average_query_length": 50.22375, + "max_query_length": 54, "unique_queries": 4000, + "none_queries": 0, + "num_relevant_docs": 4000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json b/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json index 70f2d53e95..bec131256e 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL2Context.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 64656976, "num_samples": 21184, - "num_queries": 5397, + "number_of_characters": 64656976, "num_documents": 15787, - "min_document_length": 409, - "average_document_length": 4074.759929055552, - "max_document_length": 115754, + "min_document_length": 3, + "average_document_length": 20.823525685690758, + "max_document_length": 141, "unique_documents": 15787, - "min_query_length": 3, - "average_query_length": 60.911802853437095, - "max_query_length": 141, + "num_queries": 5397, + "min_query_length": 409, + "average_query_length": 11919.25792106726, + "max_query_length": 115754, "unique_queries": 5397, + "none_queries": 0, + "num_relevant_docs": 5397, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json b/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json index 72dc39f22c..6868508ad8 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 4812174, "num_samples": 21184, - "num_queries": 5397, + "number_of_characters": 4812174, "num_documents": 15787, - "min_document_length": 230, - "average_document_length": 283.9952492557167, - "max_document_length": 4822, + "min_document_length": 3, + "average_document_length": 20.823525685690758, + "max_document_length": 141, "unique_documents": 15787, - "min_query_length": 3, - "average_query_length": 60.911802853437095, - "max_query_length": 141, + "num_queries": 5397, + "min_query_length": 230, + "average_query_length": 830.7268853066519, + "max_query_length": 4822, "unique_queries": 5397, + "none_queries": 0, + "num_relevant_docs": 5397, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json b/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json index e83bde9b37..4f4056f753 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL2Pure.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 630654, "num_samples": 21184, - "num_queries": 5397, + "number_of_characters": 630654, "num_documents": 15787, - "min_document_length": 36, - "average_document_length": 19.12415278393615, - "max_document_length": 90, + "min_document_length": 3, + "average_document_length": 20.823525685690758, + "max_document_length": 141, "unique_documents": 15787, - "min_query_length": 3, - "average_query_length": 60.911802853437095, - "max_query_length": 141, + "num_queries": 5397, + "min_query_length": 36, + "average_query_length": 55.94089308875301, + "max_query_length": 90, "unique_queries": 5397, + "none_queries": 0, + "num_relevant_docs": 5397, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json b/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json index 93206cedc7..47b755d65c 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL3Context.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 59743321, "num_samples": 20090, - "num_queries": 4426, + "number_of_characters": 59743321, "num_documents": 15664, - "min_document_length": 412, - "average_document_length": 3793.2473186925436, - "max_document_length": 115787, + "min_document_length": 3, + "average_document_length": 20.80534984678243, + "max_document_length": 141, "unique_documents": 15664, - "min_query_length": 3, - "average_query_length": 73.63194758246723, - "max_query_length": 141, + "num_queries": 4426, + "min_query_length": 412, + "average_query_length": 13424.633077270673, + "max_query_length": 115787, "unique_queries": 4426, + "none_queries": 0, + "num_relevant_docs": 4426, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json b/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json index 48b4e4cc37..ea9aa9125d 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL3Fact.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 4291925, "num_samples": 20090, - "num_queries": 4426, + "number_of_characters": 4291925, "num_documents": 15664, - "min_document_length": 232, - "average_document_length": 253.19394790602655, - "max_document_length": 4791, + "min_document_length": 3, + "average_document_length": 20.80534984678243, + "max_document_length": 141, "unique_documents": 15664, - "min_query_length": 3, - "average_query_length": 73.63194758246723, - "max_query_length": 141, + "num_queries": 4426, + "min_query_length": 232, + "average_query_length": 896.0754631721645, + "max_query_length": 4791, "unique_queries": 4426, + "none_queries": 0, + "num_relevant_docs": 4426, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json b/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json index 57b125d8a3..f2e9fba639 100644 --- a/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json +++ b/mteb/descriptive_stats/Retrieval/TempReasonL3Pure.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 655367, "num_samples": 20090, - "num_queries": 4426, + "number_of_characters": 655367, "num_documents": 15664, - "min_document_length": 39, - "average_document_length": 21.03370786516854, - "max_document_length": 142, + "min_document_length": 3, + "average_document_length": 20.80534984678243, + "max_document_length": 141, "unique_documents": 15664, - "min_query_length": 3, - "average_query_length": 73.63194758246723, - "max_query_length": 141, + "num_queries": 4426, + "min_query_length": 39, + "average_query_length": 74.44012652507908, + "max_query_length": 142, "unique_queries": 4426, + "none_queries": 0, + "num_relevant_docs": 4426, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TopiOCQA.json b/mteb/descriptive_stats/Retrieval/TopiOCQA.json index bc3bca51ef..928a98b2fd 100644 --- a/mteb/descriptive_stats/Retrieval/TopiOCQA.json +++ b/mteb/descriptive_stats/Retrieval/TopiOCQA.json @@ -1,19 +1,19 @@ { "validation": { - "number_of_characters": 11369989152, "num_samples": 25703106, - "num_queries": 2514, + "number_of_characters": 12307963115, "num_documents": 25700592, - "num_relevant_docs": 2514, - "min_document_length": 1, - "average_document_length": 0.0012305553117220023, - "max_document_length": 31, + "min_document_length": 12, + "average_document_length": 478.8968086416064, + "max_document_length": 28110, "unique_documents": 25700592, + "num_queries": 2514, "min_query_length": 1, - "average_query_length": 4522656.136038186, - "max_query_length": 28038, + "average_query_length": 12.579952267303103, + "max_query_length": 31, "unique_queries": 2514, "none_queries": 0, + "num_relevant_docs": 2514, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -23,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json b/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json index 767d034174..d7781a3068 100644 --- a/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json +++ b/mteb/descriptive_stats/Retrieval/TopiOCQAHardNegatives.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 48554965, "num_samples": 90933, - "num_queries": 1000, + "number_of_characters": 48554965, "num_documents": 89933, - "min_document_length": 1, - "average_document_length": 0.1428841470872761, - "max_document_length": 31, + "min_document_length": 14, + "average_document_length": 539.7586536643946, + "max_document_length": 6136, "unique_documents": 89933, - "min_query_length": 14, - "average_query_length": 48542.115, - "max_query_length": 6136, + "num_queries": 1000, + "min_query_length": 1, + "average_query_length": 12.85, + "max_query_length": 31, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/Touche2020.json b/mteb/descriptive_stats/Retrieval/Touche2020.json deleted file mode 100644 index a3c37a54ee..0000000000 --- a/mteb/descriptive_stats/Retrieval/Touche2020.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "test": { - "number_of_characters": 658107591, - "num_samples": 382594, - "num_queries": 49, - "num_documents": 382545, - "min_document_length": 16, - "average_document_length": 0.0055627442523101854, - "max_document_length": 83, - "unique_documents": 382545, - "min_query_length": 3, - "average_query_length": 13430723.734693877, - "max_query_length": 106072, - "unique_queries": 49, - "min_relevant_docs_per_query": 40, - "average_relevant_docs_per_query": 45.183673469387756, - "max_relevant_docs_per_query": 52, - "unique_relevant_docs": 2099 - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json index 1b436abd75..4b277612c7 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json @@ -1,20 +1,31 @@ { "test": { - "number_of_characters": 637047138, "num_samples": 303781, - "num_queries": 49, + "number_of_characters": 637047138, "num_documents": 303732, - "min_document_length": 16, - "average_document_length": 0.007006176497701922, - "max_document_length": 83, + "min_document_length": 41, + "average_document_length": 2097.391812518931, + "max_document_length": 105983, "unique_documents": 303732, - "min_query_length": 41, - "average_query_length": 13000918.57142857, - "max_query_length": 105983, + "num_queries": 49, + "min_query_length": 16, + "average_query_length": 43.42857142857143, + "max_query_length": 83, "unique_queries": 49, + "none_queries": 0, + "num_relevant_docs": 2849, "min_relevant_docs_per_query": 40, - "average_relevant_docs_per_query": 58.142857142857146, + "average_relevant_docs_per_query": 34.93877551020408, "max_relevant_docs_per_query": 87, - "unique_relevant_docs": 2732 + "unique_relevant_docs": 2732, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json b/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json index 37c1ca2ad2..51561352f3 100644 --- a/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/TurHistQuadRetrieval.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 272814, "num_samples": 2237, - "num_queries": 1024, + "number_of_characters": 272814, "num_documents": 1213, - "min_document_length": 13, - "average_document_length": 52.78730420445177, - "max_document_length": 199, + "min_document_length": 1, + "average_document_length": 172.12118713932398, + "max_document_length": 10521, "unique_documents": 1213, - "min_query_length": 1, - "average_query_length": 203.8896484375, - "max_query_length": 10521, + "num_queries": 1024, + "min_query_length": 13, + "average_query_length": 62.5302734375, + "max_query_length": 199, "unique_queries": 1024, + "none_queries": 0, + "num_relevant_docs": 2048, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json b/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json index f60a7414ba..6571a2c4d5 100644 --- a/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/TwitterHjerneRetrieval.json @@ -1,17 +1,19 @@ { "train": { - "number_of_characters": 46737, "num_samples": 340, - "num_queries": 78, + "number_of_characters": 46737, "num_documents": 262, - "min_document_length": 48, - "average_document_length": 49.534351145038165, - "max_document_length": 383, + "min_document_length": 16, + "average_document_length": 128.85114503816794, + "max_document_length": 595, "unique_documents": 262, - "min_query_length": 16, - "average_query_length": 432.8076923076923, - "max_query_length": 595, + "num_queries": 78, + "min_query_length": 48, + "average_query_length": 166.3846153846154, + "max_query_length": 383, "unique_queries": 78, + "none_queries": 0, + "num_relevant_docs": 262, "min_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 3.358974358974359, "max_relevant_docs_per_query": 6, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/VideoRetrieval.json b/mteb/descriptive_stats/Retrieval/VideoRetrieval.json index 2498aaa578..b322e84d94 100644 --- a/mteb/descriptive_stats/Retrieval/VideoRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/VideoRetrieval.json @@ -1,17 +1,19 @@ { "dev": { - "number_of_characters": 3141126, "num_samples": 101930, - "num_queries": 1000, + "number_of_characters": 3141126, "num_documents": 100930, - "min_document_length": 2, - "average_document_length": 0.07297136629347072, - "max_document_length": 19, + "min_document_length": 1, + "average_document_length": 31.048855642524522, + "max_document_length": 5869, "unique_documents": 100930, - "min_query_length": 1, - "average_query_length": 3133.761, - "max_query_length": 5869, + "num_queries": 1000, + "min_query_length": 2, + "average_query_length": 7.365, + "max_query_length": 19, "unique_queries": 1000, + "none_queries": 0, + "num_relevant_docs": 1000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json index 043e2d1f72..f3313f9233 100644 --- a/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 657355, "num_samples": 4538, - "num_queries": 2048, + "number_of_characters": 688480, "num_documents": 2490, - "min_document_length": 10, - "average_document_length": 53.8855421686747, - "max_document_length": 245, + "min_document_length": 6, + "average_document_length": 222.61244979919678, + "max_document_length": 2871, "unique_documents": 2490, - "min_query_length": 1, - "average_query_length": 255.458984375, - "max_query_length": 2852, + "num_queries": 2048, + "min_query_length": 10, + "average_query_length": 65.51513671875, + "max_query_length": 245, "unique_queries": 2048, + "none_queries": 0, + "num_relevant_docs": 4096, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json b/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json index 495ee557ea..cc5e0d6569 100644 --- a/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json +++ b/mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 83866932, "num_samples": 240000, - "num_queries": 24000, + "number_of_characters": 83866932, "num_documents": 216000, - "min_document_length": 7, - "average_document_length": 6.565689814814815, - "max_document_length": 180, + "min_document_length": 100, + "average_document_length": 381.70714351851854, + "max_document_length": 9461, "unique_documents": 216000, - "min_query_length": 100, - "average_query_length": 3435.3642916666668, - "max_query_length": 9461, + "num_queries": 24000, + "min_query_length": 7, + "average_query_length": 59.091208333333334, + "max_query_length": 180, "unique_queries": 24000, + "none_queries": 0, + "num_relevant_docs": 24000, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "bg": { - "number_of_characters": 5145316, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5145316, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 6.758518518518518, - "max_document_length": 166, + "min_document_length": 100, + "average_document_length": 374.376, + "max_document_length": 4869, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3369.384, - "max_query_length": 4869, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 60.82666666666667, + "max_query_length": 166, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "bn": { - "number_of_characters": 5390581, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5390581, "num_documents": 13500, - "min_document_length": 7, - "average_document_length": 5.2518518518518515, - "max_document_length": 123, + "min_document_length": 100, + "average_document_length": 394.05044444444445, + "max_document_length": 5104, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3546.454, - "max_query_length": 5104, + "num_queries": 1500, + "min_query_length": 7, + "average_query_length": 47.266666666666666, + "max_query_length": 123, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "cs": { - "number_of_characters": 5079180, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5079180, "num_documents": 13500, - "min_document_length": 17, - "average_document_length": 6.2524444444444445, - "max_document_length": 137, + "min_document_length": 100, + "average_document_length": 369.9831111111111, + "max_document_length": 3487, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3329.848, - "max_query_length": 3487, + "num_queries": 1500, + "min_query_length": 17, + "average_query_length": 56.272, + "max_query_length": 137, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "da": { - "number_of_characters": 4746132, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 4746132, "num_documents": 13500, - "min_document_length": 17, - "average_document_length": 6.30562962962963, - "max_document_length": 137, + "min_document_length": 100, + "average_document_length": 345.2597037037037, + "max_document_length": 2563, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3107.3373333333334, - "max_query_length": 2563, + "num_queries": 1500, + "min_query_length": 17, + "average_query_length": 56.75066666666667, + "max_query_length": 137, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 5483592, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5483592, "num_documents": 13500, - "min_document_length": 20, - "average_document_length": 7.778222222222222, - "max_document_length": 180, + "min_document_length": 100, + "average_document_length": 398.4137777777778, + "max_document_length": 3083, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3585.724, - "max_query_length": 3083, + "num_queries": 1500, + "min_query_length": 20, + "average_query_length": 70.004, + "max_query_length": 180, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 6217884, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 6217884, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 7.596888888888889, - "max_document_length": 162, + "min_document_length": 100, + "average_document_length": 452.9871111111111, + "max_document_length": 3662, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 4076.884, - "max_query_length": 3662, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 68.372, + "max_query_length": 162, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fa": { - "number_of_characters": 4732619, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 4732619, "num_documents": 13500, - "min_document_length": 12, - "average_document_length": 5.407481481481481, - "max_document_length": 119, + "min_document_length": 100, + "average_document_length": 345.1568888888889, + "max_document_length": 4707, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3106.412, - "max_query_length": 4707, + "num_queries": 1500, + "min_query_length": 12, + "average_query_length": 48.66733333333333, + "max_query_length": 119, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -203,23 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fi": { - "number_of_characters": 5209132, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5209132, "num_documents": 13500, - "min_document_length": 14, - "average_document_length": 6.149259259259259, - "max_document_length": 132, + "min_document_length": 100, + "average_document_length": 379.71237037037037, + "max_document_length": 2574, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3417.4113333333335, - "max_query_length": 2574, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 55.343333333333334, + "max_query_length": 132, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -229,23 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 5620959, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5620959, "num_documents": 13500, - "min_document_length": 13, - "average_document_length": 5.641925925925926, - "max_document_length": 125, + "min_document_length": 100, + "average_document_length": 410.72540740740743, + "max_document_length": 5912, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3696.5286666666666, - "max_query_length": 5912, + "num_queries": 1500, + "min_query_length": 13, + "average_query_length": 50.77733333333333, + "max_query_length": 125, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -255,23 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "it": { - "number_of_characters": 5420496, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5420496, "num_documents": 13500, - "min_document_length": 23, - "average_document_length": 7.783851851851852, - "max_document_length": 156, + "min_document_length": 100, + "average_document_length": 393.73437037037036, + "max_document_length": 9461, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3543.6093333333333, - "max_query_length": 9461, + "num_queries": 1500, + "min_query_length": 23, + "average_query_length": 70.05466666666666, + "max_query_length": 156, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -281,23 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "nl": { - "number_of_characters": 5169556, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5169556, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 7.260518518518518, - "max_document_length": 136, + "min_document_length": 100, + "average_document_length": 375.6695555555556, + "max_document_length": 3641, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3381.026, - "max_query_length": 3641, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 65.34466666666667, + "max_query_length": 136, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -307,23 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pt": { - "number_of_characters": 5474356, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5474356, "num_documents": 13500, - "min_document_length": 18, - "average_document_length": 7.235481481481481, - "max_document_length": 176, + "min_document_length": 100, + "average_document_length": 398.27237037037037, + "max_document_length": 3057, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3584.4513333333334, - "max_query_length": 3057, + "num_queries": 1500, + "min_query_length": 18, + "average_query_length": 65.11933333333333, + "max_query_length": 176, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -333,23 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ro": { - "number_of_characters": 4796113, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 4796113, "num_documents": 13500, - "min_document_length": 14, - "average_document_length": 6.885925925925926, - "max_document_length": 169, + "min_document_length": 100, + "average_document_length": 348.3817037037037, + "max_document_length": 4213, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3135.4353333333333, - "max_query_length": 4213, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 61.973333333333336, + "max_query_length": 169, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -359,23 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "sr": { - "number_of_characters": 5271732, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5271732, "num_documents": 13500, - "min_document_length": 15, - "average_document_length": 6.185481481481482, - "max_document_length": 146, + "min_document_length": 100, + "average_document_length": 384.3131851851852, + "max_document_length": 3668, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3458.8186666666666, - "max_query_length": 3668, + "num_queries": 1500, + "min_query_length": 15, + "average_query_length": 55.669333333333334, + "max_query_length": 146, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -385,23 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "no": { - "number_of_characters": 5036586, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5036586, "num_documents": 13500, - "min_document_length": 14, - "average_document_length": 6.143111111111111, - "max_document_length": 129, + "min_document_length": 100, + "average_document_length": 366.93733333333336, + "max_document_length": 2841, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3302.436, - "max_query_length": 2841, + "num_queries": 1500, + "min_query_length": 14, + "average_query_length": 55.288, + "max_query_length": 129, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -411,23 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "sv": { - "number_of_characters": 5072698, "num_samples": 15000, - "num_queries": 1500, + "number_of_characters": 5072698, "num_documents": 13500, - "min_document_length": 17, - "average_document_length": 6.414444444444444, - "max_document_length": 133, + "min_document_length": 100, + "average_document_length": 369.340962962963, + "max_document_length": 3680, "unique_documents": 13500, - "min_query_length": 100, - "average_query_length": 3324.0686666666666, - "max_query_length": 3680, + "num_queries": 1500, + "min_query_length": 17, + "average_query_length": 57.73, + "max_query_length": 133, "unique_queries": 1500, + "none_queries": 0, + "num_relevant_docs": 1500, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -437,6 +487,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/WinoGrande.json b/mteb/descriptive_stats/Retrieval/WinoGrande.json index 39d09c1855..a9c69d435d 100644 --- a/mteb/descriptive_stats/Retrieval/WinoGrande.json +++ b/mteb/descriptive_stats/Retrieval/WinoGrande.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 185865, "num_samples": 6362, - "num_queries": 1267, + "number_of_characters": 185865, "num_documents": 5095, - "min_document_length": 79, - "average_document_length": 27.797448478900883, - "max_document_length": 185, + "min_document_length": 3, + "average_document_length": 8.68243375858685, + "max_document_length": 32, "unique_documents": 5095, - "min_query_length": 3, - "average_query_length": 34.9147592738753, - "max_query_length": 32, + "num_queries": 1267, + "min_query_length": 79, + "average_query_length": 111.78216258879242, + "max_query_length": 185, "unique_queries": 1267, + "none_queries": 0, + "num_relevant_docs": 1267, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,6 +23,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/XMarket.json b/mteb/descriptive_stats/Retrieval/XMarket.json index 477dbdf9d5..1900559343 100644 --- a/mteb/descriptive_stats/Retrieval/XMarket.json +++ b/mteb/descriptive_stats/Retrieval/XMarket.json @@ -1,18 +1,19 @@ { "test": { - "number_of_characters": 98558149, - "num_samples": 345689, - "num_queries": 16711, + "num_samples": 345688, + "number_of_characters": 123653121, "num_documents": 328978, "min_document_length": 1, - "average_document_length": 0.8491570864921059, - "max_document_length": 88, + "average_document_length": 375.0213296937789, + "max_document_length": 152114, "unique_documents": 328978, - "min_query_length": 0, - "average_query_length": 5881.084016516067, - "max_query_length": 151924, + "num_queries": 16710, + "min_query_length": 1, + "average_query_length": 16.717773788150808, + "max_query_length": 88, "unique_queries": 16711, "none_queries": 1, + "num_relevant_docs": 1125549, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 67.35377894799832, "max_relevant_docs_per_query": 81770, @@ -22,24 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "de": { - "number_of_characters": 9005388, "num_samples": 74563, - "num_queries": 4037, + "number_of_characters": 13280456, "num_documents": 70526, - "min_document_length": 2, - "average_document_length": 0.8996965658055185, - "max_document_length": 51, + "min_document_length": 1, + "average_document_length": 187.4061197288943, + "max_document_length": 152114, "unique_documents": 70526, - "min_query_length": 0, - "average_query_length": 2214.9952935348033, - "max_query_length": 151924, + "num_queries": 4037, + "min_query_length": 2, + "average_query_length": 15.717612088184294, + "max_query_length": 51, "unique_queries": 4037, "none_queries": 0, + "num_relevant_docs": 219420, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 54.3522417636859, "max_relevant_docs_per_query": 41933, @@ -49,24 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 81219963, - "num_samples": 227876, - "num_queries": 9099, + "num_samples": 227875, + "number_of_characters": 99205002, "num_documents": 218777, "min_document_length": 1, - "average_document_length": 0.6605219012967543, - "max_document_length": 66, + "average_document_length": 452.792089662076, + "max_document_length": 35870, "unique_documents": 218777, - "min_query_length": 0, - "average_query_length": 8910.369930761623, - "max_query_length": 35724, + "num_queries": 9098, + "min_query_length": 1, + "average_query_length": 15.883380962848978, + "max_query_length": 66, "unique_queries": 9099, "none_queries": 1, + "num_relevant_docs": 777393, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 85.43719090009891, "max_relevant_docs_per_query": 81770, @@ -76,24 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 8332798, "num_samples": 43250, - "num_queries": 3575, + "number_of_characters": 11167663, "num_documents": 39675, - "min_document_length": 2, - "average_document_length": 1.7994959042218022, - "max_document_length": 88, + "min_document_length": 1, + "average_document_length": 279.67909262759923, + "max_document_length": 29187, "unique_documents": 39675, - "min_query_length": 0, - "average_query_length": 2310.881958041958, - "max_query_length": 29074, + "num_queries": 3575, + "min_query_length": 2, + "average_query_length": 19.97062937062937, + "max_query_length": 88, "unique_queries": 3575, "none_queries": 0, + "num_relevant_docs": 128736, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 36.01006993006993, "max_relevant_docs_per_query": 17788, @@ -103,6 +110,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/XPQARetrieval.json b/mteb/descriptive_stats/Retrieval/XPQARetrieval.json index 9b33a6cb7d..d00b0e60f0 100644 --- a/mteb/descriptive_stats/Retrieval/XPQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/XPQARetrieval.json @@ -1,19 +1,19 @@ { "test": { - "number_of_characters": 5308501, "num_samples": 81710, - "num_queries": 27856, + "number_of_characters": 5308501, "num_documents": 53854, - "num_relevant_docs": 55424, "min_document_length": 3, - "average_document_length": 20.861588739926468, - "max_document_length": 298, + "average_document_length": 77.710495042151, + "max_document_length": 4229, "unique_documents": 53854, + "num_queries": 27856, "min_query_length": 3, - "average_query_length": 150.2376866743251, - "max_query_length": 4229, + "average_query_length": 40.33170591614015, + "max_query_length": 298, "unique_queries": 27856, "none_queries": 0, + "num_relevant_docs": 55424, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9896611143021252, "max_relevant_docs_per_query": 17, @@ -23,25 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ara-ara": { - "number_of_characters": 114782, "num_samples": 2245, - "num_queries": 750, + "number_of_characters": 114782, "num_documents": 1495, - "num_relevant_docs": 1503, - "min_document_length": 8, - "average_document_length": 14.893645484949833, - "max_document_length": 111, + "min_document_length": 10, + "average_document_length": 61.88361204013378, + "max_document_length": 1200, "unique_documents": 1495, - "min_query_length": 10, - "average_query_length": 123.35466666666666, - "max_query_length": 1200, + "num_queries": 750, + "min_query_length": 8, + "average_query_length": 29.688, + "max_query_length": 111, "unique_queries": 750, "none_queries": 0, + "num_relevant_docs": 1503, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.004, "max_relevant_docs_per_query": 5, @@ -51,25 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-ara": { - "number_of_characters": 214304, "num_samples": 2283, - "num_queries": 750, + "number_of_characters": 214304, "num_documents": 1533, - "num_relevant_docs": 1544, - "min_document_length": 8, - "average_document_length": 14.524461839530332, - "max_document_length": 111, + "min_document_length": 9, + "average_document_length": 125.26940639269407, + "max_document_length": 4229, "unique_documents": 1533, - "min_query_length": 9, - "average_query_length": 256.05066666666664, - "max_query_length": 4229, + "num_queries": 750, + "min_query_length": 8, + "average_query_length": 29.688, + "max_query_length": 111, "unique_queries": 750, "none_queries": 0, + "num_relevant_docs": 1544, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.058666666666667, "max_relevant_docs_per_query": 5, @@ -79,25 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ara-eng": { - "number_of_characters": 121839, "num_samples": 2237, - "num_queries": 742, + "number_of_characters": 121839, "num_documents": 1495, - "num_relevant_docs": 1502, - "min_document_length": 11, - "average_document_length": 19.614046822742473, - "max_document_length": 162, + "min_document_length": 10, + "average_document_length": 61.88361204013378, + "max_document_length": 1200, "unique_documents": 1495, - "min_query_length": 10, - "average_query_length": 124.68463611859838, - "max_query_length": 1200, + "num_queries": 742, + "min_query_length": 11, + "average_query_length": 39.5188679245283, + "max_query_length": 162, "unique_queries": 742, "none_queries": 0, + "num_relevant_docs": 1502, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.024258760107817, "max_relevant_docs_per_query": 5, @@ -107,25 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-deu": { - "number_of_characters": 129323, "num_samples": 2014, - "num_queries": 766, + "number_of_characters": 129323, "num_documents": 1248, - "num_relevant_docs": 1250, - "min_document_length": 17, - "average_document_length": 34.076121794871796, - "max_document_length": 144, + "min_document_length": 13, + "average_document_length": 69.54807692307692, + "max_document_length": 383, "unique_documents": 1248, - "min_query_length": 13, - "average_query_length": 113.31070496083551, - "max_query_length": 383, + "num_queries": 766, + "min_query_length": 17, + "average_query_length": 55.51827676240209, + "max_query_length": 144, "unique_queries": 766, "none_queries": 0, + "num_relevant_docs": 1250, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6318537859007833, "max_relevant_docs_per_query": 5, @@ -135,25 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-deu": { - "number_of_characters": 216068, "num_samples": 2265, - "num_queries": 766, + "number_of_characters": 216068, "num_documents": 1499, - "num_relevant_docs": 1504, - "min_document_length": 17, - "average_document_length": 28.370246831220815, - "max_document_length": 144, + "min_document_length": 3, + "average_document_length": 115.77118078719145, + "max_document_length": 1130, "unique_documents": 1499, - "min_query_length": 3, - "average_query_length": 226.55483028720627, - "max_query_length": 1130, + "num_queries": 766, + "min_query_length": 17, + "average_query_length": 55.51827676240209, + "max_query_length": 144, "unique_queries": 766, "none_queries": 0, + "num_relevant_docs": 1504, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9634464751958225, "max_relevant_docs_per_query": 5, @@ -163,25 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "deu-eng": { - "number_of_characters": 126543, "num_samples": 2014, - "num_queries": 766, + "number_of_characters": 126543, "num_documents": 1248, - "num_relevant_docs": 1250, - "min_document_length": 15, - "average_document_length": 31.848557692307693, - "max_document_length": 144, + "min_document_length": 13, + "average_document_length": 69.54807692307692, + "max_document_length": 383, "unique_documents": 1248, - "min_query_length": 13, - "average_query_length": 113.31070496083551, - "max_query_length": 383, + "num_queries": 766, + "min_query_length": 15, + "average_query_length": 51.88903394255875, + "max_query_length": 144, "unique_queries": 766, "none_queries": 0, + "num_relevant_docs": 1250, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6318537859007833, "max_relevant_docs_per_query": 5, @@ -191,25 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-spa": { - "number_of_characters": 169564, "num_samples": 2734, - "num_queries": 793, + "number_of_characters": 169564, "num_documents": 1941, - "num_relevant_docs": 1942, - "min_document_length": 12, - "average_document_length": 19.08397733127254, - "max_document_length": 140, + "min_document_length": 11, + "average_document_length": 68.27511591962906, + "max_document_length": 266, "unique_documents": 1941, - "min_query_length": 11, - "average_query_length": 167.11475409836066, - "max_query_length": 266, + "num_queries": 793, + "min_query_length": 12, + "average_query_length": 46.711223203026485, + "max_query_length": 140, "unique_queries": 793, "none_queries": 0, + "num_relevant_docs": 1942, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.4489281210592684, "max_relevant_docs_per_query": 5, @@ -219,25 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-spa": { - "number_of_characters": 276016, "num_samples": 2729, - "num_queries": 793, + "number_of_characters": 276016, "num_documents": 1936, - "num_relevant_docs": 1961, - "min_document_length": 12, - "average_document_length": 19.13326446280992, - "max_document_length": 140, + "min_document_length": 13, + "average_document_length": 123.43698347107438, + "max_document_length": 1401, "unique_documents": 1936, - "min_query_length": 13, - "average_query_length": 301.3543505674653, - "max_query_length": 1401, + "num_queries": 793, + "min_query_length": 12, + "average_query_length": 46.711223203026485, + "max_query_length": 140, "unique_queries": 793, "none_queries": 0, + "num_relevant_docs": 1961, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.472887767969735, "max_relevant_docs_per_query": 5, @@ -247,25 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "spa-eng": { - "number_of_characters": 169960, "num_samples": 2734, - "num_queries": 793, + "number_of_characters": 169960, "num_documents": 1941, - "num_relevant_docs": 1942, - "min_document_length": 12, - "average_document_length": 19.287995878413188, - "max_document_length": 133, + "min_document_length": 11, + "average_document_length": 68.27511591962906, + "max_document_length": 266, "unique_documents": 1941, - "min_query_length": 11, - "average_query_length": 167.11475409836066, - "max_query_length": 266, + "num_queries": 793, + "min_query_length": 12, + "average_query_length": 47.21059268600252, + "max_query_length": 133, "unique_queries": 793, "none_queries": 0, + "num_relevant_docs": 1942, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.4489281210592684, "max_relevant_docs_per_query": 5, @@ -275,25 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fra-fra": { - "number_of_characters": 161169, "num_samples": 2297, - "num_queries": 749, + "number_of_characters": 161169, "num_documents": 1548, - "num_relevant_docs": 1550, - "min_document_length": 12, - "average_document_length": 27.120801033591732, - "max_document_length": 110, + "min_document_length": 16, + "average_document_length": 76.99354005167959, + "max_document_length": 359, "unique_documents": 1548, - "min_query_length": 16, - "average_query_length": 159.1268357810414, - "max_query_length": 359, + "num_queries": 749, + "min_query_length": 12, + "average_query_length": 56.0520694259012, + "max_query_length": 110, "unique_queries": 749, "none_queries": 0, + "num_relevant_docs": 1550, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.069425901201602, "max_relevant_docs_per_query": 5, @@ -303,25 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-fra": { - "number_of_characters": 271844, "num_samples": 2423, - "num_queries": 749, + "number_of_characters": 271844, "num_documents": 1674, - "num_relevant_docs": 1684, - "min_document_length": 12, - "average_document_length": 25.079450418160096, - "max_document_length": 110, + "min_document_length": 7, + "average_document_length": 137.31242532855435, + "max_document_length": 1798, "unique_documents": 1674, - "min_query_length": 7, - "average_query_length": 306.890520694259, - "max_query_length": 1798, + "num_queries": 749, + "min_query_length": 12, + "average_query_length": 56.0520694259012, + "max_query_length": 110, "unique_queries": 749, "none_queries": 0, + "num_relevant_docs": 1684, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.248331108144192, "max_relevant_docs_per_query": 5, @@ -331,25 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "fra-eng": { - "number_of_characters": 156327, "num_samples": 2297, - "num_queries": 749, + "number_of_characters": 156327, "num_documents": 1548, - "num_relevant_docs": 1550, - "min_document_length": 11, - "average_document_length": 23.992894056847547, - "max_document_length": 110, + "min_document_length": 16, + "average_document_length": 76.99354005167959, + "max_document_length": 359, "unique_documents": 1548, - "min_query_length": 16, - "average_query_length": 159.1268357810414, - "max_query_length": 359, + "num_queries": 749, + "min_query_length": 11, + "average_query_length": 49.58744993324433, + "max_query_length": 110, "unique_queries": 749, "none_queries": 0, + "num_relevant_docs": 1550, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.069425901201602, "max_relevant_docs_per_query": 5, @@ -359,25 +371,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-hin": { - "number_of_characters": 90024, "num_samples": 2176, - "num_queries": 925, + "number_of_characters": 90024, "num_documents": 1251, - "num_relevant_docs": 1286, - "min_document_length": 8, - "average_document_length": 24.753796962430055, - "max_document_length": 97, + "min_document_length": 11, + "average_document_length": 47.20783373301359, + "max_document_length": 246, "unique_documents": 1251, - "min_query_length": 11, - "average_query_length": 63.84540540540541, - "max_query_length": 246, + "num_queries": 925, + "min_query_length": 8, + "average_query_length": 33.47783783783784, + "max_query_length": 97, "unique_queries": 925, "none_queries": 0, + "num_relevant_docs": 1286, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.3902702702702703, "max_relevant_docs_per_query": 5, @@ -387,25 +400,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-hin": { - "number_of_characters": 191622, "num_samples": 2431, - "num_queries": 925, + "number_of_characters": 191622, "num_documents": 1506, - "num_relevant_docs": 1670, - "min_document_length": 8, - "average_document_length": 20.562416998671978, - "max_document_length": 97, + "min_document_length": 9, + "average_document_length": 106.67662682602922, + "max_document_length": 2000, "unique_documents": 1506, - "min_query_length": 9, - "average_query_length": 173.6810810810811, - "max_query_length": 2000, + "num_queries": 925, + "min_query_length": 8, + "average_query_length": 33.47783783783784, + "max_query_length": 97, "unique_queries": 925, "none_queries": 0, + "num_relevant_docs": 1670, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.8054054054054054, "max_relevant_docs_per_query": 5, @@ -415,25 +429,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hin-eng": { - "number_of_characters": 90964, "num_samples": 2163, - "num_queries": 912, + "number_of_characters": 90964, "num_documents": 1251, - "num_relevant_docs": 1286, - "min_document_length": 8, - "average_document_length": 25.50519584332534, - "max_document_length": 118, + "min_document_length": 11, + "average_document_length": 47.20783373301359, + "max_document_length": 246, "unique_documents": 1251, - "min_query_length": 11, - "average_query_length": 64.75548245614036, - "max_query_length": 246, + "num_queries": 912, + "min_query_length": 8, + "average_query_length": 34.98574561403509, + "max_query_length": 118, "unique_queries": 912, "none_queries": 0, + "num_relevant_docs": 1286, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.4100877192982457, "max_relevant_docs_per_query": 8, @@ -443,25 +458,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ita-ita": { - "number_of_characters": 108624, "num_samples": 1935, - "num_queries": 663, + "number_of_characters": 108624, "num_documents": 1272, - "num_relevant_docs": 1276, - "min_document_length": 13, - "average_document_length": 25.617924528301888, - "max_document_length": 134, + "min_document_length": 17, + "average_document_length": 59.778301886792455, + "max_document_length": 293, "unique_documents": 1272, - "min_query_length": 17, - "average_query_length": 114.68778280542986, - "max_query_length": 293, + "num_queries": 663, + "min_query_length": 13, + "average_query_length": 49.14932126696833, + "max_query_length": 134, "unique_queries": 663, "none_queries": 0, + "num_relevant_docs": 1276, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9245852187028658, "max_relevant_docs_per_query": 5, @@ -471,25 +487,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-ita": { - "number_of_characters": 192704, "num_samples": 1964, - "num_queries": 663, + "number_of_characters": 192704, "num_documents": 1301, - "num_relevant_docs": 1316, - "min_document_length": 13, - "average_document_length": 25.046887009992314, - "max_document_length": 134, + "min_document_length": 6, + "average_document_length": 123.07302075326672, + "max_document_length": 1561, "unique_documents": 1301, - "min_query_length": 6, - "average_query_length": 241.5052790346908, - "max_query_length": 1561, + "num_queries": 663, + "min_query_length": 13, + "average_query_length": 49.14932126696833, + "max_query_length": 134, "unique_queries": 663, "none_queries": 0, + "num_relevant_docs": 1316, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9849170437405732, "max_relevant_docs_per_query": 5, @@ -499,25 +516,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ita-eng": { - "number_of_characters": 108552, "num_samples": 1935, - "num_queries": 663, + "number_of_characters": 108552, "num_documents": 1272, - "num_relevant_docs": 1276, - "min_document_length": 11, - "average_document_length": 25.56132075471698, - "max_document_length": 131, + "min_document_length": 17, + "average_document_length": 59.778301886792455, + "max_document_length": 293, "unique_documents": 1272, - "min_query_length": 17, - "average_query_length": 114.68778280542986, - "max_query_length": 293, + "num_queries": 663, + "min_query_length": 11, + "average_query_length": 49.040723981900456, + "max_query_length": 131, "unique_queries": 663, "none_queries": 0, + "num_relevant_docs": 1276, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9245852187028658, "max_relevant_docs_per_query": 5, @@ -527,25 +545,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "jpn-jpn": { - "number_of_characters": 84910, "num_samples": 2426, - "num_queries": 825, + "number_of_characters": 84910, "num_documents": 1601, - "num_relevant_docs": 1601, - "min_document_length": 5, - "average_document_length": 12.004996876951905, - "max_document_length": 49, + "min_document_length": 8, + "average_document_length": 41.030605871330415, + "max_document_length": 368, "unique_documents": 1601, - "min_query_length": 8, - "average_query_length": 79.62424242424242, - "max_query_length": 368, + "num_queries": 825, + "min_query_length": 5, + "average_query_length": 23.296969696969697, + "max_query_length": 49, "unique_queries": 825, "none_queries": 0, + "num_relevant_docs": 1601, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9406060606060607, "max_relevant_docs_per_query": 5, @@ -555,25 +574,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-jpn": { - "number_of_characters": 239552, "num_samples": 2570, - "num_queries": 825, + "number_of_characters": 239552, "num_documents": 1745, - "num_relevant_docs": 1748, - "min_document_length": 5, - "average_document_length": 11.01432664756447, - "max_document_length": 49, + "min_document_length": 3, + "average_document_length": 126.2647564469914, + "max_document_length": 1116, "unique_documents": 1745, - "min_query_length": 3, - "average_query_length": 267.0690909090909, - "max_query_length": 1116, + "num_queries": 825, + "min_query_length": 5, + "average_query_length": 23.296969696969697, + "max_query_length": 49, "unique_queries": 825, "none_queries": 0, + "num_relevant_docs": 1748, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.1187878787878787, "max_relevant_docs_per_query": 5, @@ -583,25 +603,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "jpn-eng": { - "number_of_characters": 107954, "num_samples": 2423, - "num_queries": 822, + "number_of_characters": 107954, "num_documents": 1601, - "num_relevant_docs": 1601, - "min_document_length": 13, - "average_document_length": 26.398500936914427, - "max_document_length": 154, + "min_document_length": 8, + "average_document_length": 41.030605871330415, + "max_document_length": 368, "unique_documents": 1601, - "min_query_length": 8, - "average_query_length": 79.91484184914842, - "max_query_length": 368, + "num_queries": 822, + "min_query_length": 13, + "average_query_length": 51.416058394160586, + "max_query_length": 154, "unique_queries": 822, "none_queries": 0, + "num_relevant_docs": 1601, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.9476885644768855, "max_relevant_docs_per_query": 6, @@ -611,25 +632,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "kor-kor": { - "number_of_characters": 42030, "num_samples": 1543, - "num_queries": 654, + "number_of_characters": 42030, "num_documents": 889, - "num_relevant_docs": 1023, "min_document_length": 4, - "average_document_length": 16.050618672665916, - "max_document_length": 149, + "average_document_length": 31.22722159730034, + "max_document_length": 231, "unique_documents": 889, + "num_queries": 654, "min_query_length": 4, - "average_query_length": 42.448012232415905, - "max_query_length": 231, + "average_query_length": 21.81804281345566, + "max_query_length": 149, "unique_queries": 654, "none_queries": 0, + "num_relevant_docs": 1023, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.5642201834862386, "max_relevant_docs_per_query": 5, @@ -639,25 +661,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-kor": { - "number_of_characters": 145679, "num_samples": 1823, - "num_queries": 654, + "number_of_characters": 145679, "num_documents": 1169, - "num_relevant_docs": 1277, - "min_document_length": 4, - "average_document_length": 12.206159110350727, - "max_document_length": 149, + "min_document_length": 5, + "average_document_length": 112.41231822070145, + "max_document_length": 1948, "unique_documents": 1169, - "min_query_length": 5, - "average_query_length": 200.93272171253824, - "max_query_length": 1948, + "num_queries": 654, + "min_query_length": 4, + "average_query_length": 21.81804281345566, + "max_query_length": 149, "unique_queries": 654, "none_queries": 0, + "num_relevant_docs": 1277, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.952599388379205, "max_relevant_docs_per_query": 5, @@ -667,25 +690,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "kor-eng": { - "number_of_characters": 54748, "num_samples": 1503, - "num_queries": 614, + "number_of_characters": 54748, "num_documents": 889, - "num_relevant_docs": 1023, - "min_document_length": 5, - "average_document_length": 30.35658042744657, - "max_document_length": 298, + "min_document_length": 4, + "average_document_length": 31.22722159730034, + "max_document_length": 231, "unique_documents": 889, - "min_query_length": 4, - "average_query_length": 45.21335504885994, - "max_query_length": 231, + "num_queries": 614, + "min_query_length": 5, + "average_query_length": 43.9527687296417, + "max_query_length": 298, "unique_queries": 614, "none_queries": 0, + "num_relevant_docs": 1023, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6661237785016287, "max_relevant_docs_per_query": 9, @@ -695,25 +719,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pol-pol": { - "number_of_characters": 122176, "num_samples": 2364, - "num_queries": 785, + "number_of_characters": 122176, "num_documents": 1579, - "num_relevant_docs": 1633, - "min_document_length": 8, - "average_document_length": 26.707409753008232, - "max_document_length": 150, + "min_document_length": 18, + "average_document_length": 50.66814439518683, + "max_document_length": 219, "unique_documents": 1579, - "min_query_length": 18, - "average_query_length": 101.9171974522293, - "max_query_length": 219, + "num_queries": 785, + "min_query_length": 8, + "average_query_length": 53.72101910828025, + "max_query_length": 150, "unique_queries": 785, "none_queries": 0, + "num_relevant_docs": 1633, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.080254777070064, "max_relevant_docs_per_query": 5, @@ -723,25 +748,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-pol": { - "number_of_characters": 240206, "num_samples": 2538, - "num_queries": 785, + "number_of_characters": 240206, "num_documents": 1753, - "num_relevant_docs": 1873, - "min_document_length": 8, - "average_document_length": 24.056474614945806, - "max_document_length": 150, + "min_document_length": 5, + "average_document_length": 112.96919566457501, + "max_document_length": 1459, "unique_documents": 1753, - "min_query_length": 5, - "average_query_length": 252.27388535031847, - "max_query_length": 1459, + "num_queries": 785, + "min_query_length": 8, + "average_query_length": 53.72101910828025, + "max_query_length": 150, "unique_queries": 785, "none_queries": 0, + "num_relevant_docs": 1873, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.385987261146497, "max_relevant_docs_per_query": 5, @@ -751,25 +777,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "pol-eng": { - "number_of_characters": 122118, "num_samples": 2356, - "num_queries": 777, + "number_of_characters": 122118, "num_documents": 1579, - "num_relevant_docs": 1633, - "min_document_length": 5, - "average_document_length": 26.67067764407853, - "max_document_length": 180, + "min_document_length": 18, + "average_document_length": 50.66814439518683, + "max_document_length": 219, "unique_documents": 1579, - "min_query_length": 18, - "average_query_length": 102.96653796653797, - "max_query_length": 219, + "num_queries": 777, + "min_query_length": 5, + "average_query_length": 54.1994851994852, + "max_query_length": 180, "unique_queries": 777, "none_queries": 0, + "num_relevant_docs": 1633, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.101673101673102, "max_relevant_docs_per_query": 6, @@ -779,25 +806,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "por-por": { - "number_of_characters": 157318, "num_samples": 2422, - "num_queries": 800, + "number_of_characters": 157318, "num_documents": 1622, - "num_relevant_docs": 1712, - "min_document_length": 9, - "average_document_length": 21.005548705302097, - "max_document_length": 126, + "min_document_length": 7, + "average_document_length": 75.9845869297164, + "max_document_length": 500, "unique_documents": 1622, - "min_query_length": 7, - "average_query_length": 154.05875, - "max_query_length": 500, + "num_queries": 800, + "min_query_length": 9, + "average_query_length": 42.58875, + "max_query_length": 126, "unique_queries": 800, "none_queries": 0, + "num_relevant_docs": 1712, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.14, "max_relevant_docs_per_query": 5, @@ -807,25 +835,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-por": { - "number_of_characters": 216697, "num_samples": 2439, - "num_queries": 800, + "number_of_characters": 216697, "num_documents": 1639, - "num_relevant_docs": 1775, "min_document_length": 9, - "average_document_length": 20.787675411836485, - "max_document_length": 126, + "average_document_length": 111.42525930445393, + "max_document_length": 1206, "unique_documents": 1639, + "num_queries": 800, "min_query_length": 9, - "average_query_length": 228.2825, - "max_query_length": 1206, + "average_query_length": 42.58875, + "max_query_length": 126, "unique_queries": 800, "none_queries": 0, + "num_relevant_docs": 1775, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.21875, "max_relevant_docs_per_query": 5, @@ -835,25 +864,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "por-eng": { - "number_of_characters": 160371, "num_samples": 2419, - "num_queries": 797, + "number_of_characters": 160371, "num_documents": 1622, - "num_relevant_docs": 1712, - "min_document_length": 9, - "average_document_length": 22.887792848335387, - "max_document_length": 136, + "min_document_length": 7, + "average_document_length": 75.9845869297164, + "max_document_length": 500, "unique_documents": 1622, - "min_query_length": 7, - "average_query_length": 154.63864491844416, - "max_query_length": 500, + "num_queries": 797, + "min_query_length": 9, + "average_query_length": 46.57967377666248, + "max_query_length": 136, "unique_queries": 797, "none_queries": 0, + "num_relevant_docs": 1712, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.148055207026349, "max_relevant_docs_per_query": 6, @@ -863,25 +893,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "tam-tam": { - "number_of_characters": 108750, "num_samples": 2057, - "num_queries": 782, + "number_of_characters": 108750, "num_documents": 1275, - "num_relevant_docs": 1329, - "min_document_length": 3, - "average_document_length": 20.40392156862745, - "max_document_length": 146, + "min_document_length": 4, + "average_document_length": 64.89019607843137, + "max_document_length": 441, "unique_documents": 1275, - "min_query_length": 4, - "average_query_length": 105.79923273657289, - "max_query_length": 441, + "num_queries": 782, + "min_query_length": 3, + "average_query_length": 33.267263427109974, + "max_query_length": 146, "unique_queries": 782, "none_queries": 0, + "num_relevant_docs": 1329, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.6994884910485935, "max_relevant_docs_per_query": 5, @@ -891,25 +922,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-tam": { - "number_of_characters": 169909, "num_samples": 2266, - "num_queries": 782, + "number_of_characters": 169909, "num_documents": 1484, - "num_relevant_docs": 1584, - "min_document_length": 3, - "average_document_length": 17.53032345013477, - "max_document_length": 146, + "min_document_length": 5, + "average_document_length": 96.96361185983828, + "max_document_length": 1240, "unique_documents": 1484, - "min_query_length": 5, - "average_query_length": 184.0076726342711, - "max_query_length": 1240, + "num_queries": 782, + "min_query_length": 3, + "average_query_length": 33.267263427109974, + "max_query_length": 146, "unique_queries": 782, "none_queries": 0, + "num_relevant_docs": 1584, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0255754475703327, "max_relevant_docs_per_query": 5, @@ -919,25 +951,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "tam-eng": { - "number_of_characters": 109479, "num_samples": 2044, - "num_queries": 769, + "number_of_characters": 109479, "num_documents": 1275, - "num_relevant_docs": 1329, - "min_document_length": 6, - "average_document_length": 20.975686274509805, - "max_document_length": 162, + "min_document_length": 4, + "average_document_length": 64.89019607843137, + "max_document_length": 441, "unique_documents": 1275, - "min_query_length": 4, - "average_query_length": 107.58777633289986, - "max_query_length": 441, + "num_queries": 769, + "min_query_length": 6, + "average_query_length": 34.777633289986994, + "max_query_length": 162, "unique_queries": 769, "none_queries": 0, + "num_relevant_docs": 1329, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.728218465539662, "max_relevant_docs_per_query": 17, @@ -947,25 +980,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "cmn-cmn": { - "number_of_characters": 45797, "num_samples": 2529, - "num_queries": 824, + "number_of_characters": 45797, "num_documents": 1705, - "num_relevant_docs": 1707, "min_document_length": 5, - "average_document_length": 5.901466275659824, - "max_document_length": 29, + "average_document_length": 20.958944281524925, + "max_document_length": 236, "unique_documents": 1705, + "num_queries": 824, "min_query_length": 5, - "average_query_length": 43.36771844660194, - "max_query_length": 236, + "average_query_length": 12.21116504854369, + "max_query_length": 29, "unique_queries": 824, "none_queries": 0, + "num_relevant_docs": 1707, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0716019417475726, "max_relevant_docs_per_query": 5, @@ -975,25 +1009,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "eng-cmn": { - "number_of_characters": 201023, "num_samples": 2587, - "num_queries": 824, + "number_of_characters": 201023, "num_documents": 1763, - "num_relevant_docs": 1865, - "min_document_length": 5, - "average_document_length": 5.7073170731707314, - "max_document_length": 29, + "min_document_length": 15, + "average_document_length": 108.31593874078276, + "max_document_length": 965, "unique_documents": 1763, - "min_query_length": 15, - "average_query_length": 231.748786407767, - "max_query_length": 965, + "num_queries": 824, + "min_query_length": 5, + "average_query_length": 12.21116504854369, + "max_query_length": 29, "unique_queries": 824, "none_queries": 0, + "num_relevant_docs": 1865, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.2633495145631066, "max_relevant_docs_per_query": 5, @@ -1003,25 +1038,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "cmn-eng": { - "number_of_characters": 69555, "num_samples": 2525, - "num_queries": 820, + "number_of_characters": 69555, "num_documents": 1705, - "num_relevant_docs": 1707, - "min_document_length": 10, - "average_document_length": 19.835777126099707, - "max_document_length": 130, + "min_document_length": 5, + "average_document_length": 20.958944281524925, + "max_document_length": 236, "unique_documents": 1705, - "min_query_length": 5, - "average_query_length": 43.579268292682926, - "max_query_length": 236, + "num_queries": 820, + "min_query_length": 10, + "average_query_length": 41.24390243902439, + "max_query_length": 130, "unique_queries": 820, "none_queries": 0, + "num_relevant_docs": 1707, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 2.0817073170731706, "max_relevant_docs_per_query": 6, @@ -1031,6 +1067,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json b/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json index cad459ef82..f3c9b4c4aa 100644 --- a/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/XQuADRetrieval.json @@ -1,17 +1,19 @@ { "validation": { - "number_of_characters": 3049962, "num_samples": 17079, - "num_queries": 14199, + "number_of_characters": 3049962, "num_documents": 2880, - "min_document_length": 6, - "average_document_length": 291.05694444444447, - "max_document_length": 307, + "min_document_length": 56, + "average_document_length": 767.9576388888889, + "max_document_length": 3884, "unique_documents": 2880, - "min_query_length": 56, - "average_query_length": 155.76575815198254, - "max_query_length": 3884, + "num_queries": 14199, + "min_query_length": 6, + "average_query_length": 59.03542502993169, + "max_query_length": 307, "unique_queries": 14199, + "none_queries": 0, + "num_relevant_docs": 14199, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -21,23 +23,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null, "hf_subset_descriptive_stats": { "ar": { - "number_of_characters": 227279, "num_samples": 1426, - "num_queries": 1186, + "number_of_characters": 227279, "num_documents": 240, - "min_document_length": 14, - "average_document_length": 263.52916666666664, - "max_document_length": 221, + "min_document_length": 146, + "average_document_length": 683.4666666666667, + "max_document_length": 2772, "unique_documents": 240, - "min_query_length": 146, - "average_query_length": 138.30691399662732, - "max_query_length": 2772, + "num_queries": 1186, + "min_query_length": 14, + "average_query_length": 53.327993254637434, + "max_query_length": 221, "unique_queries": 1186, + "none_queries": 0, + "num_relevant_docs": 1186, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -47,23 +52,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "de": { - "number_of_characters": 296116, "num_samples": 1421, - "num_queries": 1181, + "number_of_characters": 296116, "num_documents": 240, - "min_document_length": 15, - "average_document_length": 339.75, - "max_document_length": 197, + "min_document_length": 186, + "average_document_length": 894.0666666666667, + "max_document_length": 3884, "unique_documents": 240, - "min_query_length": 186, - "average_query_length": 181.69009314140558, - "max_query_length": 3884, + "num_queries": 1181, + "min_query_length": 15, + "average_query_length": 69.04318374259103, + "max_query_length": 197, "unique_queries": 1181, + "none_queries": 0, + "num_relevant_docs": 1181, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -73,23 +81,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "el": { - "number_of_characters": 295889, "num_samples": 1424, - "num_queries": 1184, + "number_of_characters": 295889, "num_documents": 240, - "min_document_length": 18, - "average_document_length": 338.4916666666667, - "max_document_length": 198, + "min_document_length": 176, + "average_document_length": 894.3791666666667, + "max_document_length": 3745, "unique_documents": 240, - "min_query_length": 176, - "average_query_length": 181.29307432432432, - "max_query_length": 3745, + "num_queries": 1184, + "min_query_length": 18, + "average_query_length": 68.61317567567568, + "max_query_length": 198, "unique_queries": 1184, + "none_queries": 0, + "num_relevant_docs": 1184, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -99,23 +110,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "en": { - "number_of_characters": 260942, "num_samples": 1425, - "num_queries": 1185, + "number_of_characters": 260942, "num_documents": 240, - "min_document_length": 15, - "average_document_length": 302.425, - "max_document_length": 197, + "min_document_length": 158, + "average_document_length": 784.8333333333334, + "max_document_length": 3326, "unique_documents": 240, - "min_query_length": 158, - "average_query_length": 158.95358649789029, - "max_query_length": 3326, + "num_queries": 1185, + "min_query_length": 15, + "average_query_length": 61.25063291139241, + "max_query_length": 197, "unique_queries": 1185, + "none_queries": 0, + "num_relevant_docs": 1185, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -125,23 +139,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "es": { - "number_of_characters": 292907, "num_samples": 1424, - "num_queries": 1184, + "number_of_characters": 292907, "num_documents": 240, - "min_document_length": 15, - "average_document_length": 336.64166666666665, - "max_document_length": 226, + "min_document_length": 173, + "average_document_length": 883.8041666666667, + "max_document_length": 3734, "unique_documents": 240, - "min_query_length": 173, - "average_query_length": 179.14949324324326, - "max_query_length": 3734, + "num_queries": 1184, + "min_query_length": 15, + "average_query_length": 68.23817567567568, + "max_query_length": 226, "unique_queries": 1184, + "none_queries": 0, + "num_relevant_docs": 1184, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -151,23 +168,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "hi": { - "number_of_characters": 254193, "num_samples": 1423, - "num_queries": 1183, + "number_of_characters": 254193, "num_documents": 240, - "min_document_length": 14, - "average_document_length": 294.1958333333333, - "max_document_length": 307, + "min_document_length": 132, + "average_document_length": 764.9416666666667, + "max_document_length": 3044, "unique_documents": 240, - "min_query_length": 132, - "average_query_length": 155.1868131868132, - "max_query_length": 3044, + "num_queries": 1183, + "min_query_length": 14, + "average_query_length": 59.684699915469146, + "max_query_length": 307, "unique_queries": 1183, + "none_queries": 0, + "num_relevant_docs": 1183, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -177,23 +197,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ro": { - "number_of_characters": 290359, "num_samples": 1424, - "num_queries": 1184, + "number_of_characters": 290359, "num_documents": 240, - "min_document_length": 14, - "average_document_length": 331.3833333333333, - "max_document_length": 211, + "min_document_length": 184, + "average_document_length": 878.4458333333333, + "max_document_length": 3732, "unique_documents": 240, - "min_query_length": 184, - "average_query_length": 178.06334459459458, - "max_query_length": 3732, + "num_queries": 1184, + "min_query_length": 14, + "average_query_length": 67.17229729729729, + "max_query_length": 211, "unique_queries": 1184, + "none_queries": 0, + "num_relevant_docs": 1184, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -203,23 +226,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "ru": { - "number_of_characters": 281002, "num_samples": 1425, - "num_queries": 1185, + "number_of_characters": 281002, "num_documents": 240, - "min_document_length": 16, - "average_document_length": 320.65416666666664, - "max_document_length": 210, + "min_document_length": 182, + "average_document_length": 850.1875, + "max_document_length": 3691, "unique_documents": 240, - "min_query_length": 182, - "average_query_length": 172.18987341772151, - "max_query_length": 3691, + "num_queries": 1185, + "min_query_length": 16, + "average_query_length": 64.94261603375527, + "max_query_length": 210, "unique_queries": 1185, + "none_queries": 0, + "num_relevant_docs": 1185, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -229,23 +255,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "th": { - "number_of_characters": 241844, "num_samples": 1420, - "num_queries": 1180, + "number_of_characters": 241844, "num_documents": 240, - "min_document_length": 11, - "average_document_length": 270.925, - "max_document_length": 161, + "min_document_length": 154, + "average_document_length": 736.7583333333333, + "max_document_length": 2891, "unique_documents": 240, - "min_query_length": 154, - "average_query_length": 149.84915254237288, - "max_query_length": 2891, + "num_queries": 1180, + "min_query_length": 11, + "average_query_length": 55.103389830508476, + "max_query_length": 161, "unique_queries": 1180, + "none_queries": 0, + "num_relevant_docs": 1180, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -255,23 +284,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "tr": { - "number_of_characters": 261270, "num_samples": 1424, - "num_queries": 1184, + "number_of_characters": 261270, "num_documents": 240, - "min_document_length": 13, - "average_document_length": 300.325, - "max_document_length": 164, + "min_document_length": 140, + "average_document_length": 788.3, + "max_document_length": 3266, "unique_documents": 240, - "min_query_length": 140, - "average_query_length": 159.79054054054055, - "max_query_length": 3266, + "num_queries": 1184, + "min_query_length": 13, + "average_query_length": 60.876689189189186, + "max_query_length": 164, "unique_queries": 1184, + "none_queries": 0, + "num_relevant_docs": 1184, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -281,23 +313,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "vi": { - "number_of_characters": 265783, "num_samples": 1422, - "num_queries": 1182, + "number_of_characters": 265783, "num_documents": 240, - "min_document_length": 12, - "average_document_length": 303.5208333333333, - "max_document_length": 177, + "min_document_length": 184, + "average_document_length": 803.9083333333333, + "max_document_length": 3412, "unique_documents": 240, - "min_query_length": 184, - "average_query_length": 163.2301184433164, - "max_query_length": 3412, + "num_queries": 1182, + "min_query_length": 12, + "average_query_length": 61.62859560067682, + "max_query_length": 177, "unique_queries": 1182, + "none_queries": 0, + "num_relevant_docs": 1182, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -307,23 +342,26 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null }, "zh": { - "number_of_characters": 82378, "num_samples": 1421, - "num_queries": 1181, + "number_of_characters": 82378, "num_documents": 240, - "min_document_length": 6, - "average_document_length": 90.84166666666667, - "max_document_length": 52, + "min_document_length": 56, + "average_document_length": 252.4, + "max_document_length": 974, "unique_documents": 240, - "min_query_length": 56, - "average_query_length": 51.29212531752752, - "max_query_length": 974, + "num_queries": 1181, + "min_query_length": 6, + "average_query_length": 18.460626587637595, + "max_query_length": 52, "unique_queries": 1181, + "none_queries": 0, + "num_relevant_docs": 1181, "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 1, @@ -333,6 +371,7 @@ "average_instruction_length": null, "max_instruction_length": null, "unique_instructions": null, + "num_top_ranked": null, "min_top_ranked_per_query": null, "average_top_ranked_per_query": null, "max_top_ranked_per_query": null diff --git a/mteb/descriptive_stats/Retrieval/mFollowIR.json b/mteb/descriptive_stats/Retrieval/mFollowIR.json index e1f65148e8..196b75f007 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIR.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIR.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 277808433, "num_samples": 121881, - "num_queries": 246, + "number_of_characters": 283554696, "num_documents": 121635, - "min_document_length": 10, - "average_document_length": 0.11550951617544292, - "max_document_length": 136, + "min_document_length": 74, + "average_document_length": 2331.0777818884367, + "max_document_length": 24179, "unique_documents": 121635, - "min_query_length": 0, - "average_query_length": 1129245.4593495934, - "max_query_length": 24117, + "num_queries": 246, + "min_query_length": 10, + "average_query_length": 57.113821138211385, + "max_query_length": 136, "unique_queries": 246, + "none_queries": 0, + "num_relevant_docs": 73924, "min_relevant_docs_per_query": 123, "average_relevant_docs_per_query": 7.865853658536586, "max_relevant_docs_per_query": 450, @@ -21,23 +23,26 @@ "average_instruction_length": 74785, "max_instruction_length": 1083, "unique_instructions": 246, + "num_top_ranked": 246, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "fas": { - "number_of_characters": 127465295, "num_samples": 41269, - "num_queries": 80, + "number_of_characters": 129565774, "num_documents": 41189, - "min_document_length": 34, - "average_document_length": 0.14110563499963583, - "max_document_length": 124, + "min_document_length": 99, + "average_document_length": 3145.4990895627475, + "max_document_length": 24179, "unique_documents": 41189, - "min_query_length": 0, - "average_query_length": 1593243.5375, - "max_query_length": 24117, + "num_queries": 80, + "min_query_length": 34, + "average_query_length": 72.65, + "max_query_length": 124, "unique_queries": 80, + "none_queries": 0, + "num_relevant_docs": 24326, "min_relevant_docs_per_query": 151, "average_relevant_docs_per_query": 8.075, "max_relevant_docs_per_query": 450, @@ -47,23 +52,26 @@ "average_instruction_length": 30970, "max_instruction_length": 842, "unique_instructions": 80, + "num_top_ranked": 80, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000 }, "rus": { - "number_of_characters": 106937404, "num_samples": 39406, - "num_queries": 80, + "number_of_characters": 109492983, "num_documents": 39326, - "min_document_length": 26, - "average_document_length": 0.15765651223109392, - "max_document_length": 136, + "min_document_length": 75, + "average_document_length": 2784.0813456746173, + "max_document_length": 24061, "unique_documents": 39326, - "min_query_length": 0, - "average_query_length": 1336640.05, - "max_query_length": 24033, + "num_queries": 80, + "min_query_length": 26, + "average_query_length": 77.5, + "max_query_length": 136, "unique_queries": 80, + "none_queries": 0, + "num_relevant_docs": 24134, "min_relevant_docs_per_query": 168, "average_relevant_docs_per_query": 7.35, "max_relevant_docs_per_query": 443, @@ -73,23 +81,26 @@ "average_instruction_length": 33800, "max_instruction_length": 1083, "unique_instructions": 80, + "num_top_ranked": 80, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000 }, "zho": { - "number_of_characters": 43405734, "num_samples": 41206, - "num_queries": 86, + "number_of_characters": 44495939, "num_documents": 41120, - "min_document_length": 10, - "average_document_length": 0.04956225680933852, - "max_document_length": 44, + "min_document_length": 74, + "average_document_length": 1082.0501215953307, + "max_document_length": 23840, "unique_documents": 41120, - "min_query_length": 0, - "average_query_length": 504694.1395348837, - "max_query_length": 23822, + "num_queries": 86, + "min_query_length": 10, + "average_query_length": 23.697674418604652, + "max_query_length": 44, "unique_queries": 86, + "none_queries": 0, + "num_relevant_docs": 25464, "min_relevant_docs_per_query": 123, "average_relevant_docs_per_query": 8.151162790697674, "max_relevant_docs_per_query": 429, @@ -99,6 +110,7 @@ "average_instruction_length": 10015, "max_instruction_length": 229, "unique_instructions": 86, + "num_top_ranked": 86, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000 diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json index 482a144707..bc39b75088 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingual.json @@ -1,17 +1,19 @@ { "test": { - "number_of_characters": 277814525, "num_samples": 121881, - "num_queries": 246, + "number_of_characters": 283560788, "num_documents": 121635, - "min_document_length": 32, - "average_document_length": 0.1655937846836848, - "max_document_length": 173, + "min_document_length": 74, + "average_document_length": 2331.0777818884367, + "max_document_length": 24179, "unique_documents": 121635, - "min_query_length": 0, - "average_query_length": 1129245.4593495934, - "max_query_length": 24117, + "num_queries": 246, + "min_query_length": 32, + "average_query_length": 81.8780487804878, + "max_query_length": 173, "unique_queries": 246, + "none_queries": 0, + "num_relevant_docs": 73924, "min_relevant_docs_per_query": 123, "average_relevant_docs_per_query": 7.865853658536586, "max_relevant_docs_per_query": 450, @@ -21,23 +23,26 @@ "average_instruction_length": 103382, "max_instruction_length": 974, "unique_instructions": 246, + "num_top_ranked": 246, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "eng-fas": { - "number_of_characters": 127465889, "num_samples": 41269, - "num_queries": 80, + "number_of_characters": 129566368, "num_documents": 41189, - "min_document_length": 34, - "average_document_length": 0.15552696108184225, - "max_document_length": 124, + "min_document_length": 99, + "average_document_length": 3145.4990895627475, + "max_document_length": 24179, "unique_documents": 41189, - "min_query_length": 0, - "average_query_length": 1593243.5375, - "max_query_length": 24117, + "num_queries": 80, + "min_query_length": 34, + "average_query_length": 80.075, + "max_query_length": 124, "unique_queries": 80, + "none_queries": 0, + "num_relevant_docs": 24326, "min_relevant_docs_per_query": 151, "average_relevant_docs_per_query": 8.075, "max_relevant_docs_per_query": 450, @@ -47,23 +52,26 @@ "average_instruction_length": 34402, "max_instruction_length": 974, "unique_instructions": 80, + "num_top_ranked": 80, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000 }, "eng-rus": { - "number_of_characters": 106937754, "num_samples": 39406, - "num_queries": 80, + "number_of_characters": 109493333, "num_documents": 39326, - "min_document_length": 32, - "average_document_length": 0.16655647663123632, - "max_document_length": 173, + "min_document_length": 75, + "average_document_length": 2784.0813456746173, + "max_document_length": 24061, "unique_documents": 39326, - "min_query_length": 0, - "average_query_length": 1336640.05, - "max_query_length": 24033, + "num_queries": 80, + "min_query_length": 32, + "average_query_length": 81.875, + "max_query_length": 173, "unique_queries": 80, + "none_queries": 0, + "num_relevant_docs": 24134, "min_relevant_docs_per_query": 168, "average_relevant_docs_per_query": 7.35, "max_relevant_docs_per_query": 443, @@ -73,23 +81,26 @@ "average_instruction_length": 32117, "max_instruction_length": 957, "unique_instructions": 80, + "num_top_ranked": 80, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000 }, "eng-zho": { - "number_of_characters": 43410882, "num_samples": 41206, - "num_queries": 86, + "number_of_characters": 44501087, "num_documents": 41120, - "min_document_length": 32, - "average_document_length": 0.1747568093385214, - "max_document_length": 159, + "min_document_length": 74, + "average_document_length": 1082.0501215953307, + "max_document_length": 23840, "unique_documents": 41120, - "min_query_length": 0, - "average_query_length": 504694.1395348837, - "max_query_length": 23822, + "num_queries": 86, + "min_query_length": 32, + "average_query_length": 83.55813953488372, + "max_query_length": 159, "unique_queries": 86, + "none_queries": 0, + "num_relevant_docs": 25464, "min_relevant_docs_per_query": 123, "average_relevant_docs_per_query": 8.151162790697674, "max_relevant_docs_per_query": 429, @@ -99,6 +110,7 @@ "average_instruction_length": 36863, "max_instruction_length": 822, "unique_instructions": 86, + "num_top_ranked": 86, "min_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, "max_top_ranked_per_query": 1000 diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json deleted file mode 100644 index f23a5ea1be..0000000000 --- a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "test": { - "num_samples": 121758, - "num_docs": 121635, - "num_queries": 123, - "number_of_characters": 283654099, - "min_document_length": 74, - "average_document_length": 2331.0777818884367, - "max_document_length": 24179, - "unique_docs": 121635, - "min_query_length": 32, - "average_query_length": 81.8780487804878, - "max_query_length": 173, - "unique_queries": 75, - "min_instruction_length": 93, - "average_instruction_length": 389.9512195121951, - "max_instruction_length": 887, - "unique_instructions": 75, - "min_changed_instruction_length": 180, - "average_changed_instruction_length": 450.5528455284553, - "max_changed_instruction_length": 974, - "unique_changed_instructions": 123, - "min_average_relevant_docs_per_query": 0, - "average_relevant_docs_per_query": 10.43089430894309, - "max_average_relevant_docs_per_query": 24, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000, - "hf_subset_descriptive_stats": { - "eng-fas": { - "num_samples": 41229, - "num_docs": 41189, - "num_queries": 40, - "number_of_characters": 129597567, - "min_document_length": 99, - "average_document_length": 3145.4990895627475, - "max_document_length": 24179, - "unique_docs": 41189, - "min_query_length": 34, - "average_query_length": 80.075, - "max_query_length": 124, - "unique_queries": 40, - "min_instruction_length": 150, - "average_instruction_length": 396.875, - "max_instruction_length": 887, - "unique_instructions": 40, - "min_changed_instruction_length": 205, - "average_changed_instruction_length": 463.175, - "max_changed_instruction_length": 974, - "unique_changed_instructions": 40, - "min_average_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 10.85, - "max_average_relevant_docs_per_query": 22, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - }, - "eng-rus": { - "num_samples": 39366, - "num_docs": 39326, - "num_queries": 40, - "number_of_characters": 109522175, - "min_document_length": 75, - "average_document_length": 2784.0813456746173, - "max_document_length": 24061, - "unique_docs": 39326, - "min_query_length": 32, - "average_query_length": 81.875, - "max_query_length": 173, - "unique_queries": 40, - "min_instruction_length": 93, - "average_instruction_length": 371.125, - "max_instruction_length": 887, - "unique_instructions": 40, - "min_changed_instruction_length": 180, - "average_changed_instruction_length": 431.8, - "max_changed_instruction_length": 957, - "unique_changed_instructions": 40, - "min_average_relevant_docs_per_query": 0, - "average_relevant_docs_per_query": 9.775, - "max_average_relevant_docs_per_query": 24, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - }, - "eng-zho": { - "num_samples": 41163, - "num_docs": 41120, - "num_queries": 43, - "number_of_characters": 44534357, - "min_document_length": 74, - "average_document_length": 1082.0501215953307, - "max_document_length": 23840, - "unique_docs": 41120, - "min_query_length": 32, - "average_query_length": 83.55813953488372, - "max_query_length": 159, - "unique_queries": 43, - "min_instruction_length": 157, - "average_instruction_length": 401.0232558139535, - "max_instruction_length": 731, - "unique_instructions": 43, - "min_changed_instruction_length": 209, - "average_changed_instruction_length": 456.25581395348837, - "max_changed_instruction_length": 822, - "unique_changed_instructions": 43, - "min_average_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 10.651162790697674, - "max_average_relevant_docs_per_query": 24, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - } - } - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json deleted file mode 100644 index 54ae5d1ec2..0000000000 --- a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "test": { - "num_samples": 121758, - "num_docs": 121635, - "num_queries": 123, - "number_of_characters": 283622456, - "min_document_length": 74, - "average_document_length": 2331.0777818884367, - "max_document_length": 24179, - "unique_docs": 121635, - "min_query_length": 10, - "average_query_length": 57.113821138211385, - "max_query_length": 136, - "unique_queries": 123, - "min_instruction_length": 37, - "average_instruction_length": 281.0650406504065, - "max_instruction_length": 1009, - "unique_instructions": 123, - "min_changed_instruction_length": 44, - "average_changed_instruction_length": 326.9430894308943, - "max_changed_instruction_length": 1083, - "unique_changed_instructions": 123, - "min_average_relevant_docs_per_query": 0, - "average_relevant_docs_per_query": 10.43089430894309, - "max_average_relevant_docs_per_query": 24, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000, - "hf_subset_descriptive_stats": { - "fas": { - "num_samples": 41229, - "num_docs": 41189, - "num_queries": 40, - "number_of_characters": 129593838, - "min_document_length": 99, - "average_document_length": 3145.4990895627475, - "max_document_length": 24179, - "unique_docs": 41189, - "min_query_length": 34, - "average_query_length": 72.65, - "max_query_length": 124, - "unique_queries": 40, - "min_instruction_length": 121, - "average_instruction_length": 358.925, - "max_instruction_length": 759, - "unique_instructions": 40, - "min_changed_instruction_length": 163, - "average_changed_instruction_length": 415.325, - "max_changed_instruction_length": 842, - "unique_changed_instructions": 40, - "min_average_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 10.85, - "max_average_relevant_docs_per_query": 22, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - }, - "rus": { - "num_samples": 39366, - "num_docs": 39326, - "num_queries": 40, - "number_of_characters": 109523683, - "min_document_length": 75, - "average_document_length": 2784.0813456746173, - "max_document_length": 24061, - "unique_docs": 39326, - "min_query_length": 26, - "average_query_length": 77.5, - "max_query_length": 136, - "unique_queries": 40, - "min_instruction_length": 78, - "average_instruction_length": 387.0, - "max_instruction_length": 1009, - "unique_instructions": 40, - "min_changed_instruction_length": 187, - "average_changed_instruction_length": 458.0, - "max_changed_instruction_length": 1083, - "unique_changed_instructions": 40, - "min_average_relevant_docs_per_query": 0, - "average_relevant_docs_per_query": 9.775, - "max_average_relevant_docs_per_query": 24, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - }, - "zho": { - "num_samples": 41163, - "num_docs": 41120, - "num_queries": 43, - "number_of_characters": 44504935, - "min_document_length": 74, - "average_document_length": 1082.0501215953307, - "max_document_length": 23840, - "unique_docs": 41120, - "min_query_length": 10, - "average_query_length": 23.697674418604652, - "max_query_length": 44, - "unique_queries": 43, - "min_instruction_length": 37, - "average_instruction_length": 110.09302325581395, - "max_instruction_length": 209, - "unique_instructions": 43, - "min_changed_instruction_length": 44, - "average_changed_instruction_length": 122.81395348837209, - "max_changed_instruction_length": 229, - "unique_changed_instructions": 43, - "min_average_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 10.651162790697674, - "max_average_relevant_docs_per_query": 24, - "min_average_top_ranked_per_query": 1000, - "average_top_ranked_per_query": 1000.0, - "max_average_top_ranked_per_query": 1000 - } - } - } -} \ No newline at end of file diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index c935bb6590..692f4dd9fb 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -531,10 +531,6 @@ def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): # DON'T ADD NEW DATASETS TO THIS LIST # THIS IS ONLY INTENDED FOR HISTORIC DATASETS exceptions = [ - "MSMARCOv2", - "NeuCLIR2022Retrieval", - "NeuCLIR2023Retrieval", - "FloresBitextMining", "FilipinoHateSpeechClassification", ] @@ -549,3 +545,4 @@ def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): assert ( task.metadata.descriptive_stats is not None ), f"Dataset {task.metadata.name} should have descriptive stats. You can add metadata to your task by running `YorTask().calculate_metadata_metrics()`" + assert task.metadata.n_samples is not None diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index ad011dfcef..329b110d75 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -1126,20 +1126,20 @@ def metadata_dict(self) -> dict[str, str]: class MockRerankingTask(AbsTaskReranking): expected_stats = { "test": { - "number_of_characters": 106, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 106, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 27.0, + "max_document_length": 27, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 27.0, - "max_query_length": 27, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1149,6 +1149,7 @@ class MockRerankingTask(AbsTaskReranking): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": 2, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, @@ -1196,20 +1197,20 @@ def load_data(self, **kwargs): class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 224, "num_samples": 8, - "num_queries": 4, + "number_of_characters": 224, "num_documents": 4, - "num_relevant_docs": 8, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 4, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 4, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 4, "none_queries": 0, + "num_relevant_docs": 8, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1219,25 +1220,26 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": 4, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1247,25 +1249,26 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": 2, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, }, "fra": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1275,6 +1278,7 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": 2, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, @@ -1334,20 +1338,20 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): expected_stats = { "test": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 154, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 51, + "average_document_length": 51.0, + "max_document_length": 51, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1357,6 +1361,7 @@ class MockRetrievalTask(AbsTaskRetrieval): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, @@ -1377,10 +1382,17 @@ def load_data(self, **kwargs): "q2": "This is another test sentence", } } + self.corpus = { "test": { - "d1": "This is a positive sentence", - "d2": "This is another positive sentence", + "d1": { + "title": "This is a positive title", + "text": "This is a positive sentence", + }, + "d2": { + "title": "This is a negative title", + "text": "This is a negative sentence", + }, } } @@ -1398,20 +1410,20 @@ def load_data(self, **kwargs): class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 224, "num_samples": 8, - "num_queries": 4, + "number_of_characters": 224, "num_documents": 4, - "num_relevant_docs": 8, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 4, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 4, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 4, "none_queries": 0, + "num_relevant_docs": 8, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1421,25 +1433,26 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1449,25 +1462,26 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, }, "fra": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1477,6 +1491,7 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "average_instruction_length": None, "max_instruction_length": None, "unique_instructions": None, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, @@ -1719,20 +1734,20 @@ def load_data(self, **kwargs): class MockInstructionRetrieval(AbsTaskRetrieval): expected_stats = { "test": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1742,6 +1757,7 @@ class MockInstructionRetrieval(AbsTaskRetrieval): "average_instruction_length": 58, "max_instruction_length": 32, "unique_instructions": 2, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, @@ -1788,20 +1804,20 @@ def load_data(self, **kwargs): class MockInstructionReranking(AbsTaskReranking): expected_stats = { "test": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1811,6 +1827,7 @@ class MockInstructionReranking(AbsTaskReranking): "average_instruction_length": 58, "max_instruction_length": 32, "unique_instructions": 2, + "num_top_ranked": 2, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, @@ -1862,20 +1879,20 @@ def load_data(self, **kwargs): class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 224, "num_samples": 8, - "num_queries": 4, + "number_of_characters": 224, "num_documents": 4, - "num_relevant_docs": 8, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 4, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 4, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 4, "none_queries": 0, + "num_relevant_docs": 8, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1885,25 +1902,26 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "average_instruction_length": 116, "max_instruction_length": 32, "unique_instructions": 4, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1913,25 +1931,26 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "average_instruction_length": 58, "max_instruction_length": 32, "unique_instructions": 2, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, }, "fra": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -1941,6 +1960,7 @@ class MockMultilingualInstructionRetrieval(AbsTaskRetrieval, MultilingualTask): "average_instruction_length": 58, "max_instruction_length": 32, "unique_instructions": 2, + "num_top_ranked": None, "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, @@ -2006,20 +2026,20 @@ def load_data(self, **kwargs): class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 224, "num_samples": 8, - "num_queries": 4, + "number_of_characters": 224, "num_documents": 4, - "num_relevant_docs": 8, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 4, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 4, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 4, "none_queries": 0, + "num_relevant_docs": 8, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -2029,25 +2049,26 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "average_instruction_length": 116, "max_instruction_length": 32, "unique_instructions": 4, + "num_top_ranked": 4, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -2057,25 +2078,26 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "average_instruction_length": 58, "max_instruction_length": 32, "unique_instructions": 2, + "num_top_ranked": 2, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, }, "fra": { - "number_of_characters": 112, "num_samples": 4, - "num_queries": 2, + "number_of_characters": 112, "num_documents": 2, - "num_relevant_docs": 4, - "min_document_length": 23, - "average_document_length": 26.0, - "max_document_length": 29, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, - "min_query_length": 27, - "average_query_length": 30.0, - "max_query_length": 33, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, "unique_queries": 2, "none_queries": 0, + "num_relevant_docs": 4, "min_relevant_docs_per_query": 2, "average_relevant_docs_per_query": 1.0, "max_relevant_docs_per_query": 2, @@ -2085,6 +2107,7 @@ class MockMultilingualInstructionReranking(AbsTaskReranking, MultilingualTask): "average_instruction_length": 58, "max_instruction_length": 32, "unique_instructions": 2, + "num_top_ranked": 2, "min_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, "max_top_ranked_per_query": 2, From d54fb75b71c12f6d5878f8485480773db4389057 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 29 Nov 2024 00:45:14 +0500 Subject: [PATCH 17/40] fix: hatespeech filipino (#1522) * fix FilipinoHateSpeechClassification * update tests --- .../FilipinoHateSpeechClassification.json | 56 +++++++++++++++++++ .../fil/FilipinoHateSpeechClassification.py | 4 +- tests/test_TaskMetadata.py | 19 ++----- 3 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 mteb/descriptive_stats/Classification/FilipinoHateSpeechClassification.json diff --git a/mteb/descriptive_stats/Classification/FilipinoHateSpeechClassification.json b/mteb/descriptive_stats/Classification/FilipinoHateSpeechClassification.json new file mode 100644 index 0000000000..efce6d00ae --- /dev/null +++ b/mteb/descriptive_stats/Classification/FilipinoHateSpeechClassification.json @@ -0,0 +1,56 @@ +{ + "validation": { + "num_samples": 2048, + "number_of_characters": 181717, + "number_texts_intersect_with_train": 43, + "min_text_length": 3, + "average_text_length": 88.72900390625, + "max_text_length": 181, + "unique_text": 2029, + "unique_labels": 2, + "labels": { + "0": { + "count": 1113 + }, + "1": { + "count": 935 + } + } + }, + "test": { + "num_samples": 2048, + "number_of_characters": 179449, + "number_texts_intersect_with_train": 49, + "min_text_length": 2, + "average_text_length": 87.62158203125, + "max_text_length": 148, + "unique_text": 2012, + "unique_labels": 2, + "labels": { + "1": { + "count": 971 + }, + "0": { + "count": 1077 + } + } + }, + "train": { + "num_samples": 10000, + "number_of_characters": 874151, + "number_texts_intersect_with_train": null, + "min_text_length": 2, + "average_text_length": 87.4151, + "max_text_length": 251, + "unique_text": 9685, + "unique_labels": 2, + "labels": { + "0": { + "count": 5340 + }, + "1": { + "count": 4660 + } + } + } +} \ No newline at end of file diff --git a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py index 3715103ca2..df6205d427 100644 --- a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py @@ -12,8 +12,8 @@ class FilipinoHateSpeechClassification(AbsTaskClassification): description="Filipino Twitter dataset for sentiment classification.", reference="https://pcj.csp.org.ph/index.php/pcj/issue/download/29/PCJ%20V14%20N1%20pp1-14%202019", dataset={ - "path": "legacy-datasets/hate_speech_filipino", - "revision": "1994e9bb7f3ec07518e3f0d9e870cb293e234686", + "path": "jcblaise/hatespeech_filipino", + "revision": "b01711587b073e55569de75ef04d7da4592a3618", "trust_remote_code": True, }, type="Classification", diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 692f4dd9fb..19cfd0be75 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -528,21 +528,10 @@ def test_disallow_trust_remote_code_in_new_datasets(): @pytest.mark.parametrize("task", get_tasks()) def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): - # DON'T ADD NEW DATASETS TO THIS LIST - # THIS IS ONLY INTENDED FOR HISTORIC DATASETS - exceptions = [ - "FilipinoHateSpeechClassification", - ] - if task.metadata.name.startswith("Mock"): return - if task.metadata.name in exceptions: - assert ( - task.metadata.descriptive_stats is None - ), f"Dataset {task.metadata.name} should not have descriptive stats" - else: - assert ( - task.metadata.descriptive_stats is not None - ), f"Dataset {task.metadata.name} should have descriptive stats. You can add metadata to your task by running `YorTask().calculate_metadata_metrics()`" - assert task.metadata.n_samples is not None + assert ( + task.metadata.descriptive_stats is not None + ), f"Dataset {task.metadata.name} should have descriptive stats. You can add metadata to your task by running `YorTask().calculate_metadata_metrics()`" + assert task.metadata.n_samples is not None From dec5d6a94b0b7adfa9d3b47f1fd9eb6041845008 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 5 Dec 2024 03:19:31 +0500 Subject: [PATCH 18/40] feat: Forbid task metadata and add upload functions (#1362) * init * find all wierd repos * move to mteb WikipediaRetrievalMultilingual * add base upload utils * retrieval, classification, bitextmining * test retrieval * test retrieval * test task uploaded * update tasks * working version * remove comments * lint * move upload * fix tests * fix test * move upload to task * Update mteb/tasks/Retrieval/multilingual/WikipediaRetrievalMultilingual.py Co-authored-by: Kenneth Enevoldsen * fix: hatespeech filipino (#1522) * fix FilipinoHateSpeechClassification * update tests * lint --------- Co-authored-by: Kenneth Enevoldsen --- mteb/abstasks/AbsTask.py | 36 +++ mteb/abstasks/AbsTaskBitextMining.py | 45 ++- mteb/abstasks/AbsTaskClassification.py | 3 + mteb/abstasks/AbsTaskClustering.py | 3 + mteb/abstasks/AbsTaskClusteringFast.py | 3 + .../AbsTaskMultilabelClassification.py | 3 + mteb/abstasks/AbsTaskPairClassification.py | 5 +- mteb/abstasks/AbsTaskReranking.py | 1 - mteb/abstasks/AbsTaskRetrieval.py | 174 +++++++++++- mteb/abstasks/AbsTaskSTS.py | 3 + mteb/abstasks/TaskMetadata.py | 23 +- mteb/abstasks/dataloaders.py | 6 +- .../IndicXnliPairClassification.json | 268 ++++++++++++++++++ .../multilingual/IWSLT2017BitextMining.py | 39 +-- .../AmazonCounterfactualClassification.py | 3 +- .../AmazonReviewsClassification.py | 5 +- .../Clustering/deu/BlurbsClusteringP2P.py | 1 - .../Clustering/deu/BlurbsClusteringS2S.py | 1 - .../Clustering/deu/TenKGnadClusteringS2S.py | 1 - .../Clustering/eng/BigPatentClustering.py | 1 - mteb/tasks/Clustering/eng/RedditClustering.py | 1 - .../Clustering/eng/RedditClusteringP2P.py | 1 - .../Clustering/eng/StackExchangeClustering.py | 1 - .../eng/StackExchangeClusteringP2P.py | 1 - .../Clustering/fra/AlloProfClusteringP2P.py | 1 - .../Clustering/fra/AlloProfClusteringS2S.py | 1 - mteb/tasks/Clustering/fra/HALClusteringS2S.py | 1 - .../multilingual/IndicReviewsClusteringP2P.py | 36 +-- mteb/tasks/Clustering/zho/CMTEBClustering.py | 4 - .../multilingual/mFollowIR.py | 8 +- .../InstructionRetrieval/eng/InstructIR.py | 11 - mteb/tasks/PairClassification/__init__.py | 3 + .../IndicXnliPairClassification.py | 6 +- .../PairClassification/multilingual/XNLI.py | 30 +- .../Reranking/eng/AskUbuntuDupQuestions.py | 4 +- mteb/tasks/Reranking/zho/CMTEBReranking.py | 3 - mteb/tasks/Retrieval/eng/BrightRetrieval.py | 6 +- .../WikipediaRetrievalMultilingual.py | 86 +----- mteb/tasks/Retrieval/zho/CMTEBRetrieval.py | 157 ++-------- .../STS/multilingual/IndicCrosslingualSTS.py | 31 +- tests/test_TaskMetadata.py | 23 +- tests/test_load_results/test_mteb_results.py | 1 - 42 files changed, 618 insertions(+), 422 deletions(-) create mode 100644 mteb/descriptive_stats/PairClassification/IndicXnliPairClassification.json diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 8b9edfd52c..e82878c803 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -300,6 +300,42 @@ def filter_languages( self.hf_subsets = subsets_to_keep return self + def _upload_dataset_to_hub(self, repo_name: str, fields: list[str]) -> None: + if self.is_multilingual: + for config in self.metadata.eval_langs: + logger.info(f"Converting {config} of {self.metadata.name}") + sentences = {} + for split in self.dataset[config]: + sentences[split] = Dataset.from_dict( + {field: self.dataset[config][split][field] for field in fields} + ) + sentences = DatasetDict(sentences) + sentences.push_to_hub( + repo_name, config, commit_message=f"Add {config} dataset" + ) + else: + sentences = {} + for split in self.dataset: + sentences[split] = Dataset.from_dict( + {field: self.dataset[split][field] for field in fields} + ) + sentences = DatasetDict(sentences) + sentences.push_to_hub(repo_name, commit_message="Add dataset") + + def _push_dataset_to_hub(self, repo_name: str) -> None: + raise NotImplementedError + + def push_dataset_to_hub(self, repo_name: str) -> None: + """Push the dataset to the HuggingFace Hub. + + Args: + repo_name: The name of the repository to push the dataset to. + """ + if not self.data_loaded: + self.load_data() + + self._push_dataset_to_hub(repo_name) + @property def eval_splits(self) -> list[str]: if self._eval_splits: diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 59d64039fd..4be4ec1562 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -3,7 +3,7 @@ import logging from typing import Any -from datasets import Dataset +from datasets import Dataset, DatasetDict from mteb.encoder_interface import Encoder @@ -191,3 +191,46 @@ def _calculate_metrics_from_split( max_sentence2_length=max(s2_len), unique_sentence2=unique_sentence2, ) + + def _push_dataset_to_hub(self, repo_name: str) -> None: + if self.is_multilingual: + for config in self.metadata.eval_langs: + logger.info(f"Converting {config} of {self.metadata.name}") + + sentences = {} + if self.parallel_subsets: + # If there are parallel subsets, process them + for split in self.dataset: + sent_1, sent_2 = config.split("-") + sentences[split] = Dataset.from_dict( + { + "sentence1": self.dataset[split][sent_1], + "sentence2": self.dataset[split][sent_2], + } + ) + else: + # Handle the non-parallel subset case + sent_1, sent_2 = self.get_pairs(self.parallel_subsets)[0] + for split in self.dataset[config]: + sentences[split] = Dataset.from_dict( + { + "sentence1": self.dataset[config][split][sent_1], + "sentence2": self.dataset[config][split][sent_2], + } + ) + sentences = DatasetDict(sentences) + sentences.push_to_hub( + repo_name, config, commit_message=f"Add {config} subset" + ) + else: + sentences = {} + for split in self.dataset: + sent_1, sent_2 = self.get_pairs(self.parallel_subsets)[0] + sentences[split] = Dataset.from_dict( + { + "sentence1": self.dataset[split][sent_1], + "sentence2": self.dataset[split][sent_2], + } + ) + sentences = DatasetDict(sentences) + sentences.push_to_hub(repo_name) diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 55766190fe..5e48dfab49 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -257,3 +257,6 @@ def _calculate_metrics_from_split( str(label): {"count": count} for label, count in label_count.items() }, ) + + def _push_dataset_to_hub(self, repo_name: str) -> None: + self._upload_dataset_to_hub(repo_name, ["text", "label"]) diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index 3b5d0f492d..095c44435c 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -141,3 +141,6 @@ def _calculate_metrics_from_split( for label, value in label_counter.items() }, ) + + def _push_dataset_to_hub(self, repo_name: str) -> None: + self._upload_dataset_to_hub(repo_name, ["sentences", "labels"]) diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index 61c82e9535..af600eb7e0 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -268,6 +268,9 @@ def _calculate_metrics_from_split( }, ) + def _push_dataset_to_hub(self, repo_name: str) -> None: + self._upload_dataset_to_hub(repo_name, ["sentences", "labels"]) + def clustering_downsample( dataset: DatasetDict, seed: int, max_samples_in_cluster: int = 2048 diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index 16c7ac86ff..1c3cba33e5 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -297,3 +297,6 @@ def _calculate_metrics_from_split( for label, value in label_count.items() }, ) + + def _push_dataset_to_hub(self, repo_name: str) -> None: + self._upload_dataset_to_hub(repo_name, ["text", "label"]) diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 2b4c82c01f..4d39fd2c3f 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -81,7 +81,7 @@ def _evaluate_subset( encode_kwargs: dict[str, str] = {}, **kwargs, ) -> ScoresDict: - data_split = dataset[0] + data_split = dataset[0] if len(dataset) == 1 else dataset logging.getLogger( "sentence_transformers.evaluation.PairClassificationEvaluator" ).setLevel(logging.WARN) @@ -152,3 +152,6 @@ def _calculate_metrics_from_split( str(label): {"count": count} for label, count in label_count.items() }, ) + + def _push_dataset_to_hub(self, repo_name: str) -> None: + self._upload_dataset_to_hub(repo_name, ["sentence1", "sentence2", "labels"]) diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index b4a5cffd25..2ae1f5c359 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -11,7 +11,6 @@ logger = logging.getLogger(__name__) OLD_FORMAT_RERANKING_TASKS = [ - "AskUbuntuDupQuestions", "MindSmallReranking", "SciDocsRR", "StackOverflowDupQuestions", diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 573a0efd12..b8f255b356 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -7,6 +7,8 @@ from time import time from typing import Any +from datasets import Dataset, DatasetDict + from mteb.abstasks.TaskMetadata import HFSubset from ..evaluation.evaluators import RetrievalEvaluator @@ -223,7 +225,7 @@ def load_data(self, **kwargs): if top_ranked: if self.top_ranked is None: self.top_ranked = {} - self.top_ranked = { + self.top_ranked[lang] = { split: { tr["query-id"]: tr["corpus-ids"] for tr in top_ranked } @@ -515,6 +517,176 @@ def _calculate_metrics_from_split( max_top_ranked_per_query=max_top_ranked_per_query, ) + def _push_dataset_to_hub(self, repo_name: str) -> None: + def format_text_field(text): + """Formats the text field to match loader expectations.""" + if isinstance(text, str): + return text + return f"{text.get('title', '')} {text.get('text', '')}".strip() + + if self.is_multilingual: + for config in self.queries: + logger.info(f"Converting {config} of {self.metadata.name}") + + queries_dataset = {} + for split in self.queries[config]: + queries_dataset[split] = Dataset.from_list( + [ + { + "_id": idx, + "text": text, + } + for idx, text in self.queries[config][split].items() + ] + ) + queries_dataset = DatasetDict(queries_dataset) + queries_dataset.push_to_hub(repo_name, f"{config}-queries") + + corpus_dataset = {} + for split in self.corpus[config]: + corpus_dataset[split] = Dataset.from_list( + [ + { + "_id": idx, + "text": format_text_field(text), + "title": text.get("title", "") + if isinstance(text, dict) + else "", + } + for idx, text in self.corpus[config][split].items() + ] + ) + + corpus_dataset = DatasetDict(corpus_dataset) + corpus_dataset.push_to_hub(repo_name, f"{config}-corpus") + + relevant_docs_dataset = {} + for split in self.relevant_docs[config]: + relevant_docs_dataset[split] = Dataset.from_list( + [ + {"query-id": query_id, "corpus-id": doc_id, "score": score} + for query_id, docs in self.relevant_docs[config][ + split + ].items() + for doc_id, score in docs.items() + ] + ) + relevant_docs_dataset = DatasetDict(relevant_docs_dataset) + relevant_docs_dataset.push_to_hub(repo_name, f"{config}-qrels") + + if self.instructions: + instructions_dataset = {} + for split in self.instructions[config]: + instructions_dataset[split] = Dataset.from_list( + [ + { + "query-id": idx, + "instruction": text, + } + for idx, text in self.instructions[config][ + split + ].items() + ] + ) + instructions_dataset = DatasetDict(instructions_dataset) + instructions_dataset.push_to_hub(repo_name, f"{config}-instruction") + if self.top_ranked: + top_ranked_dataset = {} + for split in self.top_ranked[config]: + top_ranked_dataset[split] = Dataset.from_list( + [ + { + "query-id": query_id, + "corpus-ids": docs, + } + for query_id, docs in self.top_ranked[config][ + split + ].items() + ] + ) + top_ranked_dataset = DatasetDict(top_ranked_dataset) + top_ranked_dataset.push_to_hub(repo_name, f"{config}-top_ranked") + else: + if "default" in self.queries: + # old rerankers have additional default split + self.queries = self.queries["default"] + self.corpus = self.corpus["default"] + self.relevant_docs = self.relevant_docs["default"] + if self.instructions: + self.instructions = self.instructions["default"] + if self.top_ranked: + self.top_ranked = self.top_ranked["default"] + + queries_dataset = {} + for split in self.queries: + queries_dataset[split] = Dataset.from_list( + [ + { + "_id": idx, + "text": text, + } + for idx, text in self.queries[split].items() + ] + ) + queries_dataset = DatasetDict(queries_dataset) + queries_dataset.push_to_hub(repo_name, "queries") + corpus_dataset = {} + for split in self.corpus: + corpus_dataset[split] = Dataset.from_list( + [ + { + "_id": idx, + "text": format_text_field(text), + "title": text.get("title", "") + if isinstance(text, dict) + else "", + } + for idx, text in self.corpus[split].items() + ] + ) + + corpus_dataset = DatasetDict(corpus_dataset) + corpus_dataset.push_to_hub(repo_name, "corpus") + relevant_docs_dataset = {} + for split in self.relevant_docs: + relevant_docs_dataset[split] = Dataset.from_list( + [ + {"query-id": query_id, "corpus-id": doc_id, "score": score} + for query_id, docs in self.relevant_docs[split].items() + for doc_id, score in docs.items() + ] + ) + relevant_docs_dataset = DatasetDict(relevant_docs_dataset) + relevant_docs_dataset.push_to_hub(repo_name, "default") + if self.instructions: + instructions_dataset = {} + for split in self.instructions: + instructions_dataset[split] = Dataset.from_list( + [ + { + "query-id": idx, + "instruction": text, + } + for idx, text in self.instructions[split].items() + ] + ) + instructions_dataset = DatasetDict(instructions_dataset) + instructions_dataset.push_to_hub(repo_name, "instruction") + if self.top_ranked: + top_ranked_dataset = {} + for split in self.top_ranked: + top_ranked_dataset[split] = Dataset.from_list( + [ + { + "query-id": query_id, + "corpus-ids": docs, + } + for query_id, docs in self.top_ranked[split].items() + ] + ) + top_ranked_dataset = DatasetDict(top_ranked_dataset) + top_ranked_dataset.push_to_hub(repo_name, "top_ranked") + def calculate_queries_length(queries: dict[str, str]) -> list[int] | None: queries_lens = [] diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index 0a7cb820ea..2183cc08af 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -136,3 +136,6 @@ def _calculate_metrics_from_split( avg_score=avg_score, max_score=max(score), ) + + def _push_dataset_to_hub(self, repo_name: str) -> None: + self._upload_dataset_to_hub(repo_name, ["sentence1", "sentence2", "score"]) diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index d1fd1fd4df..65e7ddbb86 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -11,6 +11,7 @@ AnyUrl, BaseModel, BeforeValidator, + ConfigDict, TypeAdapter, field_validator, ) @@ -197,6 +198,24 @@ class DescriptiveStatistics(TypedDict): logger = logging.getLogger(__name__) +class MetadataDatasetDict(TypedDict, total=False): + """A dictionary containing the dataset path and revision. + + Args: + path: The path to the dataset. + revision: The revision of the dataset. + name: The name the dataset config. + split: The split of the dataset. + trust_remote_code: Whether to trust the remote code. + """ + + path: str + revision: str + name: str + split: str + trust_remote_code: bool + + class TaskMetadata(BaseModel): """Metadata for a task. @@ -228,7 +247,9 @@ class TaskMetadata(BaseModel): bibtex_citation: The BibTeX citation for the dataset. Should be an empty string if no citation is available. """ - dataset: dict + model_config = ConfigDict(extra="forbid") + + dataset: MetadataDatasetDict name: str description: str diff --git a/mteb/abstasks/dataloaders.py b/mteb/abstasks/dataloaders.py index ba5d180ca5..2c2c1a33af 100644 --- a/mteb/abstasks/dataloaders.py +++ b/mteb/abstasks/dataloaders.py @@ -126,7 +126,7 @@ def load( logger.info("Loading Queries...") self._load_queries(config) - if any(c.endswith("top_ranked") for c in configs) in configs or ( + if any(c.endswith("top_ranked") for c in configs) or ( not self.hf_repo and self.top_ranked_file ): logger.info("Loading Top Ranked") @@ -258,7 +258,7 @@ def _load_qrels(self, split: str, config: str | None = None): self.qrels = qrels_ds def _load_top_ranked(self, config: str | None = None): - config = f"top_ranked-{config}" if config is not None else "top_ranked" + config = f"{config}-top_ranked" if config is not None else "top_ranked" if self.hf_repo: top_ranked_ds = load_dataset( self.hf_repo, @@ -303,7 +303,7 @@ def _load_top_ranked(self, config: str | None = None): self.top_ranked = top_ranked_ds def _load_instructions(self, config: str | None = None): - config = f"instruction-{config}" if config is not None else "instruction" + config = f"{config}-instruction" if config is not None else "instruction" if self.hf_repo: instructions_ds = load_dataset( self.hf_repo, diff --git a/mteb/descriptive_stats/PairClassification/IndicXnliPairClassification.json b/mteb/descriptive_stats/PairClassification/IndicXnliPairClassification.json new file mode 100644 index 0000000000..c8b78b1d60 --- /dev/null +++ b/mteb/descriptive_stats/PairClassification/IndicXnliPairClassification.json @@ -0,0 +1,268 @@ +{ + "test": { + "num_samples": 36740, + "number_of_characters": 5676171, + "unique_pairs": 36736, + "min_sentence1_length": 6, + "avg_sentence1_length": 103.40577027762656, + "max_sentence1_length": 795, + "unique_sentence1": 18367, + "min_sentence2_length": 6, + "avg_sentence2_length": 51.08990201415351, + "max_sentence2_length": 529, + "unique_sentence2": 36730, + "unique_labels": 2, + "labels": { + "0": { + "count": 18370 + }, + "1": { + "count": 18370 + } + }, + "hf_subset_descriptive_stats": { + "as": { + "num_samples": 3340, + "number_of_characters": 497266, + "unique_pairs": 3339, + "min_sentence1_length": 14, + "avg_sentence1_length": 99.08083832335329, + "max_sentence1_length": 399, + "unique_sentence1": 1670, + "min_sentence2_length": 11, + "avg_sentence2_length": 49.80119760479042, + "max_sentence2_length": 268, + "unique_sentence2": 3338, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "bn": { + "num_samples": 3340, + "number_of_characters": 497318, + "unique_pairs": 3340, + "min_sentence1_length": 11, + "avg_sentence1_length": 100.6119760479042, + "max_sentence1_length": 433, + "unique_sentence1": 1670, + "min_sentence2_length": 10, + "avg_sentence2_length": 48.28562874251497, + "max_sentence2_length": 183, + "unique_sentence2": 3340, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "gu": { + "num_samples": 3340, + "number_of_characters": 487756, + "unique_pairs": 3340, + "min_sentence1_length": 15, + "avg_sentence1_length": 98.30059880239521, + "max_sentence1_length": 266, + "unique_sentence1": 1670, + "min_sentence2_length": 9, + "avg_sentence2_length": 47.73413173652695, + "max_sentence2_length": 160, + "unique_sentence2": 3340, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "hi": { + "num_samples": 3340, + "number_of_characters": 516591, + "unique_pairs": 3340, + "min_sentence1_length": 15, + "avg_sentence1_length": 104.58203592814371, + "max_sentence1_length": 281, + "unique_sentence1": 1669, + "min_sentence2_length": 10, + "avg_sentence2_length": 50.08592814371257, + "max_sentence2_length": 173, + "unique_sentence2": 3339, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "kn": { + "num_samples": 3340, + "number_of_characters": 536983, + "unique_pairs": 3340, + "min_sentence1_length": 8, + "avg_sentence1_length": 107.52874251497006, + "max_sentence1_length": 281, + "unique_sentence1": 1669, + "min_sentence2_length": 8, + "avg_sentence2_length": 53.24461077844311, + "max_sentence2_length": 178, + "unique_sentence2": 3339, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "ml": { + "num_samples": 3340, + "number_of_characters": 544776, + "unique_pairs": 3339, + "min_sentence1_length": 8, + "avg_sentence1_length": 107.6185628742515, + "max_sentence1_length": 316, + "unique_sentence1": 1670, + "min_sentence2_length": 9, + "avg_sentence2_length": 55.48802395209581, + "max_sentence2_length": 194, + "unique_sentence2": 3338, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "mr": { + "num_samples": 3340, + "number_of_characters": 491967, + "unique_pairs": 3340, + "min_sentence1_length": 15, + "avg_sentence1_length": 98.40059880239521, + "max_sentence1_length": 477, + "unique_sentence1": 1670, + "min_sentence2_length": 12, + "avg_sentence2_length": 48.89491017964072, + "max_sentence2_length": 173, + "unique_sentence2": 3340, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "or": { + "num_samples": 3340, + "number_of_characters": 500985, + "unique_pairs": 3340, + "min_sentence1_length": 15, + "avg_sentence1_length": 99.90838323353293, + "max_sentence1_length": 304, + "unique_sentence1": 1670, + "min_sentence2_length": 10, + "avg_sentence2_length": 50.08712574850299, + "max_sentence2_length": 529, + "unique_sentence2": 3339, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "pa": { + "num_samples": 3340, + "number_of_characters": 510238, + "unique_pairs": 3340, + "min_sentence1_length": 15, + "avg_sentence1_length": 102.82035928143712, + "max_sentence1_length": 246, + "unique_sentence1": 1669, + "min_sentence2_length": 10, + "avg_sentence2_length": 49.945508982035925, + "max_sentence2_length": 169, + "unique_sentence2": 3340, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "ta": { + "num_samples": 3340, + "number_of_characters": 588359, + "unique_pairs": 3339, + "min_sentence1_length": 6, + "avg_sentence1_length": 117.50419161676646, + "max_sentence1_length": 795, + "unique_sentence1": 1670, + "min_sentence2_length": 6, + "avg_sentence2_length": 58.65119760479042, + "max_sentence2_length": 208, + "unique_sentence2": 3339, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + }, + "te": { + "num_samples": 3340, + "number_of_characters": 503932, + "unique_pairs": 3339, + "min_sentence1_length": 15, + "avg_sentence1_length": 101.10718562874251, + "max_sentence1_length": 681, + "unique_sentence1": 1670, + "min_sentence2_length": 11, + "avg_sentence2_length": 49.77065868263473, + "max_sentence2_length": 183, + "unique_sentence2": 3339, + "unique_labels": 2, + "labels": { + "0": { + "count": 1670 + }, + "1": { + "count": 1670 + } + } + } + } + } +} \ No newline at end of file diff --git a/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py b/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py index ee83b6f5ca..dfe4e4bfa7 100644 --- a/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IWSLT2017BitextMining.py @@ -1,7 +1,5 @@ from __future__ import annotations -import datasets - from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata @@ -40,9 +38,8 @@ class IWSLT2017BitextMining(AbsTaskBitextMining, MultilingualTask): metadata = TaskMetadata( name="IWSLT2017BitextMining", dataset={ - "path": "IWSLT/iwslt2017", - "revision": "c18a4f81a47ae6fa079fe9d32db288ddde38451d", - "trust_remote_code": True, + "path": "mteb/IWSLT2017BitextMining", + "revision": "14034eed1824a54d866c93a988319b77b2e90217", }, description="The IWSLT 2017 Multilingual Task addresses text translation, including zero-shot translation, with a single MT system across all directions including English, German, Dutch, Italian and Romanian.", reference="https://aclanthology.org/2017.iwslt-1.1/", @@ -82,35 +79,3 @@ class IWSLT2017BitextMining(AbsTaskBitextMining, MultilingualTask): } """, ) - - def load_data(self, **kwargs): - """Load dataset from HuggingFace hub and convert it to the standard format.""" - if self.data_loaded: - return - - self.dataset = {} - for lang in self.hf_subsets: - self.dataset[lang] = datasets.load_dataset( - name=f"iwslt2017-{lang}", - **self.metadata_dict["dataset"], - ) - - self.dataset_transform() - self.data_loaded = True - - def dataset_transform(self): - def create_columns(row, lang): - l1, l2 = lang.split("-") - row["sentence1"] = row["translation"][l1] - row["sentence2"] = row["translation"][l2] - return row - - # Convert to standard format - dataset = {} - for lang in self.hf_subsets: - dataset[lang] = {} - for split in _SPLITS: - dataset[lang][split] = self.dataset[lang][split].map( - lambda x: create_columns(x, lang=lang) - ) - self.dataset = dataset diff --git a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py index 112d4e0b27..168f990d8d 100644 --- a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py @@ -10,8 +10,7 @@ class AmazonCounterfactualClassification(MultilingualTask, AbsTaskClassification name="AmazonCounterfactualClassification", dataset={ "path": "mteb/amazon_counterfactual", - "revision": "e8379541af4e31359cca9fbcf4b00f2671dba205", - "trust_remote_code": True, + "revision": "1f7e6a9d6fa6e64c53d146e428565640410c0df1", }, description=( "A collection of Amazon customer reviews annotated for counterfactual detection pair classification." diff --git a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py index 774ad9f01d..a3db9785bb 100644 --- a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py @@ -9,9 +9,8 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification): metadata = TaskMetadata( name="AmazonReviewsClassification", dataset={ - "path": "mteb/amazon_reviews_multi", - "revision": "1399c76144fd37290681b995c656ef9b2e06e26d", - "trust_remote_code": True, + "path": "mteb/AmazonReviewsClassification", + "revision": "6b5d328eaae8ef408dd7d775040245cf86f92e9d", }, description="A collection of Amazon reviews specifically designed to aid research in multilingual text classification.", reference="https://arxiv.org/abs/2010.02573", diff --git a/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py b/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py index 17ac058740..79f9f1badd 100644 --- a/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py +++ b/mteb/tasks/Clustering/deu/BlurbsClusteringP2P.py @@ -25,7 +25,6 @@ class BlurbsClusteringP2P(AbsTaskClustering): eval_langs=["deu-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py b/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py index 67366ed13d..79596448fa 100644 --- a/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py +++ b/mteb/tasks/Clustering/deu/BlurbsClusteringS2S.py @@ -33,7 +33,6 @@ class BlurbsClusteringS2S(AbsTaskClustering): eval_langs=["deu-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py b/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py index 66b8bc0f1d..037f3a35a6 100644 --- a/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py +++ b/mteb/tasks/Clustering/deu/TenKGnadClusteringS2S.py @@ -23,7 +23,6 @@ class TenKGnadClusteringS2S(AbsTaskClustering): eval_langs=["deu-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/eng/BigPatentClustering.py b/mteb/tasks/Clustering/eng/BigPatentClustering.py index 7df254ab51..a107d98204 100644 --- a/mteb/tasks/Clustering/eng/BigPatentClustering.py +++ b/mteb/tasks/Clustering/eng/BigPatentClustering.py @@ -29,7 +29,6 @@ class BigPatentClustering(AbsTaskClustering): eval_langs=["eng-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/eng/RedditClustering.py b/mteb/tasks/Clustering/eng/RedditClustering.py index c9efbe954a..07d9575d11 100644 --- a/mteb/tasks/Clustering/eng/RedditClustering.py +++ b/mteb/tasks/Clustering/eng/RedditClustering.py @@ -86,7 +86,6 @@ class RedditClustering(AbsTaskClustering): eval_langs=["eng-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/eng/RedditClusteringP2P.py b/mteb/tasks/Clustering/eng/RedditClusteringP2P.py index 1e8d51cdfa..b0bfbb041f 100644 --- a/mteb/tasks/Clustering/eng/RedditClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/RedditClusteringP2P.py @@ -30,7 +30,6 @@ class RedditClusteringP2P(AbsTaskClustering): eval_langs=["eng-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/eng/StackExchangeClustering.py b/mteb/tasks/Clustering/eng/StackExchangeClustering.py index b123ab5bd1..dea016d854 100644 --- a/mteb/tasks/Clustering/eng/StackExchangeClustering.py +++ b/mteb/tasks/Clustering/eng/StackExchangeClustering.py @@ -88,7 +88,6 @@ class StackExchangeClustering(AbsTaskClustering): eval_langs=["eng-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py b/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py index d6bb252304..c411138e9f 100644 --- a/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/StackExchangeClusteringP2P.py @@ -92,7 +92,6 @@ class StackExchangeClusteringP2P(AbsTaskClustering): eval_langs=["eng-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py b/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py index d48175172c..a1d3a3ac8e 100644 --- a/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py +++ b/mteb/tasks/Clustering/fra/AlloProfClusteringP2P.py @@ -31,7 +31,6 @@ class AlloProfClusteringP2P(AbsTaskClustering): eval_langs=["fra-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py b/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py index 74f5bddcaa..1a51cd86f7 100644 --- a/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py +++ b/mteb/tasks/Clustering/fra/AlloProfClusteringS2S.py @@ -31,7 +31,6 @@ class AlloProfClusteringS2S(AbsTaskClustering): eval_langs=["fra-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/fra/HALClusteringS2S.py b/mteb/tasks/Clustering/fra/HALClusteringS2S.py index 7b1f40e3e6..ef066fd3af 100644 --- a/mteb/tasks/Clustering/fra/HALClusteringS2S.py +++ b/mteb/tasks/Clustering/fra/HALClusteringS2S.py @@ -33,7 +33,6 @@ class HALClusteringS2S(AbsTaskClustering): eval_langs=["fra-Latn"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py b/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py index 8f649a745b..8310f023df 100644 --- a/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/IndicReviewsClusteringP2P.py @@ -1,10 +1,5 @@ from __future__ import annotations -from typing import Any - -import datasets -import numpy as np - from mteb.abstasks.AbsTaskClustering import AbsTaskClustering from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata @@ -30,9 +25,8 @@ class IndicReviewsClusteringP2P(AbsTaskClustering, MultilingualTask): metadata = TaskMetadata( name="IndicReviewsClusteringP2P", dataset={ - "path": "ai4bharat/IndicSentiment", - "revision": "ccb472517ce32d103bba9d4f5df121ed5a6592a4", - "trust_remote_code": True, + "path": "mteb/IndicReviewsClusteringP2P", + "revision": "add94d3b9154cc561bbad0e16ee66ebf5941f8a4", }, description="Clustering of reviews from IndicSentiment dataset. Clustering of 14 sets on the generic categories label.", reference="https://arxiv.org/abs/2212.05409", @@ -57,29 +51,3 @@ class IndicReviewsClusteringP2P(AbsTaskClustering, MultilingualTask): doi = {10.18653/v1/2023.acl-long.693} }""", ) - - def load_data(self, **kwargs: Any) -> None: - """Load dataset from HuggingFace hub""" - if self.data_loaded: - return - self.dataset = {} - for lang in self.hf_subsets: - self.dataset[lang] = datasets.load_dataset( - name=f"translation-{lang}", - **self.metadata_dict["dataset"], - ) - self.dataset_transform() - self.data_loaded = True - - def dataset_transform(self) -> None: - for lang in self.hf_subsets: - self.dataset[lang].pop("validation") - - texts = self.dataset[lang]["test"]["INDIC REVIEW"] - labels = self.dataset[lang]["test"]["GENERIC CATEGORIES"] - - new_format = { - "sentences": [split.tolist() for split in np.array_split(texts, 5)], - "labels": [split.tolist() for split in np.array_split(labels, 5)], - } - self.dataset[lang]["test"] = datasets.Dataset.from_dict(new_format) diff --git a/mteb/tasks/Clustering/zho/CMTEBClustering.py b/mteb/tasks/Clustering/zho/CMTEBClustering.py index fa0704b098..7b79fa616b 100644 --- a/mteb/tasks/Clustering/zho/CMTEBClustering.py +++ b/mteb/tasks/Clustering/zho/CMTEBClustering.py @@ -143,7 +143,6 @@ class CLSClusteringS2S(AbsTaskClustering): eval_langs=["cmn-Hans"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, @@ -179,7 +178,6 @@ class CLSClusteringP2P(AbsTaskClustering): eval_langs=["cmn-Hans"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, @@ -325,7 +323,6 @@ class ThuNewsClusteringS2S(AbsTaskClustering): eval_langs=["cmn-Hans"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, @@ -368,7 +365,6 @@ class ThuNewsClusteringP2P(AbsTaskClustering): eval_langs=["cmn-Hans"], main_score="v_measure", date=None, - form=None, domains=None, task_subtypes=None, license=None, diff --git a/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py b/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py index 7a2974dea2..b42f86b7c5 100644 --- a/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py +++ b/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py @@ -1,5 +1,7 @@ from __future__ import annotations +from logging import getLogger + import datasets from mteb.abstasks.MultilingualTask import MultilingualTask @@ -7,6 +9,8 @@ from ....abstasks.AbsTaskReranking import AbsTaskReranking +logger = getLogger(__name__) + _LANGUAGES = { "fas": ["fas-Arab"], "rus": ["rus-Cyrl"], @@ -58,7 +62,7 @@ def load_data( loading_lang = lang.split("-")[1] # don't care about the eng part else: loading_lang = lang - print(f"Loading data for {lang} from {loading_lang}") + logger.info(f"Loading data for {lang} from {loading_lang}") # Load corpus data corpus_data = datasets.load_dataset( @@ -176,7 +180,7 @@ def load_data(self, **kwargs): self.data_loaded = True -class mFollowIR(MultilingualTask, AbsTaskReranking): +class mFollowIR(AbsTaskReranking, MultilingualTask): metadata = TaskMetadata( name="mFollowIR", description="This tasks measures retrieval instruction following ability on NeuCLIR narratives for the mFollowIR benchmark on the Farsi, Russian, and Chinese languages.", diff --git a/mteb/tasks/InstructionRetrieval/eng/InstructIR.py b/mteb/tasks/InstructionRetrieval/eng/InstructIR.py index 910a3a5bae..1e2f40cd25 100644 --- a/mteb/tasks/InstructionRetrieval/eng/InstructIR.py +++ b/mteb/tasks/InstructionRetrieval/eng/InstructIR.py @@ -35,15 +35,4 @@ class InstructIR(AbsTaskRetrieval): archivePrefix={{arXiv}}, primaryClass={{cs.CL}} }""", - descriptive_stats={ - "n_samples": {"test": 2255}, - "test": { - "num_samples": 375, - "num_positive": 375, - "num_negative": 375, - "avg_query_len": 50.205333333333336, - "avg_positive_len": 6.013333333333334, - "avg_negative_len": 13.986666666666666, - }, - }, ) diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py index c2057a4952..1193728659 100644 --- a/mteb/tasks/PairClassification/__init__.py +++ b/mteb/tasks/PairClassification/__init__.py @@ -11,6 +11,9 @@ from .hye.ArmenianParaphrasePC import * from .ind.IndoNLI import * from .kor.KlueNLI import * +from .multilingual.IndicXnliPairClassification import ( + IndicXnliPairClassification as IndicXnliPairClassification, +) from .multilingual.OpusparcusPC import * from .multilingual.PawsXPairClassification import * from .multilingual.RTE3 import * diff --git a/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py b/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py index 33fa179737..2c6ef5f2c1 100644 --- a/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py +++ b/mteb/tasks/PairClassification/multilingual/IndicXnliPairClassification.py @@ -1,8 +1,10 @@ from __future__ import annotations -from mteb.abstasks import AbsTaskPairClassification, MultilingualTask +from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata +from ....abstasks.AbsTaskPairClassification import AbsTaskPairClassification + _LANGUAGES = { "as": ["asm-Beng"], "bn": ["ben-Beng"], @@ -56,7 +58,7 @@ class IndicXnliPairClassification(AbsTaskPairClassification, MultilingualTask): author = {Aggarwal, Divyanshu and Gupta, Vivek and Kunchukuttan, Anoop}, title = {IndicXNLI: Evaluating Multilingual Inference for Indian Languages}, publisher = {arXiv}, - year = {2022}, + year = {2022}, copyright = {Creative Commons Attribution 4.0 International} } """, diff --git a/mteb/tasks/PairClassification/multilingual/XNLI.py b/mteb/tasks/PairClassification/multilingual/XNLI.py index 8f3f795bad..c72d1694a3 100644 --- a/mteb/tasks/PairClassification/multilingual/XNLI.py +++ b/mteb/tasks/PairClassification/multilingual/XNLI.py @@ -110,8 +110,8 @@ class XNLIV2(MultilingualTask, AbsTaskPairClassification): metadata = TaskMetadata( name="XNLIV2", dataset={ - "path": "mteb/xnli2.0-multi-pair", - "revision": "5b7d477a8c62cdd18e2fed7e015497c20b4371ad", + "path": "mteb/XNLIV2", + "revision": "06108371a8bceee5024a527c4330baa29eb5a013", }, description=""" This is subset of 'XNLI 2.0: Improving XNLI dataset and performance on Cross Lingual Understanding' @@ -140,30 +140,4 @@ class XNLIV2(MultilingualTask, AbsTaskPairClassification): organization={IEEE} } """, - # average of premise and hypothesis ) - - def dataset_transform(self): - _dataset = {} - for lang in self.hf_subsets: - _dataset[lang] = {} - self.dataset[lang] = self.stratified_subsampling( - self.dataset[lang], seed=self.seed, splits=self.metadata.eval_splits - ) - for split in self.metadata.eval_splits: - # 0=entailment, 2=contradiction. Filter out neutral to match the task. - # Then map entailment as positive (1) and contradiction as negative (0). - hf_dataset = self.dataset[lang][split].filter( - lambda x: x["label"] in [0, 2] - ) - hf_dataset = hf_dataset.map( - lambda example: {"label": 0 if example["label"] == 2 else 1} - ) - _dataset[lang][split] = [ - { - "sentence1": hf_dataset["premise"], - "sentence2": hf_dataset["hypothesis"], - "labels": hf_dataset["label"], - } - ] - self.dataset = _dataset diff --git a/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py b/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py index 51d39c770c..a6e9dac45a 100644 --- a/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py +++ b/mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py @@ -11,8 +11,8 @@ class AskUbuntuDupQuestions(AbsTaskReranking): description="AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar", reference="https://github.com/taolei87/askubuntu", dataset={ - "path": "mteb/askubuntudupquestions-reranking", - "revision": "2000358ca161889fa9c082cb41daa8dcfb161a54", + "path": "mteb/AskUbuntuDupQuestions", + "revision": "c5691e3c48741d5f83b5cc8e630653d7a8cfc048", }, type="Reranking", category="s2s", diff --git a/mteb/tasks/Reranking/zho/CMTEBReranking.py b/mteb/tasks/Reranking/zho/CMTEBReranking.py index d6ff57a2a9..34ed14e342 100644 --- a/mteb/tasks/Reranking/zho/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zho/CMTEBReranking.py @@ -20,7 +20,6 @@ class T2Reranking(AbsTaskReranking): eval_langs=["cmn-Hans"], main_score="map_at_1000", date=None, - form=None, domains=None, task_subtypes=None, license=None, @@ -55,7 +54,6 @@ class MMarcoReranking(AbsTaskReranking): eval_langs=["cmn-Hans"], main_score="map_at_1000", date=None, - form=None, domains=None, task_subtypes=None, license=None, @@ -127,7 +125,6 @@ class CMedQAv2(AbsTaskReranking): eval_langs=["cmn-Hans"], main_score="map_at_1000", date=None, - form=None, domains=["Medical", "Written"], task_subtypes=None, license=None, diff --git a/mteb/tasks/Retrieval/eng/BrightRetrieval.py b/mteb/tasks/Retrieval/eng/BrightRetrieval.py index 4a9b2e743d..37256918d2 100644 --- a/mteb/tasks/Retrieval/eng/BrightRetrieval.py +++ b/mteb/tasks/Retrieval/eng/BrightRetrieval.py @@ -40,18 +40,16 @@ class BrightRetrieval(MultilingualTask, AbsTaskRetrieval): "revision": "a75a0eb", }, reference="https://huggingface.co/datasets/xlangai/BRIGHT", - description=("Bright retrieval dataset."), + description="Bright retrieval dataset.", type="Retrieval", category="s2p", eval_splits=EVAL_SPLITS, eval_langs=DOMAINS_langs, main_score="ndcg_at_10", date=("2024-03-01", "2024-06-01"), - form=["written"], - domains=["Non-fiction"], + domains=["Written", "Non-fiction"], task_subtypes=["Article retrieval"], license="cc-by-4.0", - socioeconomic_status="low", annotations_creators="derived", dialect=[], sample_creation="found", diff --git a/mteb/tasks/Retrieval/multilingual/WikipediaRetrievalMultilingual.py b/mteb/tasks/Retrieval/multilingual/WikipediaRetrievalMultilingual.py index a78fa4110d..f51ebac2f2 100644 --- a/mteb/tasks/Retrieval/multilingual/WikipediaRetrievalMultilingual.py +++ b/mteb/tasks/Retrieval/multilingual/WikipediaRetrievalMultilingual.py @@ -1,7 +1,5 @@ from __future__ import annotations -from datasets import load_dataset - from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata @@ -27,76 +25,14 @@ } -# adapted from MIRACLRetrieval -def _load_data( - path: str, - langs: list, - split: str, - cache_dir: str = None, - revision_queries: str = None, - revision_corpus: str = None, - revision_qrels: str = None, -): - queries = {lang: {split: {}} for lang in langs} - corpus = {lang: {split: {}} for lang in langs} - qrels = {lang: {split: {}} for lang in langs} - - for lang in langs: - queries_path = path - corpus_path = path.replace("queries", "corpus") - qrels_path = path.replace("queries", "qrels") - queries_lang = load_dataset( - queries_path, - lang, - split=split, - cache_dir=cache_dir, - revision=revision_queries, - ) - corpus_lang = load_dataset( - corpus_path, - lang, - split=split, - cache_dir=cache_dir, - revision=revision_corpus, - ) - qrels_lang = load_dataset( - qrels_path, - lang, - split=split, - cache_dir=cache_dir, - revision=revision_qrels, - ) - # don't pass on titles to make task harder - corpus_lang_dict = {doc["_id"]: {"text": doc["text"]} for doc in corpus_lang} - queries_lang_dict = {query["_id"]: query["text"] for query in queries_lang} - # qrels_lang_dict = {qrel["query-id"]: {qrel["corpus-id"]: qrel["score"]} for qrel in qrels_lang} - - qrels_lang_dict = {} - for qrel in qrels_lang: - if qrel["score"] == 0.5: - continue - # score = 0 if qrel["score"] == 0.5 else qrel["score"] - # score = int(score) - score = int(qrel["score"]) - qrels_lang_dict[qrel["query-id"]] = {qrel["corpus-id"]: score} - - corpus[lang][split] = corpus_lang_dict - queries[lang][split] = queries_lang_dict - qrels[lang][split] = qrels_lang_dict - - return corpus, queries, qrels - - -class WikipediaRetrievalMultilingual(MultilingualTask, AbsTaskRetrieval): +class WikipediaRetrievalMultilingual(AbsTaskRetrieval, MultilingualTask): metadata = TaskMetadata( name="WikipediaRetrievalMultilingual", description="The dataset is derived from Cohere's wikipedia-2023-11 dataset and contains synthetically generated queries.", reference="https://huggingface.co/datasets/ellamind/wikipedia-2023-11-retrieval-multilingual-queries", dataset={ - "path": "ellamind/wikipedia-2023-11-retrieval-multilingual-queries", - "revision": "3b6ea595c94bac3448a2ad167ca2e06abd340d6e", # avoid validation error - "revision_corpus": "f20ac0c449c85358d3d5c72a95f92f1eddc98aa5", - "revision_qrels": "ec88a7bb2da034d538e98e3122d2c98530ca1c8d", + "path": "mteb/WikipediaRetrievalMultilingual", + "revision": "5f6c91d21f2f5b9afb663858d19848fbd223c775", }, type="Retrieval", category="s2p", @@ -113,19 +49,3 @@ class WikipediaRetrievalMultilingual(MultilingualTask, AbsTaskRetrieval): sample_creation="LM-generated and verified", bibtex_citation="", ) - - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = _load_data( - path=self.metadata_dict["dataset"]["path"], - langs=self.hf_subsets, - split=self.metadata_dict["eval_splits"][0], - cache_dir=kwargs.get("cache_dir", None), - revision_queries=self.metadata_dict["dataset"]["revision"], - revision_corpus=self.metadata_dict["dataset"]["revision_corpus"], - revision_qrels=self.metadata_dict["dataset"]["revision_qrels"], - ) - - self.data_loaded = True diff --git a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py index ad26652ccd..d579245439 100644 --- a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py @@ -1,31 +1,10 @@ from __future__ import annotations -from collections import defaultdict - -from datasets import DatasetDict, load_dataset - from mteb.abstasks.TaskMetadata import TaskMetadata from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval -def load_retrieval_data(dataset_path, dataset_revision, qrel_revision, eval_splits): - eval_split = eval_splits[0] - dataset = load_dataset(dataset_path, revision=dataset_revision) - qrels = load_dataset(dataset_path + "-qrels", revision=qrel_revision)[eval_split] - - corpus = {e["id"]: {"text": e["text"]} for e in dataset["corpus"]} - queries = {e["id"]: e["text"] for e in dataset["queries"]} - relevant_docs = defaultdict(dict) - for e in qrels: - relevant_docs[e["qid"]][e["pid"]] = e["score"] - - corpus = DatasetDict({eval_split: corpus}) - queries = DatasetDict({eval_split: queries}) - relevant_docs = DatasetDict({eval_split: relevant_docs}) - return corpus, queries, relevant_docs - - class T2Retrieval(AbsTaskRetrieval): ignore_identical_ids = True @@ -34,9 +13,8 @@ class T2Retrieval(AbsTaskRetrieval): description="T2Ranking: A large-scale Chinese Benchmark for Passage Ranking", reference="https://arxiv.org/abs/2304.03679", dataset={ - "path": "C-MTEB/T2Retrieval", - "revision": "8731a845f1bf500a4f111cf1070785c793d10e64", - "qrel_revision": "1c83b8d1544e529875e3f6930f3a1fcf749a8e97", + "path": "mteb/T2Retrieval", + "revision": "cf778c0ea4168ec5174a34d888d6453e4cde9222", }, type="Retrieval", category="s2p", @@ -64,18 +42,6 @@ class T2Retrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class MMarcoRetrieval(AbsTaskRetrieval): ignore_identical_ids = True @@ -85,9 +51,8 @@ class MMarcoRetrieval(AbsTaskRetrieval): description="MMarcoRetrieval", reference="https://arxiv.org/abs/2309.07597", dataset={ - "path": "C-MTEB/MMarcoRetrieval", - "revision": "539bbde593d947e2a124ba72651aafc09eb33fc2", - "qrel_revision": "bae08bb7bddbedb96c7e7db52018a55167b67f89", + "path": "mteb/MMarcoRetrieval", + "revision": "4940a7b26bf53463cfe3435bb8e201963e9c31ae", }, type="Retrieval", category="s2p", @@ -115,18 +80,6 @@ class MMarcoRetrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class DuRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( @@ -134,9 +87,8 @@ class DuRetrieval(AbsTaskRetrieval): description="A Large-scale Chinese Benchmark for Passage Retrieval from Web Search Engine", reference="https://aclanthology.org/2022.emnlp-main.357.pdf", dataset={ - "path": "C-MTEB/DuRetrieval", - "revision": "a1a333e290fe30b10f3f56498e3a0d911a693ced", - "qrel_revision": "497b7bd1bbb25cb3757ff34d95a8be50a3de2279", + "path": "mteb/DuRetrieval", + "revision": "313c81b51311893c8fd09ca432f96b841ed0ebb3", }, type="Retrieval", category="s2p", @@ -164,18 +116,6 @@ class DuRetrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class CovidRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( @@ -183,9 +123,8 @@ class CovidRetrieval(AbsTaskRetrieval): description="COVID-19 news articles", reference="https://arxiv.org/abs/2203.03367", dataset={ - "path": "C-MTEB/CovidRetrieval", - "revision": "1271c7809071a13532e05f25fb53511ffce77117", - "qrel_revision": "a9f41b7cdf24785531d12417ce0d1157ed4b39ca", + "path": "mteb/CovidRetrieval", + "revision": "9c6dc4b276bb47c3ff725bbc5ffcafd56dded38b", }, type="Retrieval", category="s2p", @@ -206,18 +145,6 @@ class CovidRetrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class CmedqaRetrieval(AbsTaskRetrieval): metadata = TaskMetadata( @@ -225,9 +152,8 @@ class CmedqaRetrieval(AbsTaskRetrieval): description="Online medical consultation text. Used the CMedQAv2 as its underlying dataset.", reference="https://aclanthology.org/2022.emnlp-main.357.pdf", dataset={ - "path": "C-MTEB/CmedqaRetrieval", - "revision": "cd540c506dae1cf9e9a59c3e06f42030d54e7301", - "qrel_revision": "279d737f36c731c8ff6e2b055f31fe02216fa23d", + "path": "mteb/CmedqaRetrieval", + "revision": "c476f85bf03d6642ec66bf54b9a551c88108bbb4", }, type="Retrieval", category="s2p", @@ -248,18 +174,6 @@ class CmedqaRetrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class EcomRetrieval(AbsTaskRetrieval): ignore_identical_ids = True @@ -269,9 +183,8 @@ class EcomRetrieval(AbsTaskRetrieval): description="EcomRetrieval", reference="https://arxiv.org/abs/2203.03367", dataset={ - "path": "C-MTEB/EcomRetrieval", - "revision": "687de13dc7294d6fd9be10c6945f9e8fec8166b9", - "qrel_revision": "39c90699b034ec22ac45b3abf5b0bbb5ffd421f9", + "path": "mteb/EcomRetrieval", + "revision": "fa705ce5418e91636b1eaeaf43f34c15aa3f5a8a", }, type="Retrieval", category="s2p", @@ -292,18 +205,6 @@ class EcomRetrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class MedicalRetrieval(AbsTaskRetrieval): ignore_identical_ids = True @@ -313,9 +214,8 @@ class MedicalRetrieval(AbsTaskRetrieval): description="MedicalRetrieval", reference="https://arxiv.org/abs/2203.03367", dataset={ - "path": "C-MTEB/MedicalRetrieval", - "revision": "2039188fb5800a9803ba5048df7b76e6fb151fc6", - "qrel_revision": "37b8efec53c54c3d9c6af212f6710b62ccdf895c", + "path": "mteb/MedicalRetrieval", + "revision": "023ae3b2c6b96f583c4ff9b3f9239c93f7885c20", }, type="Retrieval", category="s2p", @@ -336,18 +236,6 @@ class MedicalRetrieval(AbsTaskRetrieval): }, ) - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True - class VideoRetrieval(AbsTaskRetrieval): ignore_identical_ids = True @@ -357,9 +245,8 @@ class VideoRetrieval(AbsTaskRetrieval): description="VideoRetrieval", reference="https://arxiv.org/abs/2203.03367", dataset={ - "path": "C-MTEB/VideoRetrieval", - "revision": "58c2597a5943a2ba48f4668c3b90d796283c5639", - "qrel_revision": "faa71382b6a29cf1778d1f436b963e75cb5b927c", + "path": "mteb/VideoRetrieval", + "revision": "146a9d5e4fd7a9c182b6b92cccb6a3753994305c", }, type="Retrieval", category="s2p", @@ -379,15 +266,3 @@ class VideoRetrieval(AbsTaskRetrieval): "query": "Given a video search query, retrieve the titles of relevant videos" }, ) - - def load_data(self, **kwargs): - if self.data_loaded: - return - - self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], - self.metadata_dict["dataset"]["revision"], - self.metadata_dict["dataset"]["qrel_revision"], - self.metadata_dict["eval_splits"], - ) - self.data_loaded = True diff --git a/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py b/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py index b5a5c67b86..0f37f78a80 100644 --- a/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py @@ -20,22 +20,12 @@ } -def categorize_float(float_value): - left_bound = int(float_value) - right_bound = left_bound + 1 - if float_value - left_bound < right_bound - float_value: - return left_bound - else: - return right_bound - - class IndicCrosslingualSTS(AbsTaskSTS, MultilingualTask): - fast_loading = True metadata = TaskMetadata( name="IndicCrosslingualSTS", dataset={ - "path": "mteb/indic_sts", - "revision": "0ca7b87dda68ef4ebb2f50a20a62b9dbebcac3e4", + "path": "mteb/IndicCrosslingualSTS", + "revision": "f0366eb5a20087355c0e131162bbed943ba54b51", }, description="This is a Semantic Textual Similarity testset between English and 12 high-resource Indic languages.", reference="https://huggingface.co/datasets/jaygala24/indic_sts", @@ -81,20 +71,3 @@ def metadata_dict(self) -> dict[str, str]: metadata_dict["min_score"] = 0 metadata_dict["max_score"] = 5 return metadata_dict - - def dataset_transform(self) -> None: - # Convert to standard format - for lang in self.hf_subsets: - self.dataset[lang] = self.dataset[lang].rename_columns( - {"english_sentence": "sentence1", "indic_sentence": "sentence2"} - ) - self.dataset[lang] = ( - self.dataset[lang] - .map(lambda x: {"label": round(x["score"])}) - .class_encode_column("label") - ) - self.dataset[lang]["test"] = self.dataset[lang]["test"].train_test_split( - test_size=256, - seed=self.seed, - stratify_by_column="label", - )["test"] diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 19cfd0be75..873264bc80 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -1,8 +1,7 @@ from __future__ import annotations -import logging - import pytest +from pydantic import ValidationError from mteb import AbsTask from mteb.abstasks.TaskMetadata import TaskMetadata @@ -178,6 +177,7 @@ "TamilNewsClassification", "TenKGnadClusteringP2P.v2", "TenKGnadClusteringS2S.v2", + "IndicXnliPairClassification", ] @@ -259,8 +259,8 @@ def test_given_missing_revision_path_then_it_throws(): def test_given_none_revision_path_then_it_logs_warning(caplog): - with caplog.at_level(logging.WARNING): - my_task = TaskMetadata( + with pytest.raises(ValidationError): + TaskMetadata( name="MyTask", dataset={"path": "test/dataset", "revision": None}, description="testing", @@ -281,18 +281,6 @@ def test_given_none_revision_path_then_it_logs_warning(caplog): bibtex_citation="", ) - assert my_task.dataset["revision"] is None - - warning_logs = [ - record for record in caplog.records if record.levelname == "WARNING" - ] - assert len(warning_logs) == 1 - assert ( - warning_logs[0].message - == "Revision missing for the dataset test/dataset. " - + "It is encourage to specify a dataset revision for reproducability." - ) - def test_unfilled_metadata_is_not_filled(): assert ( @@ -511,10 +499,11 @@ def test_disallow_trust_remote_code_in_new_datasets(): "MLSUMClusteringS2S.v2", "SwednClusteringP2P", "SwednClusteringS2S", + "IndicXnliPairClassification", ] assert ( - 135 == len(exceptions) + 136 == len(exceptions) ), "The number of exceptions has changed. Please do not add new datasets to this list." exceptions = [] diff --git a/tests/test_load_results/test_mteb_results.py b/tests/test_load_results/test_mteb_results.py index 6c22b390f3..84071b735f 100644 --- a/tests/test_load_results/test_mteb_results.py +++ b/tests/test_load_results/test_mteb_results.py @@ -34,7 +34,6 @@ class DummyTask(AbsTask): annotations_creators="derived", dialect=[], bibtex_citation="", - descriptive_stats={}, modalities=["text"], sample_creation="created", ) From d0aa3a7e7bb0fbdb1e9a29a320e1bfb659a31228 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Tue, 10 Dec 2024 01:28:24 +0500 Subject: [PATCH 19/40] fix: remove `*` imports (#1569) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Count unique texts, data leaks in calculate metrics (#1438) * add more stat * add more stat * update statistics * fix: update task metadata to allow for null (#1448) * Update tasks table * 1.19.5 Automatically generated by python-semantic-release * Fix: Made data parsing in the leaderboard figure more robust (#1450) Bugfixes with data parsing in main figure * Fixed task loading (#1451) * Fixed task result loading from disk * Fixed task result loading from disk * fix: publish (#1452) * 1.19.6 Automatically generated by python-semantic-release * fix: Fix load external results with `None` mteb_version (#1453) * fix * lint * 1.19.7 Automatically generated by python-semantic-release * WIP: Polishing up leaderboard UI (#1461) * fix: Removed column wrapping on the table, so that it remains readable * Added disclaimer to figure * fix: Added links to task info table, switched out license with metric * fix: loading pre 1.11.0 (#1460) * small fix * fix: fix * 1.19.8 Automatically generated by python-semantic-release * fix: swap touche2020 to maintain compatibility (#1469) swap touche2020 for parity * 1.19.9 Automatically generated by python-semantic-release * docs: Add sum per language for task counts (#1468) * add sum per lang * add sort by sum option * make lint * fix: pinned datasets to <3.0.0 (#1470) * 1.19.10 Automatically generated by python-semantic-release * feat: add CUREv1 retrieval dataset (#1459) * feat: add CUREv1 dataset --------- Co-authored-by: nadshe Co-authored-by: olivierr42 Co-authored-by: Daniel Buades Marcos * feat: add missing domains to medical tasks * feat: modify benchmark tasks * chore: benchmark naming --------- Co-authored-by: nadshe Co-authored-by: olivierr42 * Update tasks table * 1.20.0 Automatically generated by python-semantic-release * fix: check if `model` attr of model exists (#1499) * check if model attr of model exists * lint * Fix retrieval evaluator * 1.20.1 Automatically generated by python-semantic-release * fix: Leaderboard demo data loading (#1507) * Made get_scores error tolerant * Added join_revisions, made get_scores failsafe * Fetching metadata fixed fr HF models * Added failsafe metadata fetching to leaderboard code * Added revision joining to leaderboard app * fix * Only show models that have metadata, when filter_models is called * Ran linting * 1.20.2 Automatically generated by python-semantic-release * fix: leaderboard only shows models that have ModelMeta (#1508) Filtering for models that have metadata * 1.20.3 Automatically generated by python-semantic-release * fix: align readme with current mteb (#1493) * align readme with current mteb * align with mieb branch * fix test * 1.20.4 Automatically generated by python-semantic-release * docs: Add lang family mapping and map to task table (#1486) * add lang family mapping and map to task table * make lint * add back some unclassified lang codes * Update tasks table * fix: Ensure that models match the names on embedding-benchmarks/results (#1519) * 1.20.5 Automatically generated by python-semantic-release * fix: Adding missing metadata on models and mathcing names up with the results repo (#1528) * Added Voyage 3 models * Added correct metadata to Cohere models and matched names with the results repo * 1.20.6 Automatically generated by python-semantic-release * feat: Evaluate missing splits (#1525) * fix: evaluate missing splits (#1268) * implement partial evaluation for missing splits * lint * requested changes done from scratch * test for missing split evaluation added * uncomment test * lint * avoid circular import * use TaskResult * skip tests for now --------- Co-authored-by: Isaac Chung * got test_all_splits_evaluated passing * tests passing * address review comments * make lint * handle None cases for kg_co2_emissions * use new results info --------- Co-authored-by: Thivyanth * 1.21.0 Automatically generated by python-semantic-release * fix: Correct typos superseeded -> superseded (#1532) fix typo -> superseded * 1.21.1 Automatically generated by python-semantic-release * fix: Task load data error for SICK-BR-STS and XStance (#1534) * fix task load data for two tasks * correct dataset keys * 1.21.2 Automatically generated by python-semantic-release * fix: Proprietary models now get correctly shown in leaderboard (#1530) * Fixed showing proprietary models in leaderboard * Added links to all OpenAI models * Fixed table formatting issues * Bumped Gradio version * 1.21.3 Automatically generated by python-semantic-release * docs: Add Model Meta parameters and metadata (#1536) * add multi_qa_MiniLM_L6_cos_v1 model meta * add all_mpnet_base_v2 * add parameters to model meta * make lint * add extra params to meta * fix: add more model meta (jina, e5) (#1537) * add e5 model meta * address review comments * 1.21.4 Automatically generated by python-semantic-release * Add cohere models (#1538) * fix: bug cohere names * format * fix: add nomic models (#1543) #1515 * fix: Added all-minilm-l12-v2 (#1542) #1515 * fix: Added arctic models (#1541) #1515 * fix: add sentence trimming to OpenAIWrapper (#1526) * fix: add sentence trimming to OpenAIWrapper * fix: import tiktoken library inside encode function * fix: check tokenizer library installed and update ModelMeta to pass tokenizer_name * fix: pass tokenizer_name, max_tokens to loader * fix: make tokenizer_name None for default * fix: delete changes for ModelMeta * fix: fix revision to 2 for OpenAI models * fix: add docstring for OpenAIWrapper * fix: lint * feat: add openai optional dependency set * fix: add sleep for too many requests * fix: add lint * fix: delete evaluate file * 1.21.5 Automatically generated by python-semantic-release * fix: Fixed metadata errors (#1547) * 1.21.6 Automatically generated by python-semantic-release * fix: remove curev1 from multlingual (#1552) Seems like it was added here: https://github.com/embeddings-benchmark/mteb/commit/1cc6c9e0fe62ca4e77708b641823fa1a121f048b * 1.21.7 Automatically generated by python-semantic-release * fix: Add Model2vec (#1546) * Added Model2Vec wrapper * Added Model2vec models * Added model2vec models to registry * Added model2vec as a dependency * Ran linting * Update mteb/models/model2vec_models.py Co-authored-by: Kenneth Enevoldsen * Update mteb/models/model2vec_models.py Co-authored-by: Kenneth Enevoldsen * Added adapted_from and superseeded_by to model2vec models. * Added missing import * Moved pyproject.toml to optional dependencies * Fixed typos * Added import error and changed model to model_name * Added Numpy to frameworks * Added Numpy to frameworks * Corrected false info on model2vec models * Replaced np.inf with maxint * Update mteb/models/model2vec_models.py Co-authored-by: Isaac Chung * Added option to have infinite max tokens, added it to Model2vec --------- Co-authored-by: Kenneth Enevoldsen Co-authored-by: Isaac Chung * Made result loading more permissive, changed eval splits for HotPotQA and DBPedia (#1554) * Removed train and dev from eval splits on HotpotQA * Removed dev from eval splits on DBPedia * Made task_results validation more permissive * Readded exception in get_score * Ran linting * 1.21.8 Automatically generated by python-semantic-release * docs: Correction of SICK-R metadata (#1558) * Correction of SICK-R metadata * Correction of SICK-R metadata --------- Co-authored-by: rposwiata * feat(google_models): fix issues and add support for `text-embedding-005` and `text-multilingual-embedding-002` (#1562) * fix: google_models batching and prompt * feat: add text-embedding-005 and text-multilingual-embedding-002 * chore: `make lint` errors * fix: address PR comments * 1.22.0 Automatically generated by python-semantic-release * fix(bm25s): search implementation (#1566) fix: bm25s implementation * 1.22.1 Automatically generated by python-semantic-release * docs: Fix dependency library name for bm25s (#1568) * fix: bm25s implementation * correct library name --------- Co-authored-by: Daniel Buades Marcos * fix: Add training dataset to model meta (#1561) * fix: Add training dataset to model meta Adresses #1556 * Added docs * format * feat: (cohere_models) cohere_task_type issue, batch requests and tqdm for visualization (#1564) * feat: batch requests to cohere models * fix: use correct task_type * feat: use tqdm with openai * fix: explicitely set `show_progress_bar` to False * fix(publichealth-qa): ignore rows with `None` values in `question` or `answer` (#1565) * 1.23.0 Automatically generated by python-semantic-release * fix wongnai * update inits * fix tests * lint * update imports * fix tests * lint --------- Co-authored-by: Kenneth Enevoldsen Co-authored-by: github-actions[bot] Co-authored-by: github-actions Co-authored-by: Márton Kardos Co-authored-by: Isaac Chung Co-authored-by: Napuh <55241721+Napuh@users.noreply.github.com> Co-authored-by: Daniel Buades Marcos Co-authored-by: nadshe Co-authored-by: olivierr42 Co-authored-by: Thivyanth Co-authored-by: Youngjoon Jang <82500463+yjoonjang@users.noreply.github.com> Co-authored-by: Rafał Poświata --- README.md | 31 +- docs/create_tasks_table.py | 22 +- mteb/__init__.py | 16 +- mteb/abstasks/AbsTask.py | 6 +- mteb/abstasks/TaskMetadata.py | 1 + mteb/abstasks/__init__.py | 44 +- mteb/benchmarks/__init__.py | 57 +- .../Classification/Ddisco.json | 44 + .../GeorgianSentimentClassification.json | 38 + .../WongnaiReviewsClassification.json | 56 + mteb/evaluation/MTEB.py | 157 +- mteb/evaluation/__init__.py | 42 +- .../evaluators/RetrievalEvaluator.py | 6 +- mteb/evaluation/evaluators/__init__.py | 42 +- mteb/evaluation/evaluators/model_classes.py | 2 +- mteb/language_family.json | 62611 ++++++++++++++++ mteb/languages.py | 7 +- mteb/leaderboard/app.py | 9 +- mteb/leaderboard/table.py | 60 +- mteb/load_results/benchmark_results.py | 173 +- mteb/load_results/task_results.py | 8 +- mteb/model_meta.py | 12 +- mteb/models/__init__.py | 6 +- mteb/models/arctic_models.py | 130 +- mteb/models/bm25.py | 14 +- mteb/models/cohere_models.py | 238 +- mteb/models/e5_models.py | 62 +- mteb/models/google_models.py | 137 +- mteb/models/model2vec_models.py | 209 + mteb/models/nomic_models.py | 66 + mteb/models/openai_models.py | 115 +- mteb/models/overview.py | 45 +- mteb/models/sentence_transformers_models.py | 153 +- mteb/models/voyage_models.py | 56 +- mteb/overview.py | 16 +- mteb/tasks/BitextMining/__init__.py | 71 +- mteb/tasks/BitextMining/dan/__init__.py | 5 + mteb/tasks/BitextMining/kat/__init__.py | 5 + .../BitextMining/multilingual/__init__.py | 41 + mteb/tasks/BitextMining/srn/__init__.py | 5 + mteb/tasks/BitextMining/vie/__init__.py | 5 + mteb/tasks/Classification/__init__.py | 681 +- mteb/tasks/Classification/ara/__init__.py | 21 + mteb/tasks/Classification/ben/__init__.py | 11 + mteb/tasks/Classification/bul/__init__.py | 7 + mteb/tasks/Classification/ces/__init__.py | 17 + mteb/tasks/Classification/dan/__init__.py | 15 + mteb/tasks/Classification/deu/__init__.py | 8 + mteb/tasks/Classification/ell/__init__.py | 5 + mteb/tasks/Classification/eng/__init__.py | 267 + mteb/tasks/Classification/est/__init__.py | 5 + mteb/tasks/Classification/fas/__init__.py | 5 + mteb/tasks/Classification/fil/__init__.py | 6 + mteb/tasks/Classification/fin/__init__.py | 5 + mteb/tasks/Classification/fra/__init__.py | 6 + mteb/tasks/Classification/guj/__init__.py | 5 + mteb/tasks/Classification/heb/__init__.py | 5 + mteb/tasks/Classification/hin/__init__.py | 6 + mteb/tasks/Classification/hrv/__init__.py | 5 + mteb/tasks/Classification/ind/__init__.py | 11 + mteb/tasks/Classification/ita/__init__.py | 8 + mteb/tasks/Classification/jav/__init__.py | 5 + mteb/tasks/Classification/jpn/__init__.py | 5 + mteb/tasks/Classification/kan/__init__.py | 5 + mteb/tasks/Classification/kat/__init__.py | 5 + mteb/tasks/Classification/kor/__init__.py | 8 + mteb/tasks/Classification/kur/__init__.py | 5 + mteb/tasks/Classification/mal/__init__.py | 5 + mteb/tasks/Classification/mar/__init__.py | 5 + mteb/tasks/Classification/mkd/__init__.py | 7 + .../Classification/multilingual/__init__.py | 63 + mteb/tasks/Classification/mya/__init__.py | 5 + mteb/tasks/Classification/nep/__init__.py | 5 + mteb/tasks/Classification/nld/__init__.py | 7 + mteb/tasks/Classification/nob/__init__.py | 6 + mteb/tasks/Classification/ory/__init__.py | 5 + mteb/tasks/Classification/pan/__init__.py | 5 + mteb/tasks/Classification/pol/__init__.py | 17 + mteb/tasks/Classification/por/__init__.py | 5 + mteb/tasks/Classification/ron/__init__.py | 7 + mteb/tasks/Classification/rus/__init__.py | 19 + mteb/tasks/Classification/san/__init__.py | 5 + mteb/tasks/Classification/sin/__init__.py | 6 + mteb/tasks/Classification/slk/__init__.py | 8 + mteb/tasks/Classification/slv/__init__.py | 5 + mteb/tasks/Classification/spa/__init__.py | 6 + mteb/tasks/Classification/ssw/__init__.py | 5 + mteb/tasks/Classification/svk/__init__.py | 7 + mteb/tasks/Classification/swa/__init__.py | 5 + mteb/tasks/Classification/swe/__init__.py | 11 + mteb/tasks/Classification/tam/__init__.py | 5 + mteb/tasks/Classification/tel/__init__.py | 5 + ...on .py => WongnaiReviewsClassification.py} | 8 +- mteb/tasks/Classification/tha/__init__.py | 6 + mteb/tasks/Classification/tsn/__init__.py | 5 + mteb/tasks/Classification/tur/__init__.py | 8 + mteb/tasks/Classification/ukr/__init__.py | 5 + mteb/tasks/Classification/urd/__init__.py | 5 + mteb/tasks/Classification/vie/__init__.py | 5 + mteb/tasks/Classification/zho/__init__.py | 21 + mteb/tasks/Classification/zul/__init__.py | 5 + mteb/tasks/Clustering/__init__.py | 220 +- mteb/tasks/Clustering/deu/__init__.py | 17 + mteb/tasks/Clustering/eng/__init__.py | 57 + mteb/tasks/Clustering/fra/__init__.py | 14 + mteb/tasks/Clustering/jpn/__init__.py | 6 + .../tasks/Clustering/multilingual/__init__.py | 22 + mteb/tasks/Clustering/nob/__init__.py | 21 + mteb/tasks/Clustering/pol/__init__.py | 19 + mteb/tasks/Clustering/rom/__init__.py | 5 + mteb/tasks/Clustering/rus/__init__.py | 11 + mteb/tasks/Clustering/spa/__init__.py | 5 + mteb/tasks/Clustering/swe/__init__.py | 6 + mteb/tasks/Clustering/zho/__init__.py | 23 + mteb/tasks/InstructionReranking/__init__.py | 18 +- .../InstructionReranking/eng/__init__.py | 11 + .../multilingual/__init__.py | 5 + mteb/tasks/InstructionRetrieval/__init__.py | 4 +- .../InstructionRetrieval/eng/__init__.py | 5 + .../multilingual/__init__.py | 3 + .../MultiLabelClassification/__init__.py | 20 +- .../MultiLabelClassification/kor/__init__.py | 5 + .../MultiLabelClassification/mlt/__init__.py | 5 + .../multilingual/__init__.py | 5 + .../MultiLabelClassification/por/__init__.py | 5 + .../MultiLabelClassification/rus/__init__.py | 6 + mteb/tasks/PairClassification/__init__.py | 78 +- mteb/tasks/PairClassification/ara/__init__.py | 5 + mteb/tasks/PairClassification/ces/__init__.py | 5 + mteb/tasks/PairClassification/deu/__init__.py | 4 + mteb/tasks/PairClassification/eng/__init__.py | 13 + mteb/tasks/PairClassification/fas/__init__.py | 5 + mteb/tasks/PairClassification/hye/__init__.py | 5 + mteb/tasks/PairClassification/ind/__init__.py | 5 + mteb/tasks/PairClassification/kor/__init__.py | 5 + .../multilingual/XStance.py | 17 + .../multilingual/__init__.py | 18 + mteb/tasks/PairClassification/pol/__init__.py | 5 + mteb/tasks/PairClassification/por/__init__.py | 6 + mteb/tasks/PairClassification/rus/__init__.py | 5 + mteb/tasks/PairClassification/zho/__init__.py | 5 + mteb/tasks/Reranking/__init__.py | 47 +- mteb/tasks/Reranking/eng/__init__.py | 17 + mteb/tasks/Reranking/fra/__init__.py | 6 + mteb/tasks/Reranking/jpn/__init__.py | 5 + mteb/tasks/Reranking/multilingual/__init__.py | 7 + mteb/tasks/Reranking/rus/__init__.py | 5 + mteb/tasks/Reranking/zho/__init__.py | 5 + mteb/tasks/Retrieval/__init__.py | 491 +- mteb/tasks/Retrieval/ara/__init__.py | 5 + mteb/tasks/Retrieval/code/__init__.py | 29 + mteb/tasks/Retrieval/dan/__init__.py | 12 + mteb/tasks/Retrieval/deu/__init__.py | 17 + mteb/tasks/Retrieval/ell/__init__.py | 5 + mteb/tasks/Retrieval/eng/DBPediaRetrieval.py | 2 +- mteb/tasks/Retrieval/eng/HotpotQARetrieval.py | 2 +- mteb/tasks/Retrieval/eng/__init__.py | 142 + mteb/tasks/Retrieval/est/__init__.py | 5 + mteb/tasks/Retrieval/fra/__init__.py | 8 + mteb/tasks/Retrieval/hun/__init__.py | 5 + mteb/tasks/Retrieval/jpn/__init__.py | 17 + mteb/tasks/Retrieval/kat/__init__.py | 5 + mteb/tasks/Retrieval/kor/__init__.py | 6 + .../multilingual/PublicHealthQARetrieval.py | 3 + mteb/tasks/Retrieval/multilingual/__init__.py | 54 + mteb/tasks/Retrieval/nob/__init__.py | 6 + mteb/tasks/Retrieval/pol/__init__.py | 32 + mteb/tasks/Retrieval/rus/__init__.py | 6 + mteb/tasks/Retrieval/slk/__init__.py | 6 + mteb/tasks/Retrieval/spa/__init__.py | 6 + mteb/tasks/Retrieval/swe/__init__.py | 6 + mteb/tasks/Retrieval/tur/__init__.py | 5 + mteb/tasks/Retrieval/vie/__init__.py | 5 + mteb/tasks/Retrieval/zho/__init__.py | 25 + mteb/tasks/STS/__init__.py | 99 +- mteb/tasks/STS/deu/__init__.py | 5 + mteb/tasks/STS/eng/SickrSTS.py | 48 +- mteb/tasks/STS/eng/__init__.py | 21 + mteb/tasks/STS/fao/__init__.py | 5 + mteb/tasks/STS/fin/__init__.py | 5 + mteb/tasks/STS/fra/__init__.py | 5 + mteb/tasks/STS/jpn/__init__.py | 6 + mteb/tasks/STS/kor/__init__.py | 6 + mteb/tasks/STS/multilingual/__init__.py | 16 + mteb/tasks/STS/pol/__init__.py | 5 + mteb/tasks/STS/por/__init__.py | 6 + mteb/tasks/STS/ron/__init__.py | 5 + mteb/tasks/STS/rus/__init__.py | 6 + mteb/tasks/STS/spa/__init__.py | 5 + mteb/tasks/STS/zho/__init__.py | 5 + mteb/tasks/SpeedTask/__init__.py | 6 +- mteb/tasks/Summarization/__init__.py | 15 +- mteb/tasks/Summarization/eng/__init__.py | 5 + mteb/tasks/Summarization/fra/__init__.py | 5 + mteb/tasks/__init__.py | 1306 +- pyproject.toml | 7 +- scripts/create_language_family_mapping.py | 47 + scripts/generate_imports.py | 106 + tests/test_TaskMetadata.py | 3 +- tests/test_benchmark/mock_models.py | 3 +- tests/test_benchmark/mock_tasks.py | 160 +- tests/test_benchmark/test_benchmark.py | 29 +- .../test_evaluation/test_split_evaluation.py | 91 + tests/test_load_results/test_mteb_results.py | 5 +- tests/test_overview.py | 8 +- tests/test_reproducible_workflow.py | 7 +- tests/test_tasks/test_all_abstasks.py | 6 +- 207 files changed, 69186 insertions(+), 819 deletions(-) create mode 100644 mteb/descriptive_stats/Classification/Ddisco.json create mode 100644 mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json create mode 100644 mteb/language_family.json create mode 100644 mteb/models/model2vec_models.py create mode 100644 mteb/tasks/Classification/est/__init__.py create mode 100644 mteb/tasks/Classification/jpn/__init__.py create mode 100644 mteb/tasks/Classification/ory/__init__.py create mode 100644 mteb/tasks/Classification/pan/__init__.py create mode 100644 mteb/tasks/Classification/slk/__init__.py rename mteb/tasks/Classification/tha/{WongnaiReviewsClassification .py => WongnaiReviewsClassification.py} (90%) create mode 100644 mteb/tasks/Clustering/rom/__init__.py create mode 100644 mteb/tasks/InstructionRetrieval/multilingual/__init__.py create mode 100644 mteb/tasks/PairClassification/ara/__init__.py create mode 100644 mteb/tasks/PairClassification/ces/__init__.py create mode 100644 mteb/tasks/PairClassification/fas/__init__.py create mode 100644 mteb/tasks/PairClassification/ind/__init__.py create mode 100644 mteb/tasks/PairClassification/kor/__init__.py create mode 100644 mteb/tasks/PairClassification/por/__init__.py create mode 100644 mteb/tasks/Reranking/jpn/__init__.py create mode 100644 mteb/tasks/Retrieval/hun/__init__.py create mode 100644 mteb/tasks/Retrieval/jpn/__init__.py create mode 100644 mteb/tasks/STS/fao/__init__.py create mode 100644 mteb/tasks/STS/jpn/__init__.py create mode 100644 mteb/tasks/STS/por/__init__.py create mode 100644 mteb/tasks/STS/ron/__init__.py create mode 100644 mteb/tasks/STS/rus/__init__.py create mode 100644 scripts/create_language_family_mapping.py create mode 100644 scripts/generate_imports.py create mode 100644 tests/test_evaluation/test_split_evaluation.py diff --git a/README.md b/README.md index ec402579ad..faf016bbc7 100644 --- a/README.md +++ b/README.md @@ -46,10 +46,8 @@ from sentence_transformers import SentenceTransformer # Define the sentence-transformers model name model_name = "average_word_embeddings_komninos" -# or directly from huggingface: -# model_name = "sentence-transformers/all-MiniLM-L6-v2" -model = SentenceTransformer(model_name) +model = mteb.get_model(model_name) # if the model is not implemented in MTEB it will be eq. to SentenceTransformer(model_name) tasks = mteb.get_tasks(tasks=["Banking77Classification"]) evaluation = mteb.MTEB(tasks=tasks) results = evaluation.run(model, output_folder=f"results/{model_name}") @@ -221,7 +219,10 @@ Note that the public leaderboard uses the test splits for all datasets except MS Models should implement the following interface, implementing an `encode` function taking as inputs a list of sentences, and returning a list of embeddings (embeddings can be `np.array`, `torch.tensor`, etc.). For inspiration, you can look at the [mteb/mtebscripts repo](https://github.com/embeddings-benchmark/mtebscripts) used for running diverse models via SLURM scripts for the paper. ```python +import mteb from mteb.encoder_interface import PromptType +import numpy as np + class CustomModel: def encode( @@ -245,7 +246,7 @@ class CustomModel: pass model = CustomModel() -tasks = mteb.get_task("Banking77Classification") +tasks = mteb.get_tasks(tasks=["Banking77Classification"]) evaluation = MTEB(tasks=tasks) evaluation.run(model) ``` @@ -379,6 +380,28 @@ results = mteb.load_results(models=models, tasks=tasks) df = results_to_dataframe(results) ``` + + + +
+ Annotate Contamination in the training data of a model + +### Annotate Contamination + +have your found contamination in the training data of a model? Please let us know, either by opening an issue or ideally by submitting a PR +annotatig the training datasets of the model: + +```py +model_w_contamination = ModelMeta( + name = "model-with-contamination" + ... + training_datasets: {"ArguAna": # name of dataset within MTEB + ["test"]} # the splits that have been trained on + ... +) +``` + +
diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index 13e9830276..4a1be0cd89 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -8,6 +8,7 @@ import mteb from mteb.abstasks.TaskMetadata import PROGRAMMING_LANGS, TASK_TYPE +from mteb.languages import ISO_TO_FAM_LEVEL0, ISO_TO_LANGUAGE def author_from_bibtex(bibtex: str | None) -> str: @@ -82,10 +83,21 @@ def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: ## Wrangle for polars pl_table_dict = [] for lang, d in table_dict.items(): - d.update({"0-lang": lang}) # for sorting columns + d.update({"0-lang-code": lang}) # for sorting columns pl_table_dict.append(d) - df = pl.DataFrame(pl_table_dict).sort(by="0-lang") + df = pl.DataFrame(pl_table_dict).sort(by="0-lang-code") + df = df.with_columns( + pl.col("0-lang-code") + .replace_strict(ISO_TO_LANGUAGE, default="unknown") + .alias("1-lang-name") + ) + df = df.with_columns( + pl.col("0-lang-code") + .replace_strict(ISO_TO_FAM_LEVEL0, default="Unclassified") + .alias("2-lang-fam") + ) + df = df.with_columns(sum=pl.sum_horizontal(get_args(TASK_TYPE))) df = df.select(sorted(df.columns)) if sort_by_sum: @@ -96,7 +108,7 @@ def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: task_names_md = " | ".join(sorted(get_args(TASK_TYPE))) horizontal_line_md = "---|---" * (len(sorted(get_args(TASK_TYPE))) + 1) table = f""" -| Language | {task_names_md} | Sum | +| ISO Code | Language | Family | {task_names_md} | Sum | |{horizontal_line_md}| """ @@ -119,14 +131,14 @@ def insert_tables( file_path: str, tables: list[str], tags: list[str] = ["TASKS TABLE"] ) -> None: """Insert tables within and or similar tags.""" - md = Path(file_path).read_text() + md = Path(file_path).read_text(encoding="utf-8") for table, tag in zip(tables, tags): start = f"" end = f"" md = md.replace(md[md.index(start) + len(start) : md.index(end)], table) - Path(file_path).write_text(md) + Path(file_path).write_text(md, encoding="utf-8") def main(): diff --git a/mteb/__init__.py b/mteb/__init__.py index 6de017b1f1..ff4e065efb 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -10,9 +10,16 @@ MTEB_RETRIEVAL_WITH_INSTRUCTIONS, CoIR, ) -from mteb.evaluation import * +from mteb.encoder_interface import Encoder +from mteb.evaluation import MTEB from mteb.load_results import BenchmarkResults, load_results -from mteb.models import get_model, get_model_meta, get_model_metas +from mteb.load_results.task_results import TaskResult +from mteb.models import ( + SentenceTransformerWrapper, + get_model, + get_model_meta, + get_model_metas, +) from mteb.overview import TASKS_REGISTRY, get_task, get_tasks from .benchmarks.benchmarks import Benchmark @@ -20,7 +27,6 @@ __version__ = version("mteb") # fetch version from install metadata - __all__ = [ "MTEB_ENG_CLASSIC", "MTEB_MAIN_RU", @@ -40,4 +46,8 @@ "get_benchmarks", "BenchmarkResults", "BENCHMARK_REGISTRY", + "MTEB", + "TaskResult", + "SentenceTransformerWrapper", + "Encoder", ] diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index e82878c803..c65f25d55d 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -72,11 +72,11 @@ def __init__(self, seed: int = 42, **kwargs: Any): torch.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) - def check_if_dataset_is_superseeded(self): - """Check if the dataset is superseeded by a newer version""" + def check_if_dataset_is_superseded(self): + """Check if the dataset is superseded by a newer version""" if self.superseded_by: logger.warning( - f"Dataset '{self.metadata.name}' is superseeded by '{self.superseded_by}', you might consider using the newer version of the dataset." + f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset." ) def dataset_transform(self): diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 65e7ddbb86..e3038b6348 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -168,6 +168,7 @@ "cc0-1.0", "bsd-3-clause", "gpl-3.0", + "lgpl-3.0", "cdla-sharing-1.0", "mpl-2.0", ] diff --git a/mteb/abstasks/__init__.py b/mteb/abstasks/__init__.py index 086866b997..a95eeaf09d 100644 --- a/mteb/abstasks/__init__.py +++ b/mteb/abstasks/__init__.py @@ -1,15 +1,33 @@ from __future__ import annotations -from ..evaluation.LangMapping import * -from .AbsTask import * -from .AbsTaskBitextMining import * -from .AbsTaskClassification import * -from .AbsTaskClustering import * -from .AbsTaskMultilabelClassification import * -from .AbsTaskPairClassification import * -from .AbsTaskReranking import * -from .AbsTaskRetrieval import * -from .AbsTaskSpeedTask import * -from .AbsTaskSTS import * -from .AbsTaskSummarization import * -from .MultilingualTask import * +from .AbsTask import AbsTask +from .AbsTaskBitextMining import AbsTaskBitextMining +from .AbsTaskClassification import AbsTaskClassification +from .AbsTaskClustering import AbsTaskClustering +from .AbsTaskClusteringFast import AbsTaskClusteringFast +from .AbsTaskMultilabelClassification import AbsTaskMultilabelClassification +from .AbsTaskPairClassification import AbsTaskPairClassification +from .AbsTaskReranking import AbsTaskReranking +from .AbsTaskRetrieval import AbsTaskRetrieval +from .AbsTaskSpeedTask import AbsTaskSpeedTask +from .AbsTaskSTS import AbsTaskSTS +from .AbsTaskSummarization import AbsTaskSummarization +from .MultilingualTask import MultilingualTask +from .TaskMetadata import TaskMetadata + +__all__ = [ + "AbsTask", + "AbsTaskBitextMining", + "AbsTaskClassification", + "AbsTaskClustering", + "AbsTaskClusteringFast", + "AbsTaskMultilabelClassification", + "AbsTaskPairClassification", + "AbsTaskReranking", + "AbsTaskRetrieval", + "AbsTaskSpeedTask", + "AbsTaskSTS", + "AbsTaskSummarization", + "MultilingualTask", + "TaskMetadata", +] diff --git a/mteb/benchmarks/__init__.py b/mteb/benchmarks/__init__.py index 653b97c6f7..b44a52ed37 100644 --- a/mteb/benchmarks/__init__.py +++ b/mteb/benchmarks/__init__.py @@ -1,4 +1,57 @@ from __future__ import annotations -from mteb.benchmarks.benchmarks import * -from mteb.benchmarks.get_benchmark import * +from mteb.benchmarks.benchmarks import ( + BRIGHT, + LONG_EMBED, + MTEB_DEU, + MTEB_EN, + MTEB_ENG_CLASSIC, + MTEB_EU, + MTEB_FRA, + MTEB_INDIC, + MTEB_JPN, + MTEB_KOR, + MTEB_MAIN_RU, + MTEB_MINERS_BITEXT_MINING, + MTEB_POL, + MTEB_RETRIEVAL_LAW, + MTEB_RETRIEVAL_MEDICAL, + MTEB_RETRIEVAL_WITH_INSTRUCTIONS, + SEB, + Benchmark, + CoIR, + MTEB_code, + MTEB_multilingual, +) +from mteb.benchmarks.get_benchmark import ( + BENCHMARK_REGISTRY, + get_benchmark, + get_benchmarks, +) + +__all__ = [ + "Benchmark", + "MTEB_EN", + "MTEB_ENG_CLASSIC", + "MTEB_MAIN_RU", + "MTEB_RETRIEVAL_WITH_INSTRUCTIONS", + "MTEB_RETRIEVAL_LAW", + "MTEB_RETRIEVAL_MEDICAL", + "MTEB_MINERS_BITEXT_MINING", + "SEB", + "CoIR", + "MTEB_FRA", + "MTEB_DEU", + "MTEB_KOR", + "MTEB_POL", + "MTEB_code", + "MTEB_multilingual", + "MTEB_JPN", + "MTEB_INDIC", + "MTEB_EU", + "LONG_EMBED", + "BRIGHT", + "BENCHMARK_REGISTRY", + "get_benchmarks", + "get_benchmark", +] diff --git a/mteb/descriptive_stats/Classification/Ddisco.json b/mteb/descriptive_stats/Classification/Ddisco.json new file mode 100644 index 0000000000..c9b0bfd67e --- /dev/null +++ b/mteb/descriptive_stats/Classification/Ddisco.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 201, + "number_of_characters": 200062, + "number_texts_intersect_with_train": 1, + "min_text_length": 529, + "average_text_length": 995.3333333333334, + "max_text_length": 2050, + "unique_text": 201, + "unique_labels": 3, + "labels": { + "2": { + "count": 76 + }, + "3": { + "count": 115 + }, + "1": { + "count": 10 + } + } + }, + "train": { + "num_samples": 801, + "number_of_characters": 779241, + "number_texts_intersect_with_train": null, + "min_text_length": 492, + "average_text_length": 972.8352059925094, + "max_text_length": 2411, + "unique_text": 796, + "unique_labels": 3, + "labels": { + "1": { + "count": 30 + }, + "2": { + "count": 325 + }, + "3": { + "count": 446 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json b/mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json new file mode 100644 index 0000000000..f6e00d147e --- /dev/null +++ b/mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1200, + "number_of_characters": 141679, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 118.06583333333333, + "max_text_length": 566, + "unique_text": 1200, + "unique_labels": 2, + "labels": { + "1": { + "count": 600 + }, + "0": { + "count": 600 + } + } + }, + "train": { + "num_samples": 330, + "number_of_characters": 37706, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 114.26060606060607, + "max_text_length": 315, + "unique_text": 330, + "unique_labels": 2, + "labels": { + "1": { + "count": 165 + }, + "0": { + "count": 165 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json b/mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json new file mode 100644 index 0000000000..9896719ce5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1198297, + "number_texts_intersect_with_train": 0, + "min_text_length": 200, + "average_text_length": 585.10595703125, + "max_text_length": 14899, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "3": { + "count": 983 + }, + "1": { + "count": 68 + }, + "4": { + "count": 351 + }, + "2": { + "count": 629 + }, + "0": { + "count": 17 + } + } + }, + "train": { + "num_samples": 40000, + "number_of_characters": 21614868, + "number_texts_intersect_with_train": null, + "min_text_length": 200, + "average_text_length": 540.3717, + "max_text_length": 20557, + "unique_text": 39993, + "unique_labels": 5, + "labels": { + "2": { + "count": 12171 + }, + "3": { + "count": 18770 + }, + "4": { + "count": 6799 + }, + "1": { + "count": 1845 + }, + "0": { + "count": 415 + } + } + } +} \ No newline at end of file diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index a04e55f96b..6ca449e1c5 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -5,7 +5,7 @@ import os import traceback from collections.abc import Iterable -from copy import copy +from copy import copy, deepcopy from datetime import datetime from itertools import chain from pathlib import Path @@ -14,18 +14,17 @@ import datasets from codecarbon import EmissionsTracker -from sentence_transformers import SentenceTransformer +from sentence_transformers import CrossEncoder, SentenceTransformer +from mteb.abstasks.AbsTask import ScoresDict from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models import model_meta_from_sentence_transformers -from ..abstasks import * -from ..abstasks import AbsTask, AbsTaskReranking +from ..abstasks.AbsTask import AbsTask +from ..abstasks.AbsTaskReranking import AbsTaskReranking from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper -from ..models.wrapper import Wrapper -from ..tasks import * from . import LangMapping logger = logging.getLogger(__name__) @@ -86,6 +85,8 @@ def __init__( self._version = version self.err_logs_path = err_logs_path + self.last_evaluated_splits = {} + self.select_tasks(**kwargs) def deprecation_warning( @@ -315,6 +316,70 @@ def _run_eval( tock = time() return results, tick, tock + @staticmethod + def _get_missing_splits( + existing_results: TaskResult | None, task_eval_splits: list[str] + ) -> list[str]: + if existing_results is None: + return task_eval_splits + + missing_splits = [] + for split in task_eval_splits: + if split not in existing_results.scores: + missing_splits.append(split) + elif not existing_results.scores[ + split + ]: # Check if the split has any scores + missing_splits.append(split) + + return missing_splits + + @staticmethod + def _merge_results( + existing_results: TaskResult, new_results: TaskResult + ) -> TaskResult: + merged_scores = existing_results.scores.copy() + + for split, scores in new_results.scores.items(): + if split in merged_scores: + merged_scores[split] = MTEB._merge_split_scores( + merged_scores[split], scores + ) + else: + merged_scores[split] = scores + + existing_kg_co2_emissions = ( + existing_results.kg_co2_emissions + if existing_results.kg_co2_emissions + else 0 + ) + new_kg_co2_emissions = ( + new_results.kg_co2_emissions if new_results.kg_co2_emissions else 0 + ) + merged_kg_co2_emissions = None + if existing_kg_co2_emissions and new_kg_co2_emissions: + merged_kg_co2_emissions = existing_kg_co2_emissions + new_kg_co2_emissions + merged_results = TaskResult( + dataset_revision=new_results.dataset_revision, + task_name=new_results.task_name, + mteb_version=new_results.mteb_version, + scores=merged_scores, + evaluation_time=existing_results.evaluation_time + + new_results.evaluation_time, + kg_co2_emissions=merged_kg_co2_emissions, + ) + + return merged_results + + @staticmethod + def _merge_split_scores( + existing_scores: list[ScoresDict], new_scores: list[ScoresDict] + ) -> list[ScoresDict]: + merged = {score["hf_subset"]: score for score in existing_scores} + for score in new_scores: + merged[score["hf_subset"]] = score + return list(merged.values()) + def run( self, model: SentenceTransformer | Encoder, @@ -370,7 +435,7 @@ def run( meta = self.create_model_meta(model) output_path = self.create_output_folder(meta, output_folder) - if not isinstance(model, Wrapper): + if isinstance(model, (SentenceTransformer, CrossEncoder)): model = SentenceTransformerWrapper(model) if output_path: @@ -386,38 +451,62 @@ def run( original_tasks = ( self.tasks.copy() ) # save them in case we re-use the object (e.g. for reranking) + + # To evaluate missing splits, we keep track of the task name and the corresponding splits. + self.last_evaluated_splits = {} + while len(self.tasks) > 0: task = self.tasks[0] logger.info( f"\n\n********************** Evaluating {task.metadata.name} **********************" ) - # skip evaluation if results folder exists and overwrite_results is False if output_path: save_path = output_path / f"{task.metadata.name}{task.save_suffix}.json" - if save_path.exists() and not overwrite_results: - logger.info( - f"{task.metadata.name} results already exists. Loading results from disk. Set overwrite_results=True to overwrite." - ) - mteb_results = TaskResult.from_disk(save_path) - evaluation_results.append(mteb_results) - del self.tasks[0] # empty memory - continue - try: + existing_results = None + if save_path.exists(): + existing_results = TaskResult.from_disk(save_path) + + if not overwrite_results: + logger.info( + f"{task.metadata.name} results already exists. Loading results from disk. Set overwrite_results=True to overwrite." + ) + evaluation_results.append(existing_results) + del self.tasks[0] # empty memory + continue + task_eval_splits = ( eval_splits if eval_splits is not None else task.eval_splits ) + missing_splits = self._get_missing_splits( + existing_results, task_eval_splits + ) + + if not missing_splits and existing_results: + evaluation_results.append(existing_results) + + # no splits are evaluated. + self.last_evaluated_splits[task.metadata.name] = [] + del self.tasks[0] + continue + + if missing_splits: + logger.info( + f"Running evaluation for missing splits: {missing_splits}" + ) - # load data - logger.info(f"Loading dataset for {task.metadata_dict['name']}") - task.check_if_dataset_is_superseeded() + try: + task.check_if_dataset_is_superseded() task.load_data(eval_splits=task_eval_splits, **kwargs) # run evaluation task_results = {} evaluation_time = 0 kg_co2_emissions: int | None = 0 if co2_tracker else None - for split in task_eval_splits: + + self.last_evaluated_splits[task.metadata.name] = [] + + for split in missing_splits: if co2_tracker: logger.warning( "Evaluating multiple MTEB runs simultaniously will produce incorrect CO₂ results" @@ -450,6 +539,8 @@ def run( **kwargs, ) + self.last_evaluated_splits[task.metadata.name].append(split) + logger.info( f"Evaluation for {task.metadata_dict['name']} on {split} took {tock - tick:.2f} seconds" ) @@ -459,21 +550,22 @@ def run( if verbosity >= 1: logger.info(f"Scores: {results}") - mteb_task_result = TaskResult.from_task_results( + new_results = TaskResult.from_task_results( task, task_results, evaluation_time=evaluation_time, kg_co2_emissions=kg_co2_emissions, ) - # save results + if existing_results: + merged_results = self._merge_results(existing_results, new_results) + else: + merged_results = new_results + if output_path: - with open(save_path, "w") as f_out: - json.dump( - mteb_task_result.to_dict(), f_out, indent=2, sort_keys=True - ) + merged_results.to_disk(save_path) - evaluation_results.append(mteb_task_result) + evaluation_results.append(merged_results) except Exception as e: logger.error( @@ -492,7 +584,6 @@ def run( # empty memory del self.tasks[0] - # restore original tasks self.tasks = original_tasks return evaluation_results @@ -543,3 +634,11 @@ def _save_model_metadata(model_meta: ModelMeta, output_folder: Path) -> None: with save_path.open("w") as f: json.dump(model_meta.to_dict(), f) + + def get_last_evaluated_splits(self): + """Returns a dictionary of tasks and their evaluated splits from the most recent run. + Tasks with empty lists indicate that results already existed and no splits were evaluated. + """ + return deepcopy( + {task: list(splits) for task, splits in self.last_evaluated_splits.items()} + ) diff --git a/mteb/evaluation/__init__.py b/mteb/evaluation/__init__.py index c0a1596c91..58db80480c 100644 --- a/mteb/evaluation/__init__.py +++ b/mteb/evaluation/__init__.py @@ -1,3 +1,43 @@ from __future__ import annotations -from .MTEB import * +from .evaluators import ( + BitextMiningEvaluator, + ClassificationEvaluator, + ClusteringEvaluator, + DenseRetrievalExactSearch, + DeprecatedSummarizationEvaluator, + DRESModel, + Evaluator, + PairClassificationEvaluator, + RetrievalEvaluator, + STSEvaluator, + SummarizationEvaluator, + corpus_to_str, + dot_distance, + kNNClassificationEvaluator, + kNNClassificationEvaluatorPytorch, + logRegClassificationEvaluator, +) +from .LangMapping import LANG_MAPPING +from .MTEB import MTEB + +__all__ = [ + "Evaluator", + "STSEvaluator", + "SummarizationEvaluator", + "DeprecatedSummarizationEvaluator", + "RetrievalEvaluator", + "DRESModel", + "DenseRetrievalExactSearch", + "ClusteringEvaluator", + "BitextMiningEvaluator", + "PairClassificationEvaluator", + "corpus_to_str", + "kNNClassificationEvaluator", + "kNNClassificationEvaluatorPytorch", + "logRegClassificationEvaluator", + "dot_distance", + "LANG_MAPPING", + "MTEB", + "ClassificationEvaluator", +] diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index 8dcac9ab00..3dca66b0fa 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -83,10 +83,10 @@ def __call__( corpus, queries, self.top_k, instructions=instructions, **kwargs ) elif ( - hasattr(self.retriever.model, "mteb_model_meta") - and self.retriever.model.mteb_model_meta.name == "bm25s" + hasattr(self.retriever.model.model, "mteb_model_meta") + and self.retriever.model.model.mteb_model_meta.name == "bm25s" ): - return self.retriever.model.search( + return self.retriever.model.model.search( corpus, queries, self.top_k, diff --git a/mteb/evaluation/evaluators/__init__.py b/mteb/evaluation/evaluators/__init__.py index fc293a3448..ac2a886067 100644 --- a/mteb/evaluation/evaluators/__init__.py +++ b/mteb/evaluation/evaluators/__init__.py @@ -1,9 +1,37 @@ from __future__ import annotations -from .BitextMiningEvaluator import * -from .ClassificationEvaluator import * -from .ClusteringEvaluator import * -from .PairClassificationEvaluator import * -from .RetrievalEvaluator import * -from .STSEvaluator import * -from .SummarizationEvaluator import * +from .BitextMiningEvaluator import BitextMiningEvaluator +from .ClassificationEvaluator import ( + dot_distance, + kNNClassificationEvaluator, + kNNClassificationEvaluatorPytorch, + logRegClassificationEvaluator, +) +from .ClusteringEvaluator import ClusteringEvaluator +from .Evaluator import Evaluator +from .model_classes import DenseRetrievalExactSearch, DRESModel, corpus_to_str +from .PairClassificationEvaluator import PairClassificationEvaluator +from .RetrievalEvaluator import RetrievalEvaluator +from .STSEvaluator import STSEvaluator +from .SummarizationEvaluator import ( + DeprecatedSummarizationEvaluator, + SummarizationEvaluator, +) + +__all__ = [ + "Evaluator", + "STSEvaluator", + "SummarizationEvaluator", + "DeprecatedSummarizationEvaluator", + "RetrievalEvaluator", + "DRESModel", + "DenseRetrievalExactSearch", + "ClusteringEvaluator", + "BitextMiningEvaluator", + "PairClassificationEvaluator", + "corpus_to_str", + "kNNClassificationEvaluator", + "kNNClassificationEvaluatorPytorch", + "logRegClassificationEvaluator", + "dot_distance", +] diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index 60dea56385..146d529dc9 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -558,5 +558,5 @@ def encode( def is_cross_encoder_compatible(model) -> bool: - op = getattr(model.model, "predict", None) + op = getattr(model, "predict", None) return callable(op) diff --git a/mteb/language_family.json b/mteb/language_family.json new file mode 100644 index 0000000000..5770aa6712 --- /dev/null +++ b/mteb/language_family.json @@ -0,0 +1,62611 @@ +{ + "aaa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Afenmai-Bendel" + }, + "aab": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic", + "level5": "Alumu-Akpondu" + }, + "aac": { + "level0": "Suki-Gogodala", + "level1": "Gogodalic", + "level2": "Ari-Waruna" + }, + "aad": { + "level0": "Sepik" + }, + "aae": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian", + "level3": "Albanian-Tosk", + "level4": "Southern Tosk" + }, + "aaf": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "aag": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak" + }, + "aah": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh", + "level3": "Mufian-Bukiyip-Abu", + "level4": "Bukiyip-Abu" + }, + "aai": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage" + }, + "aak": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Ankave-Tainae-Akoye" + }, + "aal": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 1" + }, + "aam": { + "level0": "Bookkeeping" + }, + "aan": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V", + "level6": "Arawetic" + }, + "aao": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "aap": { + "level0": "Cariban", + "level1": "Pekodian", + "level2": "Xinguan" + }, + "aaq": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Abenaki" + }, + "aar": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Saho-Afar" + }, + "aas": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic" + }, + "aat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian", + "level3": "Albanian-Tosk", + "level4": "Southern Tosk" + }, + "aau": { + "level0": "Sepik" + }, + "aaw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "aax": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Mandobo" + }, + "aay": { + "level0": "Bookkeeping" + }, + "aaz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto", + "level6": "Meto" + }, + "aba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Agneby" + }, + "abb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Abo-Barombi" + }, + "abc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic" + }, + "abd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Alabat-Manide Agta" + }, + "abe": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Abenaki" + }, + "abf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic" + }, + "abg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria", + "level6": "Unclassified Kamano-Yagaria" + }, + "abh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Central Asian Arabic" + }, + "abi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Agneby" + }, + "abj": { + "level0": "Great Andamanese", + "level1": "South Great Andamanese" + }, + "abk": { + "level0": "Abkhaz-Adyge", + "level1": "Abkhaz-Abaza" + }, + "abl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Lampungic" + }, + "abm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Abanyom-Nkem-Nkum" + }, + "abn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Abua-Odual" + }, + "abo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "abp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Abellen-Botolan" + }, + "abq": { + "level0": "Abkhaz-Adyge", + "level1": "Abkhaz-Abaza" + }, + "abr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Akanic" + }, + "abs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Ambonic Malay" + }, + "abt": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Ambulas-Hanga-Hundi" + }, + "abu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Western Tano" + }, + "abv": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "abw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Omosan" + }, + "abx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw" + }, + "aby": { + "level0": "Yareban", + "level1": "Doriri-Abia" + }, + "abz": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "Central Alor", + "level4": "Abuic" + }, + "aca": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Piapoco-Achagua" + }, + "acb": { + "level0": "Bookkeeping" + }, + "acc": { + "level0": "Bookkeeping" + }, + "acd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang", + "level10": "Gikyode-Ginyanga", + "level11": "Gikyode-Foodo" + }, + "ace": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic" + }, + "acf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Lesser Antillean French Creole" + }, + "ach": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo" + }, + "aci": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Bo-Cari" + }, + "ack": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Jeru-Kora" + }, + "acl": { + "level0": "Great Andamanese", + "level1": "South Great Andamanese" + }, + "acm": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic" + }, + "acn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Achangic" + }, + "acp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku", + "level8": "Rogo-Sagamuk-Sama-Sambuga", + "level9": "Sagamuk-Sama-Sambuga" + }, + "acq": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic" + }, + "acr": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Quiche-Achi" + }, + "acs": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "acu": { + "level0": "Chicham", + "level1": "Shuaric" + }, + "acv": { + "level0": "Palaihnihan" + }, + "acw": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic" + }, + "acx": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "acy": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Levantine-Cypriot Arabic" + }, + "acz": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Acheron-Tocho" + }, + "ada": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ga-Dangme" + }, + "adb": { + "level0": "Bookkeeping" + }, + "add": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "ade": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Basila-Adele" + }, + "adf": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic", + "level8": "Dhofaric" + }, + "adg": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte", + "level5": "Central-Eastern Arrernte" + }, + "adh": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Adhola-Alur-Luo", + "level5": "Adhola-Luo" + }, + "adi": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Eastern Tani" + }, + "adj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Agneby" + }, + "adl": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri" + }, + "adn": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "West Alor" + }, + "ado": { + "level0": "Ramu", + "level1": "Agoan" + }, + "adp": { + "level0": "Bookkeeping" + }, + "adq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Eweic" + }, + "adr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "North Lembata-Adonara" + }, + "ads": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "adt": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Northern Thura-Yura" + }, + "adu": { + "level0": "Bookkeeping" + }, + "adw": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva", + "level9": "Amondava-Uru-Eu-Wau-Wau" + }, + "adx": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "North-Eastern Tibetic" + }, + "ady": { + "level0": "Abkhaz-Adyge", + "level1": "Circassian" + }, + "adz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham" + }, + "aea": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama", + "level3": "Kuthant-Gurdjar", + "level4": "Rib-Gurdjar" + }, + "aeb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Malta-Tunisian Arabic" + }, + "aec": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic" + }, + "aed": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "aee": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Eastern Pashayi" + }, + "aek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone", + "level11": "Bwatooic", + "level12": "Haeke-Bwatoo" + }, + "ael": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields" + }, + "aem": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic", + "level3": "East Chutic" + }, + "aen": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "aeq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "aer": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte", + "level5": "Central-Eastern Arrernte" + }, + "aeu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic", + "level9": "Akeuic" + }, + "aew": { + "level0": "Keram", + "level1": "East Keram" + }, + "aex": { + "level0": "Bookkeeping" + }, + "aey": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum" + }, + "aez": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Orokaivic" + }, + "afb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "afd": { + "level0": "Arafundi" + }, + "afe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic" + }, + "afg": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "afh": { + "level0": "Artificial Language" + }, + "afi": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan" + }, + "afk": { + "level0": "Arafundi" + }, + "afn": { + "level0": "Ijoid" + }, + "afo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau" + }, + "afp": { + "level0": "Arafundi" + }, + "afr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch", + "level9": "Afrikaansic" + }, + "afs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah" + }, + "aft": { + "level0": "Nyimang" + }, + "afu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang" + }, + "afz": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Eritai-Obokuitai-Biritai" + }, + "aga": { + "level0": "Unattested", + "level1": "Arawakan (Unattested)" + }, + "agb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo", + "level8": "Legboic" + }, + "agc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma", + "level7": "Idoma-Agatu-Okpogu" + }, + "agd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Gadsup-Agarabi" + }, + "age": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Angal Mendi" + }, + "agf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuta" + }, + "agg": { + "level0": "Senagi" + }, + "agh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian" + }, + "agi": { + "level0": "Unattested", + "level1": "Dravidian (Unattested)" + }, + "agj": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Amharic-Argobba" + }, + "agk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inagta Bikol" + }, + "agl": { + "level0": "East Strickland" + }, + "agm": { + "level0": "Angan" + }, + "agn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Kalamian" + }, + "ago": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Ankave-Tainae-Akoye", + "level3": "Tainae-Akoye" + }, + "agq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic", + "level11": "Aghem-Weh" + }, + "agr": { + "level0": "Chicham" + }, + "ags": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid" + }, + "agt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic" + }, + "agu": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Ixilan" + }, + "agv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon" + }, + "agw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "agx": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Tabasaran-Aghul-Lezgi", + "level6": "Aghul-Lezgi", + "level7": "Aghulic" + }, + "agy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran" + }, + "agz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inagta Bikol" + }, + "aha": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Southern Bia" + }, + "ahb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Southeastern Malakula linkage", + "level10": "Port Sandwich-Axamb-Avok" + }, + "ahe": { + "level0": "Bookkeeping" + }, + "ahg": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw", + "level3": "Northern-Eastern-Western Agaw" + }, + "ahh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu", + "level6": "Mappi-Digul Awyu" + }, + "ahi": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Aizi" + }, + "ahk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic" + }, + "ahl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kposo-Ahlo-Bowili" + }, + "ahm": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Aizi" + }, + "ahn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Ayere-Ahan" + }, + "aho": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic" + }, + "ahp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo" + }, + "ahr": { + "level0": "Bookkeeping" + }, + "ahs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic", + "level7": "Tinoric" + }, + "aht": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Southern Alaskan Athabaskan" + }, + "aia": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "aib": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uyghuric" + }, + "aic": { + "level0": "Border", + "level1": "Bewani", + "level2": "Pagi-Kilmeri" + }, + "aid": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Linngithigh-Alngith" + }, + "aie": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage" + }, + "aif": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "West Palai", + "level3": "Agi-Yeri" + }, + "aig": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua" + }, + "aih": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Mak-Ai-Cham" + }, + "aii": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic" + }, + "aij": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab", + "level12": "Western Trans-Zab" + }, + "aik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Eggon-Ake" + }, + "ail": { + "level0": "Bosavi", + "level1": "Bosavi Watershed" + }, + "aim": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Central Old Kuki" + }, + "ain": { + "level0": "Ainu", + "level1": "Hokkaido-Kuril Ainu" + }, + "aio": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Mogaung", + "level12": "Assam Tai A" + }, + "aip": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Kwer-Kopkaka-Burumakok", + "level6": "Kwer-Burumakok" + }, + "aiq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "air": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Samarokena-Airoran" + }, + "ait": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Arikemic" + }, + "aiw": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Aari-Gayil" + }, + "aix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "aiy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka", + "level7": "Manzaic", + "level8": "Ngbaka-Manza-Ali" + }, + "aja": { + "level0": "Kresh-Aja" + }, + "ajg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe" + }, + "aji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian", + "level9": "Houailou" + }, + "ajs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "aju": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "ajw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Western West Chadic B.2" + }, + "ajz": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Karbic" + }, + "aka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Akanic" + }, + "akb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak", + "level5": "Southern Batak", + "level6": "Angkola-Mandailing" + }, + "akd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross" + }, + "ake": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan", + "level4": "Kapong" + }, + "akf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Yatye-Akpa" + }, + "akg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese", + "level7": "Central Sumbanese" + }, + "akh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Angal Mendi" + }, + "aki": { + "level0": "Ramu", + "level1": "Aian" + }, + "akj": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Jeru-Kora" + }, + "akk": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "East Semitic" + }, + "akl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan" + }, + "akm": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Bo-Cari" + }, + "akn": { + "level0": "Bookkeeping" + }, + "ako": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Taranoan", + "level3": "Tiriyoan" + }, + "akp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Lelemi-Akpafu" + }, + "akq": { + "level0": "Sepik", + "level1": "Yellow River" + }, + "akr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo", + "level9": "Araki-Tangoa" + }, + "aks": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma A" + }, + "akt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "East Arawe" + }, + "aku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Akum-Beezen" + }, + "akv": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Akhvakhic" + }, + "akw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)" + }, + "akx": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede" + }, + "aky": { + "level0": "Great Andamanese", + "level1": "Middle Great Andamanese", + "level2": "Okol-Opucikwar" + }, + "akz": { + "level0": "Muskogean", + "level1": "Alabaman-Koasati" + }, + "ala": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma" + }, + "alc": { + "level0": "Kawesqar", + "level1": "North Central Alacufan" + }, + "ald": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Avikam-Alladian" + }, + "ale": { + "level0": "Eskimo-Aleut", + "level1": "Aleutic" + }, + "alf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bukpic" + }, + "alh": { + "level0": "Mangarrayi-Maran", + "level1": "Maran" + }, + "ali": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles" + }, + "alj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Mangyan" + }, + "alk": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric" + }, + "all": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "alm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "South-Central Santo" + }, + "aln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian" + }, + "alo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "East Hoamoal" + }, + "alp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram", + "level6": "Ulat Inai" + }, + "alq": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Severn-Algonquin" + }, + "alr": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian" + }, + "als": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian", + "level3": "Albanian-Tosk" + }, + "alt": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "East Kipchak" + }, + "alu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Southern Malaita" + }, + "alw": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Kambaataic" + }, + "alx": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Bragat-Aruop-Amol" + }, + "aly": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte" + }, + "alz": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Adhola-Alur-Luo" + }, + "ama": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V", + "level6": "Arawetic" + }, + "amb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "amc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano" + }, + "amd": { + "level0": "Bookkeeping" + }, + "ame": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha" + }, + "amf": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Hamer-Karo" + }, + "amg": { + "level0": "Iwaidjan Proper" + }, + "amh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Amharic-Argobba" + }, + "ami": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Central East Formosan" + }, + "amj": { + "level0": "Furan" + }, + "amk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "aml": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian" + }, + "amm": { + "level0": "Left May", + "level1": "Western Left May" + }, + "amn": { + "level0": "Border", + "level1": "Warisic" + }, + "amo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos" + }, + "amp": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Eastern Sepik Hill" + }, + "amq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Elpaputi" + }, + "amr": { + "level0": "Harakmbut" + }, + "ams": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Oshima" + }, + "amt": { + "level0": "Amto-Musan" + }, + "amu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Amuzgoan" + }, + "amv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku" + }, + "amw": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Western Aramaic" + }, + "amx": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte" + }, + "amy": { + "level0": "Western Daly", + "level1": "Maranunggu-Ame-Manda", + "level2": "Ame-Manda" + }, + "amz": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Gudang-Northeast Paman", + "level4": "Northeast Paman", + "level5": "Uradhic" + }, + "anb": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Arabela-Andoa" + }, + "anc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3" + }, + "and": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "ane": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "anf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kebu-Animere" + }, + "ang": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic" + }, + "anh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic", + "level6": "Greater West Sogeram", + "level7": "West Sogeram" + }, + "ani": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic" + }, + "anj": { + "level0": "Ramu", + "level1": "Aian" + }, + "ank": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic" + }, + "anl": { + "level0": "Sino-Tibetan", + "level1": "Mruic" + }, + "anm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Anal-Lamgang" + }, + "ann": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross" + }, + "anp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan" + }, + "anq": { + "level0": "Jarawa-Onge" + }, + "ans": { + "level0": "Chocoan", + "level1": "Unclassified Chocoan" + }, + "ant": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Unclassified Wati" + }, + "anu": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "anv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Mamfe", + "level6": "Kendem-Denya" + }, + "anw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Okop Usem" + }, + "anx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus" + }, + "any": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia", + "level9": "Anyinic" + }, + "aoa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese", + "level15": "Bantu Layer Lower Guinea Portuguese", + "level16": "Saotomic" + }, + "aob": { + "level0": "Anim", + "level1": "Tirio" + }, + "aoc": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan" + }, + "aod": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan" + }, + "aoe": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Angal Mendi" + }, + "aof": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Bragat-Aruop-Amol" + }, + "aog": { + "level0": "Lower Sepik" + }, + "aoh": { + "level0": "Unattested", + "level1": "Chocoan (Unattested)" + }, + "aoi": { + "level0": "Gunwinyguan", + "level1": "Eastern Gunwinyguan", + "level2": "Wubuy-Anindilyakwa" + }, + "aoj": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh", + "level3": "Mufian-Bukiyip-Abu" + }, + "aok": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian", + "level9": "Houailou" + }, + "aol": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat" + }, + "aom": { + "level0": "Koiarian", + "level1": "Baraic" + }, + "aon": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh" + }, + "aor": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "aos": { + "level0": "Border", + "level1": "Taikat-Awyi" + }, + "aot": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "aou": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Northern Gelao", + "level7": "Ahouic" + }, + "aox": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Pidjanan", + "level3": "Wapishanan", + "level4": "Wapishana-Atorai" + }, + "aoz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto", + "level6": "Meto", + "level7": "Central Meto" + }, + "apb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira" + }, + "apc": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Levantine-Cypriot Arabic" + }, + "apd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic", + "level9": "East Sudanic Arabic" + }, + "ape": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh", + "level3": "Mufian-Bukiyip-Abu", + "level4": "Bukiyip-Abu" + }, + "apf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon", + "level4": "Nuclear Northeastern Luzon", + "level5": "Paranan-Pahanan" + }, + "apg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Barito-Mahakam" + }, + "aph": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar", + "level6": "Yakkha-Athpariyic", + "level7": "Athpariyic" + }, + "api": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva" + }, + "apj": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Eastern Southwestern Apachean" + }, + "apk": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean" + }, + "apl": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Eastern Southwestern Apachean" + }, + "apm": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Western Southwestern Apachean" + }, + "apn": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je" + }, + "apo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "app": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost" + }, + "apq": { + "level0": "Great Andamanese", + "level1": "Middle Great Andamanese", + "level2": "Okol-Opucikwar" + }, + "apr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "aps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano", + "level10": "Sissanoic" + }, + "apt": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani" + }, + "apu": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus" + }, + "apv": { + "level0": "Unattested", + "level1": "Nambiquaran (Unattested)" + }, + "apw": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Western Southwestern Apachean" + }, + "apx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar", + "level5": "Perai-Tugun-Aputai", + "level6": "Perai-Aputai" + }, + "apy": { + "level0": "Cariban" + }, + "apz": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic" + }, + "aqc": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic" + }, + "aqd": { + "level0": "Dogon", + "level1": "West Dogon", + "level2": "Penangic" + }, + "aqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid" + }, + "aqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "aqm": { + "level0": "Kayagaric" + }, + "aqn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran" + }, + "aqr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian", + "level9": "Houailou" + }, + "aqt": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "aqz": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Corumbiara" + }, + "arb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic" + }, + "arc": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic" + }, + "ard": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Arabana-Wangganguru" + }, + "are": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte" + }, + "arg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Unshifted Western Romance" + }, + "arh": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic", + "level4": "Arhuacic", + "level5": "Eastern-Southern Arhuacic" + }, + "ari": { + "level0": "Caddoan", + "level1": "Northern Caddoan", + "level2": "Pawnee-Kitsai", + "level3": "Pawnee-Arikara" + }, + "arj": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo", + "level5": "Piratapuyic", + "level6": "Arapaso-Miriti" + }, + "ark": { + "level0": "Nuclear-Macro-Je", + "level1": "Jabuti" + }, + "arl": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Arabela-Andoa" + }, + "arn": { + "level0": "Araucanian" + }, + "aro": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik", + "level4": "Araona-Toromono" + }, + "arp": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Arapahoic", + "level4": "Arapaho-Gros Ventre-Besawunena" + }, + "arq": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "arr": { + "level0": "Tupian", + "level1": "Purubora-Ramarama", + "level2": "Ramarama" + }, + "ars": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "aru": { + "level0": "Arawan" + }, + "arv": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Western Omo-Tana" + }, + "arw": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan", + "level3": "Ineric" + }, + "arx": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic", + "level3": "Nuclear Gavianic" + }, + "ary": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Moroccan-Andalusian Arabic" + }, + "arz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic" + }, + "asa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Pare-Taveta", + "level10": "Pareic" + }, + "asb": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Nakoda" + }, + "asc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat" + }, + "ase": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "asf": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL", + "level4": "Auslanic" + }, + "asg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "West Kambaric" + }, + "asi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Sabakor" + }, + "asj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane" + }, + "ask": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani" + }, + "asl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay" + }, + "asm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Eastern Kamrupa" + }, + "asn": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V" + }, + "aso": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Gahuku" + }, + "asp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "asq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "asr": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Asuric" + }, + "ass": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid B" + }, + "ast": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Asturo-Leonese" + }, + "asu": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.A" + }, + "asv": { + "level0": "Central Sudanic", + "level1": "Mangbetu-Asua", + "level2": "Mangbetuic" + }, + "asw": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "asx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup" + }, + "asy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Central-Yaosakor Asmat" + }, + "asz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera" + }, + "atb": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic" + }, + "atc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Madre de Dios Pano" + }, + "atd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "Central Manobo" + }, + "ate": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic", + "level6": "Greater West Sogeram", + "level7": "West Sogeram" + }, + "atf": { + "level0": "Bookkeeping" + }, + "atg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Ikpeshic" + }, + "ati": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo" + }, + "atj": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi" + }, + "atk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "atl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inagta Bikol" + }, + "atm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Unclassified Bisayan" + }, + "atn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Vafsic" + }, + "ato": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields", + "level8": "Menka-Atong" + }, + "atp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Atta" + }, + "atq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu", + "level6": "Matangnga-Aralle-Tabulahan" + }, + "atr": { + "level0": "Cariban", + "level1": "Yawaperi" + }, + "ats": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Arapahoic", + "level4": "Arapaho-Gros Ventre-Besawunena" + }, + "att": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Atta" + }, + "atu": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Nuer-Reel" + }, + "atv": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Northern Altai-Lower Chulym" + }, + "atw": { + "level0": "Palaihnihan" + }, + "aty": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu" + }, + "atz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon" + }, + "aua": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro", + "level6": "Utupua" + }, + "aub": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupha-Alugu" + }, + "aud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "aug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Eweic" + }, + "auh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bemba (M.40)" + }, + "aui": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage" + }, + "auj": { + "level0": "Afro-Asiatic", + "level1": "Berber" + }, + "auk": { + "level0": "Nuclear Torricelli", + "level1": "Nuclear Maimai", + "level2": "Heyo-Yahang" + }, + "aul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula" + }, + "aum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid" + }, + "aun": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "auo": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade", + "level6": "Shira-Southwestern Bade", + "level7": "Shira" + }, + "aup": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "auq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Anus-Podena" + }, + "aur": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Unclassified Kombio-Yambes" + }, + "aut": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal", + "level14": "Tahitian-Austral" + }, + "auu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Auye-Dao" + }, + "auv": { + "level0": "Bookkeeping" + }, + "auw": { + "level0": "Border", + "level1": "Taikat-Awyi" + }, + "aux": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava", + "level7": "Guaja-Aure-Aura" + }, + "auy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Auyana" + }, + "auz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Central Asian Arabic", + "level8": "Xorasan-Qashqa-Darya Arabic" + }, + "ava": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic" + }, + "avb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "East Arawe" + }, + "avd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic" + }, + "ave": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian" + }, + "avi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Avikam-Alladian" + }, + "avk": { + "level0": "Artificial Language" + }, + "avl": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic" + }, + "avm": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Gudang-Northeast Paman", + "level4": "Northeast Paman", + "level5": "Uradhic", + "level6": "Yadhaykenu-Angkamuthi" + }, + "avn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Avatime-Nyangbo" + }, + "avo": { + "level0": "Unattested", + "level1": "Arawakan (Unattested)" + }, + "avs": { + "level0": "Zaparoan", + "level1": "Zaparo-Abishira" + }, + "avt": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Au-Olo-Elkei" + }, + "avu": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "avv": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava" + }, + "awa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi", + "level9": "Awadhic" + }, + "awb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Awa-Oweina" + }, + "awc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu" + }, + "awe": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani" + }, + "awg": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Albatross Bay", + "level4": "Anguthimri-Yangathimri-Yuputhimri", + "level5": "Anguthimri-Yangathimri" + }, + "awh": { + "level0": "Bayono-Awbono" + }, + "awi": { + "level0": "Kamula-Elevala", + "level1": "Elevala" + }, + "awk": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Hunter-Hastings" + }, + "awm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "awn": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw" + }, + "awo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Awak-Kamo" + }, + "awr": { + "level0": "Lakes Plain", + "level1": "Far West Lakes Plain" + }, + "aws": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "awt": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V", + "level6": "Arawetic" + }, + "awu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu", + "level6": "Mappi-Digul Awyu" + }, + "awv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "aww": { + "level0": "Sepik", + "level1": "Yellow River" + }, + "awx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Wantoatic", + "level4": "Wantoat-Awara" + }, + "awy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "axb": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur" + }, + "axk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "axl": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic" + }, + "axx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "aya": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Bosngun-Awar" + }, + "ayb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "ayc": { + "level0": "Aymaran", + "level1": "Central-Southern Aymara" + }, + "ayd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umbindhamuic" + }, + "aye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Ayere-Ahan" + }, + "ayg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang", + "level10": "Gikyode-Ginyanga" + }, + "ayh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic" + }, + "ayi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo", + "level8": "Legboic", + "level9": "Lenyima-Leyigha" + }, + "ayk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid", + "level7": "Okpe-Akuku-Idesa" + }, + "ayl": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "ayn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "Judeo-Muslim Sanaani Arabic" + }, + "ayo": { + "level0": "Zamucoan", + "level1": "Zamuco-Ayoreo" + }, + "ayp": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Qeltu" + }, + "ayq": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi", + "level3": "Yimin-Bel" + }, + "ayr": { + "level0": "Aymaran", + "level1": "Central-Southern Aymara" + }, + "ays": { + "level0": "Unattested", + "level1": "Austronesian (Unattested)" + }, + "ayt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic" + }, + "ayu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "ayx": { + "level0": "Bookkeeping" + }, + "ayy": { + "level0": "Unattested", + "level1": "Austronesian (Unattested)" + }, + "ayz": { + "level0": "Maybratic" + }, + "aza": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid" + }, + "azb": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "Central Oghuz" + }, + "azd": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "Western Periphery Nahuatl", + "level7": "Durango Nahuatl" + }, + "azg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Amuzgoan" + }, + "azj": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "Central Oghuz" + }, + "azm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Amuzgoan" + }, + "azn": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "Western Periphery Nahuatl", + "level7": "Durango Nahuatl" + }, + "azo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic" + }, + "azr": { + "level0": "Bookkeeping" + }, + "azt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Atta" + }, + "azz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Sierra de Puebla Nahuatl" + }, + "baa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "East Choiseul", + "level11": "Southeast Choiseul" + }, + "bab": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Bainounk" + }, + "bae": { + "level0": "Arawakan", + "level1": "Medio Rio Negro", + "level2": "Bareic" + }, + "baf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam" + }, + "bag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "Sanaga (A.60)" + }, + "bah": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah", + "level16": "Bahamian Gullah" + }, + "baj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Karey-Barakai" + }, + "bak": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "North Kipchak", + "level6": "Bashkiric" + }, + "bam": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "ban": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bali-Sasak-Sumbawa" + }, + "bao": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan I", + "level4": "Bara-Tatuyo" + }, + "bap": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti", + "level6": "Bantawic" + }, + "bar": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch" + }, + "bas": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Basaa-Bakoko", + "level10": "Basaa-Hijuk" + }, + "bau": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jarawaic" + }, + "bav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring", + "level9": "Babungoic" + }, + "baw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "bax": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bay": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Batuley-Mariri" + }, + "baz": { + "level0": "Bookkeeping" + }, + "bba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur" + }, + "bbb": { + "level0": "Koiarian", + "level1": "Baraic", + "level2": "Barai-Namiae" + }, + "bbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak", + "level5": "Southern Batak" + }, + "bbd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum", + "level5": "Panim-Isebe-Bau" + }, + "bbe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Eastern Mundu-Baka", + "level7": "Mayogo-Bangba" + }, + "bbf": { + "level0": "Baibai-Fas" + }, + "bbg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Sangu-Sira", + "level25": "Sira-Barama" + }, + "bbh": { + "level0": "Austroasiatic", + "level1": "Mangic", + "level2": "Pakanic" + }, + "bbi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Abo-Barombi" + }, + "bbj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "bbk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring" + }, + "bbl": { + "level0": "Nakh-Daghestanian", + "level1": "Nakh" + }, + "bbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic", + "level12": "Bujaic", + "level13": "Budja (C.37)" + }, + "bbn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Bali-Vitu" + }, + "bbo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Bobo" + }, + "bbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic" + }, + "bbq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun", + "level10": "Nun MCNB" + }, + "bbr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Kokon" + }, + "bbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Agoi-Doko-Iyoniyong" + }, + "bbt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2" + }, + "bbu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan" + }, + "bbv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "bbw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bbx": { + "level0": "Bookkeeping" + }, + "bby": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields" + }, + "bbz": { + "level0": "Bookkeeping" + }, + "bca": { + "level0": "Sino-Tibetan", + "level1": "Macro-Bai", + "level2": "Baic", + "level3": "South-Central Bai" + }, + "bcb": { + "level0": "Bookkeeping" + }, + "bcc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic", + "level8": "Southern-Western Balochi", + "level9": "Southern Balochi-Koroshi" + }, + "bcd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "North Babaric" + }, + "bce": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun", + "level10": "Nun MCNB" + }, + "bcf": { + "level0": "Kiwaian" + }, + "bcg": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Naluic" + }, + "bch": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Eastern Ngero" + }, + "bci": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia" + }, + "bcj": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Bardic" + }, + "bck": { + "level0": "Bunaban" + }, + "bcl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Coastal Bikol" + }, + "bcm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Banoni-Piva" + }, + "bcn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Bali-Kpasam" + }, + "bco": { + "level0": "Bosavi", + "level1": "Bosavi Watershed", + "level2": "Kaluli-Sunia" + }, + "bcp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Bali-Beeke" + }, + "bcq": { + "level0": "Ta-Ne-Omotic" + }, + "bcr": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan", + "level4": "Carrieric" + }, + "bcs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono", + "level8": "Kohumonoic" + }, + "bct": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "bcu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Eastern Bel" + }, + "bcv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu", + "level7": "Wurbo" + }, + "bcw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic", + "level5": "Nkafa-Kirya-Bana" + }, + "bcx": { + "level0": "Bookkeeping" + }, + "bcy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Bata-Bwatiye" + }, + "bcz": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Bainounk" + }, + "bda": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "Bayot" + }, + "bdb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito" + }, + "bdc": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "Atrato", + "level3": "Panama-Baudo-Atrato" + }, + "bdd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Bunama-Mwatebu" + }, + "bde": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade", + "level6": "Shira-Southwestern Bade" + }, + "bdf": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Biage-Mountain Koiali" + }, + "bdg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Northeast Sabahan" + }, + "bdh": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli" + }, + "bdi": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Burun" + }, + "bdj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Bai-Viri" + }, + "bdk": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Southern Samur" + }, + "bdl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Borneo Coast Bajaw" + }, + "bdm": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma" + }, + "bdn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Maroua" + }, + "bdo": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Bayo-Morom" + }, + "bdp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Unclassified Northeast Savanna Bantu", + "level9": "Bende-Tongwe" + }, + "bdq": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Tampuon-Bahnar" + }, + "bdr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Borneo Coast Bajaw" + }, + "bds": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic" + }, + "bdt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya" + }, + "bdu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bafawic" + }, + "bdv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "bdw": { + "level0": "West Bomberai", + "level1": "Nuclear West Bomberai" + }, + "bdx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko", + "level5": "Panasuanic" + }, + "bdy": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Greater Bandjalangic", + "level4": "Bandjalangic", + "level5": "Inland Bandjalang" + }, + "bdz": { + "level0": "Unattested", + "level1": "Indo-European (Unattested)" + }, + "bea": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Beaver-Sekani" + }, + "beb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)", + "level9": "Ewondo-Bebele" + }, + "bec": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A" + }, + "bed": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuta" + }, + "bee": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi", + "level6": "Darma-Byangsi" + }, + "bef": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria" + }, + "beg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram A" + }, + "beh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental" + }, + "bei": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati", + "level4": "Bakati'" + }, + "bej": { + "level0": "Afro-Asiatic", + "level1": "Cushitic" + }, + "bek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Bebeli-Mangseng" + }, + "bel": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic" + }, + "bem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bemba (M.40)" + }, + "ben": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "beo": { + "level0": "Bosavi", + "level1": "Etoro-Bedamini" + }, + "bep": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola", + "level5": "Badaic" + }, + "beq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic" + }, + "bes": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Kim-Besme-Goundo" + }, + "bet": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Western Bete" + }, + "beu": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits", + "level5": "Blagaric" + }, + "bev": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Western Bete" + }, + "bew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Betawic" + }, + "bex": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Lori" + }, + "bey": { + "level0": "Nuclear Torricelli" + }, + "bez": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Bena-Hehe" + }, + "bfa": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian", + "level3": "Nuclear Barian" + }, + "bfb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "bfc": { + "level0": "Sino-Tibetan", + "level1": "Macro-Bai", + "level2": "Baic" + }, + "bfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "bfe": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor", + "level3": "Betaf-Vitou" + }, + "bff": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Mbodomo-Bofi" + }, + "bfg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic", + "level6": "Rejang-Makaham Kayan" + }, + "bfh": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "bfi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL" + }, + "bfj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bfk": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "bfl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "bfm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Mmen-Bum" + }, + "bfn": { + "level0": "Timor-Alor-Pantar" + }, + "bfo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "North-West Dagaric", + "level16": "Birifor" + }, + "bfp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic" + }, + "bfq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada" + }, + "bfr": { + "level0": "Unclassifiable" + }, + "bfs": { + "level0": "Sino-Tibetan", + "level1": "Macro-Bai", + "level2": "Baic", + "level3": "South-Central Bai" + }, + "bft": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Shamskatic" + }, + "bfu": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Central-Eastern West Himalayish" + }, + "bfw": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Gutob-Remo" + }, + "bfx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan" + }, + "bfy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi", + "level9": "Awadhic" + }, + "bfz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "bga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Dukaic", + "level7": "Main-Gwamhi" + }, + "bgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai", + "level7": "Saluanic" + }, + "bgc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi" + }, + "bgd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "bge": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "bgf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Gbaya Meridional" + }, + "bgg": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa" + }, + "bgh": { + "level0": "Bookkeeping" + }, + "bgi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic" + }, + "bgj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bgk": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Khao-Bit" + }, + "bgn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic", + "level8": "Southern-Western Balochi" + }, + "bgo": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "bgp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic" + }, + "bgq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Bagri-Jandavra" + }, + "bgr": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin", + "level6": "Falamic" + }, + "bgs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "South Manobo" + }, + "bgt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "Nggelic" + }, + "bgu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mbongno-Mvano" + }, + "bgv": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Yaqayic" + }, + "bgw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "bgx": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "West Oghuz" + }, + "bgy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "bgz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Eastern Saluan-Banggai" + }, + "bha": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Bundeli-Bharia" + }, + "bhb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "bhc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera" + }, + "bhd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi", + "level10": "Bhadarwahic" + }, + "bhe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani" + }, + "bhg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean" + }, + "bhh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic", + "level10": "Tajikic" + }, + "bhi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "bhj": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Northwestern Kiranti", + "level5": "Bahing-Sunwar" + }, + "bhk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inland Bikol" + }, + "bhl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok" + }, + "bhm": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA", + "level5": "Western MSA" + }, + "bhn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Bohtan" + }, + "bho": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Bhojpuric" + }, + "bhp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata" + }, + "bhq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Tukangbesi-Bonerate", + "level8": "Tukang Besi" + }, + "bhr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Bara-Tanosy" + }, + "bhs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Buwal-Gavar" + }, + "bht": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "bhu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "bhv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "bhw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic", + "level6": "Biak-Roon" + }, + "bhx": { + "level0": "Bookkeeping" + }, + "bhy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic" + }, + "bhz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola", + "level5": "Badaic" + }, + "bia": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda", + "level3": "Kartu" + }, + "bib": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa" + }, + "bic": { + "level0": "Bookkeeping" + }, + "bid": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla" + }, + "bie": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Kumil" + }, + "bif": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Jaad" + }, + "big": { + "level0": "Kunimaipan" + }, + "bii": { + "level0": "Bookkeeping" + }, + "bil": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Numan" + }, + "bim": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Gourmantche-Moba", + "level14": "Moba-Bimoba" + }, + "bin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid" + }, + "bio": { + "level0": "Kwomtari-Nai" + }, + "bip": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic", + "level16": "Bila-Kaiku" + }, + "biq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "bir": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan", + "level3": "Outer Enga" + }, + "bis": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "bit": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Bahinemic" + }, + "biu": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric" + }, + "biv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "North-West Dagaric", + "level16": "Birifor" + }, + "biw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "Southern Makaaic" + }, + "bix": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Asuric" + }, + "biy": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "biz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain" + }, + "bja": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic", + "level12": "Bujaic", + "level13": "Budja (C.37)" + }, + "bjb": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Northern Thura-Yura" + }, + "bjc": { + "level0": "Yareban", + "level1": "Yareba-Bariji-Nawaru" + }, + "bjd": { + "level0": "Bookkeeping" + }, + "bje": { + "level0": "Hmong-Mien", + "level1": "Mienic" + }, + "bjf": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Northwestern Jewish Neo-Aramaic" + }, + "bjg": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bijogo" + }, + "bjh": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Bahinemic" + }, + "bji": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic" + }, + "bjj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi" + }, + "bjk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Madak linkage" + }, + "bjl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Bola-Bulu" + }, + "bjm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Gorani", + "level9": "Shabak-Bajalani" + }, + "bjn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Banjar-Bukit Malay" + }, + "bjo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "bjq": { + "level0": "Bookkeeping" + }, + "bjr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "bjs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius", + "level15": "Barbados-Trinidad" + }, + "bjt": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Balanta" + }, + "bju": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "bjv": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Bediondo" + }, + "bjw": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Bakwe-Wane" + }, + "bjx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Northern Kalinga", + "level9": "Northwest Kalinga" + }, + "bjy": { + "level0": "Pama-Nyungan", + "level1": "Rockhampton-Gladstone" + }, + "bjz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Baruga-Doghoro" + }, + "bka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Bambuka-Gomu-Leelau" + }, + "bkb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Bontok" + }, + "bkc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi", + "level8": "Baka complex" + }, + "bkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo", + "level5": "Kinamiguin-Bukidnon", + "level6": "Bukidnon" + }, + "bkf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Bali-Beeke" + }, + "bkg": { + "level0": "Bookkeeping" + }, + "bkh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Basaa-Bakoko" + }, + "bki": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Baki-Bierebo" + }, + "bkj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "bkk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic" + }, + "bkl": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "bkm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Komic" + }, + "bkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Bukat-Ukit-Beketan-Lugat-Lisum" + }, + "bko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "bkp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Interieur", + "level12": "Lobalic" + }, + "bkq": { + "level0": "Cariban", + "level1": "Pekodian" + }, + "bkr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "South West Greater Barito" + }, + "bks": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Masbate-Sorsogon" + }, + "bkt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain" + }, + "bku": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan", + "level4": "Buhid-Taubuid" + }, + "bkv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic" + }, + "bkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic" + }, + "bkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto", + "level6": "Meto", + "level7": "Central Meto" + }, + "bky": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic" + }, + "bkz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "bla": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot" + }, + "blc": { + "level0": "Salishan" + }, + "bld": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "ble": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Balanta" + }, + "blf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "blg": { + "level0": "Bookkeeping" + }, + "blh": { + "level0": "Kru", + "level1": "Greater Western Kru" + }, + "bli": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Bolia-Ntomba" + }, + "blj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan" + }, + "blk": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen" + }, + "bll": { + "level0": "Siouan", + "level1": "Ohio Valley Siouan", + "level2": "Southeastern Siouan" + }, + "blm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli" + }, + "bln": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Coastal Bikol" + }, + "blo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Basila-Adele" + }, + "blp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "Central Santa Isabel", + "level11": "Zazao-Blanga" + }, + "blq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty", + "level7": "Lou-Paluai" + }, + "blr": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Bulangic" + }, + "bls": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "blt": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Black Tai" + }, + "blu": { + "level0": "Bookkeeping" + }, + "blv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "blw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran" + }, + "blx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Mag-Ayta" + }, + "bly": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental" + }, + "blz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Eastern Saluan-Banggai" + }, + "bma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan" + }, + "bmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Bembe-Buyu" + }, + "bmc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Bam-Manam" + }, + "bmd": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "bme": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi" + }, + "bmf": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Bullom", + "level4": "Northern Bullom" + }, + "bmg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bamweic" + }, + "bmh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Kokon" + }, + "bmi": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic" + }, + "bmj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "bmk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Boanaki-Paiwa" + }, + "bml": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bombomic" + }, + "bmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Northern Malagasic", + "level8": "Tsimihety-Betsimisaraka" + }, + "bmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic", + "level9": "Magoric" + }, + "bmo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bmp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Unclassified Nuclear Warup" + }, + "bmq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu" + }, + "bmr": { + "level0": "Boran" + }, + "bms": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric", + "level4": "East Kanuri" + }, + "bmt": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Mien-Mun" + }, + "bmu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu", + "level6": "Kosorong-Burum-Mindik" + }, + "bmv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Mmen-Bum" + }, + "bmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)" + }, + "bmx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "bmy": { + "level0": "Bookkeeping" + }, + "bmz": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "bna": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Tukangbesi-Bonerate" + }, + "bnb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Lowland Murut" + }, + "bnd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser" + }, + "bne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "bnf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "East Rivers Seram" + }, + "bng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic" + }, + "bnh": { + "level0": "Bookkeeping" + }, + "bni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Bobangi-Bangala-Lingala" + }, + "bnj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan", + "level4": "Buhid-Taubuid", + "level5": "Batangan" + }, + "bnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Baki-Bierebo" + }, + "bnl": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Unclassified East Cushitic" + }, + "bnm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic" + }, + "bnn": { + "level0": "Austronesian" + }, + "bno": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan" + }, + "bnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Bola-Bulu" + }, + "bnq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Southern Sangiric" + }, + "bnr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Southeast Santo" + }, + "bns": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Bundeli-Bharia" + }, + "bnu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Nuclear Makassaric" + }, + "bnv": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "bnw": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill" + }, + "bnx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic", + "level10": "Bangubangu-Kasai" + }, + "bny": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan" + }, + "bnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Akum-Beezen", + "level6": "Beezen-Baazem" + }, + "boa": { + "level0": "Boran" + }, + "bob": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Karre-Boni" + }, + "boc": { + "level0": "Bookkeeping" + }, + "bod": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan" + }, + "boe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi", + "level6": "Ji" + }, + "bof": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "bog": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "boh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Kwa-Kasai North" + }, + "boi": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "boj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Coastal Minjim" + }, + "bok": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Impfondoic" + }, + "bol": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Ngamo-Bele", + "level11": "Bolanci-Bele" + }, + "bom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic", + "level5": "Iten-Cara-Berom", + "level6": "Cara-Berom" + }, + "bon": { + "level0": "Eastern Trans-Fly" + }, + "boo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo", + "level5": "Nuclear Bozo", + "level6": "Ti-Bozo" + }, + "bop": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Kewieng-Bonkiman-Nokopo" + }, + "bor": { + "level0": "Bororoan", + "level1": "Bororo-Otuke" + }, + "bos": { + "level0": "Indo-European", + "level1": "Balto-Slavic", + "level2": "Slavic" + }, + "bot": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi" + }, + "bou": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Bondei-Shambala" + }, + "bov": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kposo-Ahlo-Bowili" + }, + "bow": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "box": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu", + "level8": "Nuclear Bwamu" + }, + "boy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan", + "level16": "Unclassified Ngendan" + }, + "boz": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo", + "level5": "Nuclear Bozo", + "level6": "Ti-Bozo" + }, + "bpa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym", + "level7": "Orkon-West Ambrym", + "level8": "West Ambrym", + "level9": "Southwest Ambrym" + }, + "bpb": { + "level0": "Unattested", + "level1": "Barbacoan (Unattested)" + }, + "bpc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane", + "level9": "Ncane-Cung", + "level10": "Cung-Mbuk" + }, + "bpd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "bpe": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Barupu Lagoon" + }, + "bpg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi" + }, + "bph": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Botlikh-Godoberi" + }, + "bpi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic" + }, + "bpj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Mbagani-Lwalwa" + }, + "bpk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "bpl": { + "level0": "Pidgin", + "level1": "Malay-based pidgin" + }, + "bpm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Unclassified Rai Coast" + }, + "bpn": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Zaominic" + }, + "bpo": { + "level0": "Bookkeeping" + }, + "bpp": { + "level0": "Kaure-Kosare" + }, + "bpq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Ambonic Malay" + }, + "bpr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic", + "level3": "Tboli-Blaan", + "level4": "Blaan" + }, + "bps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic", + "level3": "Tboli-Blaan", + "level4": "Blaan" + }, + "bpt": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Flinders-Barrow" + }, + "bpu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Coastal Minjim" + }, + "bpv": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Marindic" + }, + "bpw": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Iteri-Bo" + }, + "bpx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Rathawi-Palya" + }, + "bpy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "bpz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote", + "level5": "Central East Rote", + "level6": "Southeast Rote" + }, + "bqa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Chumbuli" + }, + "bqb": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "bqc": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Boko-Busa" + }, + "bqd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Konja" + }, + "bqf": { + "level0": "Bookkeeping" + }, + "bqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala", + "level10": "Bago-Delo-Cala" + }, + "bqh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Eastern Tibetic" + }, + "bqi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic", + "level8": "Luric", + "level9": "Bakhtiari-Southern Lori" + }, + "bqj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "Gusilay-Bandial" + }, + "bqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "bql": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Karian-Usan-Yaben" + }, + "bqm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bakweric" + }, + "bqn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign" + }, + "bqo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "bqp": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Boko-Busa" + }, + "bqq": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Eritai-Obokuitai-Biritai" + }, + "bqr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal", + "level9": "Tidung-Bulusu" + }, + "bqs": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Bosngun-Awar" + }, + "bqt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun", + "level10": "Nun MCNB" + }, + "bqu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan", + "level16": "Unclassified Ngendan" + }, + "bqv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic", + "level7": "Tinoric" + }, + "bqw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "bqx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "East Kambaric" + }, + "bqy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "bqz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba", + "level11": "Central Manenguba" + }, + "bra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi" + }, + "brb": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "brc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "brd": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Thangmi-Baram" + }, + "bre": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Southwestern Brythonic", + "level8": "Middle-Modern Southwestern Brythonic", + "level9": "Bretonic" + }, + "brf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic" + }, + "brg": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan" + }, + "brh": { + "level0": "Dravidian", + "level1": "North Dravidian" + }, + "bri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bakweric" + }, + "brj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Bieria-Maii" + }, + "brk": { + "level0": "Nubian", + "level1": "Central Nubian" + }, + "brl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Northern Sotho" + }, + "brm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Barambo-Pambia" + }, + "brn": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan" + }, + "bro": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic" + }, + "brp": { + "level0": "Geelvink Bay", + "level1": "Barapasi-Sauri-Kofei" + }, + "brq": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Breri-Romkun" + }, + "brr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Southeast Guadalcanal" + }, + "brs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "brt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "bru": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Eastern Bru-Katang" + }, + "brv": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Western Bru-So" + }, + "brw": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian" + }, + "brx": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Tiwa-Boro", + "level5": "Bodo-Mech-Kachari" + }, + "bry": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Burui-Gaikundi" + }, + "brz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "bsb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Bisaya-Lotud" + }, + "bsc": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Tenda", + "level3": "Bassari-Bedik-Bapen" + }, + "bsd": { + "level0": "Bookkeeping" + }, + "bse": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring" + }, + "bsf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "bsg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "bsh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani" + }, + "bsi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba" + }, + "bsj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Nuclear Tulaic" + }, + "bsl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa", + "level7": "Kontagora-Gumna-Koromba", + "level8": "Gumna-Kontagora" + }, + "bsm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen", + "level8": "Serui-Busami" + }, + "bsn": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Barasano-Eduria-Macuna" + }, + "bso": { + "level0": "Bookkeeping" + }, + "bsp": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "bsq": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Bassaic" + }, + "bsr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa", + "level7": "Kontagora-Gumna-Koromba", + "level8": "Gumna-Kontagora" + }, + "bss": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba", + "level11": "Central Manenguba" + }, + "bst": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo" + }, + "bsu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "bsv": { + "level0": "Bookkeeping" + }, + "bsw": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Baiso-Jiiddu" + }, + "bsx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Yangkam-Tarok-Pe" + }, + "bsy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Bisaya-Lotud" + }, + "bsz": { + "level0": "Bookkeeping" + }, + "bta": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Bata-Bwatiye" + }, + "btb": { + "level0": "Bookkeeping" + }, + "btc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa" + }, + "btd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Northern Batak" + }, + "bte": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Ningic" + }, + "btf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Birgit-Mogum-Toram" + }, + "btg": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Eastern Bete" + }, + "bth": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh", + "level5": "Central-Western Bidayuh" + }, + "bti": { + "level0": "Geelvink Bay", + "level1": "Burate-Wate" + }, + "btj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay", + "level8": "Bruneic Malay", + "level9": "Brunei-Bacan Malay" + }, + "btl": { + "level0": "Bookkeeping" + }, + "btm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak", + "level5": "Southern Batak", + "level6": "Angkola-Mandailing" + }, + "btn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kuyan", + "level7": "Datagnon-Santa Teresa-Semirara" + }, + "bto": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inland Bikol" + }, + "btp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima", + "level8": "Kilivilic" + }, + "btq": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek", + "level6": "Batekic" + }, + "btr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Maewo" + }, + "bts": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak" + }, + "btt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bete-Obanliku" + }, + "btu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "btv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani", + "level10": "Bateri-Mankiyali" + }, + "btw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Butuan-Tausug" + }, + "btx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Northern Batak" + }, + "bty": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "East Rivers Seram" + }, + "btz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Northern Batak" + }, + "bub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua", + "level6": "Bua-Lua", + "level7": "Ba-Korom" + }, + "buc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic" + }, + "bud": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma A" + }, + "buf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Bushoong-Wongo-Lele" + }, + "bug": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Bugis" + }, + "buh": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Paheng-Younuo" + }, + "bui": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Mokiba-Ngando" + }, + "buj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa", + "level7": "Kontagora-Gumna-Koromba" + }, + "buk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "North Huon Gulf linkage" + }, + "bul": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Eastern South Slavic", + "level6": "Macedo-Bulgarian" + }, + "bum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)" + }, + "bun": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Bullom" + }, + "buo": { + "level0": "South Bougainville", + "level1": "Buinic", + "level2": "Buin" + }, + "bup": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan" + }, + "buq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Manep-Barem" + }, + "bus": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Boko-Busa" + }, + "but": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg" + }, + "buu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Budu-Ndaka-Mbo" + }, + "buv": { + "level0": "Yuat", + "level1": "Miyak-Bun-Biwat", + "level2": "Bun-Mundukumo" + }, + "buw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Southern Okani" + }, + "bux": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Boghomic" + }, + "buy": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Bullom", + "level4": "Northern Bullom" + }, + "buz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung", + "level9": "Nakic" + }, + "bva": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "Barainic" + }, + "bvb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi" + }, + "bvc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "bvd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "bve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay" + }, + "bvf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Miltuic" + }, + "bvg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)" + }, + "bvh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "bvi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Bai-Viri" + }, + "bvj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "West Ogonoid" + }, + "bvk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Bukat-Ukit-Beketan-Lugat-Lisum" + }, + "bvl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign" + }, + "bvm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring" + }, + "bvn": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg" + }, + "bvo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Bolgo-Koke" + }, + "bvp": { + "level0": "Bookkeeping" + }, + "bvq": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Unclassified Membi-Mangbutu-Efe" + }, + "bvr": { + "level0": "Maningrida", + "level1": "Bureran" + }, + "bvs": { + "level0": "Bookkeeping" + }, + "bvt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Geser-Gorom-Bati" + }, + "bvu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Banjar-Bukit Malay" + }, + "bvw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Eastern Tera" + }, + "bvx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha" + }, + "bvy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan", + "level7": "Samar-Waray" + }, + "bvz": { + "level0": "Geelvink Bay" + }, + "bwa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone", + "level11": "Bwatooic", + "level12": "Haeke-Bwatoo" + }, + "bwb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Western Fijian" + }, + "bwc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi" + }, + "bwd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Bwaidoka-Iduna" + }, + "bwe": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Geba-Bwe" + }, + "bwf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Boselewa-Galeya" + }, + "bwg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "bwh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "bwi": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Baniwa-Curripaco-Tariano", + "level5": "Baniwa-Curripaco" + }, + "bwj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu", + "level8": "Nuclear Bwamu" + }, + "bwk": { + "level0": "Mailuan", + "level1": "Bauwakic" + }, + "bwl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala" + }, + "bwm": { + "level0": "Yuat", + "level1": "Miyak-Bun-Biwat", + "level2": "Bun-Mundukumo" + }, + "bwn": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Paheng-Younuo", + "level3": "Paheng" + }, + "bwo": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid" + }, + "bwp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Mandobo" + }, + "bwq": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Bobo" + }, + "bwr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "bws": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bombomic" + }, + "bwt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bafawic" + }, + "bwu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Buli-Koma" + }, + "bwv": { + "level0": "Bookkeeping" + }, + "bww": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian", + "level16": "Pagabeteic" + }, + "bwx": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Bu-Nao-Bunu" + }, + "bwy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu", + "level8": "Nuclear Bwamu" + }, + "bwz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Lumbu-Bwisi" + }, + "bxa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "bxb": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "bxc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic", + "level9": "Unclassified Bengaic" + }, + "bxd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic" + }, + "bxf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Label-Bilur" + }, + "bxg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Bobangi-Bangala-Lingala", + "level15": "Lingala-Bangala" + }, + "bxh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "bxi": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Pirlatapa-Dieric" + }, + "bxj": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara" + }, + "bxk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Northern Luyia" + }, + "bxl": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jeri" + }, + "bxm": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Buriat" + }, + "bxn": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara", + "level4": "Thalanyji-Burduna" + }, + "bxo": { + "level0": "Pidgin", + "level1": "Hausa-based pidgin" + }, + "bxp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)", + "level9": "Ewondo-Bebele" + }, + "bxq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Ngamo-Bele", + "level11": "Bolanci-Bele" + }, + "bxr": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Buriat" + }, + "bxs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "bxt": { + "level0": "Bookkeeping" + }, + "bxu": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Buriat" + }, + "bxv": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Naba-Berakou" + }, + "bxw": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "bxx": { + "level0": "Bookkeeping" + }, + "bxz": { + "level0": "Mailuan", + "level1": "Binaharic" + }, + "bya": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Northern Palawanic", + "level5": "Batak-Central Tagbanwa" + }, + "byb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid" + }, + "byc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono" + }, + "byd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati" + }, + "bye": { + "level0": "Sepik", + "level1": "Ram", + "level2": "Pouye-Karawa" + }, + "byf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Bete-Lufu" + }, + "byg": { + "level0": "Dajuic", + "level1": "Western Dajuic", + "level2": "Nyala Dajuic" + }, + "byh": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Chepangic" + }, + "byi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Bembe-Buyu" + }, + "byj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "byk": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Lakkia-Biao", + "level3": "Biaoic" + }, + "byl": { + "level0": "Bayono-Awbono" + }, + "bym": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Bidyaric" + }, + "byn": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw", + "level3": "Northern-Eastern-Western Agaw", + "level4": "Northeastern Agaw" + }, + "byo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "byp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic" + }, + "byq": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Northern East Formosan" + }, + "byr": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Baruya-Simbari" + }, + "bys": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Burak-Loo" + }, + "byt": { + "level0": "Saharan", + "level1": "Eastern Saharan" + }, + "byv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "byw": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar", + "level6": "Yakkha-Athpariyic", + "level7": "Athpariyic" + }, + "byx": { + "level0": "Baining" + }, + "byy": { + "level0": "Bookkeeping" + }, + "byz": { + "level0": "Ramu" + }, + "bza": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Bandi-Zialo" + }, + "bzb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai" + }, + "bzc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Central-Eastern Malagasic" + }, + "bzd": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan", + "level4": "Viceitic" + }, + "bze": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo", + "level5": "Nuclear Bozo" + }, + "bzf": { + "level0": "Ndu" + }, + "bzg": { + "level0": "Austronesian", + "level1": "Western Plains Austronesian", + "level2": "Central Western Plains" + }, + "bzh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mapos-Mangga-Wagau" + }, + "bzi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Bisu-Pyen-Laomian" + }, + "bzj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Miskitoic Creole English", + "level15": "Belize-Miskito Creole English" + }, + "bzk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Miskitoic Creole English", + "level15": "Belize-Miskito Creole English" + }, + "bzl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tolitoli" + }, + "bzm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic" + }, + "bzn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "East Hoamoal" + }, + "bzp": { + "level0": "South Bird's Head Family", + "level1": "East South Bird's Head", + "level2": "Kemberanic" + }, + "bzq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "bzr": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Eastern Maric" + }, + "bzs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "bzt": { + "level0": "Artificial Language" + }, + "bzv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung" + }, + "bzw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa" + }, + "bzx": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo" + }, + "bzy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bete-Obanliku" + }, + "bzz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand" + }, + "caa": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan", + "level5": "Chorti-Cholti" + }, + "cab": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan", + "level3": "Ineric", + "level4": "Island Carib-Garifuna" + }, + "cac": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Chujean" + }, + "cad": { + "level0": "Caddoan" + }, + "cae": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Saafi-Noon-Lehar", + "level4": "Noon-Lehar" + }, + "caf": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan", + "level4": "Carrieric", + "level5": "Dakelh" + }, + "cag": { + "level0": "Mataguayan", + "level1": "Mataguayo I" + }, + "cah": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Cahuarano-Iquito" + }, + "cak": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Cakchiquel-Tzutujil", + "level6": "Kaqchikelic" + }, + "cal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian" + }, + "cam": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Cem-Pac" + }, + "can": { + "level0": "Lower Sepik" + }, + "cao": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Bolivian Nawa" + }, + "cap": { + "level0": "Uru-Chipaya" + }, + "caq": { + "level0": "Austroasiatic", + "level1": "Nicobaric" + }, + "car": { + "level0": "Cariban", + "level1": "Guianan" + }, + "cat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance" + }, + "cav": { + "level0": "Pano-Tacanan", + "level1": "Tacanan" + }, + "caw": { + "level0": "Speech Register", + "level1": "Quechua-Puquina" + }, + "cax": { + "level0": "Chiquitano" + }, + "cay": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "cbb": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia" + }, + "cbc": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Pisamira-Yuruti", + "level5": "Pisamira-Carapana" + }, + "cbd": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Taranoan" + }, + "cbe": { + "level0": "Bookkeeping" + }, + "cbg": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic" + }, + "cbh": { + "level0": "Bookkeeping" + }, + "cbi": { + "level0": "Barbacoan", + "level1": "Awa-Southern Barbacoan", + "level2": "Cayapa-Colorado" + }, + "cbj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede" + }, + "cbk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic", + "level14": "Ternate-Zamboanga-Cavite" + }, + "cbl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin", + "level6": "Falamic" + }, + "cbm": { + "level0": "Bookkeeping" + }, + "cbn": { + "level0": "Austroasiatic", + "level1": "Monic" + }, + "cbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Chokobo-Lemoro-Sanga" + }, + "cbq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake", + "level5": "Upper Niger Kainji", + "level6": "Oleran" + }, + "cbr": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano" + }, + "cbs": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano" + }, + "cbt": { + "level0": "Cahuapanan" + }, + "cbv": { + "level0": "Kakua-Nukak" + }, + "cbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan", + "level7": "Samar-Waray" + }, + "cby": { + "level0": "Unclassifiable" + }, + "cca": { + "level0": "Bookkeeping" + }, + "ccc": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Chamicuro-Morike" + }, + "ccd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Brazil-Portugal Portuguese" + }, + "cce": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Chopi (S.60)" + }, + "ccg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Taram-Dirim-Nnakenyare" + }, + "cch": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Piti-Atsam" + }, + "ccj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Buy" + }, + "ccl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili", + "level11": "Swahili (G.40)", + "level12": "Mombasa-Lamu-Inland Swahili" + }, + "ccm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "cco": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Chinantec Group V" + }, + "ccp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Southeastern Bengali" + }, + "ccq": { + "level0": "Bookkeeping" + }, + "ccr": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Matagalpan" + }, + "ccx": { + "level0": "Bookkeeping" + }, + "ccy": { + "level0": "Bookkeeping" + }, + "cda": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Eastern Tibetic" + }, + "cde": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "cdf": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Central Old Kuki" + }, + "cdg": { + "level0": "Bookkeeping" + }, + "cdh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "cdi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "cdj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi" + }, + "cdm": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Chepangic" + }, + "cdn": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi" + }, + "cdo": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Coastal Min" + }, + "cdr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku" + }, + "cds": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "cdy": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Maonan-Chadong" + }, + "cdz": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "cea": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Coastal Tsamosan" + }, + "ceb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan" + }, + "ceg": { + "level0": "Zamucoan" + }, + "cek": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Khomic" + }, + "cen": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "ces": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Czech-Slovak" + }, + "cey": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "cfa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic" + }, + "cfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic", + "level5": "Iten-Cara-Berom", + "level6": "Cara-Berom" + }, + "cfg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu", + "level7": "Wurbo" + }, + "cfm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin", + "level6": "Falamic" + }, + "cga": { + "level0": "Yuat" + }, + "cgc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo" + }, + "cgg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nkore-Kiga" + }, + "cgk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic" + }, + "cha": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "chb": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Chibcha-Duit" + }, + "chc": { + "level0": "Siouan", + "level1": "Catawban" + }, + "chd": { + "level0": "Tequistlatecan" + }, + "che": { + "level0": "Nakh-Daghestanian", + "level1": "Nakh", + "level2": "Chechen-Ingush" + }, + "chf": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan", + "level5": "Chol-Chontal" + }, + "chg": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan" + }, + "chh": { + "level0": "Chinookan", + "level1": "Lower Chinookan" + }, + "chj": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I" + }, + "chk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Eastern Trukic", + "level12": "Mortlockese-Trukese" + }, + "chl": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Cupan", + "level4": "Cahuilla-Cupeno" + }, + "chn": { + "level0": "Chinookan", + "level1": "Lower Chinookan" + }, + "cho": { + "level0": "Muskogean", + "level1": "Western Muskogean" + }, + "chp": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan" + }, + "chq": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Chinantec Group V" + }, + "chr": { + "level0": "Iroquoian" + }, + "chs": { + "level0": "Bookkeeping" + }, + "cht": { + "level0": "Hibito-Cholon" + }, + "chu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Eastern South Slavic" + }, + "chv": { + "level0": "Turkic", + "level1": "Bolgar" + }, + "chw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Chuwabo-Maindo" + }, + "chx": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Thakali-Chantyal" + }, + "chy": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian" + }, + "chz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group III" + }, + "cia": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Butonic" + }, + "cib": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "cic": { + "level0": "Muskogean", + "level1": "Western Muskogean" + }, + "cie": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic", + "level7": "Gudufic" + }, + "cih": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi", + "level10": "Bhadarwahic", + "level11": "Chinali-Lahul Lohar" + }, + "cik": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric" + }, + "cim": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch", + "level10": "Global South Bavarian" + }, + "cin": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic", + "level3": "Nuclear Gavianic" + }, + "cip": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Manguean" + }, + "cir": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "cit": { + "level0": "Bookkeeping" + }, + "ciw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Central-Eastern-Southwestern Ojibwa" + }, + "ciy": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Cumana" + }, + "cja": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Cham" + }, + "cje": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Chruic" + }, + "cjh": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Inland Tsamosan" + }, + "cji": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic" + }, + "cjk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Chokwe-Lwena" + }, + "cjm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Cham" + }, + "cjn": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Wogamusin-Chenapian" + }, + "cjo": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Asheninka" + }, + "cjp": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan", + "level4": "Viceitic" + }, + "cjr": { + "level0": "Bookkeeping" + }, + "cjs": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Yenisey Turkic" + }, + "cjv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Chuave-Nomane" + }, + "cjy": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese" + }, + "cka": { + "level0": "Bookkeeping" + }, + "ckb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish", + "level8": "Kurdish" + }, + "ckc": { + "level0": "Bookkeeping" + }, + "ckd": { + "level0": "Bookkeeping" + }, + "cke": { + "level0": "Bookkeeping" + }, + "ckf": { + "level0": "Bookkeeping" + }, + "ckh": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish" + }, + "cki": { + "level0": "Bookkeeping" + }, + "ckj": { + "level0": "Bookkeeping" + }, + "ckk": { + "level0": "Bookkeeping" + }, + "ckl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "ckn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic", + "level6": "Daai-Nghmoye-Muun-Kaang" + }, + "cko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia" + }, + "ckq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "ckr": { + "level0": "Baining" + }, + "cks": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French" + }, + "ckt": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian", + "level2": "R-Koryakic" + }, + "cku": { + "level0": "Muskogean", + "level1": "Alabaman-Koasati" + }, + "ckv": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Northern East Formosan" + }, + "ckw": { + "level0": "Bookkeeping" + }, + "ckx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid B" + }, + "cky": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip" + }, + "ckz": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Cakchiquel-Tzutujil", + "level6": "Kaqchikelic" + }, + "cla": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic" + }, + "clc": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan" + }, + "cld": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic" + }, + "cle": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group IV" + }, + "clh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani" + }, + "cli": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic", + "level11": "Chakali-Tamprusi-Vagala", + "level12": "Chakali-Tamprusi" + }, + "clj": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "clk": { + "level0": "Sino-Tibetan", + "level1": "Digarish" + }, + "cll": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala", + "level10": "Bago-Delo-Cala", + "level11": "Delo-Cala" + }, + "clm": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Straits Salish" + }, + "clo": { + "level0": "Tequistlatecan", + "level1": "Eastern Tequistlatecan" + }, + "clt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "clu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kuyan", + "level7": "Datagnon-Santa Teresa-Semirara" + }, + "clw": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Northern Altai-Lower Chulym" + }, + "cly": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cma": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Koho-Maa" + }, + "cme": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kirma-Tyurama" + }, + "cmi": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "San Juan", + "level3": "Upper San Juan" + }, + "cml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Bugis" + }, + "cmn": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese", + "level5": "Mandarinic" + }, + "cmo": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong", + "level6": "Southern-Central Mnong" + }, + "cmr": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Khomic" + }, + "cms": { + "level0": "Indo-European", + "level1": "Unclassified Indo-European" + }, + "cmt": { + "level0": "Speech Register", + "level1": "Zulu-Sotho" + }, + "cna": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Kenhatic" + }, + "cnb": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic" + }, + "cnc": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Phunoi-Coong" + }, + "cng": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Qiang", + "level5": "Upstream-Nu Qiang" + }, + "cnh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin" + }, + "cni": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha" + }, + "cnk": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Khomic" + }, + "cnl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group IV" + }, + "cnm": { + "level0": "Bookkeeping" + }, + "cno": { + "level0": "Bookkeeping" + }, + "cnp": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Yue-Pinghua", + "level5": "Pinghua" + }, + "cnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane", + "level9": "Ncane-Cung", + "level10": "Cung-Mbuk" + }, + "cns": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Central-Yaosakor Asmat" + }, + "cnt": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group II" + }, + "cnu": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic" + }, + "cnw": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "coa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Betawic" + }, + "cob": { + "level0": "Mayan", + "level1": "Huastecan Mayan" + }, + "coc": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Delta-Californian Yuman" + }, + "cod": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III", + "level7": "Omagua-Kokama" + }, + "coe": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Koreguaje-Tama" + }, + "cof": { + "level0": "Barbacoan", + "level1": "Awa-Southern Barbacoan", + "level2": "Cayapa-Colorado" + }, + "cog": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic" + }, + "coh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Northern Mijikenda" + }, + "coj": { + "level0": "Cochimi-Yuman", + "level1": "Cochimic" + }, + "cok": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Corachol", + "level4": "Coran" + }, + "col": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish" + }, + "com": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Central Numic" + }, + "coo": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "North Georgia Central Salish" + }, + "cop": { + "level0": "Afro-Asiatic", + "level1": "Egyptian" + }, + "coq": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan", + "level5": "Rogue River" + }, + "cor": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Southwestern Brythonic", + "level8": "Middle-Modern Southwestern Brythonic" + }, + "cos": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Corsic" + }, + "cot": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak" + }, + "cou": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Tenda" + }, + "cov": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam", + "level4": "Kamic", + "level5": "Northern Kam" + }, + "cow": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Inland Tsamosan" + }, + "cox": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Matsi-Nan" + }, + "coy": { + "level0": "Bookkeeping" + }, + "coz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan" + }, + "cpa": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group II" + }, + "cpb": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "cpc": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "cpg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Pontic-Cappadocian Greek" + }, + "cpi": { + "level0": "Pidgin", + "level1": "English-based pidgin" + }, + "cpn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang", + "level8": "Hill South Guang", + "level9": "Gua-Cherepon" + }, + "cpo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "cps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Capiznon-Ilonggo-Kawayan" + }, + "cpu": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "cpx": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Coastal Min" + }, + "cpy": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Asheninka" + }, + "cqd": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao" + }, + "cqu": { + "level0": "Bookkeeping" + }, + "cra": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo" + }, + "crb": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan", + "level3": "Ineric", + "level4": "Island Carib-Garifuna" + }, + "crc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym" + }, + "crd": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish" + }, + "crf": { + "level0": "Chocoan", + "level1": "Unclassified Chocoan" + }, + "crg": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree", + "level5": "Plains Creeic" + }, + "crh": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Crimean Tatar-Urum", + "level7": "Crimeaic" + }, + "cri": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese", + "level15": "Bantu Layer Lower Guinea Portuguese", + "level16": "Saotomic" + }, + "crj": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "crk": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree", + "level5": "Plains Creeic" + }, + "crl": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "crm": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "crn": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Corachol", + "level4": "Coran" + }, + "cro": { + "level0": "Siouan", + "level1": "Missouri River Siouan" + }, + "crq": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Chorote" + }, + "crr": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian" + }, + "crs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Isle-de-France Creole" + }, + "crt": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Chorote" + }, + "cru": { + "level0": "Bookkeeping" + }, + "crv": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Chowra-Teressa" + }, + "crw": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau" + }, + "crx": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan", + "level4": "Carrieric", + "level5": "Dakelh" + }, + "cry": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic" + }, + "crz": { + "level0": "Chumashan", + "level1": "Southern Chumashan" + }, + "csa": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I", + "level7": "Tlacoatzintepec-Chiltepec" + }, + "csb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic" + }, + "csc": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign", + "level3": "Nuclear Spanish Sign" + }, + "csd": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Old Chiangmai-Bangkok Sign" + }, + "cse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "csf": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "csg": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "csh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Ashoic" + }, + "csi": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Western Miwokan" + }, + "csj": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "csk": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola" + }, + "csl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "CSLic" + }, + "csm": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan", + "level3": "Sierra Miwokan" + }, + "csn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign" + }, + "cso": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I" + }, + "csp": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Yue-Pinghua", + "level5": "Pinghua" + }, + "csq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Yugoslav Sign" + }, + "csr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "css": { + "level0": "Miwok-Costanoan", + "level1": "Costanoan", + "level2": "Southern Costanoan" + }, + "cst": { + "level0": "Miwok-Costanoan", + "level1": "Costanoan" + }, + "csv": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Ashoic" + }, + "csw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "csx": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "csy": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "csz": { + "level0": "Coosan" + }, + "cta": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino" + }, + "ctc": { + "level0": "Bookkeeping" + }, + "ctd": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "cte": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group IV" + }, + "ctg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Southeastern Bengali" + }, + "cth": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "cti": { + "level0": "Bookkeeping" + }, + "ctl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I", + "level7": "Tlacoatzintepec-Chiltepec" + }, + "ctn": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar" + }, + "cto": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "Atrato" + }, + "ctp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cts": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol" + }, + "ctt": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "ctu": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan", + "level5": "Chol-Chontal" + }, + "cty": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "ctz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cua": { + "level0": "Austroasiatic", + "level1": "Bahnaric" + }, + "cub": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Cubeo-Desano" + }, + "cuc": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I" + }, + "cuh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga" + }, + "cui": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan", + "level2": "Central Guahibo" + }, + "cuj": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus", + "level4": "Yineic", + "level5": "Western Yineic" + }, + "cuk": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Kuna" + }, + "cul": { + "level0": "Arawan", + "level1": "Madi-Madiha", + "level2": "Madiha" + }, + "cum": { + "level0": "Bookkeeping" + }, + "cun": { + "level0": "Bookkeeping" + }, + "cuo": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Cumana" + }, + "cup": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Cupan", + "level4": "Cahuilla-Cupeno" + }, + "cuq": { + "level0": "Tai-Kadai", + "level1": "Hlaic", + "level2": "Nuclear Hlaic" + }, + "cur": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar" + }, + "cut": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Cuicatec" + }, + "cuu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Wuding-Yuanyang Tai" + }, + "cuv": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Matakam", + "level5": "Mefele-Cuvok" + }, + "cux": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Cuicatec" + }, + "cvg": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Chug-Lish" + }, + "cvn": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group III" + }, + "cwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara", + "level12": "Unclassified North Mara" + }, + "cwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Chuwabo-Maindo" + }, + "cwd": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "cwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu" + }, + "cwg": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian" + }, + "cwt": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Kwatay-Karon-Mlomp" + }, + "cxh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic", + "level8": "Nuclear Zeemic" + }, + "cya": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cym": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Old-Modern Welsh" + }, + "cyo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kuyan" + }, + "czh": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Wu-Hui Chinese" + }, + "czn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino" + }, + "czo": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Inland Min" + }, + "czt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic" + }, + "daa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla" + }, + "dac": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Dambi-Kumaru" + }, + "dad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "dae": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Diic" + }, + "daf": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Dan-Toura" + }, + "dag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Mampruli-Dagbani" + }, + "dah": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup" + }, + "dai": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day" + }, + "daj": { + "level0": "Dajuic", + "level1": "Western Dajuic", + "level2": "Nyala Dajuic" + }, + "dak": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Sioux" + }, + "dal": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic" + }, + "dam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Clela-Damakawa" + }, + "dan": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "South Scandinavian" + }, + "dao": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic", + "level6": "Daai-Nghmoye-Muun-Kaang" + }, + "dap": { + "level0": "Bookkeeping" + }, + "daq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Southeast Gondi", + "level5": "South Bastar Gondi-Koya" + }, + "dar": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Dargwic" + }, + "das": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "dat": { + "level0": "Bookkeeping" + }, + "dau": { + "level0": "Dajuic", + "level1": "Western Dajuic" + }, + "dav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Taita-Sagalla" + }, + "daw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan" + }, + "dax": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu" + }, + "daz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Auye-Dao" + }, + "dbb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Kubi-Deno" + }, + "dbd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Nuclear Tulaic" + }, + "dbe": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "dbf": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "Central Tariku" + }, + "dbg": { + "level0": "Dogon", + "level1": "North Plateau Dogon" + }, + "dbi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau" + }, + "dbj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Northeast Sabahan" + }, + "dbl": { + "level0": "Pama-Nyungan" + }, + "dbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jarawaic" + }, + "dbn": { + "level0": "Inanwatan" + }, + "dbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jaku-Gubi" + }, + "dbp": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1" + }, + "dbq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Daba-Mazagway-Kola" + }, + "dbr": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Dabarre-Tunni" + }, + "dbt": { + "level0": "Dogon", + "level1": "Nangan Dogon" + }, + "dbu": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul" + }, + "dbv": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "dbw": { + "level0": "Dogon", + "level1": "Nangan Dogon" + }, + "dcr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "ddd": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lopit-Dongotono", + "level6": "Dongotonic" + }, + "dde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Nuclear Northern Kikongo" + }, + "ddg": { + "level0": "Timor-Alor-Pantar", + "level1": "East Timor", + "level2": "Fataluku-Oirata" + }, + "ddi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage" + }, + "ddj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Western Ngumpin" + }, + "ddn": { + "level0": "Songhay", + "level1": "Eastern Songhay", + "level2": "Zarma-Kaado-Dendi" + }, + "ddo": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "West Tsezic" + }, + "ddr": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Eastern Victoria", + "level4": "Dhudhuroa-Pallanganmiddang" + }, + "dds": { + "level0": "Dogon", + "level1": "Escarpment Dogon" + }, + "ddw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "North Babaric" + }, + "dec": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Buram Hill Chain" + }, + "ded": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu" + }, + "dee": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Bassaic" + }, + "def": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic" + }, + "deg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Delta Edoid", + "level6": "Degema-Engenni" + }, + "deh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "dei": { + "level0": "Geelvink Bay" + }, + "dek": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "dep": { + "level0": "Pidgin", + "level1": "Delaware-based pidgin" + }, + "deq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "der": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo" + }, + "des": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Cubeo-Desano", + "level4": "Yupua-Siriano-Desano", + "level5": "Siriano-Desano" + }, + "deu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Global German" + }, + "dev": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Bwana-Moam-Tapen" + }, + "dez": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Nkutsu-Lokenye" + }, + "dga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "Central-South Dagaric" + }, + "dgb": { + "level0": "Dogon", + "level1": "West Dogon", + "level2": "Penangic" + }, + "dgc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon", + "level4": "Nuclear Northeastern Luzon" + }, + "dgd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "Central-South Dagaric", + "level16": "South Dagaric" + }, + "dge": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Degenanic" + }, + "dgg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Are-Doga" + }, + "dgh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic" + }, + "dgi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "North-West Dagaric" + }, + "dgk": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Sido" + }, + "dgn": { + "level0": "Yangmanic" + }, + "dgo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Kangri-Dogri" + }, + "dgr": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Slaveyic" + }, + "dgs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Dogoso-Khe" + }, + "dgu": { + "level0": "Bookkeeping" + }, + "dgx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Baruga-Doghoro" + }, + "dgz": { + "level0": "Dagan", + "level1": "Central Dagan", + "level2": "Southwest Dagan" + }, + "dha": { + "level0": "Bookkeeping" + }, + "dhd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Eastern Rajasthani" + }, + "dhg": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu" + }, + "dhi": { + "level0": "Sino-Tibetan", + "level1": "Dhimal-Lhokpu-Toto" + }, + "dhl": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara", + "level4": "Thalanyji-Burduna" + }, + "dhm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Herero (R.30)" + }, + "dhn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Khandesic" + }, + "dho": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "dhr": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Wariyangga-Dhargari" + }, + "dhs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Kamba-Dhaisu" + }, + "dhu": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Yuin", + "level5": "Northern Costal Yuin" + }, + "dhv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Loyalty Islands" + }, + "dhw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "dia": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Galu-Alu" + }, + "dib": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dic": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida", + "level3": "Dida", + "level4": "Guebie-Lakota Dida" + }, + "did": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle", + "level4": "Didinga-Longarim" + }, + "dif": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Pirlatapa-Dieric", + "level5": "Dieric" + }, + "dig": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Southern Mijikenda" + }, + "dih": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Delta-Californian Yuman", + "level4": "Diegueno" + }, + "dii": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)", + "level8": "Nuclear Bafia (A.50)" + }, + "dij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "North Babaric" + }, + "dik": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dil": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "dim": { + "level0": "South Omotic" + }, + "dio": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa", + "level7": "Abawa" + }, + "dip": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "diq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Zaza" + }, + "dir": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Taram-Dirim-Nnakenyare", + "level7": "Dirim-Nnakenyare" + }, + "dis": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok" + }, + "diu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Kwangali-Diriku" + }, + "div": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Dhivehi-Sinhala" + }, + "diw": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "diy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Citak Asmat" + }, + "diz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic" + }, + "djb": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Western-Inland Yolngu" + }, + "djc": { + "level0": "Dajuic", + "level1": "Western Dajuic" + }, + "djd": { + "level0": "Mirndi", + "level1": "Yirram" + }, + "dje": { + "level0": "Songhay", + "level1": "Eastern Songhay", + "level2": "Zarma-Kaado-Dendi" + }, + "djf": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yalandyic" + }, + "dji": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Western-Inland Yolngu" + }, + "djj": { + "level0": "Maningrida", + "level1": "Nakkara-Ndjebbana" + }, + "djk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English", + "level13": "Eastern Maroons", + "level14": "Ndyuka" + }, + "djm": { + "level0": "Dogon", + "level1": "Plains Dogon" + }, + "djn": { + "level0": "Gunwinyguan", + "level1": "Western Gunwinyguan" + }, + "djo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "djr": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Eastern Dhuwal-Dhuwala" + }, + "dju": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill", + "level4": "Kapriman-Watakataui" + }, + "djw": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Bardic" + }, + "dka": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Dakpa-Dzala" + }, + "dkg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall", + "level6": "Kwangic", + "level7": "Vaghat" + }, + "dkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu" + }, + "dkl": { + "level0": "Bookkeeping" + }, + "dkr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "dks": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dkx": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Daba-Mazagway-Kola" + }, + "dlg": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Sakha-Dolgan" + }, + "dlk": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "Tigre-Dahalik" + }, + "dlm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Dalmatian Romance" + }, + "dln": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic" + }, + "dma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi B" + }, + "dmb": { + "level0": "Dogon", + "level1": "West Dogon", + "level2": "Penangic" + }, + "dmc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert" + }, + "dme": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Meri", + "level7": "Dugwor-Merey" + }, + "dmf": { + "level0": "Speech Register", + "level1": "Atlantic-Congo Speech Register" + }, + "dmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic", + "level7": "Upper Kinabatangan-Lobu" + }, + "dmk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone" + }, + "dml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "dmm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Dama-Galke" + }, + "dmo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung" + }, + "dmr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku" + }, + "dms": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "dmu": { + "level0": "Pauwasi", + "level1": "Western Pauwasi" + }, + "dmv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Rungus-Mangkaak-Labuk", + "level8": "Dumpas-Sukang" + }, + "dmw": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Eastern Ngumpin" + }, + "dmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Unclassified Shona (S. 10)" + }, + "dmy": { + "level0": "Sentanic" + }, + "dna": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Walakic" + }, + "dnd": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Simog-Daonda" + }, + "dne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic", + "level10": "Ndendeule-Ngindo" + }, + "dng": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese", + "level5": "Mandarinic", + "level6": "Zhongyuan" + }, + "dni": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Southeast Grand Valley Dani" + }, + "dnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto" + }, + "dnn": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "dno": { + "level0": "Central Sudanic", + "level1": "Lenduic", + "level2": "Bale" + }, + "dnr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka", + "level4": "Urigina-Danaru" + }, + "dnt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani" + }, + "dnu": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic" + }, + "dnw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani" + }, + "dny": { + "level0": "Arawan", + "level1": "Madi-Madiha", + "level2": "Madiha" + }, + "doa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Kuman-Dom-Gunaa" + }, + "dob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage" + }, + "doc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam", + "level4": "Kamic", + "level5": "Northern Kam" + }, + "doe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu" + }, + "dof": { + "level0": "Mailuan" + }, + "doh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Tiba-Dong" + }, + "dok": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Northern Tomini" + }, + "dol": { + "level0": "Doso-Turumsa" + }, + "don": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "doo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic", + "level6": "Ndunga-Mba-Dongo" + }, + "dop": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Kabiyeic" + }, + "doq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "dor": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Southern Malaita" + }, + "dos": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kaansa-Dogose", + "level7": "Dogose-Khisa" + }, + "dot": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi" + }, + "dov": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe" + }, + "dow": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru" + }, + "dox": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Konsoid", + "level8": "Gidole-Bussa" + }, + "doy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Gonja-Dompo" + }, + "doz": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "dpp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic" + }, + "drb": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Eastern Kordofan Nubian" + }, + "drc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance" + }, + "drd": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi", + "level6": "Darma-Byangsi", + "level7": "Zhangzhungic" + }, + "dre": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Dolpo-Tichurong" + }, + "drg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Rungus-Mangkaak-Labuk" + }, + "dri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Clela-Damakawa" + }, + "drl": { + "level0": "Pama-Nyungan", + "level1": "Yarli-Baagandji" + }, + "drn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar" + }, + "dro": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau" + }, + "drq": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang" + }, + "drr": { + "level0": "Bookkeeping" + }, + "drs": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Sidaama-Gedeo" + }, + "dru": { + "level0": "Austronesian" + }, + "dry": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "dsb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Sorbian" + }, + "dse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Dutch-Belgian Sign" + }, + "dsh": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Western Omo-Tana" + }, + "dsi": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Koulfaic" + }, + "dsk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic" + }, + "dsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Danish Sign" + }, + "dsn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic" + }, + "dso": { + "level0": "Bookkeeping" + }, + "dsq": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "dsz": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "dta": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic" + }, + "dtb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Rungus-Mangkaak-Labuk", + "level8": "Dumpas-Sukang" + }, + "dtd": { + "level0": "Wakashan", + "level1": "Southern Wakashan", + "level2": "Makah-Nitinat" + }, + "dti": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul", + "level3": "Yanda-Ana" + }, + "dtk": { + "level0": "Dogon", + "level1": "Plains Dogon", + "level2": "Western Plains Dogon" + }, + "dtm": { + "level0": "Dogon", + "level1": "Plains Dogon", + "level2": "Western Plains Dogon" + }, + "dtn": { + "level0": "Gumuz" + }, + "dto": { + "level0": "Dogon", + "level1": "Escarpment Dogon" + }, + "dtp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Kadazan-Sugut-Minokok" + }, + "dtr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Bisaya-Lotud" + }, + "dts": { + "level0": "Dogon", + "level1": "Escarpment Dogon" + }, + "dtt": { + "level0": "Dogon", + "level1": "Plains Dogon" + }, + "dtu": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul" + }, + "dty": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Eastern Pahari" + }, + "dua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Duala-Malimba" + }, + "dub": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic" + }, + "dud": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Dukaic" + }, + "due": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine" + }, + "duf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Extreme Southern New Caledonian" + }, + "dug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Northern Mijikenda" + }, + "duh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "dui": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon" + }, + "duj": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Western Dhuwal-Dhuwala" + }, + "duk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru" + }, + "dul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Alabat-Manide Agta" + }, + "dum": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch" + }, + "dun": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage" + }, + "duo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon" + }, + "dup": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic" + }, + "duq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage" + }, + "dur": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Diic" + }, + "dus": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Upper Dudhkosi" + }, + "duu": { + "level0": "Sino-Tibetan", + "level1": "Nungish", + "level2": "Gunong" + }, + "duv": { + "level0": "Lakes Plain", + "level1": "Tariku" + }, + "duw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito" + }, + "dux": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "duy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Unclassified Northern Luzon" + }, + "duz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Unclassified Central Adamawa" + }, + "dva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage" + }, + "dwa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2" + }, + "dwk": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "dwl": { + "level0": "Bookkeeping" + }, + "dwr": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo", + "level4": "Dawro-Gofa-Gamo" + }, + "dws": { + "level0": "Artificial Language" + }, + "dww": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Kakabai linkage" + }, + "dwz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "dya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Lobiri-Jaane" + }, + "dyb": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Nyulnyulic" + }, + "dyd": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Yawuric" + }, + "dyg": { + "level0": "Unattested", + "level1": "Austronesian (Unattested)" + }, + "dyi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo", + "level5": "Tagbana-Jimini" + }, + "dyk": { + "level0": "Bookkeeping" + }, + "dym": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul", + "level3": "Yanda-Ana" + }, + "dyn": { + "level0": "Pama-Nyungan", + "level1": "Macleay-New England" + }, + "dyo": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola" + }, + "dyr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zakse-Saya" + }, + "dyu": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "dyy": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yidinic" + }, + "dza": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos" + }, + "dzd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Giiwo-Daza" + }, + "dze": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Djiwarli-Thiin" + }, + "dzg": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Tebu" + }, + "dzl": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Dakpa-Dzala" + }, + "dzn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bamweic" + }, + "dzo": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic", + "level8": "Nuclear Dzongkhic" + }, + "ebg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Ebughu-Oro" + }, + "ebo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Ngungwel-Eboo" + }, + "ebr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Potou" + }, + "ebu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu" + }, + "ecr": { + "level0": "Unclassifiable" + }, + "ecs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign" + }, + "ecy": { + "level0": "Unclassifiable" + }, + "eee": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Unclassified Northern Tai" + }, + "efa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross" + }, + "efe": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "efi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Okop Usem", + "level9": "Efik-Ibibio" + }, + "ega": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo" + }, + "egl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Emiliano-Romagnolo" + }, + "ego": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Eggon-Ake" + }, + "egy": { + "level0": "Afro-Asiatic", + "level1": "Egyptian" + }, + "ehs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ehu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse", + "level7": "Ukue-Ehueun" + }, + "eip": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Eastern Mek" + }, + "eit": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Kombioic" + }, + "eiv": { + "level0": "North Bougainville", + "level1": "Rotokas-Askopan" + }, + "eja": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Her-Ejamat" + }, + "eka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Nnam-Ekajuk" + }, + "eke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ekit-Etebi" + }, + "ekg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Mee-Wodani" + }, + "eki": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Unclassified Efikic" + }, + "ekk": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "Central Finnic" + }, + "ekl": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Santalic" + }, + "ekm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)", + "level12": "Mmala-Elip-Gunu", + "level13": "Elip-Gunu" + }, + "eko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Koti-Nathembo" + }, + "ekp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid" + }, + "ekr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Yatye-Akpa" + }, + "eky": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayah-Yintale", + "level4": "Kayah" + }, + "ele": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Elepi-Kamasau-Marienberg" + }, + "elh": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "eli": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Nding-Tasomi" + }, + "elk": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Au-Olo-Elkei", + "level5": "Olo-Elkei" + }, + "ell": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Nuclear Modern Greek" + }, + "elm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "West Ogonoid" + }, + "elo": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Western Omo-Tana" + }, + "elp": { + "level0": "Bookkeeping" + }, + "elu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere", + "level9": "Kurti-Elu" + }, + "ema": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid", + "level7": "Emaic" + }, + "emb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Tamanic" + }, + "eme": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Wayampi-Zoe-Emerillon", + "level7": "Zoe-Emerillon" + }, + "emg": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Mewahang" + }, + "emi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "St. Matthias" + }, + "emk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan" + }, + "emn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid B" + }, + "emo": { + "level0": "Bookkeeping" + }, + "emp": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "Atrato", + "level3": "Panama-Baudo-Atrato" + }, + "emq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Muya" + }, + "ems": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "emu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Muria" + }, + "emw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Southwest Babar" + }, + "emx": { + "level0": "Speech Register", + "level1": "Basque-Romani" + }, + "emy": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan" + }, + "emz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Komic" + }, + "ena": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic" + }, + "enb": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Northern Kalenjin" + }, + "enc": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang", + "level4": "Northern Buyang" + }, + "end": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Ende-Lio" + }, + "enf": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Enets-Nenets", + "level3": "Enets" + }, + "eng": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English" + }, + "enh": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Enets-Nenets", + "level3": "Enets" + }, + "enl": { + "level0": "Lengua-Mascoy", + "level1": "Lengua" + }, + "enm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English" + }, + "enn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Delta Edoid", + "level6": "Degema-Engenni" + }, + "eno": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "enq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "enr": { + "level0": "Pauwasi", + "level1": "Eastern Pauwasi" + }, + "enu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "env": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Ikpeshic" + }, + "enw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Enwang-Uda" + }, + "enx": { + "level0": "Lengua-Mascoy", + "level1": "Lengua" + }, + "eot": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Western Tano" + }, + "epi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Delta Edoid" + }, + "epo": { + "level0": "Artificial Language", + "level1": "Esperantic" + }, + "era": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Malasa-Eravallan" + }, + "erg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Erromanga" + }, + "erh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "eri": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru", + "level4": "Erimaic" + }, + "erk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "South Efatic" + }, + "ero": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic", + "level6": "Horpa" + }, + "err": { + "level0": "Giimbiyu", + "level1": "Urninganggic" + }, + "ers": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Ersuic" + }, + "ert": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Eritai-Obokuitai-Biritai" + }, + "erw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuta" + }, + "ese": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik" + }, + "esg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Southern Gondi", + "level7": "Eastern Gondi" + }, + "esh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Ramand-Karaj" + }, + "esi": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit", + "level3": "Alaskan Inupiaq" + }, + "esk": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit", + "level3": "Alaskan Inupiaq" + }, + "esl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign" + }, + "esm": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "esn": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "eso": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic" + }, + "ess": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "est": { + "level0": "Uralic", + "level1": "Finnic" + }, + "esu": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "esy": { + "level0": "Artificial Language" + }, + "etb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ekit-Etebi" + }, + "eth": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "etn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "South Efatic" + }, + "eto": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)" + }, + "etr": { + "level0": "Bosavi", + "level1": "Etoro-Bedamini" + }, + "ets": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Afenmai-Bendel", + "level7": "Uneme-Yekhee" + }, + "etu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham" + }, + "etx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic", + "level5": "Iten-Cara-Berom" + }, + "etz": { + "level0": "Mairasic" + }, + "eud": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Opata-Eudeve" + }, + "eur": { + "level0": "Bookkeeping" + }, + "eve": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic", + "level3": "Ewenic" + }, + "evh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "evn": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic" + }, + "ewe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Eweic" + }, + "ewo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)", + "level9": "Ewondo-Bebele" + }, + "ext": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic" + }, + "eya": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak" + }, + "eyo": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin", + "level5": "Western Plateau Central Kalenjin" + }, + "eze": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Koring-Kukele", + "level8": "Kukele-Uzekwe" + }, + "fab": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese", + "level15": "Bantu Layer Lower Guinea Portuguese" + }, + "fad": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "faf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "fag": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Finungwan-Mamaa-Gusan" + }, + "fah": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Unclassified Benue-Congo" + }, + "fai": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic", + "level9": "Faiwol-Seltaman" + }, + "faj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram" + }, + "fak": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "fal": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo", + "level3": "Adamawa Fali" + }, + "fam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Unclassified Bantoid" + }, + "fan": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)" + }, + "fao": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian", + "level6": "Icelandic-Faroese" + }, + "fap": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Palor-Ndut" + }, + "far": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "fas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic" + }, + "fau": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "West Tariku", + "level3": "Fayu-Kirikiri" + }, + "fax": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance" + }, + "fay": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "faz": { + "level0": "Bookkeeping" + }, + "fcs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "fer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Feroge-Mangaya" + }, + "ffi": { + "level0": "Bookkeeping" + }, + "ffm": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fia": { + "level0": "Nubian", + "level1": "Nile Nubian", + "level2": "Nobiin Nubian" + }, + "fie": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Fyer-Tambas" + }, + "fif": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic", + "level5": "Modern Sayhadic" + }, + "fij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian", + "level7": "Nuclear Eastern Fijian", + "level8": "Viwa-Lomaiviti-East Viti Levu" + }, + "fil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Tagalogic", + "level5": "Tagalog-Filipino" + }, + "fin": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Nuclear Finnish" + }, + "fip": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic" + }, + "fir": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "fit": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Nuclear Finnish" + }, + "fiw": { + "level0": "East Kutubu" + }, + "fiz": { + "level0": "Bookkeeping" + }, + "fkk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic", + "level5": "Nkafa-Kirya-Bana", + "level6": "Nkafa-Kirya" + }, + "fkv": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Nuclear Finnish" + }, + "fla": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish", + "level3": "Okanaganic", + "level4": "Kalispel-Spokane" + }, + "flh": { + "level0": "Lakes Plain", + "level1": "East Lakes Plain" + }, + "fli": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan", + "level7": "Fali-Gude" + }, + "fll": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo", + "level3": "Adamawa Fali" + }, + "flm": { + "level0": "Bookkeeping" + }, + "fln": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Flinders-Barrow" + }, + "flr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Fuliiric", + "level13": "Fuliiru-Vira" + }, + "fly": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "fmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "fmu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Southern Gondi", + "level7": "Eastern Gondi" + }, + "fnb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym", + "level7": "Orkon-West Ambrym" + }, + "fng": { + "level0": "Pidgin", + "level1": "Zulu-based pidgin" + }, + "fni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic", + "level7": "Zan-Kulaalic", + "level8": "Kulaalic" + }, + "fod": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang", + "level10": "Gikyode-Ginyanga", + "level11": "Gikyode-Foodo" + }, + "foi": { + "level0": "East Kutubu" + }, + "fom": { + "level0": "Bookkeeping" + }, + "fon": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "for": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Fore-Gimi" + }, + "fos": { + "level0": "Austronesian", + "level1": "East Formosan" + }, + "fpe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English" + }, + "fqs": { + "level0": "Baibai-Fas" + }, + "fra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Global French" + }, + "frc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil" + }, + "frd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Kei-Fordata" + }, + "fri": { + "level0": "Bookkeeping" + }, + "fro": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil" + }, + "frp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Francoprovencalic" + }, + "frq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup" + }, + "frr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian" + }, + "frs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "West Low German", + "level10": "North Low Saxon" + }, + "frt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo" + }, + "fry": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian", + "level8": "Modern West Frisian", + "level9": "Westlauwers-Terschelling Frisian" + }, + "fse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign", + "level3": "Finnish Sign" + }, + "fsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "fss": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign", + "level3": "Finnish Sign" + }, + "fub": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula", + "level5": "Adamawa-Bagirmi Fulfulde" + }, + "fuc": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "fue": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fuf": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fuh": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fui": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula", + "level5": "Adamawa-Bagirmi Fulfulde" + }, + "fuj": { + "level0": "Heibanic", + "level1": "Eastern Heibanic" + }, + "fum": { + "level0": "Bookkeeping" + }, + "fuq": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula" + }, + "fur": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian" + }, + "fut": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers", + "level9": "Mele-Futuna" + }, + "fuu": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita" + }, + "fuv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula" + }, + "fvr": { + "level0": "Furan" + }, + "fwa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap", + "level10": "Nmi-Fij-Fwa", + "level11": "Hyenghene" + }, + "fwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Western Botatwe", + "level9": "Zambezi Hook" + }, + "gaa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ga-Dangme" + }, + "gab": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2", + "level6": "Gabri-Kimre" + }, + "gad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic", + "level6": "Cagayan-Baliwon Gaddang" + }, + "gae": { + "level0": "Arawakan", + "level1": "Alto Orinoco", + "level2": "Parenic" + }, + "gaf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka" + }, + "gag": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "West Oghuz" + }, + "gah": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Gahuku" + }, + "gai": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien" + }, + "gaj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Gadsup-Agarabi" + }, + "gak": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan", + "level3": "Nuclear Sahuan" + }, + "gal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro" + }, + "gam": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Jimi", + "level3": "Kandawo-Narak" + }, + "gan": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "gao": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram" + }, + "gap": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "gaq": { + "level0": "Austroasiatic", + "level1": "Mundaic" + }, + "gar": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Boselewa-Galeya" + }, + "gas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Garasia Bhil" + }, + "gat": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Unclassified Kainantu-Goroka" + }, + "gau": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Parji-Ollari-Gadaba", + "level3": "Ollari-Gadaba" + }, + "gav": { + "level0": "Bookkeeping" + }, + "gaw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "gax": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "Central Oromo" + }, + "gay": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "gaz": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo" + }, + "gbb": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic" + }, + "gbd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Marrngu" + }, + "gbe": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill", + "level3": "Hewa-April River" + }, + "gbf": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Burui-Gaikundi" + }, + "gbg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka" + }, + "gbh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "gbi": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Galela-Loloda" + }, + "gbj": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Gutob-Remo" + }, + "gbk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "gbl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "gbm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Central Pahari" + }, + "gbn": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Gberi-Morokodo-Mittu" + }, + "gbo": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo" + }, + "gbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya", + "level6": "Gbeya", + "level7": "Gbeya-Suma" + }, + "gbq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya", + "level6": "Gbeya" + }, + "gbr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Gbagyi-Gbari" + }, + "gbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "gbv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka" + }, + "gbw": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Eastern Waka-Kabic" + }, + "gbx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Eastern Phla-Phera" + }, + "gby": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Gbagyi-Gbari" + }, + "gbz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Yazdi-Kermani-Nayini" + }, + "gcc": { + "level0": "Baining" + }, + "gcd": { + "level0": "Tangkic", + "level1": "Southern Tangkic" + }, + "gce": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan", + "level5": "Rogue River" + }, + "gcf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Lesser Antillean French Creole" + }, + "gcl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Vincent-Grenadian Creole", + "level15": "Grenada-Tobago Creole" + }, + "gcn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Gaena-Korafe" + }, + "gcr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Guyanic Creole French" + }, + "gct": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "North Alemannic" + }, + "gda": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewaric" + }, + "gdb": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Parji-Ollari-Gadaba", + "level3": "Ollari-Gadaba" + }, + "gdc": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Northern Maric", + "level5": "Warungu-Gugu Badhun" + }, + "gdd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "gde": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan", + "level7": "Fali-Gude" + }, + "gdf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic", + "level7": "Gudufic" + }, + "gdg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic", + "level6": "Cagayan-Baliwon Gaddang" + }, + "gdh": { + "level0": "Jarrakan", + "level1": "Miriwunic" + }, + "gdi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi" + }, + "gdj": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama", + "level3": "Kuthant-Gurdjar", + "level4": "Rib-Gurdjar" + }, + "gdk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1" + }, + "gdl": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Konsoid", + "level8": "Gidole-Bussa" + }, + "gdn": { + "level0": "Dagan" + }, + "gdo": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Botlikh-Godoberi" + }, + "gdq": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA", + "level5": "Western MSA" + }, + "gdr": { + "level0": "Eastern Trans-Fly" + }, + "gds": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "gdu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic" + }, + "gdx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "gea": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Gera-Geruma" + }, + "geb": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ruboni" + }, + "gec": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo", + "level7": "Barclayville-Gboloo-Central Liberian Grebo", + "level8": "Gboloo-Central Grebo" + }, + "ged": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo" + }, + "geg": { + "level0": "Bookkeeping" + }, + "geh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch", + "level10": "Global South Bavarian" + }, + "gei": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera" + }, + "gej": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe" + }, + "gek": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic" + }, + "gel": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Dukaic", + "level7": "Main-Gwamhi" + }, + "gen": { + "level0": "Bookkeeping" + }, + "geq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Zande-Nzakara" + }, + "ges": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Geser-Gorom-Bati" + }, + "gev": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Southern Okani" + }, + "gew": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Gera-Geruma" + }, + "gex": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Karre-Boni" + }, + "gey": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Mitukuic" + }, + "gez": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic" + }, + "gfk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai" + }, + "gft": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "N-Group" + }, + "gga": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "East Santa Isabel" + }, + "ggb": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Bassaic" + }, + "ggd": { + "level0": "Pama-Nyungan", + "level1": "Paman" + }, + "gge": { + "level0": "Maningrida", + "level1": "Bureran" + }, + "ggg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Unclassified Western Hindi", + "level11": "Ghera-Gurgula" + }, + "ggh": { + "level0": "Bookkeeping" + }, + "ggl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon", + "level5": "Ganglau-Saep" + }, + "ggm": { + "level0": "Bookkeeping" + }, + "ggr": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Thaypanic" + }, + "ggt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Western Ngero", + "level9": "Tuam" + }, + "ggu": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Ben-Gban" + }, + "ggw": { + "level0": "Suki-Gogodala", + "level1": "Gogodalic" + }, + "gha": { + "level0": "Afro-Asiatic", + "level1": "Berber" + }, + "ghe": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Ghale", + "level5": "Nuclear Ghale" + }, + "ghh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Ghale", + "level5": "Nuclear Ghale" + }, + "ghk": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "ghl": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Eastern Kordofan Nubian" + }, + "ghn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia", + "level11": "Simboic", + "level12": "Ghanongga-Lungga" + }, + "gho": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber", + "level4": "Northwestern Moroccan Berber" + }, + "ghr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Unclassified Western Hindi", + "level11": "Ghera-Gurgula" + }, + "ghs": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean" + }, + "ght": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Ghale" + }, + "gia": { + "level0": "Jarrakan" + }, + "gib": { + "level0": "Pidgin", + "level1": "Hausa-based pidgin" + }, + "gic": { + "level0": "Unclassifiable" + }, + "gid": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara" + }, + "gie": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida", + "level3": "Dida", + "level4": "Guebie-Lakota Dida" + }, + "gig": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani" + }, + "gih": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Greater Bandjalangic", + "level4": "Bandjalangic", + "level5": "Inland Bandjalang" + }, + "gii": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "gil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian" + }, + "gim": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Fore-Gimi" + }, + "gin": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "West Tsezic" + }, + "gio": { + "level0": "Bookkeeping" + }, + "gip": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "giq": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Southwestern Gelao" + }, + "gir": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Northern Gelao", + "level7": "Ahouic" + }, + "gis": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Maroua", + "level5": "Giziga" + }, + "git": { + "level0": "Tsimshian", + "level1": "Nishga-Gitxsan" + }, + "giu": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Northern Gelao" + }, + "giw": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Southwestern Gelao" + }, + "gix": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Bwaka" + }, + "giy": { + "level0": "Unattested" + }, + "giz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Maroua", + "level5": "Giziga" + }, + "gjk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "gjm": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Warrnambool-Bunganditj" + }, + "gjn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Gonja-Dompo" + }, + "gjr": { + "level0": "Mixed Language", + "level1": "Gurindji-Kriol" + }, + "gju": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "gka": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Unclassified Nuclear Warup" + }, + "gkd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram", + "level6": "Aisian" + }, + "gke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Dama-Galke" + }, + "gkn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "East Ogonoid" + }, + "gko": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama" + }, + "gkp": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Kpelle" + }, + "gku": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Ghaap-Kalahari" + }, + "gla": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic", + "level7": "Modern Goidelic", + "level8": "Eastern Goidelic" + }, + "glb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3" + }, + "glc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic" + }, + "gld": { + "level0": "Tungusic", + "level1": "Central-Western Tungusic" + }, + "gle": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic", + "level7": "Modern Goidelic" + }, + "glg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance" + }, + "glh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Western Pashayi" + }, + "gli": { + "level0": "Bookkeeping" + }, + "glj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic", + "level7": "Zan-Kulaalic", + "level8": "Kulaalic" + }, + "glk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Gilaki-Rudbari" + }, + "gll": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Badjiri-Eastern Karnic", + "level3": "Eastern Karnic" + }, + "glo": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele" + }, + "glr": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "glu": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Bayo-Morom" + }, + "glv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic", + "level7": "Modern Goidelic", + "level8": "Eastern Goidelic" + }, + "glw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Wandala-Malgwa-Glavda" + }, + "gma": { + "level0": "Worrorran", + "level1": "Northern Worrorran", + "level2": "Forrest River" + }, + "gmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "gmd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Mak-Tal" + }, + "gmg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Unclassified Sogeram" + }, + "gmh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German" + }, + "gml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German" + }, + "gmm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Mbodomo-Bofi" + }, + "gmn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Koma Alantika" + }, + "gmo": { + "level0": "Bookkeeping" + }, + "gmu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum" + }, + "gmv": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo", + "level4": "Dawro-Gofa-Gamo" + }, + "gmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kinga-Magoma" + }, + "gmy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek" + }, + "gna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kaansa-Dogose" + }, + "gnb": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic", + "level6": "Gangte-Vaiphei" + }, + "gnc": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Unclassified Berber" + }, + "gnd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Meri" + }, + "gne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "gng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Konkomba-Gangam" + }, + "gnh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos" + }, + "gni": { + "level0": "Bunaban" + }, + "gnj": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Ben-Gban", + "level5": "Bengic" + }, + "gnk": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Naro-Ana", + "level5": "Ana" + }, + "gnl": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Eastern Maric" + }, + "gnm": { + "level0": "Dagan", + "level1": "Southeast Dagan" + }, + "gnn": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Western Dhuwal-Dhuwala" + }, + "gno": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi" + }, + "gnq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic" + }, + "gnr": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Eastern Waka-Kabic" + }, + "gnt": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "gnu": { + "level0": "Nuclear Torricelli", + "level1": "Unclassified Nuclear Torricelli" + }, + "gnw": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.B" + }, + "gnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi", + "level8": "Baka complex" + }, + "goa": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Guro-Yaoure" + }, + "gob": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan", + "level2": "Central Guahibo", + "level3": "Guahibo-Playero" + }, + "goc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng" + }, + "god": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Western Bete" + }, + "goe": { + "level0": "Sino-Tibetan" + }, + "gof": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo", + "level4": "Dawro-Gofa-Gamo" + }, + "gog": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu" + }, + "goh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German" + }, + "goi": { + "level0": "East Strickland", + "level1": "Kubo-Samo-Bibo" + }, + "goj": { + "level0": "Bookkeeping" + }, + "gok": { + "level0": "Bookkeeping" + }, + "gol": { + "level0": "Atlantic-Congo" + }, + "gom": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani" + }, + "goo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian", + "level7": "Nuclear Eastern Fijian" + }, + "gop": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Southwest Cenderawasih Bay" + }, + "goq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Manadoic Malay" + }, + "gor": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "gos": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "West Low German", + "level10": "North Low Saxon" + }, + "got": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "East Germanic" + }, + "gou": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Buwal-Gavar" + }, + "gov": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Dan-Toura", + "level6": "Toura-Goo" + }, + "gow": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic", + "level5": "Northern West Rift South Cushitic", + "level6": "Iraqwoid" + }, + "gox": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "goy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Kim-Besme-Goundo" + }, + "goz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic" + }, + "gpa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa", + "level7": "Abawa", + "level8": "Kami-Gupa" + }, + "gpe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English", + "level13": "Coastal Nigerian Krio" + }, + "gqa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Eastern Tera" + }, + "gqi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic" + }, + "gqr": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Bediondo" + }, + "gqu": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic" + }, + "gra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Garasia Bhil" + }, + "grc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek" + }, + "grd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic" + }, + "grg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Gira-Neko-Nekgini" + }, + "grh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "gri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "North and West Guadalcanal" + }, + "grj": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo" + }, + "grm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "gro": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic" + }, + "grq": { + "level0": "Ramu", + "level1": "Agoan" + }, + "grr": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber" + }, + "grs": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic", + "level2": "Mlap-Gresi-Kemtuik", + "level3": "Gresi-Kemtuik" + }, + "grt": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo" + }, + "gru": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "N-Group" + }, + "grv": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo", + "level7": "Barclayville-Gboloo-Central Liberian Grebo", + "level8": "Gboloo-Central Grebo" + }, + "grw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota" + }, + "gry": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo", + "level7": "Barclayville-Gboloo-Central Liberian Grebo" + }, + "grz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai", + "level9": "Minigir-Tolai" + }, + "gsc": { + "level0": "Bookkeeping" + }, + "gse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "gsg": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "DGSic" + }, + "gsl": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "Gusilay-Bandial" + }, + "gsm": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "gsn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Finungwan-Mamaa-Gusan" + }, + "gso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Gbaya Meridional" + }, + "gsp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon" + }, + "gss": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "gsw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "South Alemannic" + }, + "gti": { + "level0": "Bookkeeping" + }, + "gtu": { + "level0": "Bookkeeping" + }, + "gua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jaku-Gubi" + }, + "guc": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Guajiro-Paraujano" + }, + "gud": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida", + "level3": "Dida" + }, + "gue": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Eastern Ngumpin", + "level5": "Ngumpit" + }, + "guf": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Eastern Dhuwal-Dhuwala" + }, + "gug": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani" + }, + "guh": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan", + "level2": "Central Guahibo", + "level3": "Guahibo-Playero" + }, + "gui": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.B", + "level8": "Chiriguanic" + }, + "guj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic" + }, + "guk": { + "level0": "Gumuz", + "level1": "Nuclear Gumuz" + }, + "gul": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah" + }, + "gum": { + "level0": "Barbacoan", + "level1": "Coconucan" + }, + "gun": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A" + }, + "guo": { + "level0": "Guahiboan" + }, + "gup": { + "level0": "Gunwinyguan" + }, + "guq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I" + }, + "gur": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Mossi-Farefare", + "level14": "Farefareic" + }, + "gus": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "gut": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Votic Chibchan" + }, + "guu": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame", + "level2": "Yanomam-Yaroame", + "level3": "Yanomam-Yanimamo" + }, + "guv": { + "level0": "Bookkeeping" + }, + "guw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "gux": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Gourmantche-Moba" + }, + "guz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara" + }, + "gva": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "gvc": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo" + }, + "gve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu" + }, + "gvf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Golinic" + }, + "gvj": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava", + "level7": "Guaja-Aure-Aura" + }, + "gvl": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Chari" + }, + "gvm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "gvn": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yalandyic" + }, + "gvo": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic", + "level3": "Nuclear Gavianic" + }, + "gvp": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je", + "level5": "Eastern Timbira", + "level6": "Southeastern Timbira" + }, + "gvr": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic" + }, + "gvs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage" + }, + "gvy": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Northern Thura-Yura" + }, + "gwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Potou" + }, + "gwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Unclassified Nigerian Jarawan" + }, + "gwc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Dir-Swat Kohistani" + }, + "gwd": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Transversal Lowland East Cushitic", + "level6": "Dullay" + }, + "gwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu" + }, + "gwf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani" + }, + "gwg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Bambuka-Gomu-Leelau" + }, + "gwi": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Gwichin-Han" + }, + "gwj": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Naro-Ana", + "level5": "Ana" + }, + "gwn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.1" + }, + "gwr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza" + }, + "gwt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Gawarbatic" + }, + "gwu": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "gww": { + "level0": "Worrorran", + "level1": "Northern Worrorran", + "level2": "Forrest River" + }, + "gwx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang", + "level8": "Hill South Guang", + "level9": "Gua-Cherepon" + }, + "gxx": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn", + "level6": "Guere" + }, + "gya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental" + }, + "gyb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Rempic" + }, + "gyd": { + "level0": "Tangkic", + "level1": "Southern Tangkic", + "level2": "Kayardild-Yangkaal" + }, + "gye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Lameic" + }, + "gyf": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "gyg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic" + }, + "gyi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Mvumboic", + "level11": "Kwasio-Gyele" + }, + "gyl": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Aari-Gayil" + }, + "gym": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Guaymiic" + }, + "gyn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius", + "level15": "Barbados-Trinidad" + }, + "gyr": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II" + }, + "gyy": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Margany-Gunya" + }, + "gyz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Gejic" + }, + "gza": { + "level0": "Blue Nile Mao", + "level1": "West Mao" + }, + "gzi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic" + }, + "gzn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "East Makian-Gane" + }, + "haa": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Gwichin-Han" + }, + "hab": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Vietnamese Sign" + }, + "hac": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Gorani" + }, + "had": { + "level0": "Hatam-Mansim" + }, + "hae": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "South-East-North Oromo" + }, + "haf": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Vietnamese Sign" + }, + "hag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Kamara-Hanga" + }, + "hah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz", + "level13": "Tinputzic" + }, + "haj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Eastern Bengali" + }, + "hak": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "hal": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang", + "level4": "Jeh-Halang" + }, + "ham": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill", + "level3": "Hewa-April River" + }, + "han": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic", + "level13": "Hangaza-Shubi" + }, + "hao": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Haliaic" + }, + "hap": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Southeast Grand Valley Dani" + }, + "haq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic" + }, + "har": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage" + }, + "has": { + "level0": "Wakashan", + "level1": "Northern Wakashan" + }, + "hat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French" + }, + "hau": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.1" + }, + "hav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Hunde-Havu" + }, + "haw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal" + }, + "hax": { + "level0": "Haida" + }, + "hay": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "South Rutara" + }, + "haz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "hba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Tetelaic" + }, + "hbb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Marghic", + "level7": "Kilba-South Margi" + }, + "hbn": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol", + "level4": "Ebang-Laru" + }, + "hbo": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Hebrewic" + }, + "hbs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "hbu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Central Timoric A" + }, + "hca": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "hch": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Corachol" + }, + "hdn": { + "level0": "Haida" + }, + "hds": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Honduras-Panama Sign" + }, + "hdy": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Hadiyyaic" + }, + "hea": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "Northeastern Qiandongic Miao" + }, + "heb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Hebrewic" + }, + "hed": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa", + "level4": "Peveic", + "level5": "Hede-Ngide" + }, + "heg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar" + }, + "heh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Bena-Hehe" + }, + "hei": { + "level0": "Wakashan", + "level1": "Northern Wakashan", + "level2": "Kwakiutlan" + }, + "hem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic", + "level10": "Bangubangu-Kasai" + }, + "her": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Herero (R.30)" + }, + "hgm": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "North Khoekhoe" + }, + "hgw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota" + }, + "hhi": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua", + "level3": "Hoyaic" + }, + "hhr": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Her-Ejamat" + }, + "hhy": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua", + "level3": "Hoyaic" + }, + "hia": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Lamang-Hdi" + }, + "hib": { + "level0": "Hibito-Cholon" + }, + "hid": { + "level0": "Siouan", + "level1": "Missouri River Siouan" + }, + "hif": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "hig": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic", + "level5": "Nkafa-Kirya-Bana", + "level6": "Nkafa-Kirya" + }, + "hih": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "hii": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "hij": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Basaa-Bakoko", + "level10": "Basaa-Hijuk" + }, + "hik": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Central Ambon" + }, + "hil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Capiznon-Ilonggo-Kawayan" + }, + "hin": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "hio": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "Ost-Kxoe", + "level4": "Tshwa Khoe" + }, + "hir": { + "level0": "Unattested", + "level1": "Arawan (Unattested)" + }, + "hit": { + "level0": "Indo-European", + "level1": "Anatolian" + }, + "hiw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Hiw-Lo-Toga" + }, + "hix": { + "level0": "Cariban", + "level1": "Parukotoan", + "level2": "Waiwaian" + }, + "hji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic" + }, + "hka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "Central Kilimanjaro" + }, + "hke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Hunde-Havu" + }, + "hkh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Kashmiric" + }, + "hkk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Orokaivic" + }, + "hkn": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Stieng" + }, + "hks": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "CSLic" + }, + "hla": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Haliaic" + }, + "hlb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "hld": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang", + "level4": "Unclassified Kayong-Jeh-Halang" + }, + "hle": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo" + }, + "hlt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic" + }, + "hlu": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Luvian" + }, + "hma": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmb": { + "level0": "Songhay", + "level1": "Eastern Songhay" + }, + "hmc": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmd": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian" + }, + "hme": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmf": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao", + "level9": "Unclassified First Vernacular Hmong" + }, + "hmg": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Guiyang" + }, + "hmh": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmi": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmj": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian" + }, + "hml": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian" + }, + "hmm": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmo": { + "level0": "Pidgin", + "level1": "Motu-based pidgin" + }, + "hmp": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmq": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "Northeastern Qiandongic Miao", + "level6": "Eastern Qiandongic Miao" + }, + "hmr": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric", + "level7": "Hmar-Saihriem" + }, + "hms": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "South Qiandongic Miao" + }, + "hmt": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Kapau-Menya" + }, + "hmu": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "West Alor" + }, + "hmv": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao", + "level9": "Unclassified First Vernacular Hmong" + }, + "hmw": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmy": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Guiyang" + }, + "hmz": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong" + }, + "hna": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic" + }, + "hnd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Hindko" + }, + "hne": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi" + }, + "hng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo" + }, + "hnh": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Kxoe-Ani" + }, + "hni": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya" + }, + "hnj": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao" + }, + "hnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan" + }, + "hno": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Hindko" + }, + "hns": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Bhojpuric" + }, + "hnu": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Cuoi" + }, + "hoa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Rovianic", + "level12": "Hoava-Kusaghe" + }, + "hob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "hoc": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Ho-Mundari" + }, + "hod": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Nzanyic" + }, + "hoe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Southeastern Benue-Congo Plateau", + "level5": "Horom-Fyem" + }, + "hoh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA" + }, + "hoi": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Koyukonic" + }, + "hoj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Eastern Rajasthani" + }, + "hol": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)" + }, + "hom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan" + }, + "hoo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega" + }, + "hop": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan" + }, + "hor": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Chari" + }, + "hos": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Vietnamese Sign" + }, + "hot": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Misim-Yamap" + }, + "hov": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Muller-Schwaner", + "level6": "Hovongan-Kereho" + }, + "how": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic" + }, + "hoy": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "hoz": { + "level0": "Blue Nile Mao", + "level1": "West Mao", + "level2": "Hozo-Seze" + }, + "hpo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish" + }, + "hps": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "hra": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric" + }, + "hre": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam", + "level4": "Hre-Sedang" + }, + "hrk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha" + }, + "hrm": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong" + }, + "hro": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic" + }, + "hrr": { + "level0": "Bookkeeping" + }, + "hrt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Bohtan" + }, + "hrv": { + "level0": "Indo-European", + "level1": "Balto-Slavic" + }, + "hrx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Rhenish Franconian" + }, + "hrz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Northern Tatic" + }, + "hsb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Sorbian" + }, + "hsf": { + "level0": "Bookkeeping" + }, + "hsh": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "hsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "hsn": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "hss": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA", + "level5": "Western MSA" + }, + "hti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "hto": { + "level0": "Huitotoan", + "level1": "Nuclear Witotoan", + "level2": "Minica-Murui" + }, + "htu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Northeast Ambon" + }, + "hub": { + "level0": "Chicham", + "level1": "Shuaric", + "level2": "Huambisa-Shuar" + }, + "huc": { + "level0": "Kxa" + }, + "hud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Manusela-Huaulu" + }, + "hue": { + "level0": "Huavean", + "level1": "San Francisco-Santa Mar\u00eda Huave" + }, + "huf": { + "level0": "Kwalean", + "level1": "Humene-Kwale" + }, + "hug": { + "level0": "Harakmbut" + }, + "huh": { + "level0": "Araucanian" + }, + "hui": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli" + }, + "huj": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Guiyang" + }, + "huk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "hul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Sinagoro-Keapara", + "level9": "Hula-Keapara" + }, + "hum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Hungan-Samba" + }, + "hun": { + "level0": "Uralic", + "level1": "Hungaric" + }, + "huo": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "hup": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "huq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Northern Cham" + }, + "hur": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "South Georgia Central Salish" + }, + "hus": { + "level0": "Mayan", + "level1": "Huastecan Mayan" + }, + "hut": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic" + }, + "huu": { + "level0": "Huitotoan", + "level1": "Nuclear Witotoan", + "level2": "Minica-Murui" + }, + "huv": { + "level0": "Huavean", + "level1": "San Dionisio-San Mateo Huave" + }, + "huw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru", + "level4": "Buruic" + }, + "hux": { + "level0": "Huitotoan", + "level1": "Nuclear Witotoan" + }, + "huy": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab" + }, + "huz": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "East Tsezic" + }, + "hva": { + "level0": "Bookkeeping" + }, + "hvc": { + "level0": "Unclassifiable" + }, + "hve": { + "level0": "Huavean", + "level1": "San Dionisio-San Mateo Huave" + }, + "hvk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone", + "level11": "Bwatooic" + }, + "hvn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Hawu-Dhao" + }, + "hvv": { + "level0": "Huavean", + "level1": "San Francisco-Santa Mar\u00eda Huave" + }, + "hwa": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Bakwe-Wane" + }, + "hwc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English" + }, + "hwo": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Eastern Tera" + }, + "hya": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic" + }, + "hye": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Armenic", + "level3": "Eastern-Western Armenian" + }, + "hyw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Armenic", + "level3": "Eastern-Western Armenian" + }, + "iai": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Loyalty Islands" + }, + "ian": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Iatmulic" + }, + "iap": { + "level0": "Bookkeeping" + }, + "iba": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang", + "level6": "Iban-Seberuang", + "level7": "Northern Iban" + }, + "ibb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Okop Usem", + "level9": "Efik-Ibibio" + }, + "ibd": { + "level0": "Iwaidjan Proper", + "level1": "Central Iwaidjic" + }, + "ibe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid" + }, + "ibg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic" + }, + "ibh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Rade-Jarai" + }, + "ibi": { + "level0": "Bookkeeping" + }, + "ibl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran" + }, + "ibm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Agoi-Doko-Iyoniyong" + }, + "ibn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibino-Iko" + }, + "ibo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "ibr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic", + "level8": "Ibuoro-ItuMbuso-Nkari", + "level9": "Ibuoro-ItuMbuso" + }, + "ibu": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan" + }, + "iby": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio", + "level4": "Kio Ijo" + }, + "ica": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "ich": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Kpan-Icen" + }, + "icl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Danish Sign" + }, + "icr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Miskitoic Creole English" + }, + "ida": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia" + }, + "idb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Indo-Portuguesic", + "level15": "Northern Indo-Portuguesic" + }, + "idc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau", + "level6": "Kuturmi-Ajiya" + }, + "idd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "ide": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Unclassified Efikic" + }, + "idi": { + "level0": "Pahoturi" + }, + "ido": { + "level0": "Artificial Language", + "level1": "Esperantic" + }, + "idr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Indri-Togoyo" + }, + "ids": { + "level0": "Bookkeeping" + }, + "idt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Lakalei-Idate" + }, + "idu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma", + "level7": "Idoma-Agatu-Okpogu" + }, + "ifa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw" + }, + "ifb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw", + "level8": "Batad-Mayoyao" + }, + "ife": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "iff": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Erromanga" + }, + "ifk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw" + }, + "ifm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie" + }, + "ifu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw", + "level8": "Batad-Mayoyao" + }, + "ify": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran", + "level8": "Kalanguya" + }, + "igb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid" + }, + "ige": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya" + }, + "igg": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Unclassified Tamolan" + }, + "igl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid" + }, + "igm": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan", + "level3": "Tangu-Igom" + }, + "ign": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan", + "level3": "Mojeno-Paunaca", + "level4": "Moje\u00f1o" + }, + "igo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum", + "level5": "Panim-Isebe-Bau" + }, + "igs": { + "level0": "Artificial Language" + }, + "igw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Sasaru-Igwe" + }, + "ihb": { + "level0": "Pidgin", + "level1": "Iha-based pidgin" + }, + "ihi": { + "level0": "Bookkeeping" + }, + "ihp": { + "level0": "West Bomberai", + "level1": "Nuclear West Bomberai" + }, + "ihw": { + "level0": "Pama-Nyungan", + "level1": "Ganaic" + }, + "iii": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu" + }, + "iin": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Djiwarli-Thiin" + }, + "ijc": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo" + }, + "ije": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo", + "level3": "Inland Ijo", + "level4": "Biseni-Okordia" + }, + "ijj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede" + }, + "ijn": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio", + "level4": "Kio Ijo" + }, + "ijs": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo" + }, + "ike": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit" + }, + "ikh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid", + "level7": "Emaic" + }, + "iki": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibino-Iko" + }, + "ikk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "ikl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau" + }, + "iko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross" + }, + "ikp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Ikpeshic" + }, + "ikr": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Central Alaya-Athima" + }, + "iks": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ikt": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit" + }, + "ikv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau" + }, + "ikw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid" + }, + "ikx": { + "level0": "Kuliak" + }, + "ikz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Southwest Mara" + }, + "ila": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "North Lembata-Adonara" + }, + "ilb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "ile": { + "level0": "Artificial Language" + }, + "ilg": { + "level0": "Iwaidjan Proper", + "level1": "Central Iwaidjic" + }, + "ili": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uyghuric" + }, + "ilk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran" + }, + "ill": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Danaw" + }, + "ilo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon" + }, + "ils": { + "level0": "Sign Language", + "level1": "Pidgin Sign Language" + }, + "ilu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar" + }, + "ilv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross" + }, + "ima": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid" + }, + "ime": { + "level0": "Bookkeeping" + }, + "imi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan", + "level5": "Pomoikan", + "level6": "Anamuxric" + }, + "iml": { + "level0": "Coosan" + }, + "imn": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "imo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Aua-Gawil" + }, + "imr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Southwest Babar" + }, + "imy": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Milyan-Carian" + }, + "ina": { + "level0": "Artificial Language" + }, + "inb": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua" + }, + "ind": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Standard Malay-Indonesian" + }, + "ing": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Koyukonic" + }, + "inh": { + "level0": "Nakh-Daghestanian", + "level1": "Nakh", + "level2": "Chechen-Ingush" + }, + "inl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Malaysian Sign", + "level5": "Indonesian Sign" + }, + "inm": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "inn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran" + }, + "ino": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "inp": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus" + }, + "ins": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Indo-Pakistani-Nepalese Sign", + "level3": "Indo-Pakistani Sign" + }, + "int": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic" + }, + "inz": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "ior": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group", + "level7": "Peripheral Western Gurage" + }, + "iou": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Wantoatic" + }, + "iow": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Winnebago-Chiwere" + }, + "ipi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "ipo": { + "level0": "Anim", + "level1": "Inland Gulf of Papua" + }, + "iqu": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Cahuarano-Iquito" + }, + "ire": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Southwest Cenderawasih Bay", + "level6": "Yaur-Yerisiam" + }, + "irh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Koiwai-Irarutu", + "level6": "Irarutic" + }, + "iri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric" + }, + "irk": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic", + "level5": "Northern West Rift South Cushitic", + "level6": "Iraqwoid" + }, + "irr": { + "level0": "Bookkeeping" + }, + "iru": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Irula-Muduga" + }, + "irx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Sabakor" + }, + "iry": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Mangyan" + }, + "isa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Unclassified Goroka" + }, + "isc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Poyanawa Subgroup" + }, + "isd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley" + }, + "ise": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Italian Sign" + }, + "isg": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "ish": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid" + }, + "isi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Abanyom-Nkem-Nkum" + }, + "isk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Sanglechi-Ishkashimi" + }, + "isl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian", + "level6": "Icelandic-Faroese" + }, + "ism": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic" + }, + "isn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Nyaturu-Nilamba" + }, + "iso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "isr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "DGSic" + }, + "ist": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Dalmatian Romance" + }, + "isu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic" + }, + "ita": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Italian Romance" + }, + "itb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Itneg" + }, + "itd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal", + "level9": "Tidung-Bulusu", + "level10": "Tidung" + }, + "ite": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Moreic", + "level3": "Kujubim-More" + }, + "iti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Itneg" + }, + "itk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "itl": { + "level0": "Chukotko-Kamchatkan", + "level1": "Kamchatkan" + }, + "itm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic", + "level8": "Ibuoro-ItuMbuso-Nkari", + "level9": "Ibuoro-ItuMbuso" + }, + "itr": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Iteri-Bo" + }, + "its": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri" + }, + "itt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Kankanay", + "level9": "Maeng-Northern Kankanay" + }, + "itu": { + "level0": "Bookkeeping" + }, + "itv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic" + }, + "itw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic" + }, + "itx": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "ity": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Masadiit" + }, + "itz": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan", + "level3": "Nuclear Yucatecan" + }, + "ium": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Mien-Mun" + }, + "ivb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Batanic" + }, + "ivv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Batanic", + "level3": "Yami-Itbayat" + }, + "iwk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran" + }, + "iwm": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Iwamic" + }, + "iwo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok" + }, + "iws": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Iwamic" + }, + "ixc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan" + }, + "ixl": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Ixilan" + }, + "iya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse" + }, + "iyo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid" + }, + "iyx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Laali-Yaa" + }, + "izh": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan" + }, + "izi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "izm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "izr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "jaa": { + "level0": "Arawan", + "level1": "Madi-Madiha" + }, + "jab": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic" + }, + "jac": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan", + "level5": "Kanjobal-Jacaltec" + }, + "jad": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Xasonka" + }, + "jae": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "North Huon Gulf linkage" + }, + "jaf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Western Tera" + }, + "jah": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian" + }, + "jai": { + "level0": "Bookkeeping" + }, + "jaj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "Central Santa Isabel", + "level11": "Zazao-Blanga" + }, + "jak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "jal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute" + }, + "jam": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Jamaicanic" + }, + "jao": { + "level0": "Pama-Nyungan", + "level1": "Ngarna" + }, + "jap": { + "level0": "Bookkeeping" + }, + "jaq": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Yaqayic" + }, + "jar": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jarawaic" + }, + "jas": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese", + "level4": "Global Javanese" + }, + "jat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Siraikic" + }, + "jau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Southwest Cenderawasih Bay", + "level6": "Yaur-Yerisiam" + }, + "jav": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese", + "level4": "Global Javanese" + }, + "jax": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay" + }, + "jay": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu" + }, + "jaz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian" + }, + "jbe": { + "level0": "Bookkeeping" + }, + "jbi": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Badjiri-Eastern Karnic" + }, + "jbj": { + "level0": "South Bird's Head Family", + "level1": "East South Bird's Head", + "level2": "Kemberanic" + }, + "jbk": { + "level0": "Turama-Kikori", + "level1": "Turama-Omatian" + }, + "jbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall", + "level6": "Kwangic", + "level7": "Vaghat" + }, + "jbn": { + "level0": "Afro-Asiatic", + "level1": "Berber" + }, + "jbo": { + "level0": "Artificial Language" + }, + "jbr": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "jbt": { + "level0": "Nuclear-Macro-Je", + "level1": "Jabuti" + }, + "jbu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Jibu-Wase", + "level8": "Jibuic" + }, + "jcs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jct": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Crimean Tatar-Urum", + "level7": "Crimeaic" + }, + "jda": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti", + "level7": "Spiti-Jad" + }, + "jdg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Lasi-Jadgali" + }, + "jdt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Caucasian Tat" + }, + "jeb": { + "level0": "Cahuapanan" + }, + "jee": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Chaurasiya" + }, + "jeh": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang", + "level4": "Jeh-Halang" + }, + "jei": { + "level0": "Yam", + "level1": "Morehead-Maro" + }, + "jek": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jeri" + }, + "jel": { + "level0": "Bulaka River", + "level1": "Jelmek" + }, + "jen": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Jen", + "level7": "Doso-Dza" + }, + "jer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Boze-Loro" + }, + "jet": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "jeu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla", + "level7": "Unclassified Dangla" + }, + "jgb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Extreme North Vestigial Suffixes Bantu" + }, + "jgo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke" + }, + "jhi": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek" + }, + "jhs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jia": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Meridional" + }, + "jib": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Jibu-Wase", + "level8": "Jibuic" + }, + "jic": { + "level0": "Jicaquean" + }, + "jid": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "jie": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "Unclassified Biu-Mandara" + }, + "jig": { + "level0": "Mirndi" + }, + "jih": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic", + "level6": "Horpa" + }, + "jii": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Baiso-Jiiddu" + }, + "jil": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Upper Minjim" + }, + "jim": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan" + }, + "jio": { + "level0": "Tai-Kadai", + "level1": "Hlaic" + }, + "jiq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic" + }, + "jit": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Suguti" + }, + "jiu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Jino" + }, + "jiv": { + "level0": "Chicham", + "level1": "Shuaric", + "level2": "Huambisa-Shuar" + }, + "jiy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Jino" + }, + "jje": { + "level0": "Koreanic" + }, + "jka": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits" + }, + "jkm": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen" + }, + "jko": { + "level0": "East Strickland", + "level1": "Kubo-Samo-Bibo" + }, + "jkp": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Southern Karen" + }, + "jkr": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Koro-Holon" + }, + "jks": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jaku-Gubi" + }, + "jle": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Buram Hill Chain" + }, + "jls": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "jma": { + "level0": "Dagan", + "level1": "Central Dagan" + }, + "jmb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2" + }, + "jmc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "West Kilimanjaro" + }, + "jmd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin" + }, + "jmi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic" + }, + "jml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Eastern Pahari" + }, + "jmn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric", + "level5": "Makuric" + }, + "jmr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Kamara-Hanga" + }, + "jms": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung", + "level9": "Nakic" + }, + "jmw": { + "level0": "Turama-Kikori", + "level1": "Turama-Omatian" + }, + "jmx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Coicoyan-Metlatonoc" + }, + "jna": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric", + "level5": "Thebor" + }, + "jnd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Bagri-Jandavra" + }, + "jng": { + "level0": "Yangmanic" + }, + "jni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos" + }, + "jnj": { + "level0": "Ta-Ne-Omotic" + }, + "jnl": { + "level0": "Sino-Tibetan", + "level1": "Raji-Raute", + "level2": "Raute-Rawat" + }, + "jns": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali" + }, + "job": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Fuliiric", + "level13": "Fuliiru-Vira" + }, + "jod": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori" + }, + "jog": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "jor": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid", + "level8": "Sirionoid" + }, + "jos": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign", + "level3": "Levantine-Iraqi Sign" + }, + "jow": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo" + }, + "jpn": { + "level0": "Japonic", + "level1": "Japanesic", + "level2": "Japan-Taiwan Japanese" + }, + "jpr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic" + }, + "jqr": { + "level0": "Aymaran", + "level1": "Tupe" + }, + "jra": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Rade-Jarai" + }, + "jrr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu", + "level7": "Wurbo" + }, + "jrt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip", + "level7": "Kofyaric" + }, + "jru": { + "level0": "Cariban", + "level1": "Opon-Yukpan", + "level2": "Yukpan" + }, + "jsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "JSLic" + }, + "jua": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva" + }, + "jub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu" + }, + "juc": { + "level0": "Tungusic", + "level1": "Manchu-Jurchen" + }, + "jud": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori" + }, + "juh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Kororofa", + "level8": "Kona" + }, + "jui": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Unclassified Core Thura-Yura" + }, + "juk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Kororofa" + }, + "jul": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Sherpa-Jirel" + }, + "jum": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Burun", + "level3": "Southern Burun" + }, + "jun": { + "level0": "Austroasiatic", + "level1": "Mundaic" + }, + "juo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Kororofa", + "level8": "Kona" + }, + "jup": { + "level0": "Naduhup", + "level1": "Eastern Naduhup", + "level2": "Hup-Yuhup" + }, + "jur": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Jurunic" + }, + "jus": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jut": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "South Scandinavian" + }, + "juu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic", + "level7": "Tala-Sho-Zangwal" + }, + "juw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Jibu-Wase" + }, + "juy": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Sora-Juray-Gorum", + "level3": "Sora-Juray" + }, + "jvd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch" + }, + "jvn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese", + "level4": "Global Javanese" + }, + "jwi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Southern Bia", + "level9": "Jwira-Nzima" + }, + "jye": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "Judeo-Muslim Sanaani Arabic" + }, + "jyy": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba" + }, + "kaa": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "South Kipchak" + }, + "kab": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber" + }, + "kac": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Jingpho" + }, + "kad": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau" + }, + "kae": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Northern East Formosan" + }, + "kaf": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Kazhouish" + }, + "kag": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Kajang", + "level6": "Kajaman-Lahanan" + }, + "kah": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Fer-Gula" + }, + "kai": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic" + }, + "kaj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Katabic" + }, + "kak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran", + "level8": "Kalanguya" + }, + "kal": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit", + "level3": "Greenlandic Inuit" + }, + "kam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Kamba-Dhaisu" + }, + "kan": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid", + "level6": "Nuclear Kannaoid" + }, + "kao": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Xasonka" + }, + "kap": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "East Tsezic" + }, + "kaq": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup", + "level5": "Shipibo-Konibo-Kapanawa" + }, + "kas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Kashmiric" + }, + "kat": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Georgic" + }, + "kav": { + "level0": "Bookkeeping" + }, + "kaw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic" + }, + "kax": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Kao River", + "level4": "Paguic" + }, + "kay": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani" + }, + "kaz": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "South Kipchak" + }, + "kba": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Mirning" + }, + "kbb": { + "level0": "Cariban", + "level1": "Parukotoan" + }, + "kbc": { + "level0": "Guaicuruan" + }, + "kbd": { + "level0": "Abkhaz-Adyge", + "level1": "Circassian" + }, + "kbe": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umpilaic" + }, + "kbf": { + "level0": "Bookkeeping" + }, + "kbg": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Unclassified Southern Tibetic" + }, + "kbi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Kaptiau-Tarpia" + }, + "kbj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Extreme North Vestigial Suffixes Bantu" + }, + "kbk": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Koita-Koiari" + }, + "kbl": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanembuic" + }, + "kbm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage" + }, + "kbn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic", + "level7": "Kare-Pana" + }, + "kbo": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi", + "level3": "Kalikoic" + }, + "kbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Kabiyeic" + }, + "kbq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "kbr": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid", + "level2": "South Gonga" + }, + "kbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Northern Okani" + }, + "kbt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage" + }, + "kbu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Unclassified Rajasthani" + }, + "kbv": { + "level0": "Senagi" + }, + "kbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Kairiruic linkage", + "level9": "Kaiep-Terebu" + }, + "kbx": { + "level0": "Keram", + "level1": "East Keram" + }, + "kby": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric" + }, + "kbz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic", + "level6": "Mundat-Karfa" + }, + "kca": { + "level0": "Uralic", + "level1": "Khantyic", + "level2": "Northern Khanty" + }, + "kcb": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic" + }, + "kcc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Lokoic", + "level8": "Lubila-Lokaa" + }, + "kcd": { + "level0": "Yam", + "level1": "Kanum", + "level2": "Ngkrn-Ngkantr", + "level3": "Ngkantr" + }, + "kce": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "kcf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo" + }, + "kcg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Katabic" + }, + "kch": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "kci": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Gyong-Kamantan" + }, + "kcj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Buy" + }, + "kck": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Kalanga-Nambya" + }, + "kcl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "North Huon Gulf linkage" + }, + "kcm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Fer-Gula" + }, + "kcn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic", + "level9": "East Sudanic Arabic" + }, + "kco": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Dallman", + "level6": "Kinalakna-Kumukio" + }, + "kcp": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Katcha-Kadugli-Miri-Kanga" + }, + "kcq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Awak-Kamo" + }, + "kcr": { + "level0": "Katla-Tima", + "level1": "Katla-Julud" + }, + "kcs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic", + "level7": "Talic", + "level8": "Piapung-Koenoem" + }, + "kct": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Watam-Kaian" + }, + "kcu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu" + }, + "kcv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu", + "level11": "Lunda-Ruund-Kete", + "level12": "Ruund-Kete" + }, + "kcw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu" + }, + "kcx": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "East Ometo" + }, + "kcy": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "kcz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "kda": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Hunter-Hastings" + }, + "kdc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu", + "level12": "Kutu-Zaramo" + }, + "kdd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra", + "level6": "Tjarra" + }, + "kde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Makonde-Makwe" + }, + "kdf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Mengenic", + "level9": "Mamusa-Mengen" + }, + "kdg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bisa-Lamba (M.50)" + }, + "kdh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala" + }, + "kdi": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Lango-Kumam" + }, + "kdj": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "kdk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Extreme Southern New Caledonian" + }, + "kdl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "West Kambaric" + }, + "kdm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Gyong-Kamantan" + }, + "kdn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja" + }, + "kdp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Kanufi-Ninkyob-Angan" + }, + "kdq": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "kdr": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak" + }, + "kds": { + "level0": "Bookkeeping" + }, + "kdt": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Kuy-Souei" + }, + "kdu": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Eastern Kordofan Nubian" + }, + "kdv": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish", + "level4": "Chakpa-Kadu-Ganan" + }, + "kdw": { + "level0": "Mombum-Koneraw" + }, + "kdx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo" + }, + "kdy": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "kdz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe", + "level9": "Mfumteic" + }, + "kea": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Upper Guinea Portuguese" + }, + "keb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Akeleic" + }, + "kec": { + "level0": "Kadugli-Krongo" + }, + "ked": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza" + }, + "kee": { + "level0": "Keresan" + }, + "kef": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Kpesi-Waci" + }, + "keg": { + "level0": "Temeinic" + }, + "keh": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Iatmulic" + }, + "kei": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Kei-Fordata" + }, + "kej": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kek": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean" + }, + "kem": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor", + "level3": "Kemak-Tukudede" + }, + "ken": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Mamfe" + }, + "keo": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian" + }, + "kep": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Yerukula-Korava-Kaikadi" + }, + "keq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "ker": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.3" + }, + "kes": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Kugboic" + }, + "ket": { + "level0": "Yeniseian", + "level1": "Northern Yeniseian" + }, + "keu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kebu-Animere" + }, + "kev": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kew": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Kewa" + }, + "kex": { + "level0": "Bookkeeping" + }, + "key": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "kez": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Koring-Kukele", + "level8": "Kukele-Uzekwe" + }, + "kfa": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu" + }, + "kfb": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Kolami-Naiki" + }, + "kfc": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui" + }, + "kfd": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Koraga" + }, + "kfe": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota" + }, + "kff": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Southeast Gondi", + "level5": "South Bastar Gondi-Koya" + }, + "kfg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Tuluic" + }, + "kfh": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kfi": { + "level0": "Bookkeeping" + }, + "kfj": { + "level0": "Bookkeeping" + }, + "kfk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric" + }, + "kfl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring" + }, + "kfm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic" + }, + "kfn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring" + }, + "kfo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori", + "level10": "Koro-Koyaga" + }, + "kfp": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Kodaku-Korwa" + }, + "kfq": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda" + }, + "kfr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Sindhi-Kachchi" + }, + "kfs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "kft": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewaric" + }, + "kfu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic" + }, + "kfv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Unclassified Gauda-Banga" + }, + "kfw": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic" + }, + "kfx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "kfy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Central Pahari" + }, + "kfz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur" + }, + "kga": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori", + "level10": "Koro-Koyaga" + }, + "kgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "kge": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Lampungic", + "level3": "Pesisir" + }, + "kgf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu" + }, + "kgh": { + "level0": "Bookkeeping" + }, + "kgi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Malaysian Sign" + }, + "kgj": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham", + "level5": "Gamale-Parbate" + }, + "kgk": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani", + "level9": "Kaiowa" + }, + "kgl": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "kgm": { + "level0": "Bookkeeping" + }, + "kgn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Northern Tatic" + }, + "kgo": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Krongo-Tumtum" + }, + "kgp": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Southern Je", + "level3": "Kaingang-Xokleng", + "level4": "Kaingangic" + }, + "kgq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro" + }, + "kgs": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Gumbaynggiric" + }, + "kgt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Vutic" + }, + "kgu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Omosan" + }, + "kgv": { + "level0": "West Bomberai" + }, + "kgw": { + "level0": "Maybratic" + }, + "kgx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Wolio-Kamaru" + }, + "kgy": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Gyalsumdo-Nubri-Kyirong" + }, + "kha": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Khasi-Pnar" + }, + "khb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Northern Shanic", + "level12": "Sipsongpannic" + }, + "khc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Tukangbesi-Bonerate", + "level8": "Tukang Besi" + }, + "khd": { + "level0": "Yam", + "level1": "Kanum", + "level2": "Ngkrn-Ngkantr", + "level3": "Ngkantr" + }, + "khe": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Becking-Dawi" + }, + "khf": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Khmu'" + }, + "khg": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Kham-Hor" + }, + "khj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau", + "level6": "Kuturmi-Ajiya" + }, + "khk": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Mongolian" + }, + "khl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Eastern Ngero", + "level9": "Kaliai-Kove" + }, + "khm": { + "level0": "Austroasiatic", + "level1": "Khmeric" + }, + "khn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Khandesic" + }, + "kho": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Saka-Wakhi", + "level5": "Saka" + }, + "khq": { + "level0": "Songhay", + "level1": "Northwest Songhay" + }, + "khr": { + "level0": "Austroasiatic", + "level1": "Mundaic" + }, + "khs": { + "level0": "Bosavi", + "level1": "Bosavi Watershed" + }, + "kht": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Assam Tai B" + }, + "khu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Nyaneka-Nkhumbi" + }, + "khv": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "West Tsezic" + }, + "khw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "khx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Lega", + "level10": "Western Lega" + }, + "khy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "Kele-Lombo" + }, + "khz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Sinagoro-Keapara", + "level9": "Hula-Keapara" + }, + "kia": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Kim-Besme-Goundo" + }, + "kib": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic" + }, + "kic": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Fox" + }, + "kid": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "kie": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Runga-Kibet" + }, + "kif": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham", + "level5": "Gamale-Parbate", + "level6": "Parbate Kham" + }, + "kig": { + "level0": "Kolopom", + "level1": "Kimaama-Riantana" + }, + "kih": { + "level0": "Border", + "level1": "Bewani", + "level2": "Pagi-Kilmeri" + }, + "kii": { + "level0": "Caddoan", + "level1": "Northern Caddoan", + "level2": "Pawnee-Kitsai" + }, + "kij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima", + "level8": "Kilivilic", + "level9": "Kilivila-Muyuw" + }, + "kik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Gikuyu-Temi" + }, + "kil": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Central West Chadic B.2", + "level7": "Warji-Gala-Kariya" + }, + "kim": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Sayan" + }, + "kin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu" + }, + "kio": { + "level0": "Kiowa-Tanoan" + }, + "kip": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham" + }, + "kiq": { + "level0": "Kaure-Kosare" + }, + "kir": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "East Kipchak" + }, + "kis": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Kis-Wogeo" + }, + "kit": { + "level0": "Pahoturi" + }, + "kiu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Zaza" + }, + "kiv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "kiw": { + "level0": "Kiwaian" + }, + "kix": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian", + "level5": "Lainongic", + "level6": "Khiamniungic" + }, + "kiy": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "West Tariku", + "level3": "Fayu-Kirikiri" + }, + "kiz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kisi-Pangwa" + }, + "kja": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic", + "level2": "Mlap-Gresi-Kemtuik" + }, + "kjb": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan", + "level5": "Kanjobal-Jacaltec" + }, + "kjc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Konjo" + }, + "kjd": { + "level0": "Kiwaian" + }, + "kje": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Kisaric" + }, + "kjf": { + "level0": "Bookkeeping" + }, + "kjg": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Khmu'" + }, + "kjh": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Yenisey Turkic" + }, + "kji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "West Santa Isabel" + }, + "kjj": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian" + }, + "kjk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Konjo" + }, + "kjl": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham", + "level5": "Gamale-Parbate", + "level6": "Parbate Kham" + }, + "kjm": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Khao-Bit" + }, + "kjn": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Southwestern Alaya-Athima" + }, + "kjo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "kjp": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Eastern-Western Pwo Karen" + }, + "kjq": { + "level0": "Keresan" + }, + "kjr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Eastern Yapen" + }, + "kjs": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Kewa", + "level6": "Southeast Kewa" + }, + "kjt": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Northern Pwo Karen" + }, + "kju": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Southern Pomoan-Kashaya" + }, + "kjv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "kjx": { + "level0": "North Bougainville" + }, + "kjy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Kewa", + "level6": "Southeast Kewa" + }, + "kjz": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic", + "level5": "Bumthangic" + }, + "kka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa" + }, + "kkb": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "kkc": { + "level0": "East Strickland" + }, + "kkd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "kke": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole", + "level8": "Mixiforic" + }, + "kkf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Tshanglic" + }, + "kkg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Northern Kalinga", + "level9": "Northwest Kalinga" + }, + "kkh": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Yuanic" + }, + "kki": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu" + }, + "kkj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)" + }, + "kkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "Central Santa Isabel" + }, + "kkl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "kkm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Kiong-Korop" + }, + "kkn": { + "level0": "Bookkeeping" + }, + "kko": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "kkp": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Coastal Southwest Paman", + "level4": "Dhawa-Kaber" + }, + "kkq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic", + "level16": "Bila-Kaiku" + }, + "kkr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Boghomic", + "level7": "Kir-Mangas" + }, + "kks": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Giiwo-Daza" + }, + "kkt": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Thulung-Tilung-Koyi" + }, + "kku": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "kkv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Maduresic" + }, + "kkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie" + }, + "kkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North West Greater Barito" + }, + "kky": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic" + }, + "kkz": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Nahanni" + }, + "klb": { + "level0": "Cochimi-Yuman", + "level1": "Yuman" + }, + "klc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic" + }, + "kld": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Wiradhuric" + }, + "kle": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Khambu", + "level6": "Kulungic" + }, + "klf": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "klg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Western Mansakan" + }, + "klh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa", + "level4": "Unclassified Uruwa" + }, + "kli": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "klj": { + "level0": "Turkic", + "level1": "Common Turkic" + }, + "klk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "kll": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Western Mansakan", + "level6": "Kagan-Kalagan" + }, + "klm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "klo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic" + }, + "klp": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic", + "level3": "Kamasa-Susuami" + }, + "klq": { + "level0": "Turama-Kikori" + }, + "klr": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Upper Dudhkosi" + }, + "kls": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "klt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa" + }, + "klu": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Klao-Tajuasohn" + }, + "klv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Southeastern Malakula linkage" + }, + "klw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Kulawi" + }, + "klx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage" + }, + "kly": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Kalao-Laiyolo" + }, + "klz": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "West Alor" + }, + "kma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Buli-Koma" + }, + "kmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "kmc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam", + "level4": "Kamic" + }, + "kmd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga", + "level10": "Southeastern Kalinga" + }, + "kme": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Kole-Isubu" + }, + "kmf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso" + }, + "kmg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Kate-Mape" + }, + "kmh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "Kalam-Kobon", + "level4": "Etp-Ti Kalam" + }, + "kmi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa", + "level7": "Abawa", + "level8": "Kami-Gupa" + }, + "kmj": { + "level0": "Dravidian", + "level1": "North Dravidian", + "level2": "Kurux-Malto", + "level3": "Malto" + }, + "kmk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Northern Kalinga" + }, + "kml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga", + "level10": "Southeastern Kalinga" + }, + "kmm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic" + }, + "kmn": { + "level0": "Sepik", + "level1": "Ram" + }, + "kmo": { + "level0": "Sepik", + "level1": "Nukuma" + }, + "kmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Koma Alantika" + }, + "kmq": { + "level0": "Koman" + }, + "kmr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish", + "level8": "Kurdish" + }, + "kms": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Elepi-Kamasau-Marienberg" + }, + "kmt": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic", + "level2": "Mlap-Gresi-Kemtuik", + "level3": "Gresi-Kemtuik" + }, + "kmu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "kmv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Guyanic Creole French" + }, + "kmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic" + }, + "kmx": { + "level0": "Kiwaian" + }, + "kmy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Vere" + }, + "kmz": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "East Oghuz" + }, + "kna": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic" + }, + "knb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga" + }, + "knc": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric", + "level4": "East Kanuri" + }, + "knd": { + "level0": "Konda-Yahadian" + }, + "kne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Kankanay" + }, + "knf": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Manjaku-Mankanya-Pepel" + }, + "kng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo", + "level21": "Koongo-Kituba" + }, + "knh": { + "level0": "Bookkeeping" + }, + "kni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Kanufi-Ninkyob-Angan" + }, + "knj": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan", + "level5": "Kanjobal-Jacaltec" + }, + "knk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole" + }, + "knl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic" + }, + "knm": { + "level0": "Katukinan" + }, + "knn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi", + "level11": "Western Marathi" + }, + "kno": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Vai-Kono" + }, + "knp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Konja" + }, + "knq": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Maniqic" + }, + "knr": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Eastern Sepik Hill" + }, + "kns": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Maniqic" + }, + "knt": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Marubo Subgroup" + }, + "knu": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Kpelle" + }, + "knw": { + "level0": "Kxa", + "level1": "Ju-Kung" + }, + "knx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Western Malayic Dayak" + }, + "kny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic" + }, + "knz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi" + }, + "koa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "kob": { + "level0": "Bookkeeping" + }, + "koc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Unclassified Ngembaic" + }, + "kod": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Kodi-Gaura" + }, + "koe": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Baale-Olam" + }, + "kof": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Kubi-Deno" + }, + "kog": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic", + "level4": "Arhuacic" + }, + "koh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)", + "level10": "Koyo-Mboshi" + }, + "koi": { + "level0": "Uralic", + "level1": "Permian", + "level2": "Komi" + }, + "koj": { + "level0": "Bookkeeping" + }, + "koo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Rwenzori" + }, + "kop": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru" + }, + "koq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Kota-Mahongwe" + }, + "kor": { + "level0": "Koreanic" + }, + "kos": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Kosraean-Nauruan" + }, + "kot": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Central" + }, + "kou": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Bolgo-Koke" + }, + "kov": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Ningic" + }, + "kow": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang" + }, + "kox": { + "level0": "Bookkeeping" + }, + "koy": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Koyukonic" + }, + "koz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Kowan" + }, + "kpa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Tangale-Kwami-Kupto", + "level9": "Kwami-Kupto" + }, + "kpb": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kpc": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Baniwa-Curripaco-Tariano", + "level5": "Baniwa-Curripaco" + }, + "kpd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru", + "level4": "Dobel-Koba" + }, + "kpf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Kabwum", + "level6": "Selepet-Komba" + }, + "kpg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Carolinean Outlier Polynesian" + }, + "kph": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang" + }, + "kpi": { + "level0": "Geelvink Bay", + "level1": "Barapasi-Sauri-Kofei", + "level2": "Sauri-Kofei" + }, + "kpj": { + "level0": "Nuclear-Macro-Je" + }, + "kpk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Kpan-Icen" + }, + "kpl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Monzomboic", + "level9": "Kpala-Bakpa" + }, + "kpm": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Koho-Maa" + }, + "kpn": { + "level0": "Tupian" + }, + "kpo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kposo-Ahlo-Bowili" + }, + "kpq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "kpr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Gaena-Korafe" + }, + "kps": { + "level0": "West Bird's Head", + "level1": "South West Bird's Head" + }, + "kpt": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic" + }, + "kpu": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar" + }, + "kpv": { + "level0": "Uralic", + "level1": "Permian", + "level2": "Komi" + }, + "kpw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "Kalam-Kobon" + }, + "kpx": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Biage-Mountain Koiali" + }, + "kpy": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian", + "level2": "R-Koryakic", + "level3": "J-Koryakic" + }, + "kpz": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Elgon-Mau Kalenjin" + }, + "kqa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "North Sogeram" + }, + "kqb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz" + }, + "kqc": { + "level0": "Manubaran" + }, + "kqd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab", + "level12": "Western Trans-Zab" + }, + "kqe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Western Mansakan", + "level6": "Kagan-Kalagan" + }, + "kqf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Kakabai linkage" + }, + "kqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Dogoso-Khe" + }, + "kqh": { + "level0": "Bookkeeping" + }, + "kqi": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Koita-Koiari" + }, + "kqj": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "South Nasioi" + }, + "kqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "kql": { + "level0": "Yuat", + "level1": "Miyak-Bun-Biwat" + }, + "kqm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kaansa-Dogose", + "level7": "Dogose-Khisa" + }, + "kqn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Kaonde-Shaba-Sanga" + }, + "kqo": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee" + }, + "kqp": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2", + "level6": "Gabri-Kimre" + }, + "kqq": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum" + }, + "kqr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "kqs": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Kissi" + }, + "kqt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "kqu": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Eastern !Ui" + }, + "kqv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic" + }, + "kqw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Kandas-Duke of York" + }, + "kqx": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Central" + }, + "kqy": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "East Ometo" + }, + "kqz": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "South Khoekhoe" + }, + "kra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari" + }, + "krb": { + "level0": "Miwok-Costanoan", + "level1": "Costanoan" + }, + "krc": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Kaukasus Kipchak" + }, + "krd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Kawaimina" + }, + "kre": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz" + }, + "krf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Koto-Olrat-Lakon" + }, + "krg": { + "level0": "Bookkeeping" + }, + "krh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "kri": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English" + }, + "krj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kinarayan" + }, + "krk": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian", + "level2": "R-Koryakic", + "level3": "J-Koryakic" + }, + "krl": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan" + }, + "krn": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "krp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Kiong-Korop" + }, + "krs": { + "level0": "Kresh-Aja", + "level1": "Kreshic" + }, + "krt": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric", + "level4": "East Kanuri" + }, + "kru": { + "level0": "Dravidian", + "level1": "North Dravidian", + "level2": "Kurux-Malto" + }, + "krw": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "krx": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Kwatay-Karon-Mlomp", + "level9": "Karon-Mlomp" + }, + "kry": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Southern Samur" + }, + "krz": { + "level0": "Yam", + "level1": "Kanum", + "level2": "Ngkrn-Ngkantr" + }, + "ksa": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "ksb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Bondei-Shambala" + }, + "ksc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga" + }, + "ksd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai", + "level9": "Minigir-Tolai" + }, + "kse": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "ksf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)", + "level8": "Nuclear Bafia (A.50)", + "level9": "Lefa-Bafia" + }, + "ksg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Rovianic", + "level12": "Hoava-Kusaghe" + }, + "ksh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Middle Franconian", + "level9": "Ripuarian" + }, + "ksi": { + "level0": "Sko" + }, + "ksj": { + "level0": "Kwalean", + "level1": "Humene-Kwale" + }, + "ksk": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha", + "level3": "Osage-Kansa" + }, + "ksl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Dambi-Kumaru" + }, + "ksm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang" + }, + "ksn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Tagalogic" + }, + "kso": { + "level0": "Bookkeeping" + }, + "ksp": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone", + "level7": "Gore" + }, + "ksq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Tangale-Kwami-Kupto", + "level9": "Kwami-Kupto" + }, + "ksr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu", + "level6": "Kosorong-Burum-Mindik" + }, + "kss": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Kissi" + }, + "kst": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi" + }, + "ksu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Assam Tai B" + }, + "ksv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Tetelaic" + }, + "ksw": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Southern Karen", + "level3": "Sgaw" + }, + "ksx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata" + }, + "ksy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "ksz": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Kodaku-Korwa" + }, + "kta": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric" + }, + "ktb": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Kambaataic" + }, + "ktc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "ktd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Unclassified Wati" + }, + "kte": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Gyalsumdo-Nubri-Kyirong" + }, + "ktf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Unclassified Greater Lega" + }, + "ktg": { + "level0": "Pama-Nyungan", + "level1": "Kalkatungic" + }, + "kth": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "kti": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok", + "level6": "Division A Lowland Ok" + }, + "ktj": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Ivorian Grebo", + "level6": "Tepo-Plapo" + }, + "ktk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Western Admiralty Islands", + "level6": "Anchorite" + }, + "ktl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic", + "level8": "Southern-Western Balochi", + "level9": "Southern Balochi-Koroshi" + }, + "ktm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere", + "level9": "Kurti-Elu" + }, + "ktn": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Arikemic" + }, + "ktp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "ktq": { + "level0": "Unclassifiable" + }, + "kts": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok", + "level6": "Division A Lowland Ok" + }, + "ktt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Ketum-Wambon" + }, + "ktu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo", + "level21": "Koongo-Kituba" + }, + "ktv": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Katu", + "level3": "Nuclear Katu" + }, + "ktw": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "ktx": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano" + }, + "kty": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian" + }, + "ktz": { + "level0": "Kxa", + "level1": "Ju-Kung" + }, + "kua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)" + }, + "kub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid" + }, + "kuc": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "kud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "kue": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Kuman-Dom-Gunaa" + }, + "kuf": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Katu", + "level3": "Nuclear Katu" + }, + "kug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa" + }, + "kuh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic" + }, + "kui": { + "level0": "Cariban", + "level1": "Kuikuroan", + "level2": "Nuclear Kuikuroan" + }, + "kuj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara", + "level12": "Kuriaic" + }, + "kuk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic" + }, + "kul": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic" + }, + "kum": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Kaukasus Kipchak" + }, + "kuo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Dallman", + "level6": "Kinalakna-Kumukio" + }, + "kup": { + "level0": "Kunimaipan" + }, + "kuq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva" + }, + "kus": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta" + }, + "kuu": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic" + }, + "kuv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Teor-Kur" + }, + "kuw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "kux": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic" + }, + "kuy": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umpilaic" + }, + "kva": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Bagvalal-Tindi" + }, + "kvb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Central Sumatran Malay" + }, + "kvc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Eastern Ngero", + "level9": "Kaliai-Kove" + }, + "kvd": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "Central Alor" + }, + "kve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic" + }, + "kvf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2" + }, + "kvg": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Boazi" + }, + "kvh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic" + }, + "kvi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.3" + }, + "kvj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic" + }, + "kvk": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "JSLic" + }, + "kvl": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayaw-Manu" + }, + "kvm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Mamfe", + "level6": "Kendem-Denya" + }, + "kvn": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Kuna" + }, + "kvo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru", + "level4": "Dobel-Koba" + }, + "kvp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Ujir-Kola-Kompane", + "level4": "Kola-Kompane" + }, + "kvq": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Geba-Bwe" + }, + "kvr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau" + }, + "kvs": { + "level0": "Bookkeeping" + }, + "kvt": { + "level0": "Bookkeeping" + }, + "kvu": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "kvv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Ujir-Kola-Kompane", + "level4": "Kola-Kompane" + }, + "kvw": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "East Alor", + "level3": "Sawila-Wersing" + }, + "kvx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "kvy": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayah-Yintale" + }, + "kvz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Becking-Dawi", + "level5": "Tsakwambo-Komyandaret" + }, + "kwa": { + "level0": "Naduhup", + "level1": "Eastern Naduhup" + }, + "kwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian" + }, + "kwc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Likwala-Likuba" + }, + "kwd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "kwe": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "kwf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "kwg": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita", + "level7": "Sara-Kaba" + }, + "kwh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Koiwai-Irarutu" + }, + "kwi": { + "level0": "Barbacoan", + "level1": "Awa-Southern Barbacoan" + }, + "kwj": { + "level0": "Sepik", + "level1": "Nukuma", + "level2": "Kwanga-Mende" + }, + "kwk": { + "level0": "Wakashan", + "level1": "Northern Wakashan", + "level2": "Kwakiutlan" + }, + "kwl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip", + "level7": "Kofyaric" + }, + "kwm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)", + "level12": "Kwambi-Ndonga" + }, + "kwn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Kwangali-Diriku" + }, + "kwo": { + "level0": "Kwomtari-Nai" + }, + "kwp": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Unclassified Eastern Kru" + }, + "kwq": { + "level0": "Bookkeeping" + }, + "kwr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Kwer-Kopkaka-Burumakok", + "level6": "Kwer-Burumakok" + }, + "kws": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)", + "level11": "Pheende-Kwezo" + }, + "kwt": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "kwu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)" + }, + "kwv": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita", + "level7": "Sara-Kaba" + }, + "kww": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English", + "level13": "Eastern Maroons", + "level14": "Ndyuka" + }, + "kwx": { + "level0": "Dravidian", + "level1": "Unclassified Dravidian" + }, + "kwy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo" + }, + "kwz": { + "level0": "Khoe-Kwadi" + }, + "kxa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Kairiruic linkage" + }, + "kxb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano" + }, + "kxc": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Konsoid" + }, + "kxd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay", + "level8": "Bruneic Malay", + "level9": "Brunei-Bacan Malay" + }, + "kxe": { + "level0": "Bookkeeping" + }, + "kxf": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayaw-Manu" + }, + "kxg": { + "level0": "Bookkeeping" + }, + "kxh": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Hamer-Karo" + }, + "kxi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Lowland Murut" + }, + "kxj": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Koulfaic" + }, + "kxk": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "kxm": { + "level0": "Austroasiatic", + "level1": "Khmeric" + }, + "kxn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau", + "level6": "Sibu-Kanowit-Tanjong" + }, + "kxp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "kxq": { + "level0": "Yam", + "level1": "Kanum" + }, + "kxr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "kxs": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Baoanic" + }, + "kxt": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic" + }, + "kxu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Kui-Kuvi" + }, + "kxv": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Kui-Kuvi" + }, + "kxw": { + "level0": "East Strickland" + }, + "kxx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Likwala-Likuba" + }, + "kxy": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang" + }, + "kxz": { + "level0": "Kiwaian", + "level1": "Turama-Kerewo" + }, + "kya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Suguti" + }, + "kyb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga" + }, + "kyc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "kyd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Karey-Barakai" + }, + "kye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang" + }, + "kyf": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Eastern Bete" + }, + "kyg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "kyi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram A" + }, + "kyj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran" + }, + "kyk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan" + }, + "kyl": { + "level0": "Kalapuyan" + }, + "kym": { + "level0": "Bookkeeping" + }, + "kyn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Negrosanon" + }, + "kyo": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar" + }, + "kyp": { + "level0": "Bookkeeping" + }, + "kyq": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic" + }, + "kyr": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Mundurukuic" + }, + "kys": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "kyt": { + "level0": "Kayagaric", + "level1": "Kaygir-Tamagario" + }, + "kyu": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayah-Yintale", + "level4": "Kayah" + }, + "kyv": { + "level0": "Bookkeeping" + }, + "kyw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic" + }, + "kyx": { + "level0": "North Bougainville" + }, + "kyy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Unclassified Kainantu" + }, + "kyz": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI" + }, + "kza": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "Karaboro" + }, + "kzb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits" + }, + "kzc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Kulango" + }, + "kzd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Taliaboic" + }, + "kze": { + "level0": "Bookkeeping" + }, + "kzf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "kzg": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami" + }, + "kzh": { + "level0": "Nubian", + "level1": "Nile Nubian" + }, + "kzi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "kzk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia" + }, + "kzl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku" + }, + "kzm": { + "level0": "South Bird's Head Family" + }, + "kzn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Lolo-Kokola" + }, + "kzo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic", + "level20": "Lekaningic" + }, + "kzp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "kzq": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic" + }, + "kzr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic" + }, + "kzs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Kadazan-Sugut-Minokok", + "level8": "Sugut-Minokok Kadazan" + }, + "kzu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Jayapura Bay", + "level8": "Eastern Jayapura Bay" + }, + "kzv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Becking-Dawi", + "level5": "Tsakwambo-Komyandaret" + }, + "kzw": { + "level0": "Unclassifiable" + }, + "kzx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase" + }, + "kzy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic" + }, + "kzz": { + "level0": "West Bird's Head", + "level1": "South West Bird's Head" + }, + "laa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen" + }, + "lac": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan", + "level3": "Nuclear Yucatecan", + "level4": "Yucatec-Lacandon" + }, + "lad": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic" + }, + "lae": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Lahaulic" + }, + "lag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Mbugwe-Langi" + }, + "lai": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya" + }, + "laj": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Lango-Kumam" + }, + "lam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bisa-Lamba (M.50)" + }, + "lan": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake", + "level5": "Upper Niger Kainji" + }, + "lao": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai" + }, + "lap": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone", + "level7": "Gore" + }, + "laq": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra" + }, + "lar": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang", + "level8": "Hill South Guang" + }, + "las": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Kabiyeic" + }, + "lat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin" + }, + "lau": { + "level0": "Bookkeeping" + }, + "lav": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Eastern Baltic" + }, + "law": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Northern Tomini" + }, + "lax": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Tiwa-Boro" + }, + "lay": { + "level0": "Bookkeeping" + }, + "laz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu" + }, + "lba": { + "level0": "Bookkeeping" + }, + "lbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Label-Bilur" + }, + "lbc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Lakkia-Biao" + }, + "lbe": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian" + }, + "lbf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Lahaulic" + }, + "lbg": { + "level0": "Bookkeeping" + }, + "lbi": { + "level0": "Speech Register", + "level1": "Atlantic-Congo Speech Register" + }, + "lbj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Kenhatic" + }, + "lbk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Bontok" + }, + "lbm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "lbn": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic" + }, + "lbo": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric", + "level4": "Loven-Suq" + }, + "lbq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham" + }, + "lbr": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Lohorung-Yamphu" + }, + "lbs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign" + }, + "lbt": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Lachic" + }, + "lbu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham" + }, + "lbv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Madak linkage" + }, + "lbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "lbx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North East Greater Barito" + }, + "lby": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Lamalamic", + "level3": "Coastal Lamalamic" + }, + "lbz": { + "level0": "Tangkic" + }, + "lcc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "lcd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru" + }, + "lce": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Bangka-Belitung Malay" + }, + "lcf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "lch": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba" + }, + "lcl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru", + "level4": "Buruic" + }, + "lcm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "lcp": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Lawa" + }, + "lcq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "West Hoamoal" + }, + "lcs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "ldb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic", + "level7": "Duyaic" + }, + "ldd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri" + }, + "ldg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo", + "level8": "Legboic", + "level9": "Lenyima-Leyigha" + }, + "ldh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Taram-Dirim-Nnakenyare", + "level7": "Dirim-Nnakenyare" + }, + "ldi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Nuclear Northern Kikongo" + }, + "ldj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Chokobo-Lemoro-Sanga", + "level11": "Lemoro-Sanga" + }, + "ldk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Bambuka-Gomu-Leelau" + }, + "ldl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi" + }, + "ldm": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "ldn": { + "level0": "Artificial Language" + }, + "ldo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Burak-Loo" + }, + "ldp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic" + }, + "ldq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Bete-Lufu" + }, + "lea": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Lega", + "level10": "Western Lega" + }, + "leb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bisa-Lamba (M.50)" + }, + "led": { + "level0": "Central Sudanic", + "level1": "Lenduic", + "level2": "Bale" + }, + "lee": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi" + }, + "lef": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Lelemi-Akpafu" + }, + "leh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "lei": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "lej": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya" + }, + "lek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus" + }, + "lel": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Bushoong-Wongo-Lele" + }, + "lem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)", + "level11": "Mandi-Nyokon" + }, + "leo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "Sanaga (A.60)" + }, + "lep": { + "level0": "Sino-Tibetan", + "level1": "Himalayish" + }, + "leq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "ler": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty" + }, + "les": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "let": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "East Arawe" + }, + "leu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "lev": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar" + }, + "lew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "lex": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Luangic" + }, + "ley": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola" + }, + "lez": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Tabasaran-Aghul-Lezgi", + "level6": "Aghul-Lezgi" + }, + "lfa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)", + "level8": "Nuclear Bafia (A.50)", + "level9": "Lefa-Bafia" + }, + "lfn": { + "level0": "Artificial Language" + }, + "lga": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia", + "level11": "Simboic", + "level12": "Ghanongga-Lungga" + }, + "lgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "West Santa Isabel" + }, + "lgg": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "lgh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji" + }, + "lgi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "lgk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "lgl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "lgm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Lega" + }, + "lgn": { + "level0": "Koman", + "level1": "Central Koman", + "level2": "Dana-Opo" + }, + "lgq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo" + }, + "lgr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "North and West Guadalcanal" + }, + "lgs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lgt": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mehek-Pahi" + }, + "lgu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira" + }, + "lgz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ngombe-Genja" + }, + "lha": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Southern Kra" + }, + "lhh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Central Ambon" + }, + "lhi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Lahoid" + }, + "lhl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi", + "level10": "Bhadarwahic", + "level11": "Chinali-Lahul Lohar" + }, + "lhm": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic" + }, + "lhn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Kajang", + "level6": "Kajaman-Lahanan" + }, + "lhp": { + "level0": "Sino-Tibetan", + "level1": "Dhimal-Lhokpu-Toto" + }, + "lhs": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Turoyo-Mlahso" + }, + "lht": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Hiw-Lo-Toga" + }, + "lhu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Lahoid" + }, + "lia": { + "level0": "Atlantic-Congo", + "level1": "Limba" + }, + "lib": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II", + "level9": "Likum-Levei" + }, + "lic": { + "level0": "Tai-Kadai", + "level1": "Hlaic", + "level2": "Nuclear Hlaic" + }, + "lid": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "lie": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Ngiri" + }, + "lif": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar" + }, + "lig": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jogo" + }, + "lih": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tabar linkage" + }, + "lij": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "lik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian" + }, + "lil": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Northern Interior Salish" + }, + "lim": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Middle Franconian", + "level9": "Ripuarian" + }, + "lin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Bobangi-Bangala-Lingala", + "level15": "Lingala-Bangala" + }, + "lio": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic", + "level9": "Sobei-Liki" + }, + "lip": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Likpe-Santrokofi" + }, + "liq": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Hadiyyaic" + }, + "lir": { + "level0": "Pidgin", + "level1": "English-based pidgin" + }, + "lis": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu" + }, + "lit": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Eastern Baltic" + }, + "liu": { + "level0": "Dajuic", + "level1": "Eastern Dajuic" + }, + "liv": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic" + }, + "liw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Central Sumatran Malay", + "level6": "Music" + }, + "lix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "liy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "liz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Ngiri" + }, + "lje": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic" + }, + "lji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Kalao-Laiyolo" + }, + "ljl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Ende-Lio" + }, + "ljp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Lampungic", + "level3": "Pesisir" + }, + "ljw": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric" + }, + "ljx": { + "level0": "Pama-Nyungan", + "level1": "Nyawaygic" + }, + "lka": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Lakalei-Idate" + }, + "lkb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kabarasi-Tachoni-Nyala East" + }, + "lkc": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Lahoid" + }, + "lkd": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran", + "level3": "Roosevelt" + }, + "lke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza", + "level11": "Soga-Kenyi" + }, + "lkh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic" + }, + "lki": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish" + }, + "lkj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang", + "level6": "Iban-Seberuang", + "level7": "Northern Iban" + }, + "lkl": { + "level0": "Nuclear Torricelli" + }, + "lkm": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Mirning" + }, + "lkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Koto-Olrat-Lakon" + }, + "lko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Marachi-Khayo" + }, + "lkr": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "lks": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kisa-Marama-Tsotso" + }, + "lkt": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Sioux" + }, + "lku": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Pirriya-Kungkari" + }, + "lky": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lotuko-Lokoya" + }, + "lla": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi", + "level5": "Bena" + }, + "llb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Lolo-Kokola" + }, + "llc": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole" + }, + "lld": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian" + }, + "lle": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "llf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "llg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote" + }, + "llh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Unclassified Lisoid" + }, + "lli": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Laali-Yaa" + }, + "llk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram B" + }, + "lll": { + "level0": "Bogia" + }, + "llm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Butonic", + "level9": "East Buton" + }, + "lln": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 1" + }, + "llo": { + "level0": "Bookkeeping" + }, + "llp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "North Efatic" + }, + "llq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "lls": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic" + }, + "llu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "llx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian" + }, + "lma": { + "level0": "Atlantic-Congo", + "level1": "Limba" + }, + "lmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Central Santo" + }, + "lmc": { + "level0": "Limilngan-Wulna" + }, + "lmd": { + "level0": "Narrow Talodi", + "level1": "Lumun-Torona" + }, + "lme": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa", + "level4": "Peveic" + }, + "lmf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Tengah", + "level5": "Southeast Lembata" + }, + "lmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Bibling" + }, + "lmh": { + "level0": "Bookkeeping" + }, + "lmi": { + "level0": "Central Sudanic", + "level1": "Mangbetu-Asua" + }, + "lmj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Tengah" + }, + "lmk": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Anal-Lamgang" + }, + "lml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu" + }, + "lmm": { + "level0": "Bookkeeping" + }, + "lmn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani" + }, + "lmo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Piemontese-Lombard" + }, + "lmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "lmq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Timur" + }, + "lmr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat" + }, + "lms": { + "level0": "Bookkeeping" + }, + "lmt": { + "level0": "Bookkeeping" + }, + "lmu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Lamenu-Lewo" + }, + "lmv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian", + "level7": "Nuclear Eastern Fijian", + "level8": "Viwa-Lomaiviti-East Viti Levu" + }, + "lmw": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Western Miwokan" + }, + "lmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring" + }, + "lmy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Wewewa-Laboya" + }, + "lmz": { + "level0": "Unattested" + }, + "lna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Ngbugu-Langbasi" + }, + "lnb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)" + }, + "lnc": { + "level0": "Bookkeeping" + }, + "lnd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "lnh": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar", + "level5": "Lanoh-Semnam", + "level6": "Lanohic" + }, + "lni": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "South Nasioi" + }, + "lnj": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Linngithigh-Alngith" + }, + "lnl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Ngbugu-Langbasi" + }, + "lnm": { + "level0": "Keram", + "level1": "Ulmapo", + "level2": "Mwakai-Pondi" + }, + "lnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "Shark Bayic" + }, + "lno": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo" + }, + "lns": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring" + }, + "lnu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda" + }, + "loa": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Galela-Loloda" + }, + "lob": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Lobiri-Jaane" + }, + "loc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan" + }, + "loe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai", + "level7": "Saluanic", + "level8": "Batui-Saluan" + }, + "lof": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol" + }, + "log": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "loh": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle", + "level4": "Didinga-Longarim" + }, + "loi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Teenic" + }, + "loj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty", + "level7": "Lou-Paluai" + }, + "lok": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Mende-Loko" + }, + "lol": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Lomongo" + }, + "lom": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Loma" + }, + "lon": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe", + "level9": "Lomweic" + }, + "loo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "Kele-Lombo" + }, + "lop": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake", + "level5": "Upper Niger Kainji", + "level6": "Oleran" + }, + "loq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Interieur", + "level12": "Lobalic" + }, + "lor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Teenic" + }, + "los": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "Mokoreng-Loniu" + }, + "lot": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lotuko-Lokoya" + }, + "lou": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French" + }, + "lov": { + "level0": "Bookkeeping" + }, + "low": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic", + "level7": "Upper Kinabatangan-Lobu" + }, + "lox": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "loy": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Mustangic" + }, + "loz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana", + "level12": "Central Sotho-Tswana", + "level13": "Sesotho-Lozi" + }, + "lpa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "North Efatic" + }, + "lpe": { + "level0": "Lepki-Murkim-Kembra" + }, + "lpn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric", + "level5": "Makuric" + }, + "lpo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Lipo-Micha" + }, + "lpx": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lopit-Dongotono" + }, + "lra": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati", + "level4": "Bakati'", + "level5": "Rara-Sara Bakati'" + }, + "lrc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic", + "level8": "Luric" + }, + "lre": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "lri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Marachi-Khayo" + }, + "lrk": { + "level0": "Bookkeeping" + }, + "lrl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "lrm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kisa-Marama-Tsotso" + }, + "lrn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru" + }, + "lro": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol", + "level4": "Ebang-Laru" + }, + "lrr": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Lohorung-Yamphu", + "level7": "Yamphuic" + }, + "lrt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay" + }, + "lrv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "lrz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Lemerig-Veraa" + }, + "lsa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Komisenian" + }, + "lsc": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Northwestern Jewish Neo-Aramaic" + }, + "lse": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri" + }, + "lsg": { + "level0": "Bookkeeping" + }, + "lsh": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Chug-Lish" + }, + "lsi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic", + "level6": "Leqic" + }, + "lsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Saamiaic" + }, + "lsn": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lso": { + "level0": "Bookkeeping" + }, + "lsp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Honduras-Panama Sign" + }, + "lsr": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Bragat-Aruop-Amol" + }, + "lss": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Lasi-Jadgali" + }, + "lst": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "lsv": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsw": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ltc": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "lti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Luangic" + }, + "ltn": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran", + "level3": "Roosevelt" + }, + "lto": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kisa-Marama-Tsotso" + }, + "lts": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kabarasi-Tachoni-Nyala East" + }, + "ltu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Saparua-Latu" + }, + "ltz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Middle Franconian" + }, + "lua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic", + "level10": "Bangubangu-Kasai" + }, + "lub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Kaonde-Shaba-Sanga" + }, + "luc": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "lud": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan", + "level6": "East Ladoga" + }, + "lue": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Chokwe-Lwena" + }, + "luf": { + "level0": "Mailuan" + }, + "lug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza" + }, + "lui": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Cupan" + }, + "luj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Mbagani-Lwalwa" + }, + "luk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic", + "level8": "Nuclear Dzongkhic" + }, + "lul": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Southern Moru-Madi" + }, + "lum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba", + "level12": "Mbwela-Mbunda" + }, + "lun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu", + "level11": "Lunda-Ruund-Kete" + }, + "luo": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Adhola-Alur-Luo", + "level5": "Adhola-Luo" + }, + "lup": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Lumbu-Bwisi" + }, + "luq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede", + "level9": "Nuclear Yoruba", + "level10": "Lucumi-Yoruba" + }, + "lus": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic" + }, + "lut": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Lushootseed-Puget" + }, + "luu": { + "level0": "Bookkeeping" + }, + "luv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Unclassified Sindhic" + }, + "luw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Njerup" + }, + "luz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic", + "level8": "Luric", + "level9": "Bakhtiari-Southern Lori" + }, + "lva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku" + }, + "lvi": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric" + }, + "lvl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric" + }, + "lvu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Tengah", + "level5": "Southeast Lembata" + }, + "lwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Mbagani-Lwalwa" + }, + "lwe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Timur" + }, + "lwg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia" + }, + "lwh": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Lachic" + }, + "lwl": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Lawa" + }, + "lwm": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Bisu-Pyen-Laomian" + }, + "lwo": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo", + "level4": "Luwo-Thuri" + }, + "lws": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lwt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "Flores Lamaholot" + }, + "lwu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lawoish" + }, + "lww": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Lamenu-Lewo" + }, + "lxm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "lya": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic", + "level8": "Nuclear Dzongkhic" + }, + "lyg": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Lyngngamic" + }, + "lyn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Eastern Greater Luyana" + }, + "lzh": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic" + }, + "lzl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "lzn": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian", + "level5": "Lainongic" + }, + "lzz": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Zan" + }, + "maa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Northwest Alta Mazatec" + }, + "mab": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "mad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Maduresic" + }, + "mae": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Southeastern Benue-Congo Plateau" + }, + "maf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Matakam" + }, + "mag": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan" + }, + "mah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian" + }, + "mai": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan" + }, + "maj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec" + }, + "mak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Nuclear Makassaric" + }, + "mal": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mam": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Mamean" + }, + "maq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan" + }, + "mar": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "mas": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Ongamo-Maa", + "level5": "Nuclear Maa" + }, + "mat": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Matlatzincan" + }, + "mau": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Central Mazatec" + }, + "mav": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani" + }, + "maw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Mampruli-Dagbani" + }, + "max": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Manadoic Malay" + }, + "maz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Mazahua" + }, + "mba": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo", + "level5": "Kinamiguin-Bukidnon", + "level6": "Bukidnon" + }, + "mbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "West Manobo", + "level7": "WBM-Livunganen-Ilianen" + }, + "mbc": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan" + }, + "mbd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "East Manobo" + }, + "mbf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "mbg": { + "level0": "Bookkeeping" + }, + "mbh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Bebeli-Mangseng" + }, + "mbi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "West Manobo", + "level7": "WBM-Livunganen-Ilianen" + }, + "mbj": { + "level0": "Naduhup" + }, + "mbk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano", + "level10": "Sissanoic" + }, + "mbl": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum", + "level2": "Maxakalian", + "level3": "Nuclear Maxakalian" + }, + "mbn": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan" + }, + "mbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba" + }, + "mbp": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic", + "level4": "Arhuacic", + "level5": "Eastern-Southern Arhuacic", + "level6": "Eastern Arhuacic" + }, + "mbq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage" + }, + "mbr": { + "level0": "Kakua-Nukak" + }, + "mbs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "South Manobo", + "level6": "Sarangani-Tasaday-Cotabato" + }, + "mbt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "Central Manobo" + }, + "mbu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Numan" + }, + "mbv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Naluic" + }, + "mbw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Jimi" + }, + "mbx": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill" + }, + "mby": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Unclassified Sindhic" + }, + "mbz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec" + }, + "mca": { + "level0": "Mataguayan", + "level1": "Mataguayo I" + }, + "mcb": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Matsi-Nan" + }, + "mcc": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "mcd": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "mce": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic" + }, + "mcf": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matses subgroup" + }, + "mcg": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Mapoyo-Yawarana" + }, + "mch": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Maquiritari-Wayumara" + }, + "mci": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Sankwep" + }, + "mcj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mbongno-Mvano", + "level11": "Mvano-Ndunda" + }, + "mck": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba", + "level12": "Mbwela-Mbunda" + }, + "mcl": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan", + "level3": "Siona-Secoya", + "level4": "Sionan" + }, + "mcm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Luso-Asian Creole" + }, + "mcn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Masa-Gizey-Ham" + }, + "mco": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Lowland Mixe" + }, + "mcp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "North-Central Makaaic" + }, + "mcq": { + "level0": "Koiarian", + "level1": "Baraic" + }, + "mcr": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Kapau-Menya" + }, + "mcs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai" + }, + "mcu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Eastern Mambila" + }, + "mcv": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua" + }, + "mcw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3" + }, + "mcx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Mpiemo-Ukhwejo" + }, + "mcy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Watut" + }, + "mcz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic" + }, + "mda": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Rukubic", + "level6": "Mada-Ninzam" + }, + "mdb": { + "level0": "Kiwaian", + "level1": "Turama-Kerewo" + }, + "mdc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Inland Minjim" + }, + "mdd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic" + }, + "mde": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "mdf": { + "level0": "Uralic", + "level1": "Mordvin" + }, + "mdg": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Masalit" + }, + "mdh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Danaw" + }, + "mdi": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe" + }, + "mdj": { + "level0": "Central Sudanic", + "level1": "Mangbetu-Asua", + "level2": "Mangbetuic" + }, + "mdk": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe" + }, + "mdl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "mdm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Eastern Mundu-Baka", + "level7": "Mayogo-Bangba" + }, + "mdn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "mdo": { + "level0": "Bookkeeping" + }, + "mdp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Mbala-Sondi" + }, + "mdq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke" + }, + "mdr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi" + }, + "mds": { + "level0": "Manubaran" + }, + "mdt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic", + "level20": "Lekaningic" + }, + "mdu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)" + }, + "mdv": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Atatlahuca-Monteverde" + }, + "mdw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)", + "level10": "Koyo-Mboshi" + }, + "mdx": { + "level0": "Dizoid" + }, + "mdy": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo" + }, + "mdz": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.A" + }, + "mea": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields", + "level8": "Menka-Atong" + }, + "meb": { + "level0": "Turama-Kikori", + "level1": "Turama-Omatian" + }, + "mec": { + "level0": "Mangarrayi-Maran", + "level1": "Maran" + }, + "med": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Melpa-Tembagla" + }, + "mee": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Mengenic", + "level9": "Mamusa-Mengen" + }, + "mef": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Lyngngamic" + }, + "meh": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec" + }, + "mei": { + "level0": "Nubian" + }, + "mej": { + "level0": "East Bird's Head", + "level1": "Meax" + }, + "mek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "mel": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau" + }, + "mem": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Marrngu" + }, + "men": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Mende-Loko" + }, + "meo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric", + "level6": "Northeastern Peninsular Malay" + }, + "mep": { + "level0": "Jarrakan", + "level1": "Miriwunic" + }, + "meq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Meri", + "level7": "Dugwor-Merey" + }, + "mer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga", + "level10": "Northern Kirinyaga", + "level11": "Nithi-Meru" + }, + "mes": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "met": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya" + }, + "meu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage" + }, + "mev": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan" + }, + "mew": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "mey": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "mez": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian" + }, + "mfa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric", + "level6": "Northeastern Peninsular Malay" + }, + "mfb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Bangka-Belitung Malay" + }, + "mfc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic", + "level6": "Ndunga-Mba-Dongo", + "level7": "Ndunga-Mba" + }, + "mfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "mfe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Isle-de-France Creole" + }, + "mff": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung", + "level9": "Nakic" + }, + "mfg": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole", + "level8": "Mixiforic" + }, + "mfh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Podoko" + }, + "mfi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Wandala-Malgwa-Glavda", + "level7": "Wandala-Malgwa" + }, + "mfj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Matakam", + "level5": "Mefele-Cuvok" + }, + "mfk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Mofu" + }, + "mfl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "mfm": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Marghic", + "level7": "Kilba-South Margi" + }, + "mfn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo" + }, + "mfo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe" + }, + "mfp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "mfq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Gourmantche-Moba", + "level14": "Moba-Bimoba" + }, + "mfr": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "mfs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "mft": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "Mokoreng-Loniu" + }, + "mfu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba", + "level12": "Mbwela-Mbunda" + }, + "mfv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Manjaku-Mankanya-Pepel", + "level6": "Cur-Bok-Cotier" + }, + "mfw": { + "level0": "Kwalean" + }, + "mfx": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "mfy": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Cahitan" + }, + "mfz": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Burun", + "level3": "Southern Burun" + }, + "mgb": { + "level0": "Tamaic" + }, + "mgc": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Gberi-Morokodo-Mittu" + }, + "mgd": { + "level0": "Central Sudanic", + "level1": "Moru-Madi" + }, + "mge": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Bediondo" + }, + "mgf": { + "level0": "Bulaka River" + }, + "mgg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic" + }, + "mgh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "mgi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Jilic" + }, + "mgj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Unclassified Central Delta" + }, + "mgl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "mgm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor" + }, + "mgn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "mgo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Widikum-Tadkon" + }, + "mgp": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Magar" + }, + "mgq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mbeya" + }, + "mgr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "mgs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Manda-Ngoni" + }, + "mgt": { + "level0": "Keram", + "level1": "Ulmapo", + "level2": "Mwakai-Pondi" + }, + "mgu": { + "level0": "Mailuan" + }, + "mgv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic" + }, + "mgw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matumbic" + }, + "mgx": { + "level0": "Bookkeeping" + }, + "mgy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic" + }, + "mgz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Mbugwe-Langi" + }, + "mha": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Manda-Pengo" + }, + "mhb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Kota-Mahongwe" + }, + "mhc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan" + }, + "mhd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Pare-Taveta", + "level10": "Pareic" + }, + "mhe": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian" + }, + "mhf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Finungwan-Mamaa-Gusan" + }, + "mhg": { + "level0": "Marrku-Wurrugu" + }, + "mhh": { + "level0": "Bookkeeping" + }, + "mhi": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Southern Moru-Madi" + }, + "mhj": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic" + }, + "mhk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "mhl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Kumil" + }, + "mhm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "mhn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch", + "level10": "Global South Bavarian" + }, + "mho": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Mashi-Mbukushi" + }, + "mhp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "mhq": { + "level0": "Siouan" + }, + "mhr": { + "level0": "Uralic", + "level1": "Mari" + }, + "mhs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru", + "level4": "Buruic" + }, + "mht": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Cassiquiare" + }, + "mhu": { + "level0": "Sino-Tibetan", + "level1": "Digarish" + }, + "mhv": { + "level0": "Bookkeeping" + }, + "mhw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Mashi-Mbukushi" + }, + "mhx": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic" + }, + "mhy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Maanyan-Paku" + }, + "mhz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea" + }, + "mia": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian" + }, + "mib": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Atatlahuca-Monteverde" + }, + "mic": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Micmacic" + }, + "mid": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Macro-Mandaic" + }, + "mie": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec" + }, + "mif": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Mofu" + }, + "mig": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic" + }, + "mih": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "East Coast Mixtec" + }, + "mii": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Baja Mixtec" + }, + "mij": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "mik": { + "level0": "Muskogean" + }, + "mil": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Tlazoyal-Penoles" + }, + "mim": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec" + }, + "min": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "mio": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "West Coast Mixtec" + }, + "mip": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northeastern Alta Mixtec" + }, + "miq": { + "level0": "Misumalpan" + }, + "mir": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Lowland Mixe" + }, + "mit": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Baja Mixtec" + }, + "miu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Tezoatlanic" + }, + "miv": { + "level0": "Bookkeeping" + }, + "miw": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Ankave-Tainae-Akoye", + "level3": "Tainae-Akoye" + }, + "mix": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Mixtepec-Yucunicoco Mixtec" + }, + "miy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec", + "level8": "Southwestern Guerrero Mixtec" + }, + "miz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Alta Mixtec" + }, + "mja": { + "level0": "Bookkeeping" + }, + "mjc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "West Coast Mixtec" + }, + "mjd": { + "level0": "Maiduan" + }, + "mje": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Musguic" + }, + "mjg": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Monguoric" + }, + "mjh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Nyanjaic" + }, + "mji": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Mien-Mun" + }, + "mjj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "mjk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "mjl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali" + }, + "mjm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage" + }, + "mjn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Bwana-Moam-Tapen" + }, + "mjo": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mjp": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid" + }, + "mjq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mjr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mjs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip" + }, + "mjt": { + "level0": "Dravidian", + "level1": "North Dravidian", + "level2": "Kurux-Malto", + "level3": "Malto" + }, + "mjv": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Muthuvan-Mannan" + }, + "mjw": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Karbic" + }, + "mjx": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Santalic" + }, + "mjy": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Mahican-Woronoco-Pojassick" + }, + "mjz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Unclassified Tharu" + }, + "mka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo" + }, + "mkb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "mkc": { + "level0": "Nuclear Torricelli", + "level1": "Nuclear Maimai" + }, + "mkd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Eastern South Slavic", + "level6": "Macedo-Bulgarian" + }, + "mke": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "mkf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Central West Chadic B.2" + }, + "mkg": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Mak-Ai-Cham" + }, + "mki": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "mkj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Ponapeic" + }, + "mkk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "North-Central Makaaic" + }, + "mkl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede" + }, + "mkm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Moken-Moklen" + }, + "mkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay" + }, + "mko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Jen", + "level7": "Doso-Dza" + }, + "mkp": { + "level0": "Yareban", + "level1": "Doriri-Abia" + }, + "mkq": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan" + }, + "mkr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Manep-Barem" + }, + "mks": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Central-Western Baja Mixtec" + }, + "mkt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone" + }, + "mku": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan", + "level10": "Konya-Manya" + }, + "mkv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Mafea-Tutuba" + }, + "mkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo", + "level21": "Koongo-Kituba" + }, + "mkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo", + "level5": "Kinamiguin-Bukidnon" + }, + "mky": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "East Makian-Gane" + }, + "mkz": { + "level0": "Timor-Alor-Pantar", + "level1": "East Timor" + }, + "mla": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "mlb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa" + }, + "mlc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "mld": { + "level0": "Bookkeeping" + }, + "mle": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Manambu-Yalaku" + }, + "mlf": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Tinic", + "level4": "Tin" + }, + "mlh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Kate-Mape" + }, + "mli": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "mlj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Miltuic" + }, + "mlk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili" + }, + "mll": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Northern Malakula" + }, + "mlm": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam" + }, + "mln": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "North and West Guadalcanal" + }, + "mlo": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Kwatay-Karon-Mlomp", + "level9": "Karon-Mlomp" + }, + "mlp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles" + }, + "mlq": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Xasonka" + }, + "mlr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "Hurza" + }, + "mls": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Masalit" + }, + "mlt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Malta-Tunisian Arabic" + }, + "mlu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "mlv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mlw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere" + }, + "mlx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula", + "level10": "Southwest Coastal Malekula" + }, + "mly": { + "level0": "Bookkeeping" + }, + "mma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan" + }, + "mmb": { + "level0": "Somahai" + }, + "mmc": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Mazahua" + }, + "mmd": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Maonan-Chadong" + }, + "mme": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Northwestern Malakula" + }, + "mmf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic", + "level6": "Mundat-Karfa" + }, + "mmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym" + }, + "mmh": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xinguan Arawak", + "level4": "Waura-Mehinaku-Kustenau", + "level5": "Waura-Mehinaku" + }, + "mmi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "mmj": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "mmk": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "mml": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "mmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Bieria-Maii" + }, + "mmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine" + }, + "mmo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mapos-Mangga-Wagau" + }, + "mmp": { + "level0": "Amto-Musan" + }, + "mmq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram", + "level6": "Aisian" + }, + "mmr": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "North Hmongic" + }, + "mms": { + "level0": "Bookkeeping" + }, + "mmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Western Ngero" + }, + "mmu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)", + "level12": "Mmala-Elip-Gunu" + }, + "mmv": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo", + "level5": "Piratapuyic", + "level6": "Arapaso-Miriti" + }, + "mmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers" + }, + "mmx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Madak linkage" + }, + "mmy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla" + }, + "mmz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Fleuve" + }, + "mna": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "mnb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "mnc": { + "level0": "Tungusic", + "level1": "Manchu-Jurchen", + "level2": "Manchu-Xibe" + }, + "mnd": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic" + }, + "mne": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Naba-Berakou" + }, + "mnf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Mundani-Njen" + }, + "mng": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong" + }, + "mnh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "mni": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga" + }, + "mnj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Yidgha-Munji" + }, + "mnk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding" + }, + "mnl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Central Santo" + }, + "mnm": { + "level0": "Dagan", + "level1": "Central Dagan", + "level2": "Southwest Dagan" + }, + "mnn": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong", + "level6": "Southern-Central Mnong" + }, + "mnp": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Inland Min" + }, + "mnq": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek" + }, + "mnr": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Western Numic" + }, + "mns": { + "level0": "Uralic", + "level1": "Mansic", + "level2": "North-Central Mansi" + }, + "mnt": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Mayabic", + "level3": "Nuclear Mayabic" + }, + "mnu": { + "level0": "Mairasic" + }, + "mnv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "mnw": { + "level0": "Austroasiatic", + "level1": "Monic" + }, + "mnx": { + "level0": "East Bird's Head" + }, + "mny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu" + }, + "mnz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes" + }, + "moa": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Wan-Mwan" + }, + "mob": { + "level0": "Bookkeeping" + }, + "moc": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur", + "level2": "Qom" + }, + "mod": { + "level0": "Pidgin", + "level1": "Choctaw-based pidgin" + }, + "moe": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi" + }, + "mof": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian", + "level6": "Western Southern New England Algonquian" + }, + "mog": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Mongondowic" + }, + "moh": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Mohawk-Oneida" + }, + "moi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi" + }, + "moj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Monzomboic" + }, + "mom": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Manguean" + }, + "moo": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam" + }, + "mop": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan" + }, + "mor": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Western Heibanic" + }, + "mos": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Mossi-Farefare", + "level14": "Mossic" + }, + "mot": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic" + }, + "mou": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Birgit-Mogum-Toram" + }, + "mov": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "River Yuman" + }, + "mow": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain" + }, + "mox": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage" + }, + "moy": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid", + "level2": "South Gonga" + }, + "moz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B" + }, + "mpa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic" + }, + "mpb": { + "level0": "Northern Daly" + }, + "mpc": { + "level0": "Mangarrayi-Maran" + }, + "mpd": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus", + "level4": "Yineic" + }, + "mpe": { + "level0": "Surmic" + }, + "mpf": { + "level0": "Bookkeeping" + }, + "mpg": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Marba-Musey" + }, + "mph": { + "level0": "Iwaidjan Proper" + }, + "mpi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 2" + }, + "mpj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Martuwangkic", + "level4": "Warnman-Wangka" + }, + "mpk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Musguic", + "level5": "Musgu-Mbara" + }, + "mpl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Watut" + }, + "mpm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic" + }, + "mpn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Eastern Bel" + }, + "mpo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "mpp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Sopac" + }, + "mpq": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matis subgroup" + }, + "mpr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Marovo-Vangunu" + }, + "mps": { + "level0": "Teberan" + }, + "mpt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Mianic" + }, + "mpu": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic" + }, + "mpv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana" + }, + "mpw": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Pidjanan" + }, + "mpx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima" + }, + "mpy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic" + }, + "mpz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "mqa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "mqb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "Hurza" + }, + "mqd": { + "level0": "Bookkeeping" + }, + "mqe": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Unclassified Hanseman" + }, + "mqf": { + "level0": "Somahai" + }, + "mqg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay" + }, + "mqh": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Tlazoyal-Penoles" + }, + "mqi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Batuley-Mariri" + }, + "mqj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "mqk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "East Manobo" + }, + "mql": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari", + "level11": "Tayari-Ditammari", + "level12": "Ditammaric" + }, + "mqm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Marquesan" + }, + "mqn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki" + }, + "mqo": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Kao River" + }, + "mqp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "West Hoamoal" + }, + "mqq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Kadazan-Sugut-Minokok", + "level8": "Sugut-Minokok Kadazan" + }, + "mqr": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "mqs": { + "level0": "North Halmahera" + }, + "mqt": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "mqu": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian" + }, + "mqv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "mqw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "mqx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi" + }, + "mqy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Manggarai Khusus" + }, + "mqz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "mra": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Tinic" + }, + "mrb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Maewo" + }, + "mrc": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "River Yuman" + }, + "mrd": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Magar" + }, + "mre": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "OKSLic" + }, + "mrg": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Eastern Tani" + }, + "mrh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "mri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal", + "level14": "Maoric" + }, + "mrj": { + "level0": "Uralic", + "level1": "Mari" + }, + "mrk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone" + }, + "mrl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Eastern Trukic", + "level12": "Mortlockese-Trukese" + }, + "mrm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mrn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "East Santa Isabel" + }, + "mro": { + "level0": "Sino-Tibetan", + "level1": "Mruic" + }, + "mrp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "South-Central Santo" + }, + "mrq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Marquesan" + }, + "mrr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Southeast Gondi" + }, + "mrs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Northwestern Malakula" + }, + "mrt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Marghic" + }, + "mru": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai", + "level7": "Mundangic" + }, + "mrv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Far East Polynesian" + }, + "mrw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Danaw" + }, + "mrx": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "mry": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Eastern Mansakan" + }, + "mrz": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Marindic" + }, + "msb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Masbate-Sorsogon" + }, + "msc": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan" + }, + "msd": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Meemul-Tziij" + }, + "mse": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Marba-Musey" + }, + "msf": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic" + }, + "msg": { + "level0": "West Bird's Head", + "level1": "South West Bird's Head" + }, + "msh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Western Malagasic" + }, + "msi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay", + "level8": "Bruneic Malay" + }, + "msj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic" + }, + "msk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Eastern Mansakan" + }, + "msm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "East Manobo" + }, + "msn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mso": { + "level0": "Mombum-Koneraw" + }, + "msp": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Jurunic", + "level3": "Unclassified Jurunic" + }, + "msq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Kum-Nel-Yua-Cac" + }, + "msr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic" + }, + "mss": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "mst": { + "level0": "Bookkeeping" + }, + "msu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu", + "level10": "Musom-Sirak" + }, + "msv": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 1" + }, + "msw": { + "level0": "Atlantic-Congo" + }, + "msx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan", + "level5": "Pomoikan", + "level6": "Anamuxric" + }, + "msy": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ruboni", + "level3": "Mikarewan" + }, + "msz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Sopac" + }, + "mta": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "South Manobo", + "level6": "Sarangani-Tasaday-Cotabato" + }, + "mtb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia", + "level9": "Anyinic" + }, + "mtc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Kokon" + }, + "mtd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang" + }, + "mte": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Mono-Uruavan" + }, + "mtf": { + "level0": "Lower Sepik", + "level1": "Nor" + }, + "mtg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Eastern Mek" + }, + "mth": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen" + }, + "mti": { + "level0": "Dagan", + "level1": "Central Dagan" + }, + "mtj": { + "level0": "East Bird's Head", + "level1": "Meax" + }, + "mtk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "mtl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic" + }, + "mtm": { + "level0": "Uralic", + "level1": "Samoyedic" + }, + "mtn": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Matagalpan" + }, + "mto": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe" + }, + "mtp": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Wichi" + }, + "mtq": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Viet-Muong", + "level3": "Muongic" + }, + "mtr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewaric" + }, + "mts": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "mtt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mtu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "East Coast Mixtec" + }, + "mtv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Molet-Asaroo" + }, + "mtw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Negrosanon" + }, + "mtx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec" + }, + "mty": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic" + }, + "mtz": { + "level0": "Bookkeeping" + }, + "mua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai", + "level7": "Mundangic" + }, + "mub": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "muc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "mud": { + "level0": "Eskimo-Aleut", + "level1": "Aleutic" + }, + "mue": { + "level0": "Mixed Language", + "level1": "Spanish-Quechua" + }, + "mug": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Musguic", + "level5": "Musgu-Mbara" + }, + "muh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Eastern Mundu-Baka" + }, + "mui": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Central Sumatran Malay", + "level6": "Music" + }, + "muj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit" + }, + "muk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Mustangic" + }, + "mum": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota" + }, + "muo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic", + "level8": "Samba-Leko-Perema-Mumbake", + "level9": "Perema-Mumbake" + }, + "mup": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "muq": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "North Hmongic" + }, + "mur": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle" + }, + "mus": { + "level0": "Muskogean" + }, + "mut": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Muria" + }, + "muu": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Transversal Lowland East Cushitic" + }, + "muv": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Muthuvan-Mannan" + }, + "muw": { + "level0": "Bookkeeping" + }, + "mux": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Melpa-Tembagla" + }, + "muy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere", + "level7": "Madaic" + }, + "muz": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic", + "level3": "Pastoral Surmic", + "level4": "Tirma-Chai-Mursi" + }, + "mva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Bam-Manam", + "level10": "Manam-Sepa" + }, + "mvb": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "mvc": { + "level0": "Bookkeeping" + }, + "mvd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese" + }, + "mve": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "mvf": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Mongolian" + }, + "mvg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic", + "level8": "Yucuane-Teita" + }, + "mvh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Ndam-Tumak" + }, + "mvi": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Southern Ryukyu" + }, + "mvj": { + "level0": "Bookkeeping" + }, + "mvk": { + "level0": "Yuat" + }, + "mvl": { + "level0": "Pama-Nyungan", + "level1": "Paman" + }, + "mvn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage" + }, + "mvo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Marovo-Vangunu" + }, + "mvp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "mvq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Kumil" + }, + "mvr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "mvt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Northern Malakula", + "level8": "North Coast Malakula", + "level9": "Botovro-Vovo-Vao" + }, + "mvu": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "mvv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal" + }, + "mvw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Unclassified Ruvuma" + }, + "mvx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic" + }, + "mvy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic" + }, + "mvz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group" + }, + "mwa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Bunama-Mwatebu" + }, + "mwb": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Mandi-Muniwara" + }, + "mwc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Are-Doga" + }, + "mwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Yaoic" + }, + "mwf": { + "level0": "Southern Daly" + }, + "mwg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "mwh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Bibling" + }, + "mwi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Ninde-Nati" + }, + "mwj": { + "level0": "Bookkeeping" + }, + "mwk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Kita-Kagoro" + }, + "mwl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Asturo-Leonese" + }, + "mwm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Chari" + }, + "mwn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "mwo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Maewo" + }, + "mwp": { + "level0": "Pama-Nyungan" + }, + "mwq": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic", + "level6": "Daai-Nghmoye-Muun-Kaang" + }, + "mws": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga", + "level10": "Northern Kirinyaga", + "level11": "Nithi-Meru" + }, + "mwt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Moken-Moklen" + }, + "mwu": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Gberi-Morokodo-Mittu" + }, + "mwv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "mww": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao" + }, + "mwx": { + "level0": "Bookkeeping" + }, + "mwy": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Okiek-Akie" + }, + "mwz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Basoo" + }, + "mxa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Central-Western Baja Mixtec" + }, + "mxb": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Tezoatlanic" + }, + "mxc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona" + }, + "mxd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Modang-Segai" + }, + "mxe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers", + "level9": "Mele-Futuna" + }, + "mxf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 2" + }, + "mxg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)", + "level11": "Pheende-Kwezo" + }, + "mxh": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "mxi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Unshifted Western Romance" + }, + "mxj": { + "level0": "Sino-Tibetan", + "level1": "Kman-Meyor" + }, + "mxk": { + "level0": "Bogia" + }, + "mxl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "mxm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Nakanai-Meramera" + }, + "mxn": { + "level0": "West Bird's Head", + "level1": "Seget-Moi" + }, + "mxo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Eastern Greater Luyana" + }, + "mxp": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe" + }, + "mxq": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Midland Mixe" + }, + "mxr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik" + }, + "mxs": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "mxt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "East Coast Mixtec" + }, + "mxu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere", + "level7": "Madaic" + }, + "mxv": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Coicoyan-Metlatonoc" + }, + "mxw": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu", + "level3": "Namo-Len" + }, + "mxx": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori" + }, + "mxy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec" + }, + "mxz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "mya": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic" + }, + "myb": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Sido" + }, + "myd": { + "level0": "Bookkeeping" + }, + "mye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30" + }, + "myf": { + "level0": "Blue Nile Mao" + }, + "myg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "myh": { + "level0": "Wakashan", + "level1": "Southern Wakashan", + "level2": "Makah-Nitinat" + }, + "myi": { + "level0": "Bookkeeping" + }, + "myj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Feroge-Mangaya" + }, + "myk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo" + }, + "myl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Kulawi" + }, + "mym": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic", + "level3": "Pastoral Surmic" + }, + "myo": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid" + }, + "myq": { + "level0": "Bookkeeping" + }, + "mys": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group", + "level7": "Peripheral Western Gurage" + }, + "myt": { + "level0": "Bookkeeping" + }, + "myu": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Mundurukuic" + }, + "myv": { + "level0": "Uralic", + "level1": "Mordvin" + }, + "myw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima", + "level8": "Kilivilic", + "level9": "Kilivila-Muyuw" + }, + "myx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Northern Luyia" + }, + "myy": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Barasano-Eduria-Macuna" + }, + "myz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Macro-Mandaic" + }, + "mza": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "West Coast Mixtec" + }, + "mzb": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber" + }, + "mzc": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Norwegian Sign" + }, + "mzd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Duala-Malimba" + }, + "mze": { + "level0": "Mailuan" + }, + "mzf": { + "level0": "Bookkeeping" + }, + "mzg": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "mzh": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Wichi" + }, + "mzi": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec", + "level6": "Ayautlic", + "level7": "Northern Baja Mazatec" + }, + "mzj": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan", + "level10": "Konya-Manya" + }, + "mzk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila" + }, + "mzl": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Lowland Mixe" + }, + "mzm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Mumuyic" + }, + "mzn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Mazanderani-Shahmirzadi" + }, + "mzo": { + "level0": "Cariban", + "level1": "Kuikuroan", + "level2": "Nuclear Kuikuroan" + }, + "mzq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "Interior Bungku-Tolaki" + }, + "mzr": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Marubo Subgroup" + }, + "mzs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Luso-Asian Creole" + }, + "mzt": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek", + "level6": "Batekic" + }, + "mzu": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan" + }, + "mzv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka", + "level7": "Manzaic" + }, + "mzw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic" + }, + "mzx": { + "level0": "Bookkeeping" + }, + "mzy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "mzz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Iamalelic" + }, + "naa": { + "level0": "Namla-Tofanma" + }, + "nab": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex" + }, + "nac": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Jimi", + "level3": "Kandawo-Narak" + }, + "nad": { + "level0": "Bookkeeping" + }, + "nae": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram", + "level6": "Ulat Inai" + }, + "naf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Sankwep" + }, + "nag": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Eastern Kamrupa" + }, + "naj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Naluic" + }, + "nak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Nakanai-Meramera" + }, + "nal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "nam": { + "level0": "Southern Daly" + }, + "nan": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Coastal Min" + }, + "nao": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Sherpa-Jirel", + "level9": "Sherpaic" + }, + "nap": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Italian Romance" + }, + "naq": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "North Khoekhoe" + }, + "nar": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos" + }, + "nas": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "Central Nasioi" + }, + "nat": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya" + }, + "nau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Kosraean-Nauruan" + }, + "nav": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Western Southwestern Apachean" + }, + "naw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang" + }, + "nax": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Nimo-Nakwi" + }, + "nay": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Lower Murray", + "level4": "Yaraldi-Keramin-Yitha" + }, + "naz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "North Guerrero Nahuatl" + }, + "nba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba" + }, + "nbb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid" + }, + "nbc": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Chang-Phom-Konyak" + }, + "nbd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan" + }, + "nbe": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Chang-Phom-Konyak", + "level6": "Konyak-Phom" + }, + "nbf": { + "level0": "Bookkeeping" + }, + "nbg": { + "level0": "Unattested", + "level1": "Dravidian (Unattested)" + }, + "nbh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Ngamo-Bele" + }, + "nbi": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Naga Maoic" + }, + "nbj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Eastern Ngumpin", + "level5": "Ngumpit" + }, + "nbk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Unclassified Hanseman" + }, + "nbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Bwaka" + }, + "nbn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Koiwai-Irarutu", + "level6": "Irarutic" + }, + "nbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Lokoic", + "level8": "Lubila-Lokaa" + }, + "nbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Nnam-Ekajuk" + }, + "nbq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani" + }, + "nbr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "nbs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "South African Sign" + }, + "nbt": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri", + "level6": "Bangni-Tagin" + }, + "nbu": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nbv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Widikum-Tadkon" + }, + "nbw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "nbx": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Badjiri-Eastern Karnic", + "level3": "Eastern Karnic" + }, + "nby": { + "level0": "Border", + "level1": "Bewani" + }, + "nca": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Ufim-Rawa-Nahu" + }, + "ncb": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Central Nicobar" + }, + "ncc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus" + }, + "ncd": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Khambu", + "level6": "Kulungic" + }, + "ncf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tabar linkage" + }, + "ncg": { + "level0": "Tsimshian", + "level1": "Nishga-Gitxsan" + }, + "nch": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Huasteca Nahuatl" + }, + "nci": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "ncj": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "nck": { + "level0": "Maningrida", + "level1": "Nakkara-Ndjebbana" + }, + "ncl": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "Western Periphery Nahuatl" + }, + "ncm": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "ncn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty" + }, + "nco": { + "level0": "South Bougainville", + "level1": "Nasioiic" + }, + "ncq": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Eastern Bru-Katang", + "level5": "Katang" + }, + "ncr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane", + "level9": "Ncane-Cung" + }, + "ncs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "nct": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic", + "level5": "Tarao-Chothe" + }, + "ncu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Chumbuli" + }, + "ncx": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl" + }, + "nda": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic", + "level9": "Ndasa-Wumbvu" + }, + "ndb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring", + "level9": "Babungoic" + }, + "ndc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona" + }, + "ndd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Nde-Efutop" + }, + "nde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland", + "level14": "Swatic" + }, + "ndg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matumbic" + }, + "ndh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Nyakyusa-Ndali" + }, + "ndi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic", + "level8": "Samba-Leko-Perema-Mumbake" + }, + "ndj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Kilombero" + }, + "ndk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Budu-Ndaka-Mbo", + "level16": "Ndaka-Mbo" + }, + "ndl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bamweic" + }, + "ndm": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Ndam-Tumak" + }, + "ndn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "ndo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)", + "level12": "Kwambi-Ndonga" + }, + "ndp": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe" + }, + "ndq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Unclassified Kunene" + }, + "ndr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid" + }, + "nds": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "Greater East Low German" + }, + "ndt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic", + "level6": "Ndunga-Mba-Dongo", + "level7": "Ndunga-Mba" + }, + "ndu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Diic" + }, + "ndv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Palor-Ndut" + }, + "ndw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Fleuve" + }, + "ndx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga" + }, + "ndy": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Nduga-Luto" + }, + "ndz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Ndogo-Sere" + }, + "neb": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Dan-Toura", + "level6": "Toura-Goo" + }, + "nec": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Pantar" + }, + "ned": { + "level0": "Bookkeeping" + }, + "nee": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Kum-Nel-Yua-Cac", + "level10": "Kum-Nel-Yua" + }, + "nef": { + "level0": "Pidgin", + "level1": "Assamese-based pidgin" + }, + "neg": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic", + "level3": "Negidalic" + }, + "neh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic" + }, + "nej": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Gira-Neko-Nekgini" + }, + "nek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "nem": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap", + "level10": "Nmi-Fij-Fwa", + "level11": "Hyenghene" + }, + "nen": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Loyalty Islands" + }, + "neo": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "South Qiandongic Miao" + }, + "neq": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Midland Mixe" + }, + "ner": { + "level0": "Konda-Yahadian" + }, + "nes": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti" + }, + "net": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan", + "level3": "Outer Enga" + }, + "neu": { + "level0": "Artificial Language" + }, + "nev": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "new": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Newar" + }, + "nex": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "ney": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida" + }, + "nez": { + "level0": "Sahaptian" + }, + "nfa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Hawu-Dhao" + }, + "nfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau" + }, + "nfg": { + "level0": "Bookkeeping" + }, + "nfk": { + "level0": "Bookkeeping" + }, + "nfl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz" + }, + "nfr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo" + }, + "nfu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe", + "level9": "Mfumteic", + "level10": "Central-Southern Mfumte" + }, + "nga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka" + }, + "ngb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "ngc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ngombe-Genja" + }, + "ngd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Mokiba-Ngando" + }, + "nge": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "ngg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka", + "level7": "Manzaic", + "level8": "Ngbaka-Manza-Ali" + }, + "ngh": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Ghaap-Kalahari" + }, + "ngi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade" + }, + "ngj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Ngie-Oshie" + }, + "ngk": { + "level0": "Gunwinyguan" + }, + "ngl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe", + "level9": "Lomweic" + }, + "ngm": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "ngn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo" + }, + "ngp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Zigua-Nguu" + }, + "ngq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Western Serengeti" + }, + "ngr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz" + }, + "ngs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic" + }, + "ngt": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic" + }, + "ngu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "ngv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan" + }, + "ngw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic" + }, + "ngx": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "ngy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)" + }, + "ngz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Ngungwel-Eboo" + }, + "nha": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda" + }, + "nhb": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Ben-Gban", + "level5": "Bengic" + }, + "nhc": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhd": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani" + }, + "nhe": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Huasteca Nahuatl" + }, + "nhf": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda" + }, + "nhg": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "nhh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "nhi": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl" + }, + "nhj": { + "level0": "Bookkeeping" + }, + "nhk": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhm": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "nhn": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Tlaxcala-Puebla-Pastoral Nahuatl" + }, + "nho": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian", + "level12": "Takuuic" + }, + "nhp": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhq": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Sierra de Puebla Nahuatl" + }, + "nhr": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Naro-Ana" + }, + "nhs": { + "level0": "Bookkeeping" + }, + "nht": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl" + }, + "nhu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane" + }, + "nhv": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl" + }, + "nhw": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Huasteca Nahuatl" + }, + "nhx": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhy": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Southeast Puebla-Northern Oaxaca Nahuatl" + }, + "nhz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Tlaxcala-Puebla-Pastoral Nahuatl" + }, + "nia": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Central Barrier Islands" + }, + "nib": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana" + }, + "nid": { + "level0": "Gunwinyguan", + "level1": "Eastern Gunwinyguan" + }, + "nie": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua", + "level6": "Bua-Lua" + }, + "nif": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana", + "level5": "Nek-Nuk" + }, + "nig": { + "level0": "Gunwinyguan", + "level1": "Jala" + }, + "nih": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika", + "level11": "Central and Southern Nyika", + "level12": "Mbozi-Malawi Nyika" + }, + "nii": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Wahgic" + }, + "nij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "South West Greater Barito" + }, + "nik": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Central Nicobar" + }, + "nil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Teun-Nila-Serua", + "level5": "Nila-Serua" + }, + "nim": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Nyaturu-Nilamba" + }, + "nin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Rukubic", + "level6": "Mada-Ninzam" + }, + "nio": { + "level0": "Uralic", + "level1": "Samoyedic" + }, + "niq": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin", + "level5": "Western Plateau Central Kalenjin" + }, + "nir": { + "level0": "Nimboranic" + }, + "nis": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Sauk-Nimi" + }, + "nit": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Kolami-Naiki" + }, + "niu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Tongic" + }, + "niv": { + "level0": "Nivkh" + }, + "niw": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Nimo-Nakwi" + }, + "nix": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara" + }, + "niy": { + "level0": "Central Sudanic", + "level1": "Lenduic" + }, + "niz": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Ningil-Yil" + }, + "nja": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Nzanyic" + }, + "njb": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "North Patkaian", + "level4": "Noctean" + }, + "njd": { + "level0": "Bookkeeping" + }, + "njh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga" + }, + "nji": { + "level0": "Mirndi", + "level1": "Ngurlun" + }, + "njj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Mundani-Njen" + }, + "njl": { + "level0": "Dajuic", + "level1": "Western Dajuic" + }, + "njm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Angami-Chokri" + }, + "njn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic", + "level3": "Nuclear Zemeic" + }, + "njo": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga" + }, + "njr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Njerup" + }, + "njs": { + "level0": "Geelvink Bay" + }, + "njt": { + "level0": "Pidgin", + "level1": "Trio-based pidgin" + }, + "nju": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Mirning" + }, + "njx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kamba-Kunyi" + }, + "njy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Njemic" + }, + "njz": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri" + }, + "nka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban" + }, + "nkb": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Tangkhul-Maring", + "level3": "Maringic" + }, + "nkc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba" + }, + "nkd": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic" + }, + "nke": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia" + }, + "nkf": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nkg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Gira-Neko-Nekgini" + }, + "nkh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao" + }, + "nki": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nkj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Tangko-Nakai" + }, + "nkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "nkm": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "nkn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba" + }, + "nko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Nkonya-Nkami" + }, + "nkp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "East Uvean-Niuafo'ou" + }, + "nkq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Nkonya-Nkami" + }, + "nkr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Carolinean Outlier Polynesian" + }, + "nks": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat" + }, + "nkt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika" + }, + "nku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Kulango" + }, + "nkv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika", + "level11": "Central and Southern Nyika" + }, + "nkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Nkutsu-Lokenye", + "level12": "Songomenic" + }, + "nkx": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio" + }, + "nky": { + "level0": "Bookkeeping" + }, + "nkz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic", + "level8": "Ibuoro-ItuMbuso-Nkari" + }, + "nla": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke" + }, + "nlc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "nld": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch" + }, + "nle": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kabarasi-Tachoni-Nyala East" + }, + "nlg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "Nggelic" + }, + "nli": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Gawarbatic", + "level5": "Shumashtic" + }, + "nlj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Vanuma-Nyali" + }, + "nlk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga", + "level3": "Yalic" + }, + "nlm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani", + "level10": "Bateri-Mankiyali" + }, + "nlo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu" + }, + "nlq": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian" + }, + "nlr": { + "level0": "Bookkeeping" + }, + "nlu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Nchumbulu-Dwang" + }, + "nlv": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl" + }, + "nlw": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama" + }, + "nlx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "nly": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Northern Ngayarda" + }, + "nlz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz", + "level6": "Natugu-Nalogo" + }, + "nma": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Northwestern Malakula" + }, + "nmc": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Sido" + }, + "nmd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic", + "level20": "Lekaningic" + }, + "nme": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic", + "level3": "Nuclear Zemeic", + "level4": "Mzieme-Zeme" + }, + "nmf": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Tangkhul-Maring", + "level3": "Tangkhulic", + "level4": "Nuclear Tangkhulic" + }, + "nmg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Mvumboic", + "level11": "Kwasio-Gyele" + }, + "nmh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Moyon-Monsang Naga" + }, + "nmi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "nmj": { + "level0": "Bookkeeping" + }, + "nmk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu" + }, + "nml": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields" + }, + "nmm": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Manangba-Nar-Phu" + }, + "nmn": { + "level0": "Tuu", + "level1": "Hua", + "level2": "Taa" + }, + "nmo": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Moyon-Monsang Naga" + }, + "nmp": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan" + }, + "nmq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Kalanga-Nambya" + }, + "nmr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Unclassified Samba-Duru" + }, + "nms": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "nmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian" + }, + "nmu": { + "level0": "Maiduan" + }, + "nmv": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Pirlatapa-Dieric", + "level5": "Dieric" + }, + "nmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Nimoa-Sudest" + }, + "nmx": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu", + "level3": "Nama-Dre" + }, + "nmy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic" + }, + "nmz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Yom-Nawdm" + }, + "nna": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Marrngu" + }, + "nnb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Rwenzori" + }, + "nnc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 1" + }, + "nnd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Ambae" + }, + "nne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)", + "level12": "Unclassified Ndonga (R.20)" + }, + "nnf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Unclassified Gusap-Mot" + }, + "nng": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Tangkhul-Maring", + "level3": "Maringic" + }, + "nnh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke", + "level11": "Bamboutos" + }, + "nni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Patakai" + }, + "nnj": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "nnk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna" + }, + "nnl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Pochuri-Northern Rengma" + }, + "nnm": { + "level0": "Sepik", + "level1": "Yellow River" + }, + "nnn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa", + "level4": "Peveic", + "level5": "Hede-Ngide" + }, + "nnp": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Wanchoic" + }, + "nnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic", + "level10": "Ndendeule-Ngindo" + }, + "nnr": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Southern Thura-Yura" + }, + "nnt": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Nanticoke-Conoy" + }, + "nnu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Nchumbulu-Dwang" + }, + "nnv": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Unclassified Core Thura-Yura" + }, + "nnw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi", + "level8": "Nuna-Kasem", + "level9": "Nuni" + }, + "nnx": { + "level0": "Bookkeeping" + }, + "nny": { + "level0": "Tangkic", + "level1": "Southern Tangkic", + "level2": "Kayardild-Yangkaal" + }, + "nnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "noa": { + "level0": "Chocoan" + }, + "noc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana", + "level5": "Nek-Nuk" + }, + "nod": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Yuanic" + }, + "noe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "nof": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Chuave-Nomane" + }, + "nog": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "South Kipchak" + }, + "noh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Dallman" + }, + "noi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Vasave-Noiri" + }, + "noj": { + "level0": "Huitotoan", + "level1": "Nonuya-Ocaina" + }, + "nok": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "South Georgia Central Salish" + }, + "nom": { + "level0": "Bookkeeping" + }, + "non": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian" + }, + "noo": { + "level0": "Bookkeeping" + }, + "nop": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana" + }, + "noq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Nsong-Mpiin-Ngong" + }, + "nor": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian" + }, + "nos": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu", + "level9": "Nuclear Nisu", + "level10": "Northern Nisu" + }, + "not": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran" + }, + "nou": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean" + }, + "nov": { + "level0": "Artificial Language" + }, + "now": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "South Rutara" + }, + "noy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua", + "level6": "Unclassified Riverine Bua" + }, + "noz": { + "level0": "Dizoid" + }, + "npa": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Manangba-Nar-Phu" + }, + "npb": { + "level0": "Bookkeeping" + }, + "npg": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian", + "level5": "Lainongic", + "level6": "Khiamniungic" + }, + "nph": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Chang-Phom-Konyak", + "level6": "Konyak-Phom" + }, + "npi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Eastern Pahari" + }, + "npl": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Southeast Puebla-Northern Oaxaca Nahuatl" + }, + "npn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II" + }, + "npo": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Pochuri-Northern Rengma" + }, + "nps": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "npu": { + "level0": "Bookkeeping" + }, + "npy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola", + "level5": "Badaic" + }, + "nqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede" + }, + "nqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede" + }, + "nql": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia" + }, + "nqm": { + "level0": "Kolopom" + }, + "nqn": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "nqo": { + "level0": "Artificial Language" + }, + "nqq": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Wanchoic" + }, + "nqt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip", + "level7": "Kofyaric" + }, + "nqy": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "nra": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Akeleic" + }, + "nrc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Continental Transalpine Celtic", + "level6": "Unclassified Continental Transalpine Celtic" + }, + "nre": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Rengma-Simi" + }, + "nrg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "South-Central Santo" + }, + "nri": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Angami-Chokri" + }, + "nrk": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Northern Ngayarda" + }, + "nrl": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Ngarluma-Kariyarra" + }, + "nrm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram B" + }, + "nrp": { + "level0": "Unclassifiable" + }, + "nrr": { + "level0": "Bookkeeping" + }, + "nrt": { + "level0": "Kalapuyan" + }, + "nru": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic", + "level4": "Naish" + }, + "nrx": { + "level0": "Unattested", + "level1": "Umbugarla (Unattested)" + }, + "nrz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "nsa": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga" + }, + "nsb": { + "level0": "Tuu", + "level1": "Hua" + }, + "nsc": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "nsd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu", + "level9": "Nuclear Nisu" + }, + "nse": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi" + }, + "nsf": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu" + }, + "nsg": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Ongamo-Maa" + }, + "nsh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Ngie-Oshie" + }, + "nsi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "nsk": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi" + }, + "nsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Norwegian Sign" + }, + "nsm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Rengma-Simi" + }, + "nsn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic" + }, + "nso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Northern Sotho", + "level12": "Sepedic" + }, + "nsp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Indo-Pakistani-Nepalese Sign" + }, + "nsq": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan", + "level3": "Sierra Miwokan" + }, + "nsr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic" + }, + "nss": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "nst": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "North Patkaian", + "level4": "Tangsa" + }, + "nsu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Sierra de Puebla Nahuatl" + }, + "nsv": { + "level0": "Bookkeeping" + }, + "nsw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Central Santo" + }, + "nsx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "nsy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "nsz": { + "level0": "Maiduan" + }, + "ntd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal", + "level9": "Tidung-Bulusu", + "level10": "Tidung" + }, + "nte": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Koti-Nathembo" + }, + "nti": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Samu" + }, + "ntj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra", + "level6": "Tjarra" + }, + "ntk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Western Serengeti", + "level13": "Southeast Mara" + }, + "ntm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari", + "level11": "Tayari-Ditammari" + }, + "nto": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Bolia-Ntomba" + }, + "ntp": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan" + }, + "ntr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala", + "level10": "Bago-Delo-Cala", + "level11": "Delo-Cala" + }, + "nts": { + "level0": "Bookkeeping" + }, + "ntu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz", + "level6": "Natugu-Nalogo" + }, + "ntw": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Tuscarora-Nottoway" + }, + "ntx": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "nty": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Mondzish", + "level4": "Nuclear Mondzish", + "level5": "Munji-Mantsi" + }, + "ntz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Kashanic" + }, + "nua": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Kum-Nel-Yua-Cac", + "level10": "Kum-Nel-Yua" + }, + "nuc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Poyanawa Subgroup" + }, + "nud": { + "level0": "Ndu" + }, + "nue": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "nuf": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Nusoish" + }, + "nug": { + "level0": "Mirndi", + "level1": "Yirram" + }, + "nuh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mbongno-Mvano", + "level11": "Mvano-Ndunda" + }, + "nui": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic", + "level9": "Yasa-Kombe" + }, + "nuj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Saamiaic" + }, + "nuk": { + "level0": "Wakashan", + "level1": "Southern Wakashan" + }, + "nul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Elpaputi" + }, + "num": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "East Uvean-Niuafo'ou" + }, + "nun": { + "level0": "Sino-Tibetan", + "level1": "Nungish", + "level2": "Gunong" + }, + "nuo": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Viet-Muong", + "level3": "Muongic" + }, + "nup": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid" + }, + "nuq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian", + "level12": "Takuuic" + }, + "nur": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian", + "level12": "Takuuic" + }, + "nus": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Nuer-Reel" + }, + "nut": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "nuu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic" + }, + "nuv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi", + "level8": "Nuna-Kasem", + "level9": "Nuni" + }, + "nuw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Yapesic" + }, + "nux": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mehek-Pahi" + }, + "nuy": { + "level0": "Gunwinyguan", + "level1": "Eastern Gunwinyguan", + "level2": "Wubuy-Anindilyakwa" + }, + "nuz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "North Guerrero Nahuatl" + }, + "nvh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "nvm": { + "level0": "Koiarian", + "level1": "Baraic", + "level2": "Barai-Namiae" + }, + "nvo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)", + "level11": "Mandi-Nyokon" + }, + "nwa": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Arapahoic" + }, + "nwb": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee" + }, + "nwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke", + "level11": "Bamboutos" + }, + "nwi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Southern Tanna" + }, + "nwm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Lori" + }, + "nwo": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Unclassified Core Thura-Yura" + }, + "nwr": { + "level0": "Yareban", + "level1": "Yareba-Bariji-Nawaru" + }, + "nww": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic" + }, + "nxa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Kawaimina" + }, + "nxd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Lomongo" + }, + "nxe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Nage-Keo" + }, + "nxg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Ngada" + }, + "nxi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Unclassified Rufijic" + }, + "nxj": { + "level0": "Bookkeeping" + }, + "nxk": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "nxl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Patakai" + }, + "nxm": { + "level0": "Unclassifiable" + }, + "nxn": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Mayabic", + "level3": "Nuclear Mayabic" + }, + "nxo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Sake-Ndambomo" + }, + "nxq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic", + "level4": "Naish" + }, + "nxr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok" + }, + "nxu": { + "level0": "Bookkeeping" + }, + "nxx": { + "level0": "Sentanic", + "level1": "Nuclear Sentanic", + "level2": "Sentani-Nafri" + }, + "nya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Nyanjaic" + }, + "nyb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Avatime-Nyangbo", + "level5": "Nyangbo-Tafi" + }, + "nyc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Extreme North Vestigial Suffixes Bantu" + }, + "nyd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia" + }, + "nye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Simaaic" + }, + "nyf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Northern Mijikenda" + }, + "nyg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Fuliiric" + }, + "nyh": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Nyikinic" + }, + "nyi": { + "level0": "Nyimang" + }, + "nyj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega" + }, + "nyk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Nyaneka-Nkhumbi" + }, + "nyl": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Kuy-Souei" + }, + "nym": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "nyn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nkore-Kiga" + }, + "nyo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nyoro-Tooro" + }, + "nyp": { + "level0": "Kuliak", + "level1": "Ngangea-So" + }, + "nyq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Yazdi-Kermani-Nayini" + }, + "nyr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika", + "level11": "Central and Southern Nyika", + "level12": "Mbozi-Malawi Nyika" + }, + "nys": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan" + }, + "nyt": { + "level0": "Pama-Nyungan", + "level1": "Nyawaygic" + }, + "nyu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "nyv": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Nyulnyulic" + }, + "nyx": { + "level0": "Pama-Nyungan", + "level1": "Macleay-New England" + }, + "nyy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Nyakyusa-Ndali" + }, + "nza": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo" + }, + "nzb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi A" + }, + "nzd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic" + }, + "nzi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Southern Bia", + "level9": "Jwira-Nzima" + }, + "nzk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Zande-Nzakara" + }, + "nzm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic", + "level3": "Nuclear Zemeic", + "level4": "Mzieme-Zeme" + }, + "nzr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic", + "level9": "Zulic" + }, + "nzs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL" + }, + "nzy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic" + }, + "nzz": { + "level0": "Dogon", + "level1": "Nangan Dogon" + }, + "oaa": { + "level0": "Tungusic", + "level1": "Central-Western Tungusic", + "level2": "Ulchaic" + }, + "oac": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Central-Eastern Tungusic", + "level3": "Oroch-Udihe" + }, + "oar": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic" + }, + "obi": { + "level0": "Chumashan" + }, + "obl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Unclassified Cameroun-Ubangian" + }, + "obo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "West Manobo" + }, + "obr": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic" + }, + "obu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Abua-Odual" + }, + "oca": { + "level0": "Huitotoan", + "level1": "Nonuya-Ocaina" + }, + "occ": { + "level0": "Bookkeeping" + }, + "och": { + "level0": "Sino-Tibetan", + "level1": "Sinitic" + }, + "oci": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "Occitanic" + }, + "oco": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Southwestern Brythonic" + }, + "ocu": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Matlatzincan" + }, + "oda": { + "level0": "Bookkeeping" + }, + "odk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "odt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch" + }, + "odu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Abua-Odual" + }, + "ofo": { + "level0": "Siouan", + "level1": "Ohio Valley Siouan", + "level2": "Southeastern Siouan" + }, + "ofs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian" + }, + "ofu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Nde-Efutop" + }, + "ogb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Kugboic" + }, + "ogc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid" + }, + "oge": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Georgic" + }, + "ogg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Unclassified Central Delta" + }, + "ogo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "East Ogonoid", + "level6": "Tai-Kana" + }, + "ogu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta" + }, + "ohu": { + "level0": "Uralic", + "level1": "Hungaric" + }, + "oia": { + "level0": "Timor-Alor-Pantar", + "level1": "East Timor", + "level2": "Fataluku-Oirata" + }, + "oie": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lopit-Dongotono", + "level6": "Dongotonic" + }, + "oin": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "ojb": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Northwestern-Saulteaux Ojibwa" + }, + "ojc": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Central-Eastern-Southwestern Ojibwa" + }, + "ojg": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Central-Eastern-Southwestern Ojibwa" + }, + "ojp": { + "level0": "Japonic", + "level1": "Japanesic" + }, + "ojs": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Severn-Algonquin" + }, + "ojv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian" + }, + "ojw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Northwestern-Saulteaux Ojibwa" + }, + "oka": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish", + "level3": "Okanaganic" + }, + "okb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross" + }, + "okd": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo", + "level3": "Inland Ijo", + "level4": "Biseni-Okordia" + }, + "oke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "okg": { + "level0": "Bookkeeping" + }, + "okh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Khalkhalic" + }, + "oki": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Okiek-Akie" + }, + "okj": { + "level0": "Great Andamanese", + "level1": "Middle Great Andamanese" + }, + "okk": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One" + }, + "okl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "OKSLic" + }, + "okn": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Okinoerabu-Tokunoshima" + }, + "okr": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio", + "level4": "Kio Ijo" + }, + "oks": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo" + }, + "oku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Komic" + }, + "okv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Orokaivic" + }, + "okx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid", + "level7": "Okpe-Akuku-Idesa" + }, + "ola": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic" + }, + "old": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "Central Kilimanjaro" + }, + "ole": { + "level0": "Sino-Tibetan" + }, + "olk": { + "level0": "Bookkeeping" + }, + "olm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid" + }, + "olo": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan", + "level6": "East Ladoga" + }, + "olu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia" + }, + "oma": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha" + }, + "omb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Ambae" + }, + "ome": { + "level0": "Bookkeeping" + }, + "omg": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III", + "level7": "Omagua-Kokama" + }, + "omi": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi", + "level3": "Kalikoic" + }, + "omk": { + "level0": "Yukaghir" + }, + "oml": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic" + }, + "omn": { + "level0": "Unclassifiable" + }, + "omo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan" + }, + "omr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi" + }, + "omt": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Tatoga-Omotik" + }, + "omw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "omx": { + "level0": "Austroasiatic", + "level1": "Monic" + }, + "ona": { + "level0": "Chonan", + "level1": "Insular Chonan" + }, + "onb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Beic", + "level4": "Lingao" + }, + "one": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Mohawk-Oneida" + }, + "ong": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Au-Olo-Elkei", + "level5": "Olo-Elkei" + }, + "oni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin", + "level5": "Oninic" + }, + "onj": { + "level0": "Dagan" + }, + "onk": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "onn": { + "level0": "Bosavi", + "level1": "Bosavi Watershed" + }, + "ono": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "onp": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Sartang-Sherdukpen" + }, + "onr": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "ons": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Kalasa" + }, + "ont": { + "level0": "Bookkeeping" + }, + "onu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Unua-Pangkumu" + }, + "onw": { + "level0": "Nubian", + "level1": "Nile Nubian", + "level2": "Nobiin Nubian" + }, + "onx": { + "level0": "Pidgin", + "level1": "Onin-based pidgin" + }, + "ood": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Piman" + }, + "oog": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic", + "level3": "Ong-Ta'oih" + }, + "oon": { + "level0": "Jarawa-Onge" + }, + "oor": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch", + "level9": "Afrikaansic" + }, + "oos": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Ossetic" + }, + "opa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid" + }, + "ope": { + "level0": "Bookkeeping" + }, + "opk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Kwer-Kopkaka-Burumakok" + }, + "opm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin" + }, + "opo": { + "level0": "Eleman", + "level1": "Western Eleman" + }, + "opt": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Opata-Eudeve" + }, + "opy": { + "level0": "Nuclear-Macro-Je" + }, + "ora": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Southern Malaita" + }, + "orc": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "Central Oromo" + }, + "ore": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan" + }, + "org": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Koring-Kukele" + }, + "orh": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic" + }, + "ork": { + "level0": "Bookkeeping" + }, + "orn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "oro": { + "level0": "Eleman", + "level1": "Western Eleman" + }, + "orr": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo", + "level3": "Inland Ijo" + }, + "ors": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "ort": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "oru": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Ormuri-Parachi" + }, + "orv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic" + }, + "orw": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Waric", + "level3": "Wanham-Wari-Oro Win", + "level4": "Wari-Oro Win" + }, + "orx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Ebughu-Oro" + }, + "ory": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "orz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Jayapura Bay" + }, + "osa": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha", + "level3": "Osage-Kansa" + }, + "osc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Sabellic" + }, + "osi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese" + }, + "oso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic" + }, + "osp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic" + }, + "oss": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Ossetic", + "level7": "Modern Ossetic" + }, + "ost": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "osu": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One" + }, + "osx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch" + }, + "otd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North West Greater Barito" + }, + "ote": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Northwestern Otomi" + }, + "otk": { + "level0": "Bookkeeping" + }, + "otl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Southern Otomi" + }, + "otm": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Eastern Otomi" + }, + "otn": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Eastern Otomi" + }, + "otq": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Northwestern Otomi" + }, + "otr": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol" + }, + "ots": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Southwestern Otomi" + }, + "ott": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Southwestern Otomi" + }, + "otu": { + "level0": "Bororoan", + "level1": "Bororo-Otuke" + }, + "otw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa" + }, + "otx": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Eastern Otomi" + }, + "oty": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid" + }, + "otz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Southern Otomi" + }, + "oua": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber", + "level5": "Ouargli-Oued Righ" + }, + "oub": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "oue": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "Central Nasioi" + }, + "oui": { + "level0": "Turkic", + "level1": "Common Turkic" + }, + "oum": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic" + }, + "oun": { + "level0": "Bookkeeping" + }, + "owi": { + "level0": "Left May" + }, + "owl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Old-Modern Welsh" + }, + "oyb": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "oyd": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "oym": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Wayampi-Zoe-Emerillon" + }, + "oyy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "ozm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Njemic" + }, + "pab": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xaray", + "level4": "Parecis-Nawe" + }, + "pac": { + "level0": "Austroasiatic", + "level1": "Katuic" + }, + "pad": { + "level0": "Arawan" + }, + "pae": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian", + "level16": "Pagabeteic" + }, + "paf": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Unclassified Kawahiva" + }, + "pag": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran" + }, + "pah": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva" + }, + "pai": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Yangkam-Tarok-Pe", + "level6": "Tarok-Pe" + }, + "paj": { + "level0": "Bookkeeping" + }, + "pak": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.A" + }, + "pal": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian" + }, + "pam": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon" + }, + "pan": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Eastern Panjabic" + }, + "pao": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Western Numic" + }, + "pap": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Upper Guinea Portuguese" + }, + "paq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "par": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Central Numic" + }, + "pas": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "pau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "pav": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Waric", + "level3": "Wanham-Wari-Oro Win", + "level4": "Wari-Oro Win" + }, + "paw": { + "level0": "Caddoan", + "level1": "Northern Caddoan", + "level2": "Pawnee-Kitsai", + "level3": "Pawnee-Arikara" + }, + "pax": { + "level0": "Unattested" + }, + "pay": { + "level0": "Chibchan" + }, + "pbc": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan", + "level4": "Kapong" + }, + "pbe": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan", + "level8": "Tepexi-Zapotitlan" + }, + "pbf": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan" + }, + "pbg": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Guajiro-Paraujano" + }, + "pbh": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare" + }, + "pbi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Podoko" + }, + "pbl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Mak-Tal" + }, + "pbm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Northwest Alta Mazatec" + }, + "pbn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Bali-Kpasam" + }, + "pbo": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Manjaku-Mankanya-Pepel", + "level6": "Cur-Bok-Cotier" + }, + "pbp": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Jaad" + }, + "pbr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kisi-Pangwa" + }, + "pbs": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Pamean" + }, + "pbt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto", + "level5": "Nuclear Pashto" + }, + "pbu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto", + "level5": "Nuclear Pashto" + }, + "pbv": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Khasi-Pnar" + }, + "pbz": { + "level0": "Bookkeeping" + }, + "pca": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan", + "level8": "Tepexi-Zapotitlan" + }, + "pcb": { + "level0": "Austroasiatic", + "level1": "Pearic" + }, + "pcc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai" + }, + "pcd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil" + }, + "pce": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Palaung" + }, + "pcf": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Tamil-Paliyan" + }, + "pcg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Ravulic" + }, + "pch": { + "level0": "Unattested", + "level1": "Dravidian (Unattested)" + }, + "pci": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Parji-Ollari-Gadaba" + }, + "pcj": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Sora-Juray-Gorum" + }, + "pck": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "pcl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "pcm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English", + "level13": "Coastal Nigerian Krio", + "level14": "Nigeria-Cameroon Creole English" + }, + "pcn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Piti-Atsam" + }, + "pcp": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Bolivian Nawa" + }, + "pcr": { + "level0": "Bookkeeping" + }, + "pcw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic", + "level7": "Talic", + "level8": "Piapung-Koenoem" + }, + "pda": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan", + "level5": "Pomoikan" + }, + "pdc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Rhenish Franconian", + "level9": "Palatinate" + }, + "pdi": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "White Tai" + }, + "pdn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Anus-Podena" + }, + "pdo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "Interior Bungku-Tolaki" + }, + "pdt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "Greater East Low German" + }, + "pdu": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "pea": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Betawic" + }, + "peb": { + "level0": "Pomoan", + "level1": "Russian River and Eastern" + }, + "pec": { + "level0": "Bookkeeping" + }, + "ped": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "pee": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "pef": { + "level0": "Pomoan" + }, + "peg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Manda-Pengo" + }, + "peh": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Baoanic" + }, + "pei": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean" + }, + "pej": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Northern-Central Pomoan" + }, + "pek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty" + }, + "pel": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "pem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)", + "level11": "Pheende-Kwezo" + }, + "peo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian" + }, + "pep": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda", + "level3": "Eastern Tonda" + }, + "peq": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Southern Pomoan-Kashaya" + }, + "pes": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic" + }, + "pev": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Mapoyo-Yawarana" + }, + "pex": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka" + }, + "pey": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch" + }, + "pez": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan", + "level7": "Penan" + }, + "pfa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian", + "level13": "Murilo-Fanapanges" + }, + "pfe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru" + }, + "pfl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Rhenish Franconian", + "level9": "Palatinate" + }, + "pga": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic", + "level9": "East Sudanic Arabic" + }, + "pgd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic" + }, + "pgg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "pgi": { + "level0": "Border", + "level1": "Bewani", + "level2": "Pagi-Kilmeri" + }, + "pgk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Unua-Pangkumu" + }, + "pgs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Mumuyic" + }, + "pgu": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Kao River", + "level4": "Paguic" + }, + "pgy": { + "level0": "Bookkeeping" + }, + "pgz": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL", + "level4": "Auslanic" + }, + "pha": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Paheng-Younuo", + "level3": "Paheng" + }, + "phd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi", + "level11": "Western Marathi" + }, + "phg": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Katu" + }, + "phh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha" + }, + "phj": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Newar" + }, + "phk": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Mogaung", + "level12": "Assam Tai A" + }, + "phl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Western Shinaic", + "level9": "Dangari" + }, + "phm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "phn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Ugarito-Phoenician", + "level7": "Phoenician-Punic" + }, + "pho": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Phunoi-Coong" + }, + "phq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic" + }, + "phr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Paharic" + }, + "pht": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Siamese" + }, + "phu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Siamese" + }, + "phv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "phw": { + "level0": "Bookkeeping" + }, + "pia": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Piman" + }, + "pib": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus", + "level4": "Yineic", + "level5": "Western Yineic" + }, + "pic": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Northern Okani", + "level10": "Himba-Pinji" + }, + "pid": { + "level0": "Saliban", + "level1": "Maco-Piaroa" + }, + "pie": { + "level0": "Kiowa-Tanoan", + "level1": "Tiwa-Piro" + }, + "pif": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Ponapeic" + }, + "pig": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "pih": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English" + }, + "pij": { + "level0": "Unclassifiable" + }, + "pil": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Yom-Nawdm" + }, + "pim": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian" + }, + "pin": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill", + "level3": "Hewa-April River" + }, + "pio": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Piapoco-Achagua" + }, + "pip": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Peroic" + }, + "pir": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo", + "level5": "Piratapuyic" + }, + "pis": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "pit": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Pitta-Pitta" + }, + "piu": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic" + }, + "piv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "piw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika" + }, + "pix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage" + }, + "piy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Peroic" + }, + "piz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap", + "level10": "Nmi-Fij-Fwa" + }, + "pjt": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra", + "level6": "Tjarra" + }, + "pkb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian" + }, + "pkc": { + "level0": "Unclassifiable" + }, + "pkg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus" + }, + "pkh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin" + }, + "pkn": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Mungkanic" + }, + "pko": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Northern Kalenjin" + }, + "pkp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean", + "level9": "Pukapukic" + }, + "pkr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Irula-Muduga", + "level8": "Muduga-Palu" + }, + "pks": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Indo-Pakistani-Nepalese Sign", + "level3": "Indo-Pakistani Sign" + }, + "pkt": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic", + "level3": "East Chutic" + }, + "pku": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Maanyan-Paku" + }, + "pla": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "plb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Southeast Santo" + }, + "plc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan", + "level6": "Nuclear Palawan" + }, + "pld": { + "level0": "Unclassifiable" + }, + "ple": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe" + }, + "plg": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur", + "level2": "Qom", + "level3": "Pilaga-Toba" + }, + "plh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua" + }, + "pli": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari" + }, + "plk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Kohistanic Shina" + }, + "pll": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Palaung" + }, + "pln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic" + }, + "plo": { + "level0": "Mixe-Zoque", + "level1": "Mixe" + }, + "plp": { + "level0": "Bookkeeping" + }, + "plq": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic" + }, + "plr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo" + }, + "pls": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan" + }, + "plt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Central-Eastern Malagasic" + }, + "plu": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran" + }, + "plv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan", + "level6": "Nuclear Palawan", + "level7": "Brooke-Canipaan Palawan" + }, + "plw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan", + "level6": "Nuclear Palawan", + "level7": "Brooke-Canipaan Palawan" + }, + "ply": { + "level0": "Austroasiatic", + "level1": "Mangic", + "level2": "Pakanic" + }, + "plz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic" + }, + "pma": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu" + }, + "pmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Barambo-Pambia" + }, + "pmc": { + "level0": "Unattested" + }, + "pmd": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Eastern Victoria", + "level4": "Dhudhuroa-Pallanganmiddang" + }, + "pme": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap" + }, + "pmf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Pamona-Tombelala" + }, + "pmh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone" + }, + "pmi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Pumi" + }, + "pmj": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Pumi" + }, + "pml": { + "level0": "Pidgin", + "level1": "Romance-based pidgin" + }, + "pmm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)" + }, + "pmn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai", + "level7": "Mundangic" + }, + "pmo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen" + }, + "pmq": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Pamean" + }, + "pmr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic", + "level6": "Greater West Sogeram" + }, + "pms": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Piemontese-Lombard" + }, + "pmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "pmw": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan" + }, + "pmx": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Naga Maoic", + "level6": "Poumaic" + }, + "pmy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay" + }, + "pmz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Pamean" + }, + "pna": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Punan Tubu-Bah" + }, + "pnb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic" + }, + "pnc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu" + }, + "pnd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "pne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan", + "level7": "Penan", + "level8": "Western Penan-Sebop" + }, + "png": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "pnh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "pni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Muller-Schwaner" + }, + "pnk": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan", + "level3": "Mojeno-Paunaca" + }, + "pnl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Samu" + }, + "pnm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "pnn": { + "level0": "Piawi" + }, + "pno": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup" + }, + "pnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "pnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi" + }, + "pnr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum", + "level5": "Panim-Isebe-Bau" + }, + "pns": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Mongondowic" + }, + "pnt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Pontic-Cappadocian Greek" + }, + "pnu": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Jiongnai-Ho Ne" + }, + "pnv": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara" + }, + "pnw": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Panytyima-Yinhawangka" + }, + "pnx": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Pramic" + }, + "pny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic" + }, + "pnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic", + "level7": "Kare-Pana" + }, + "poa": { + "level0": "Bookkeeping" + }, + "pob": { + "level0": "Bookkeeping" + }, + "poc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Poqom" + }, + "pod": { + "level0": "Bookkeeping" + }, + "poe": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan" + }, + "pof": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke" + }, + "poh": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Poqom" + }, + "poi": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Gulf Zoque" + }, + "poj": { + "level0": "Bookkeeping" + }, + "pol": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic", + "level6": "Polish-Silesian" + }, + "pom": { + "level0": "Pomoan" + }, + "pon": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Ponapeic" + }, + "poo": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Northern-Central Pomoan" + }, + "pop": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap" + }, + "poq": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Gulf Zoque", + "level3": "Texistepec-Ayapa Zoque" + }, + "por": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Brazil-Portugal Portuguese" + }, + "pos": { + "level0": "Mixe-Zoque", + "level1": "Mixe" + }, + "pot": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi" + }, + "pou": { + "level0": "Bookkeeping" + }, + "pov": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Upper Guinea Portuguese" + }, + "pow": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan", + "level8": "Tepexi-Zapotitlan" + }, + "pox": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic" + }, + "poy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Kilombero" + }, + "ppa": { + "level0": "Bookkeeping" + }, + "ppi": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Pai" + }, + "ppk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Uma-Sarudu" + }, + "ppl": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl" + }, + "ppm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen" + }, + "ppn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic" + }, + "ppo": { + "level0": "Teberan" + }, + "ppq": { + "level0": "Walioic", + "level1": "Pai-Sinen-Walio" + }, + "ppr": { + "level0": "Bookkeeping" + }, + "pps": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan" + }, + "ppt": { + "level0": "Kamula-Elevala", + "level1": "Elevala" + }, + "ppu": { + "level0": "Austronesian", + "level1": "Western Plains Austronesian", + "level2": "Central Western Plains" + }, + "ppv": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "pqa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Western West Chadic B.2" + }, + "pqm": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Micmacic" + }, + "prb": { + "level0": "Bookkeeping" + }, + "prc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Ormuri-Parachi" + }, + "prd": { + "level0": "Bookkeeping" + }, + "pre": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese" + }, + "prf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon", + "level4": "Nuclear Northeastern Luzon", + "level5": "Paranan-Pahanan" + }, + "prg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic" + }, + "prh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan" + }, + "pri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Cem-Pac" + }, + "prk": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Nuclear Waic" + }, + "prl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign", + "level5": "Peruvian-Inmaculada Sign" + }, + "prn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani" + }, + "pro": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "Occitanic" + }, + "prp": { + "level0": "Bookkeeping" + }, + "prq": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "prr": { + "level0": "Puri-Coroado" + }, + "prs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "prt": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Tinic", + "level4": "Tin" + }, + "pru": { + "level0": "South Bird's Head Family" + }, + "prv": { + "level0": "Bookkeeping" + }, + "prw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Yarawata-Parawen-Ukuriguma" + }, + "prx": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Shamskatic" + }, + "pry": { + "level0": "Bookkeeping" + }, + "prz": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Providencia-Cayman Sign" + }, + "psa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "psc": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "psd": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "pse": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "South Sumatra Malay" + }, + "psg": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "psh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Western Pashayi" + }, + "psi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Eastern Pashayi" + }, + "psl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "psm": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid" + }, + "psn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko", + "level5": "Panasuanic" + }, + "pso": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign" + }, + "psp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "psq": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi", + "level3": "Yimin-Bel" + }, + "psr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign" + }, + "pss": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "pst": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto", + "level5": "Nuclear Pashto" + }, + "psu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic" + }, + "psw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Southeastern Malakula linkage", + "level10": "Port Sandwich-Axamb-Avok" + }, + "psy": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Nanticoke-Conoy" + }, + "pta": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani", + "level9": "Kaiowa" + }, + "pth": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum", + "level2": "Maxakalian", + "level3": "Nuclear Maxakalian" + }, + "pti": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra" + }, + "ptn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "pto": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Wayampi-Zoe-Emerillon", + "level7": "Zoe-Emerillon" + }, + "ptp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Zenag-Patep" + }, + "ptq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Yerukula-Korava-Kaikadi" + }, + "ptr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "ptt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "ptu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu", + "level6": "Matangnga-Aralle-Tabulahan" + }, + "ptv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym", + "level7": "Orkon-West Ambrym", + "level8": "West Ambrym", + "level9": "Southwest Ambrym" + }, + "ptw": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "North Georgia Central Salish" + }, + "pty": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Kalanadic" + }, + "pua": { + "level0": "Tarascan" + }, + "pub": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Central Old Kuki" + }, + "puc": { + "level0": "Bookkeeping" + }, + "pud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "puf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "pug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi" + }, + "puj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Punan Tubu-Bah" + }, + "puk": { + "level0": "Bookkeeping" + }, + "pum": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti" + }, + "puo": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram" + }, + "pup": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "pur": { + "level0": "Tupian", + "level1": "Purubora-Ramarama" + }, + "put": { + "level0": "Bookkeeping" + }, + "puu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Punu-Vungu" + }, + "puw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Eastern Trukic", + "level12": "Puluwatese-Pollapese" + }, + "pux": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Serra Hills" + }, + "puy": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "puz": { + "level0": "Bookkeeping" + }, + "pwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Boze-Loro" + }, + "pwg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Boanaki-Paiwa" + }, + "pwi": { + "level0": "Wintuan" + }, + "pwm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan" + }, + "pwn": { + "level0": "Austronesian" + }, + "pwo": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Eastern-Western Pwo Karen" + }, + "pwr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi" + }, + "pww": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Northern Pwo Karen" + }, + "pxm": { + "level0": "Bookkeeping" + }, + "pye": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Ivorian Grebo" + }, + "pym": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Southeastern Benue-Congo Plateau", + "level5": "Horom-Fyem" + }, + "pyn": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Poyanawa Subgroup" + }, + "pys": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "pyu": { + "level0": "Austronesian" + }, + "pyx": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish", + "level4": "Unclassified Luish" + }, + "pyy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Bisu-Pyen-Laomian" + }, + "pze": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic" + }, + "pzh": { + "level0": "Austronesian", + "level1": "Northwest Formosan" + }, + "pzn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric" + }, + "qbb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic" + }, + "qcs": { + "level0": "Mixe-Zoque", + "level1": "Mixe" + }, + "qer": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "North Scandinavian", + "level6": "East-Central Swedic" + }, + "qgu": { + "level0": "Pama-Nyungan", + "level1": "Nyawaygic" + }, + "qhr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Sabellic" + }, + "qkn": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid", + "level6": "Nuclear Kannaoid" + }, + "qlm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Jamaicanic" + }, + "qmx": { + "level0": "Bookkeeping" + }, + "qok": { + "level0": "Austroasiatic", + "level1": "Khmeric" + }, + "qpp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone" + }, + "qua": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha" + }, + "qub": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH" + }, + "quc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Quiche-Achi" + }, + "qud": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua A" + }, + "quf": { + "level0": "Quechuan", + "level1": "Cajamarca-Lambayeque Quechua" + }, + "qug": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua A" + }, + "quh": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Bolivian-Argentinian Quechua", + "level3": "South Bolivian-Argentinian Quechua" + }, + "qui": { + "level0": "Chimakuan" + }, + "quj": { + "level0": "Bookkeeping" + }, + "quk": { + "level0": "Quechuan", + "level1": "San Martin-Amazonas Quechua" + }, + "qul": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Bolivian-Argentinian Quechua" + }, + "qum": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean" + }, + "qun": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Coastal Tsamosan" + }, + "qup": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua", + "level6": "Pastaza Quechua" + }, + "quq": { + "level0": "Unclassifiable" + }, + "qur": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Yaru Quechua" + }, + "qus": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Bolivian-Argentinian Quechua", + "level3": "South Bolivian-Argentinian Quechua" + }, + "qut": { + "level0": "Bookkeeping" + }, + "quu": { + "level0": "Bookkeeping" + }, + "quv": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean" + }, + "quw": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua" + }, + "qux": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Yauyosic" + }, + "quy": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Ayacuchan Quechua" + }, + "quz": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Cuscan Quechua" + }, + "qva": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Yaru Quechua" + }, + "qvc": { + "level0": "Quechuan", + "level1": "Cajamarca-Lambayeque Quechua" + }, + "qve": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Cuscan Quechua" + }, + "qvh": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay" + }, + "qvi": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua" + }, + "qvj": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B" + }, + "qvl": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH" + }, + "qvm": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH", + "level4": "Panao-Union" + }, + "qvn": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Yaru Quechua" + }, + "qvo": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua" + }, + "qvp": { + "level0": "Quechuan", + "level1": "Quechua I" + }, + "qvs": { + "level0": "Quechuan", + "level1": "San Martin-Amazonas Quechua" + }, + "qvw": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Jauja-Huanca" + }, + "qvy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic" + }, + "qvz": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua", + "level6": "Pastaza Quechua" + }, + "qwa": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Corongo-Sihuas" + }, + "qwc": { + "level0": "Quechuan" + }, + "qwh": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay" + }, + "qws": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Corongo-Sihuas" + }, + "qwt": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan" + }, + "qxa": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH" + }, + "qxc": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Yauyosic" + }, + "qxh": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH", + "level4": "Panao-Union" + }, + "qxi": { + "level0": "Bookkeeping" + }, + "qxl": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua A" + }, + "qxn": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Conchucos" + }, + "qxo": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Conchucos" + }, + "qxp": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Cuscan Quechua" + }, + "qxq": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz" + }, + "qxr": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B" + }, + "qxs": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Qiang" + }, + "qxu": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Ayacuchan Quechua" + }, + "qxw": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Jauja-Huanca" + }, + "qya": { + "level0": "Artificial Language" + }, + "qyp": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian", + "level6": "Western Southern New England Algonquian" + }, + "raa": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti" + }, + "rab": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti" + }, + "rac": { + "level0": "Lakes Plain", + "level1": "Far West Lakes Plain", + "level2": "Rasawa-Saponi" + }, + "rad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Rade-Jarai" + }, + "rae": { + "level0": "Bookkeeping" + }, + "raf": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Mewahang" + }, + "rag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia" + }, + "rah": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "rai": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Kandas-Duke of York" + }, + "rak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II" + }, + "ral": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "ram": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je", + "level5": "Eastern Timbira", + "level6": "Southeastern Timbira" + }, + "ran": { + "level0": "Kolopom", + "level1": "Kimaama-Riantana" + }, + "rao": { + "level0": "Ramu" + }, + "rap": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Far East Polynesian" + }, + "rar": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "ras": { + "level0": "Rashad" + }, + "rat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Ramand-Karaj" + }, + "rau": { + "level0": "Sino-Tibetan", + "level1": "Raji-Raute", + "level2": "Raute-Rawat" + }, + "rav": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Khambu" + }, + "raw": { + "level0": "Sino-Tibetan", + "level1": "Nungish" + }, + "rax": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Mumuyic" + }, + "ray": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "raz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "rbb": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic" + }, + "rcf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French" + }, + "rdb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Gilaki-Rudbari" + }, + "rea": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Upper Minjim" + }, + "reb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic" + }, + "ree": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic", + "level6": "Rejang-Makaham Kayan" + }, + "reg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Suguti" + }, + "rei": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "rej": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "rel": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "rem": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano" + }, + "ren": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam", + "level4": "Hre-Sedang" + }, + "rer": { + "level0": "Unattested" + }, + "res": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake" + }, + "ret": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits", + "level5": "Blagaric" + }, + "rey": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik" + }, + "rga": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "rge": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Nuclear Modern Greek" + }, + "rgk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh" + }, + "rgn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Emiliano-Romagnolo" + }, + "rgr": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Caqueta" + }, + "rgs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Chruic" + }, + "rgu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote", + "level5": "Central East Rote", + "level6": "Southeast Rote" + }, + "rhg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "rhp": { + "level0": "Nuclear Torricelli", + "level1": "Nuclear Maimai", + "level2": "Heyo-Yahang" + }, + "ria": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "rib": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "rie": { + "level0": "Bookkeeping" + }, + "rif": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic" + }, + "ril": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Riang" + }, + "rim": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Nyaturu-Nilamba" + }, + "rin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "rir": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "rit": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu" + }, + "riu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Manggarai Khusus" + }, + "rjb": { + "level0": "Bookkeeping" + }, + "rjg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Waerana-Razong" + }, + "rji": { + "level0": "Sino-Tibetan", + "level1": "Raji-Raute" + }, + "rjs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Kamta", + "level11": "Western Kamta" + }, + "rka": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong", + "level6": "Southern-Central Mnong" + }, + "rkb": { + "level0": "Nuclear-Macro-Je" + }, + "rkh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "rki": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic", + "level7": "Arakanese-Marma" + }, + "rkm": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "rkt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Kamta" + }, + "rkw": { + "level0": "Bookkeeping" + }, + "rma": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Votic Chibchan" + }, + "rmb": { + "level0": "Gunwinyguan", + "level1": "Jala" + }, + "rmc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmd": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "rme": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "British Romani" + }, + "rmf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "Northwestern Romani" + }, + "rmg": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register", + "level2": "Scandinavian Romani" + }, + "rmh": { + "level0": "Lepki-Murkim-Kembra" + }, + "rmi": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "rmk": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Breri-Romkun" + }, + "rml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Kisaric" + }, + "rmn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "Northwestern Romani" + }, + "rmp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Rempic" + }, + "rmq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmr": { + "level0": "Bookkeeping" + }, + "rms": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign" + }, + "rmt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone" + }, + "rmu": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register", + "level2": "Scandinavian Romani" + }, + "rmv": { + "level0": "Artificial Language" + }, + "rmw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "British Romani" + }, + "rmx": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Lamamic" + }, + "rmy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic", + "level7": "Arakanese-Marma" + }, + "rna": { + "level0": "Unattested", + "level1": "Chocoan (Unattested)" + }, + "rnb": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "rnd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu", + "level11": "Lunda-Ruund-Kete", + "level12": "Ruund-Kete" + }, + "rng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Tswa-Ronga (S.50)", + "level13": "Tsongan" + }, + "rnl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin" + }, + "rnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic", + "level6": "Biak-Roon" + }, + "rnp": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Central-Eastern West Himalayish" + }, + "rnw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "rob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "roc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Chruic" + }, + "rod": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku", + "level8": "Rogo-Sagamuk-Sama-Sambuga" + }, + "roe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya" + }, + "rof": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga" + }, + "rog": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Northern Cham" + }, + "roh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian" + }, + "rol": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan" + }, + "ron": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance", + "level8": "Northern Romanian", + "level9": "Eastern Romanian" + }, + "roo": { + "level0": "North Bougainville", + "level1": "Rotokas-Askopan" + }, + "rop": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English" + }, + "ror": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Ngada" + }, + "rou": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Runga-Kibet" + }, + "row": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote" + }, + "rpt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Unclassified Hanseman" + }, + "rri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "East Choiseul" + }, + "rro": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "rrt": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama" + }, + "rsi": { + "level0": "Artificial Language" + }, + "rsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic", + "level4": "Central RSLic" + }, + "rsm": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "rsn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Unclassified L1 Sign Language" + }, + "rsw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru", + "level9": "Voric" + }, + "rtc": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "rth": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Southern Sangiric" + }, + "rtm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage" + }, + "rtw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Rathawi-Palya" + }, + "rub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu" + }, + "ruc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara" + }, + "rue": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic", + "level5": "Ukrainian-Rusyn" + }, + "ruf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu" + }, + "rug": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Rovianic" + }, + "ruh": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "rui": { + "level0": "Bookkeeping" + }, + "ruk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Rukubic" + }, + "run": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic" + }, + "ruo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance", + "level8": "Northern Romanian" + }, + "rup": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance" + }, + "ruq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance", + "level8": "Northern Romanian", + "level9": "Eastern Romanian" + }, + "rus": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic" + }, + "rut": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Western Samur" + }, + "ruu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic", + "level7": "Upper Kinabatangan-Lobu" + }, + "ruy": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "ruz": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "rwa": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Serra Hills", + "level3": "Rawo-Main Serra" + }, + "rwk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "West Kilimanjaro" + }, + "rwm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic" + }, + "rwo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Ufim-Rawa-Nahu" + }, + "rwr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "rws": { + "level0": "Bookkeeping" + }, + "rxd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Western Ngumpin" + }, + "rxw": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Mithaka-Karuwali" + }, + "ryn": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Oshima" + }, + "rys": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Southern Ryukyu", + "level3": "Macro-Yaeyama" + }, + "ryu": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Okinawa" + }, + "rzh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic", + "level5": "Modern Sayhadic" + }, + "saa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Saba-Sokoro-Tamki" + }, + "sab": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Guaymiic" + }, + "sac": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Fox" + }, + "sae": { + "level0": "Nambiquaran" + }, + "saf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare" + }, + "sag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic", + "level8": "Sangoic" + }, + "sah": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Sakha-Dolgan" + }, + "saj": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan", + "level3": "Nuclear Sahuan", + "level4": "Sahu-Waioli" + }, + "sak": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Sake-Ndambomo" + }, + "san": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "sap": { + "level0": "Bookkeeping" + }, + "saq": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Ongamo-Maa", + "level5": "Nuclear Maa" + }, + "sar": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xaray" + }, + "sas": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bali-Sasak-Sumbawa", + "level3": "Sasak-Sumbawa" + }, + "sat": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Santalic" + }, + "sau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "sav": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Saafi-Noon-Lehar" + }, + "saw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Unclassified Awyu-Dumut" + }, + "sax": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost" + }, + "say": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zakse-Saya" + }, + "saz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic" + }, + "sba": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone" + }, + "sbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia", + "level11": "Simboic" + }, + "sbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere" + }, + "sbd": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Mande Samo" + }, + "sbe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic", + "level8": "Suau chain" + }, + "sbg": { + "level0": "West Bird's Head", + "level1": "Seget-Moi" + }, + "sbh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "sbi": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei" + }, + "sbj": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Masalit" + }, + "sbk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mbeya" + }, + "sbl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Abellen-Botolan" + }, + "sbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Vidunda-Sagala" + }, + "sbn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Unclassified Sindhic" + }, + "sbo": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar", + "level5": "Lanoh-Semnam", + "level6": "Lanohic" + }, + "sbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Wanji-Sangu" + }, + "sbq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "North Sogeram" + }, + "sbr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic", + "level8": "Selungai-Sembakung Murut" + }, + "sbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Western Botatwe", + "level9": "Machili" + }, + "sbu": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti" + }, + "sbw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Northern Okani", + "level10": "Himba-Pinji" + }, + "sbx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang", + "level6": "Iban-Seberuang" + }, + "sby": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe" + }, + "sbz": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita", + "level7": "Sara-Kaba" + }, + "sca": { + "level0": "Bookkeeping" + }, + "scb": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic", + "level3": "East Chutic" + }, + "sce": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Baoanic" + }, + "scg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "sch": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric" + }, + "sci": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "sck": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic", + "level11": "Sadri-Panchpargania", + "level12": "India-Nepal-Bangladesh Sadri" + }, + "scl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic" + }, + "scn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Italian Romance" + }, + "sco": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic" + }, + "scp": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Yolmo-Kagate" + }, + "scq": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic", + "level3": "Southern Chong" + }, + "scs": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Slaveyic", + "level5": "Slave" + }, + "sct": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Eastern Bru-Katang", + "level5": "Katang" + }, + "scu": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric", + "level5": "Thebor" + }, + "scv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos" + }, + "scw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic" + }, + "scx": { + "level0": "Unclassifiable" + }, + "sda": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "sdb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Gorani", + "level9": "Shabak-Bajalani" + }, + "sdc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "sde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru", + "level9": "Voric" + }, + "sdg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Western Shinaic" + }, + "sdh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish", + "level8": "Kurdish" + }, + "sdi": { + "level0": "Bookkeeping" + }, + "sdj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Nuclear Northern Kikongo" + }, + "sdk": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Iatmulic" + }, + "sdl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign" + }, + "sdm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "sdn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Corsic" + }, + "sdo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh" + }, + "sdp": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Sartang-Sherdukpen" + }, + "sdr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic", + "level11": "Sadri-Panchpargania", + "level12": "India-Nepal-Bangladesh Sadri" + }, + "sds": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Zuara-Sened" + }, + "sdt": { + "level0": "Bookkeeping" + }, + "sdu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Uma-Sarudu" + }, + "sdx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau", + "level6": "Sibu-Kanowit-Tanjong" + }, + "sea": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic" + }, + "sec": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "North Georgia Central Salish" + }, + "sed": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam", + "level4": "Hre-Sedang" + }, + "see": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "sef": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo" + }, + "seg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Southern Mijikenda" + }, + "seh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "sej": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip" + }, + "sek": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Beaver-Sekani" + }, + "sel": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Kamas-Selkup" + }, + "sen": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo" + }, + "sep": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo", + "level5": "Supyiric" + }, + "seq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo" + }, + "ser": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Serran" + }, + "ses": { + "level0": "Songhay", + "level1": "Eastern Songhay" + }, + "set": { + "level0": "Sentanic", + "level1": "Nuclear Sentanic", + "level2": "Sentani-Nafri" + }, + "seu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen", + "level8": "Serui-Busami" + }, + "sev": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo" + }, + "sew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage" + }, + "sey": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan", + "level3": "Siona-Secoya" + }, + "sez": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic" + }, + "sfb": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Dutch-Belgian Sign", + "level4": "Belgian Sign" + }, + "sfe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen", + "level5": "East Nuclear Subanen" + }, + "sfm": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian" + }, + "sfs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "South African Sign" + }, + "sfw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia" + }, + "sga": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic" + }, + "sgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Mag-Ayta" + }, + "sgc": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin" + }, + "sgd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Surigao" + }, + "sge": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Modang-Segai" + }, + "sgg": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "DGSic" + }, + "sgh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Shughni-Yazgulami", + "level6": "Shughnic" + }, + "sgi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute" + }, + "sgk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid" + }, + "sgm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza" + }, + "sgo": { + "level0": "Bookkeeping" + }, + "sgp": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Jingpho" + }, + "sgr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Komisenian" + }, + "sgt": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic" + }, + "sgu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "sgw": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group" + }, + "sgx": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "sgy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Sanglechi-Ishkashimi" + }, + "sgz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "sha": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic" + }, + "shb": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame" + }, + "shc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Mbala-Sondi" + }, + "shd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic" + }, + "she": { + "level0": "Dizoid" + }, + "shg": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "Ost-Kxoe" + }, + "shh": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Central Numic" + }, + "shi": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber" + }, + "shj": { + "level0": "Dajuic", + "level1": "Eastern Dajuic" + }, + "shk": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "shl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "shm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Khalkhalic" + }, + "shn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic" + }, + "sho": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Kyenga-Shanga" + }, + "shp": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup", + "level5": "Shipibo-Konibo-Kapanawa" + }, + "shq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "shr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu" + }, + "shs": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Northern Interior Salish", + "level3": "Thompsonic" + }, + "sht": { + "level0": "Shastan", + "level1": "Nuclear Shastan" + }, + "shu": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic" + }, + "shv": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Eastern MSA" + }, + "shw": { + "level0": "Heibanic", + "level1": "West-Central Heibanic" + }, + "shx": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Jiongnai-Ho Ne", + "level4": "Ho Neic" + }, + "shy": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic" + }, + "sia": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "sib": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan", + "level7": "Penan", + "level8": "Western Penan-Sebop" + }, + "sic": { + "level0": "Bookkeeping" + }, + "sid": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Sidaama-Gedeo" + }, + "sie": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Simaaic" + }, + "sig": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala" + }, + "sih": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "sij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf" + }, + "sik": { + "level0": "Bookkeeping" + }, + "sil": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala" + }, + "sim": { + "level0": "Sepik", + "level1": "Nukuma", + "level2": "Kwanga-Mende" + }, + "sin": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Dhivehi-Sinhala", + "level6": "Sinhalaic" + }, + "sip": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic" + }, + "siq": { + "level0": "Bosavi", + "level1": "Bosavi Watershed", + "level2": "Kaluli-Sunia" + }, + "sir": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Western West Chadic B.2" + }, + "siu": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Galu-Alu" + }, + "siv": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill" + }, + "siw": { + "level0": "South Bougainville", + "level1": "Buinic" + }, + "six": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka" + }, + "siy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic" + }, + "siz": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Libyan-Egyptian Oases Berber" + }, + "sja": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "San Juan" + }, + "sjb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Punan Tubu-Bah" + }, + "sjd": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Peninsular Eastern Saami" + }, + "sje": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Central Western Saami", + "level4": "Lule-Pite Saami" + }, + "sjg": { + "level0": "Tamaic", + "level1": "Tama-Sungor-Miisiirii", + "level2": "Tama-Sungor" + }, + "sjk": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "sjl": { + "level0": "Sino-Tibetan", + "level1": "Miji" + }, + "sjm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Borneo Coast Bajaw" + }, + "sjn": { + "level0": "Artificial Language" + }, + "sjo": { + "level0": "Tungusic", + "level1": "Manchu-Jurchen", + "level2": "Manchu-Xibe" + }, + "sjp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Kamta", + "level11": "Western Kamta" + }, + "sjr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "sjs": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber", + "level4": "Northwestern Moroccan Berber" + }, + "sjt": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Peninsular Eastern Saami" + }, + "sju": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Southwestern Saami" + }, + "sjw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian" + }, + "skb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek" + }, + "skc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Sauk-Nimi" + }, + "skd": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan", + "level3": "Sierra Miwokan" + }, + "ske": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost", + "level7": "Seke-Sowa" + }, + "skf": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Corumbiara" + }, + "skg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Western Malagasic" + }, + "skh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Central Barrier Islands" + }, + "ski": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata" + }, + "skj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic" + }, + "skl": { + "level0": "Bookkeeping" + }, + "skm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa", + "level4": "Sakam-Som" + }, + "skn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Western Subanen" + }, + "sko": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko" + }, + "skp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Kajang" + }, + "skq": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Soninkean" + }, + "skr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Siraikic" + }, + "sks": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "skt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Kwa-Kasai North", + "level15": "Sakata-Tiinic" + }, + "sku": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "skv": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Skouic" + }, + "skw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "skx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko" + }, + "sky": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian" + }, + "skz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin", + "level5": "Oninic" + }, + "slb": { + "level0": "Bookkeeping" + }, + "slc": { + "level0": "Saliban" + }, + "sld": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala", + "level11": "Northwestern Sisaala" + }, + "sle": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "slf": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Italian Sign" + }, + "slg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic", + "level8": "Selungai-Sembakung Murut" + }, + "slh": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Lushootseed-Puget" + }, + "sli": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German", + "level8": "Schlesisch-Wilmesau" + }, + "slj": { + "level0": "Bookkeeping" + }, + "slk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Czech-Slovak" + }, + "sll": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Golinic" + }, + "slm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo" + }, + "slp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "Flores Lamaholot" + }, + "slq": { + "level0": "Bookkeeping" + }, + "slr": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz" + }, + "sls": { + "level0": "Bookkeeping" + }, + "slt": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Sila-Wanya-Cosao" + }, + "slu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "South Tanimbar" + }, + "slv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "slw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Kalasa" + }, + "slx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu" + }, + "sly": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric" + }, + "slz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "sma": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Southwestern Saami" + }, + "smb": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Baruya-Simbari" + }, + "smc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa", + "level4": "Sakam-Som" + }, + "sme": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Central Western Saami" + }, + "smf": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Simog-Daonda" + }, + "smg": { + "level0": "Baining" + }, + "smh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "smj": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Central Western Saami", + "level4": "Lule-Pite Saami" + }, + "smk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Tina-Bolinao" + }, + "sml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Inner Sulu Sama" + }, + "smm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Unclassified Tharu" + }, + "smn": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "smo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean", + "level9": "Pukapukic", + "level10": "Samoan-Tokelauan" + }, + "smp": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Hebrewic" + }, + "smq": { + "level0": "East Strickland", + "level1": "Kubo-Samo-Bibo" + }, + "smr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "sms": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "smt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "smu": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic" + }, + "smv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "smw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bali-Sasak-Sumbawa", + "level3": "Sasak-Sumbawa" + }, + "smx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Hungan-Samba" + }, + "smy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian" + }, + "smz": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "Simekuic" + }, + "sna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona", + "level11": "Central Shona" + }, + "snc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Sinagoro-Keapara" + }, + "snd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Sindhi-Kachchi" + }, + "sne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh", + "level5": "Central-Western Bidayuh" + }, + "snf": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Saafi-Noon-Lehar", + "level4": "Noon-Lehar" + }, + "sng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Kaonde-Shaba-Sanga" + }, + "snh": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "sni": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup" + }, + "snj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic", + "level8": "Sangoic" + }, + "snk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Soninkean" + }, + "snl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Northern Sangiric", + "level4": "Sangil-Sangir" + }, + "snm": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Southern Moru-Madi" + }, + "snn": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan", + "level3": "Siona-Secoya", + "level4": "Sionan" + }, + "snp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria" + }, + "snq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Sangu-Sira" + }, + "snr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum" + }, + "sns": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula", + "level10": "Southwest Coastal Malekula" + }, + "snu": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "snv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "snw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Likpe-Santrokofi" + }, + "snx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Inland Minjim" + }, + "sny": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill" + }, + "snz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia", + "level4": "Nuclear Evapia" + }, + "soa": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Black Tai" + }, + "sob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic", + "level9": "Sobei-Liki" + }, + "soc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Basoo" + }, + "sod": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Songola-Binja" + }, + "soe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Nkutsu-Lokenye", + "level12": "Songomenic" + }, + "sog": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Sogdic", + "level7": "Sogdian-Yagnobi" + }, + "soh": { + "level0": "Eastern Jebel", + "level1": "Aka-Kelo-Molo" + }, + "soi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu", + "level11": "Dangaura-Khuna-Sonaha" + }, + "soj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Kashanic" + }, + "sok": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Saba-Sokoro-Tamki" + }, + "sol": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic" + }, + "som": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "soo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Nsong-Mpiin-Ngong" + }, + "sop": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde" + }, + "soq": { + "level0": "Dagan", + "level1": "Southeast Dagan" + }, + "sor": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Sarwa-Sumray" + }, + "sos": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku" + }, + "sot": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana", + "level12": "Central Sotho-Tswana", + "level13": "Sesotho-Lozi" + }, + "sou": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai" + }, + "sov": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Western Trukic", + "level10": "Sonsorol-Tobi" + }, + "sow": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Waina-Punda" + }, + "sox": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "Southern Makaaic" + }, + "soy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Unclassified North Volta-Congo" + }, + "soz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Gikuyu-Temi" + }, + "spa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic" + }, + "spb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Eastern Littoral Piru Bay" + }, + "spd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon", + "level5": "Ganglau-Saep" + }, + "spe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Bam-Manam", + "level10": "Manam-Sepa" + }, + "spg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "spi": { + "level0": "Lakes Plain", + "level1": "Far West Lakes Plain", + "level2": "Rasawa-Saponi" + }, + "spk": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic" + }, + "spl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Kabwum", + "level6": "Selepet-Komba" + }, + "spm": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ruboni", + "level3": "Mikarewan" + }, + "spn": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "spo": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish", + "level3": "Okanaganic", + "level4": "Kalispel-Spokane" + }, + "spp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo", + "level5": "Supyiric" + }, + "spq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic" + }, + "spr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Saparua-Latu" + }, + "sps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz" + }, + "spt": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti", + "level7": "Spiti-Jad" + }, + "spu": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "spv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "spy": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Elgon-Mau Kalenjin" + }, + "sqa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku", + "level8": "Rogo-Sagamuk-Sama-Sambuga", + "level9": "Sagamuk-Sama-Sambuga" + }, + "sqh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Lameic" + }, + "sqk": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "sqm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya", + "level6": "Gbeya", + "level7": "Gbeya-Suma" + }, + "sqn": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "sqo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Komisenian" + }, + "sqq": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric", + "level4": "Loven-Suq" + }, + "sqs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic" + }, + "sqt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Eastern MSA" + }, + "squ": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "South Georgia Central Salish" + }, + "sqx": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "sra": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic" + }, + "srb": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Sora-Juray-Gorum", + "level3": "Sora-Juray" + }, + "src": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Sardinian" + }, + "sre": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati", + "level4": "Bakati'", + "level5": "Rara-Sara Bakati'" + }, + "srf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu", + "level10": "Musom-Sirak" + }, + "srg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kinarayan" + }, + "srh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Shughni-Yazgulami", + "level6": "Shughnic" + }, + "sri": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Cubeo-Desano", + "level4": "Yupua-Siriano-Desano", + "level5": "Siriano-Desano" + }, + "srk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic" + }, + "srl": { + "level0": "Greater Kwerba" + }, + "srm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English" + }, + "srn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English", + "level13": "Eastern Maroons" + }, + "sro": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Sardinian" + }, + "srp": { + "level0": "Indo-European", + "level1": "Balto-Slavic" + }, + "srq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid", + "level8": "Sirionoid" + }, + "srr": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer" + }, + "srs": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan" + }, + "srt": { + "level0": "Geelvink Bay", + "level1": "Barapasi-Sauri-Kofei", + "level2": "Sauri-Kofei" + }, + "sru": { + "level0": "Tupian", + "level1": "Monde" + }, + "srv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan" + }, + "srw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Teun-Nila-Serua", + "level5": "Nila-Serua" + }, + "srx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "sry": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano" + }, + "srz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Mazanderani-Shahmirzadi" + }, + "ssb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Inner Sulu Sama" + }, + "ssc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara", + "level12": "Kuriaic" + }, + "ssd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "sse": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Inner Sulu Sama" + }, + "ssf": { + "level0": "Austronesian", + "level1": "Western Plains Austronesian" + }, + "ssg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Western Admiralty Islands" + }, + "ssh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic", + "level8": "Dhofaric" + }, + "ssi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Eastern Panjabic" + }, + "ssj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia", + "level4": "Nuclear Evapia", + "level5": "Kesawai-Wia" + }, + "ssk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Central-Eastern West Himalayish" + }, + "ssl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala", + "level11": "Northwestern Sisaala" + }, + "ssm": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar", + "level5": "Lanoh-Semnam" + }, + "ssn": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "South-East-North Oromo" + }, + "sso": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano", + "level10": "Sissanoic" + }, + "ssp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign", + "level3": "Nuclear Spanish Sign" + }, + "ssr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "sss": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Western Bru-So" + }, + "sst": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Golinic" + }, + "ssu": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic", + "level3": "Kamasa-Susuami" + }, + "ssv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "Shark Bayic" + }, + "ssw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland", + "level14": "Swatic" + }, + "ssx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa" + }, + "ssy": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Saho-Afar" + }, + "ssz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "sta": { + "level0": "Pidgin", + "level1": "Swahili-based pidgin", + "level2": "Upcountry Swahili" + }, + "stb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen", + "level5": "East Nuclear Subanen" + }, + "stc": { + "level0": "Bookkeeping" + }, + "std": { + "level0": "Unattested" + }, + "ste": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "stf": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei" + }, + "stg": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Triengic" + }, + "sth": { + "level0": "Speech Register", + "level1": "Irish-English" + }, + "sti": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Stieng" + }, + "stj": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Mande Samo" + }, + "stk": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "stm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic", + "level9": "Faiwol-Seltaman" + }, + "stn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "sto": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Nakoda" + }, + "stp": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan", + "level4": "Southern Tepehuan" + }, + "stq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian" + }, + "str": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Straits Salish" + }, + "sts": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Gawarbatic", + "level5": "Shumashtic" + }, + "stt": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Stieng" + }, + "stu": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Bulangic" + }, + "stv": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage", + "level6": "Silte-Wolane" + }, + "stw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian" + }, + "sty": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "North Kipchak" + }, + "sub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Yaka-Suku" + }, + "suc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Western Subanen" + }, + "sue": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "North Binanderean" + }, + "suf": { + "level0": "Bookkeeping" + }, + "sug": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Mianic" + }, + "suh": { + "level0": "Bookkeeping" + }, + "sui": { + "level0": "Suki-Gogodala" + }, + "suj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic", + "level13": "Hangaza-Shubi" + }, + "suk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "sun": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "suo": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Barupu Lagoon" + }, + "suq": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic", + "level3": "Pastoral Surmic", + "level4": "Tirma-Chai-Mursi" + }, + "sur": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3" + }, + "sus": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Susu-Yalunka" + }, + "sut": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec" + }, + "suu": { + "level0": "Bookkeeping" + }, + "suv": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Puroikic" + }, + "suw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)" + }, + "suy": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je" + }, + "suz": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Northwestern Kiranti", + "level5": "Bahing-Sunwar" + }, + "sva": { + "level0": "Kartvelian" + }, + "svb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau" + }, + "svc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Vincent-Grenadian Creole" + }, + "sve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "svk": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "svm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "svr": { + "level0": "Bookkeeping" + }, + "swa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "swb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu", + "level12": "Shindzwani-Shimaore" + }, + "swc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili", + "level11": "Swahili (G.40)", + "level12": "Mombasa-Lamu-Inland Swahili" + }, + "swe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "North Scandinavian", + "level6": "East-Central Swedic", + "level7": "East Swedic" + }, + "swf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Ndogo-Sere", + "level9": "Tagbu-Sere" + }, + "swg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "North Alemannic" + }, + "swh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili", + "level11": "Swahili (G.40)", + "level12": "Mombasa-Lamu-Inland Swahili" + }, + "swi": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui" + }, + "swj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Sangu-Sira", + "level25": "Sira-Barama" + }, + "swk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "swl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign" + }, + "swm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "swn": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Libyan-Egyptian Oases Berber" + }, + "swo": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "swp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic", + "level8": "Suau chain" + }, + "swq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Sharwa-Tsuvan" + }, + "swr": { + "level0": "Yawa-Saweru" + }, + "sws": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "South Tanimbar" + }, + "swt": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "East Alor", + "level3": "Sawila-Wersing" + }, + "swu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "swv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani" + }, + "sww": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost", + "level7": "Seke-Sowa" + }, + "swx": { + "level0": "Arawan" + }, + "swy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Sarwa-Sumray" + }, + "sxb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara" + }, + "sxc": { + "level0": "Unclassifiable" + }, + "sxe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic" + }, + "sxg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic" + }, + "sxk": { + "level0": "Kalapuyan" + }, + "sxm": { + "level0": "Bookkeeping" + }, + "sxn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Northern Sangiric", + "level4": "Sangil-Sangir" + }, + "sxr": { + "level0": "Austronesian", + "level1": "Tsouic", + "level2": "Kanakanavu-Saaroa" + }, + "sxs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Sasaru-Igwe" + }, + "sxu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German" + }, + "sxw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "sya": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North West Greater Barito" + }, + "syb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen", + "level5": "East Nuclear Subanen" + }, + "syc": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic" + }, + "syi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30" + }, + "syk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara" + }, + "syl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Eastern Bengali" + }, + "sym": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Mande Samo" + }, + "syo": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic", + "level3": "Southern Chong" + }, + "sys": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi" + }, + "syw": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Yolmo-Kagate" + }, + "syx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic" + }, + "syy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "sza": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian", + "level3": "Semelai-Semaq" + }, + "szb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok" + }, + "szc": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian", + "level3": "Semelai-Semaq" + }, + "szd": { + "level0": "Bookkeeping" + }, + "sze": { + "level0": "Blue Nile Mao", + "level1": "West Mao", + "level2": "Hozo-Seze" + }, + "szg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic" + }, + "szl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic", + "level6": "Polish-Silesian" + }, + "szn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru" + }, + "szp": { + "level0": "Inanwatan" + }, + "szs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL", + "level4": "Auslanic" + }, + "szv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Kole-Isubu" + }, + "szw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "szy": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Central East Formosan" + }, + "taa": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic" + }, + "tab": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Tabasaran-Aghul-Lezgi" + }, + "tac": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran" + }, + "tad": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "West Tariku" + }, + "tae": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Baniwa-Curripaco-Tariano" + }, + "taf": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV" + }, + "tag": { + "level0": "Rashad" + }, + "tah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal", + "level14": "Tahitian-Austral" + }, + "taj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Nuclear Tamang" + }, + "tak": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic", + "level7": "Tala-Sho-Zangwal", + "level8": "Tala-Zamwar" + }, + "tal": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic", + "level7": "Talic" + }, + "tam": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Tamil-Paliyan" + }, + "tan": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Tangale-Kwami-Kupto" + }, + "tao": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Batanic", + "level3": "Yami-Itbayat" + }, + "tap": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi" + }, + "taq": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg", + "level3": "Southern Tuareg" + }, + "tar": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran" + }, + "tas": { + "level0": "Pidgin", + "level1": "French-based pidgin" + }, + "tat": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "North Kipchak", + "level6": "Bashkiric" + }, + "tau": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic", + "level6": "Upper Tananaic" + }, + "tav": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan I", + "level4": "Bara-Tatuyo" + }, + "taw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "Kalam-Kobon", + "level4": "Etp-Ti Kalam" + }, + "tax": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Saba-Sokoro-Tamki" + }, + "tay": { + "level0": "Austronesian", + "level1": "Atayalic" + }, + "taz": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Acheron-Tocho" + }, + "tbb": { + "level0": "Bookkeeping" + }, + "tbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "tbe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro", + "level6": "Utupua" + }, + "tbf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tabar linkage" + }, + "tbg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "tbh": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Yuin", + "level5": "Northern Costal Yuin" + }, + "tbi": { + "level0": "Eastern Jebel" + }, + "tbj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "tbk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Kalamian" + }, + "tbl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic", + "level3": "Tboli-Blaan" + }, + "tbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Ndogo-Sere", + "level9": "Tagbu-Sere" + }, + "tbn": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tbo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage" + }, + "tbp": { + "level0": "Lakes Plain", + "level1": "East Lakes Plain" + }, + "tbr": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Krongo-Tumtum" + }, + "tbs": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan" + }, + "tbt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu" + }, + "tbu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan" + }, + "tbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Northern Palawanic" + }, + "tbx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage" + }, + "tby": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran" + }, + "tbz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari", + "level11": "Tayari-Ditammari", + "level12": "Ditammaric" + }, + "tca": { + "level0": "Ticuna-Yuri" + }, + "tcb": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic", + "level6": "Upper Tananaic" + }, + "tcc": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Tatoga-Omotik", + "level3": "Gemein Datooga", + "level4": "North-Central Datooga" + }, + "tcd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Avatime-Nyangbo", + "level5": "Nyangbo-Tafi" + }, + "tce": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tutchone" + }, + "tcf": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa", + "level5": "North-Central Mephaa" + }, + "tcg": { + "level0": "Kayagaric", + "level1": "Kaygir-Tamagario" + }, + "tch": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah", + "level16": "Bahamian Gullah" + }, + "tci": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda", + "level3": "Eastern Tonda" + }, + "tck": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic" + }, + "tcl": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish", + "level4": "Unclassified Luish" + }, + "tcn": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Dolpo-Tichurong" + }, + "tco": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic" + }, + "tcp": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin" + }, + "tcq": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "tcs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "tct": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS" + }, + "tcu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran" + }, + "tcw": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Northern Totonacan", + "level4": "Necaxan" + }, + "tcx": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda" + }, + "tcy": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Tuluic" + }, + "tcz": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "tda": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "tdb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic", + "level11": "Sadri-Panchpargania" + }, + "tdc": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "San Juan", + "level3": "Upper San Juan" + }, + "tdd": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Northern Shanic" + }, + "tde": { + "level0": "Dogon", + "level1": "West Dogon" + }, + "tdf": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Triengic" + }, + "tdg": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Nuclear Tamang" + }, + "tdh": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Thulung-Tilung-Koyi" + }, + "tdi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "Interior Bungku-Tolaki" + }, + "tdj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "tdk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Fyer-Tambas" + }, + "tdl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall" + }, + "tdn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan", + "level4": "Northeast Minahasan" + }, + "tdo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Waka-Yendang-Teme" + }, + "tdq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Unclassified Benue-Congo" + }, + "tdr": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam" + }, + "tds": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "tdt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Central Timoric A", + "level5": "Tetunic" + }, + "tdv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic" + }, + "tdx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic" + }, + "tdy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Mangyan" + }, + "tea": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar" + }, + "teb": { + "level0": "Bookkeeping" + }, + "tec": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin", + "level5": "Western Plateau Central Kalenjin" + }, + "ted": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Ivorian Grebo", + "level6": "Tepo-Plapo" + }, + "tee": { + "level0": "Totonacan", + "level1": "Tepehua" + }, + "tef": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Chowra-Teressa" + }, + "teg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)" + }, + "teh": { + "level0": "Chonan", + "level1": "Continental Chonan" + }, + "tei": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Kombioic" + }, + "tek": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie" + }, + "tel": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "tem": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "ten": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Koreguaje-Tama" + }, + "teo": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana" + }, + "tep": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan", + "level4": "Southern Tepehuan" + }, + "teq": { + "level0": "Temeinic" + }, + "ter": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan" + }, + "tes": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese" + }, + "tet": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Central Timoric A", + "level5": "Tetunic" + }, + "teu": { + "level0": "Kuliak", + "level1": "Ngangea-So" + }, + "tev": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Teor-Kur" + }, + "tew": { + "level0": "Kiowa-Tanoan", + "level1": "Tewa" + }, + "tex": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle" + }, + "tey": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo" + }, + "tez": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Western Berber" + }, + "tfi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Eastern Phla-Phera" + }, + "tfn": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Southern Alaskan Athabaskan" + }, + "tfo": { + "level0": "Geelvink Bay" + }, + "tfr": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan" + }, + "tft": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Ternatean" + }, + "tga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Taita-Sagalla" + }, + "tgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "tgc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "tgd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2" + }, + "tge": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic" + }, + "tgf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic" + }, + "tgg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "tgh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Vincent-Grenadian Creole", + "level15": "Grenada-Tobago Creole" + }, + "tgi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Banoni-Piva" + }, + "tgj": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri", + "level6": "Bangni-Tagin" + }, + "tgk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic", + "level10": "Tajikic" + }, + "tgl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Tagalogic", + "level5": "Tagalog-Filipino" + }, + "tgn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Surigao" + }, + "tgo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Nimoa-Sudest" + }, + "tgp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo", + "level9": "Araki-Tangoa" + }, + "tgq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "tgr": { + "level0": "Bookkeeping" + }, + "tgs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "tgt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Northern Palawanic", + "level5": "Batak-Central Tagbanwa" + }, + "tgu": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan", + "level3": "Tangu-Igom" + }, + "tgv": { + "level0": "Bookkeeping" + }, + "tgw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo", + "level5": "Tagbana-Jimini" + }, + "tgx": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Nahanni" + }, + "tgy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Indri-Togoyo" + }, + "tgz": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Central Alaya-Athima" + }, + "tha": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai" + }, + "thc": { + "level0": "Bookkeeping" + }, + "thd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Upper Southwest Paman" + }, + "the": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu" + }, + "thf": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Thangmi-Baram" + }, + "thh": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran", + "level4": "Unclassified Tarahumaran" + }, + "thk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga", + "level10": "Northern Kirinyaga" + }, + "thl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu", + "level11": "Dangaura-Khuna-Sonaha" + }, + "thm": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic" + }, + "thn": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "thp": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Northern Interior Salish", + "level3": "Thompsonic" + }, + "thq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu" + }, + "thr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic" + }, + "ths": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Thakali-Chantyal" + }, + "tht": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Nahanni" + }, + "thu": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo", + "level4": "Luwo-Thuri" + }, + "thv": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg" + }, + "thw": { + "level0": "Bookkeeping" + }, + "thx": { + "level0": "Bookkeeping" + }, + "thy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Jen" + }, + "thz": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg", + "level3": "Southern Tuareg" + }, + "tia": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber" + }, + "tic": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Western Heibanic" + }, + "tie": { + "level0": "Bookkeeping" + }, + "tif": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic" + }, + "tig": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "Tigre-Dahalik" + }, + "tih": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Lowland Murut" + }, + "tii": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Kwa-Kasai North", + "level15": "Sakata-Tiinic", + "level16": "Tiinic" + }, + "tij": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Thulung-Tilung-Koyi" + }, + "tik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid" + }, + "til": { + "level0": "Salishan", + "level1": "Coast Salish" + }, + "tim": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Kabwum" + }, + "tin": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Bagvalal-Tindi" + }, + "tio": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz", + "level13": "Tinputzic" + }, + "tip": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "tiq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Tiefoic" + }, + "tir": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic" + }, + "tis": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Masadiit" + }, + "tiu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic" + }, + "tiv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand", + "level9": "Tiv-Iyive-Otanga" + }, + "tix": { + "level0": "Kiowa-Tanoan", + "level1": "Tiwa-Piro", + "level2": "Tiwa" + }, + "tiy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic" + }, + "tiz": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Wuding-Yuanyang Tai" + }, + "tja": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Klao-Tajuasohn" + }, + "tjg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Barito-Mahakam" + }, + "tji": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Tujia" + }, + "tjj": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Albatross Bay", + "level4": "Anguthimri-Yangathimri-Yuputhimri", + "level5": "Anguthimri-Yangathimri" + }, + "tjl": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Mogaung" + }, + "tjn": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jogo" + }, + "tjo": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber", + "level5": "Ouargli-Oued Righ" + }, + "tjp": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Unclassified Wati" + }, + "tjs": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Tujia" + }, + "tju": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda" + }, + "tka": { + "level0": "Unattested" + }, + "tkb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Unclassified Tharu" + }, + "tkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor", + "level3": "Kemak-Tukudede" + }, + "tke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic" + }, + "tkf": { + "level0": "Unattested", + "level1": "Tupian (Unattested)" + }, + "tkg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic" + }, + "tkk": { + "level0": "Bookkeeping" + }, + "tkl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean", + "level9": "Pukapukic", + "level10": "Samoan-Tokelauan" + }, + "tkn": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Okinoerabu-Tokunoshima" + }, + "tkp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "tkq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "East Ogonoid", + "level6": "Tai-Kana" + }, + "tkr": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Western Samur" + }, + "tks": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Ramand-Karaj" + }, + "tkt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu" + }, + "tku": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Northern Totonacan", + "level4": "Necaxan" + }, + "tkv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "tkw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro" + }, + "tkx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Tangko-Nakai" + }, + "tkz": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric" + }, + "tla": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan", + "level4": "Southern Tepehuan" + }, + "tlb": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Tobelo-Tugutil" + }, + "tlc": { + "level0": "Totonacan", + "level1": "Totonac" + }, + "tld": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Northern Sangiric" + }, + "tle": { + "level0": "Bookkeeping" + }, + "tlf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol" + }, + "tlg": { + "level0": "Namla-Tofanma" + }, + "tlh": { + "level0": "Artificial Language" + }, + "tli": { + "level0": "Athabaskan-Eyak-Tlingit" + }, + "tlj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara" + }, + "tlk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "tll": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Tetelaic" + }, + "tlm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo" + }, + "tln": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "tlo": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Nding-Tasomi" + }, + "tlp": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan" + }, + "tlq": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "tlr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Southeast Guadalcanal" + }, + "tls": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Southeast Santo" + }, + "tlt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Eastern Littoral Piru Bay" + }, + "tlu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Northeast Ambon" + }, + "tlv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Taliaboic" + }, + "tlw": { + "level0": "Bookkeeping" + }, + "tlx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II", + "level9": "Likum-Levei" + }, + "tly": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic" + }, + "tlz": { + "level0": "Bookkeeping" + }, + "tma": { + "level0": "Tamaic", + "level1": "Tama-Sungor-Miisiirii", + "level2": "Tama-Sungor" + }, + "tmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "tmc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Ndam-Tumak" + }, + "tmd": { + "level0": "Piawi" + }, + "tme": { + "level0": "Unattested" + }, + "tmf": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "tmg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic", + "level14": "Ternate-Zamboanga-Cavite" + }, + "tmi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Mafea-Tutuba" + }, + "tmj": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Samarokena-Airoran" + }, + "tmk": { + "level0": "Bookkeeping" + }, + "tml": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Citak Asmat" + }, + "tmm": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "White Tai" + }, + "tmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Tamanic" + }, + "tmo": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian", + "level3": "Semelai-Semaq" + }, + "tmq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Ali-Tumleo" + }, + "tmr": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic" + }, + "tms": { + "level0": "Katla-Tima" + }, + "tmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "tmu": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "Central Tariku" + }, + "tmv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic" + }, + "tmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "tmx": { + "level0": "Bookkeeping" + }, + "tmy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "tmz": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku" + }, + "tna": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik" + }, + "tnb": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tnc": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "South Eastern Tucanoan" + }, + "tnd": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tnf": { + "level0": "Bookkeeping" + }, + "tng": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2" + }, + "tnh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "tni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea" + }, + "tnj": { + "level0": "Bookkeeping" + }, + "tnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Southern Tanna" + }, + "tnl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Northern Tanna linkage" + }, + "tnm": { + "level0": "Sentanic", + "level1": "Nuclear Sentanic" + }, + "tnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Northern Tanna linkage", + "level8": "Whitesands-North Tanna linkage" + }, + "tno": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik", + "level4": "Araona-Toromono" + }, + "tnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Northern Tanna linkage", + "level8": "Whitesands-North Tanna linkage" + }, + "tnq": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan" + }, + "tnr": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Tenda", + "level3": "Bassari-Bedik-Bapen", + "level4": "Bedik-Bapen" + }, + "tns": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "St. Matthias" + }, + "tnt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan" + }, + "tnu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P" + }, + "tnv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Southeastern Bengali" + }, + "tnw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan" + }, + "tnx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro" + }, + "tny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Unclassified Northeast Savanna Bantu", + "level9": "Bende-Tongwe" + }, + "tnz": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Maniqic" + }, + "tob": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur", + "level2": "Qom", + "level3": "Pilaga-Toba" + }, + "toc": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Lowland-Sierra Totonacan", + "level4": "Sierra Totonacan" + }, + "tod": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Loma" + }, + "toe": { + "level0": "Bookkeeping" + }, + "tof": { + "level0": "Eastern Trans-Fly" + }, + "tog": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Tumbukic" + }, + "toh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Chopi (S.60)" + }, + "toi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "toj": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Chujean" + }, + "tok": { + "level0": "Artificial Language" + }, + "tol": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan" + }, + "tom": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan", + "level4": "Northeast Minahasan" + }, + "ton": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Tongic" + }, + "too": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Northern Totonacan" + }, + "top": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Lowland-Sierra Totonacan" + }, + "toq": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "tor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "tos": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Lowland-Sierra Totonacan", + "level4": "Sierra Totonacan" + }, + "tot": { + "level0": "Bookkeeping" + }, + "tou": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Cuoi" + }, + "tov": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Taromic" + }, + "tow": { + "level0": "Kiowa-Tanoan" + }, + "tox": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Western Trukic", + "level10": "Sonsorol-Tobi" + }, + "toy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio" + }, + "toz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Unclassified Mbum" + }, + "tpa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota", + "level12": "Taupota-Waiema" + }, + "tpc": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa" + }, + "tpe": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "tpf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Kaptiau-Tarpia" + }, + "tpg": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "East Alor" + }, + "tpi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "tpj": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.B", + "level8": "Chiriguanic" + }, + "tpl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa", + "level5": "North-Central Mephaa", + "level6": "West-Central Mephaa" + }, + "tpm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic", + "level11": "Chakali-Tamprusi-Vagala", + "level12": "Chakali-Tamprusi" + }, + "tpn": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III" + }, + "tpo": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai", + "level11": "Tai Muong" + }, + "tpp": { + "level0": "Totonacan", + "level1": "Tepehua" + }, + "tpr": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Wayoro-Tupari" + }, + "tpt": { + "level0": "Totonacan", + "level1": "Tepehua" + }, + "tpu": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Tampuon-Bahnar" + }, + "tpv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian", + "level13": "Murilo-Fanapanges" + }, + "tpx": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa", + "level5": "North-Central Mephaa", + "level6": "West-Central Mephaa" + }, + "tpz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz", + "level13": "Tinputzic" + }, + "tqb": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.B" + }, + "tql": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "tqm": { + "level0": "Doso-Turumsa" + }, + "tqn": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Southern Sahaptin" + }, + "tqo": { + "level0": "Eleman", + "level1": "Eastern Eleman" + }, + "tqp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage" + }, + "tqq": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Dabarre-Tunni" + }, + "tqr": { + "level0": "Narrow Talodi", + "level1": "Lumun-Torona" + }, + "tqt": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan" + }, + "tra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Unclassified Kohistani" + }, + "trb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Kairiruic linkage", + "level9": "Kaiep-Terebu" + }, + "trc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Trique" + }, + "trd": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "tre": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru" + }, + "trf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius", + "level15": "Barbados-Trinidad" + }, + "trg": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab" + }, + "trh": { + "level0": "Dagan" + }, + "tri": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Taranoan", + "level3": "Tiriyoan" + }, + "trj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Birgit-Mogum-Toram" + }, + "trl": { + "level0": "Unclassifiable" + }, + "trm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani", + "level4": "Nuristani Kalasha-Tregami" + }, + "trn": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan", + "level3": "Mojeno-Paunaca", + "level4": "Moje\u00f1o" + }, + "tro": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic", + "level5": "Tarao-Chothe" + }, + "trp": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "trq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Trique" + }, + "trs": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Trique" + }, + "trt": { + "level0": "Geelvink Bay", + "level1": "Burate-Wate" + }, + "tru": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Turoyo-Mlahso" + }, + "trv": { + "level0": "Austronesian", + "level1": "Atayalic" + }, + "trw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Dir-Swat Kohistani" + }, + "trx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh" + }, + "try": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Unclassified Sukaphic" + }, + "trz": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Moreic" + }, + "tsa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi A" + }, + "tsb": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Transversal Lowland East Cushitic", + "level6": "Dullay" + }, + "tsc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Tswa-Ronga (S.50)" + }, + "tsd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "North Greek" + }, + "tse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Italian Sign" + }, + "tsf": { + "level0": "Bookkeeping" + }, + "tsg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Butuan-Tausug" + }, + "tsh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Sharwa-Tsuvan" + }, + "tsi": { + "level0": "Tsimshian" + }, + "tsj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Tshanglic" + }, + "tsk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Kham-Hor" + }, + "tsl": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "tsm": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "tsn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana", + "level12": "Central Sotho-Tswana" + }, + "tso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Tswa-Ronga (S.50)", + "level13": "Tsongan" + }, + "tsp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Tusia" + }, + "tsq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "tsr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Southwest Santo" + }, + "tss": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "JSLic" + }, + "tst": { + "level0": "Songhay", + "level1": "Eastern Songhay" + }, + "tsu": { + "level0": "Austronesian", + "level1": "Tsouic" + }, + "tsv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Southern Okani" + }, + "tsw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "East Kambaric" + }, + "tsx": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua" + }, + "tsy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "tsz": { + "level0": "Tarascan" + }, + "tta": { + "level0": "Siouan", + "level1": "Ohio Valley Siouan" + }, + "ttb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Tiba-Dong" + }, + "ttc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Mamean" + }, + "tte": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "ttf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)" + }, + "ttg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram A" + }, + "tth": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic", + "level3": "Ong-Ta'oih" + }, + "tti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Jayapura Bay", + "level8": "Eastern Jayapura Bay" + }, + "ttj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nyoro-Tooro" + }, + "ttk": { + "level0": "Barbacoan", + "level1": "Coconucan" + }, + "ttl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Western Botatwe", + "level9": "Machili" + }, + "ttm": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tutchone" + }, + "ttn": { + "level0": "Pauwasi", + "level1": "Western Pauwasi" + }, + "tto": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic", + "level3": "Ong-Ta'oih" + }, + "ttp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Pamona-Tombelala" + }, + "ttq": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg", + "level3": "Southern Tuareg" + }, + "ttr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Western Tera" + }, + "tts": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai", + "level10": "Sakon Nakhon" + }, + "ttt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Caucasian Tat" + }, + "ttu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Mono-Uruavan" + }, + "ttv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "ttw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan" + }, + "ttx": { + "level0": "Bookkeeping" + }, + "tty": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku" + }, + "ttz": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate" + }, + "tua": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Mandi-Muniwara" + }, + "tub": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan" + }, + "tuc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Western Ngero", + "level9": "Tuam" + }, + "tue": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Pisamira-Yuruti", + "level5": "Tuyuca-Yuruti" + }, + "tuf": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua" + }, + "tuh": { + "level0": "Taulil-Butam" + }, + "tui": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai" + }, + "tuj": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Tobelo-Tugutil" + }, + "tuk": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "East Oghuz" + }, + "tul": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Nuclear Tulaic" + }, + "tum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Tumbukic" + }, + "tuo": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan I" + }, + "tuq": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Tebu" + }, + "tur": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "West Oghuz" + }, + "tus": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Tuscarora-Nottoway" + }, + "tuu": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan", + "level5": "Rogue River" + }, + "tuv": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "tux": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa" + }, + "tuy": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin" + }, + "tuz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kirma-Tyurama" + }, + "tva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "West Choiseul" + }, + "tvd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "East Kambaric" + }, + "tve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Teun-Nila-Serua" + }, + "tvi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic", + "level8": "Nuclear Zeemic" + }, + "tvk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym" + }, + "tvl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean" + }, + "tvm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Southwest Babar" + }, + "tvn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic" + }, + "tvo": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Ternatean" + }, + "tvs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Pare-Taveta" + }, + "tvt": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "North Patkaian", + "level4": "Noctean", + "level5": "Tutsic" + }, + "tvu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)", + "level11": "Mandi-Nyokon" + }, + "tvw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili" + }, + "tvy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Luso-Asian Creole" + }, + "twa": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish" + }, + "twb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan", + "level4": "Buhid-Taubuid", + "level5": "Batangan" + }, + "twc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade", + "level6": "Shira-Southwestern Bade", + "level7": "Shira" + }, + "twe": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Pantar", + "level5": "Teiwa-Sar" + }, + "twf": { + "level0": "Kiowa-Tanoan", + "level1": "Tiwa-Piro", + "level2": "Tiwa", + "level3": "Taos-Picuris" + }, + "twg": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits", + "level5": "Blagaric" + }, + "twh": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "White Tai" + }, + "twl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona", + "level11": "Central Shona" + }, + "twm": { + "level0": "Bookkeeping" + }, + "twn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Eastern Mambila" + }, + "two": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Northern Sotho", + "level12": "Sepedic" + }, + "twp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere" + }, + "twq": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "twr": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran", + "level4": "Unclassified Tarahumaran" + }, + "twt": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.B" + }, + "twu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote", + "level5": "Central East Rote" + }, + "tww": { + "level0": "Walioic" + }, + "twx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona" + }, + "twy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North East Greater Barito" + }, + "txa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic" + }, + "txb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Tokharian" + }, + "txc": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan" + }, + "txe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tolitoli" + }, + "txg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic", + "level6": "Horpa" + }, + "txh": { + "level0": "Indo-European", + "level1": "Unclassified Indo-European" + }, + "txi": { + "level0": "Cariban", + "level1": "Pekodian", + "level2": "Xinguan" + }, + "txj": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanembuic" + }, + "txm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Northern Tomini" + }, + "txn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru" + }, + "txo": { + "level0": "Sino-Tibetan", + "level1": "Dhimal-Lhokpu-Toto" + }, + "txq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote" + }, + "txr": { + "level0": "Unclassifiable" + }, + "txs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan", + "level4": "Northeast Minahasan" + }, + "txt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Citak Asmat" + }, + "txu": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je" + }, + "txx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic" + }, + "txy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Bara-Tanosy" + }, + "tya": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Unclassified Rai Coast" + }, + "tye": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Kyenga-Shanga" + }, + "tyh": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Pramic" + }, + "tyi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie" + }, + "tyj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai", + "level11": "Tai Muong" + }, + "tyl": { + "level0": "Bookkeeping" + }, + "tyn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Ndeiram" + }, + "typ": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Thaypanic" + }, + "tyr": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai" + }, + "tys": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai" + }, + "tyt": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai" + }, + "tyu": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "Ost-Kxoe", + "level4": "Tshwa Khoe" + }, + "tyv": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Sayan" + }, + "tyx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie" + }, + "tyy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall", + "level6": "Kwangic" + }, + "tyz": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "tza": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "tzb": { + "level0": "Bookkeeping" + }, + "tzc": { + "level0": "Bookkeeping" + }, + "tze": { + "level0": "Bookkeeping" + }, + "tzh": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Tzeltalan" + }, + "tzj": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Cakchiquel-Tzutujil" + }, + "tzl": { + "level0": "Artificial Language" + }, + "tzm": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber" + }, + "tzn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar", + "level5": "Perai-Tugun-Aputai" + }, + "tzo": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Tzeltalan" + }, + "tzs": { + "level0": "Bookkeeping" + }, + "tzt": { + "level0": "Bookkeeping" + }, + "tzu": { + "level0": "Bookkeeping" + }, + "tzx": { + "level0": "Lower Sepik", + "level1": "Karawarian" + }, + "tzz": { + "level0": "Bookkeeping" + }, + "uam": { + "level0": "Unclassifiable" + }, + "uan": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P" + }, + "uar": { + "level0": "Eleman", + "level1": "Eastern Eleman" + }, + "uba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bukpic" + }, + "ubi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3" + }, + "ubm": { + "level0": "Bookkeeping" + }, + "ubr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage" + }, + "ubu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Aua-Gawil" + }, + "uby": { + "level0": "Abkhaz-Adyge" + }, + "uda": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Enwang-Uda" + }, + "ude": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Central-Eastern Tungusic", + "level3": "Oroch-Udihe" + }, + "udg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Irula-Muduga", + "level8": "Muduga-Palu" + }, + "udi": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Udi-Aghwan" + }, + "udj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Ujir-Kola-Kompane" + }, + "udl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere", + "level7": "Madaic" + }, + "udm": { + "level0": "Uralic", + "level1": "Permian" + }, + "udu": { + "level0": "Koman", + "level1": "Central Koman", + "level2": "Komo-Uduk" + }, + "ues": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "ufi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Ufim-Rawa-Nahu" + }, + "uga": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Ugarito-Phoenician" + }, + "ugb": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "uge": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia" + }, + "ugh": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Dargwic" + }, + "ugn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "ugo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese" + }, + "ugy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "uha": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse" + }, + "uig": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uyghuric" + }, + "uis": { + "level0": "South Bougainville", + "level1": "Buinic", + "level2": "Buin" + }, + "uiv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand", + "level9": "Tiv-Iyive-Otanga" + }, + "uji": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Jilic" + }, + "uka": { + "level0": "South Bird's Head Family" + }, + "ukg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Yarawata-Parawen-Ukuriguma" + }, + "ukh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Mpiemo-Ukhwejo" + }, + "ukl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic", + "level4": "Central RSLic" + }, + "ukp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bukpic" + }, + "ukq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic" + }, + "ukr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic", + "level5": "Ukrainian-Rusyn" + }, + "uks": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "uku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse", + "level7": "Ukue-Ehueun" + }, + "ukv": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian", + "level3": "Nuclear Barian" + }, + "ukw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "uky": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Upper Southwest Paman" + }, + "ula": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "ulb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede", + "level9": "Nuclear Yoruba" + }, + "ulc": { + "level0": "Tungusic", + "level1": "Central-Western Tungusic", + "level2": "Ulchaic" + }, + "uli": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Western Trukic" + }, + "ulk": { + "level0": "Eastern Trans-Fly" + }, + "ull": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "ulm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu", + "level6": "Matangnga-Aralle-Tabulahan" + }, + "uln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Global German" + }, + "ulu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Highland Kenyah", + "level6": "Upper Pujungan" + }, + "ulw": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Sumuic" + }, + "uly": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic" + }, + "uma": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Southern Sahaptin" + }, + "umb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene" + }, + "umd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umbindhamuic" + }, + "umg": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Lamalamic", + "level3": "Coastal Lamalamic" + }, + "umi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Bukat-Ukit-Beketan-Lugat-Lisum" + }, + "umm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono", + "level8": "Kohumonoic" + }, + "umn": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian" + }, + "umo": { + "level0": "Bororoan" + }, + "ump": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umpilaic" + }, + "ums": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "umu": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Common Delaware" + }, + "una": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Watut" + }, + "une": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Afenmai-Bendel", + "level7": "Uneme-Yekhee" + }, + "ung": { + "level0": "Worrorran" + }, + "uni": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Barupu Lagoon" + }, + "unk": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xaray", + "level4": "Parecis-Nawe" + }, + "unm": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Common Delaware" + }, + "unn": { + "level0": "Pama-Nyungan", + "level1": "Ganaic" + }, + "unr": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Ho-Mundari" + }, + "unu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "unx": { + "level0": "Bookkeeping" + }, + "unz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "uok": { + "level0": "Bookkeeping" + }, + "uon": { + "level0": "Austronesian", + "level1": "Northwest Formosan" + }, + "upi": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Waina-Punda" + }, + "upv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage" + }, + "urb": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava" + }, + "urc": { + "level0": "Giimbiyu", + "level1": "Urninganggic" + }, + "urd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "ure": { + "level0": "Uru-Chipaya" + }, + "urf": { + "level0": "Bookkeeping" + }, + "urg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka", + "level4": "Urigina-Danaru" + }, + "urh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "uri": { + "level0": "Nuclear Torricelli" + }, + "urk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric", + "level6": "Northeastern Peninsular Malay" + }, + "url": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu" + }, + "urm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic" + }, + "urn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin", + "level5": "Oninic" + }, + "uro": { + "level0": "Baining" + }, + "urp": { + "level0": "Unclassifiable" + }, + "urr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "urt": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat" + }, + "uru": { + "level0": "Tupian", + "level1": "Purubora-Ramarama", + "level2": "Ramarama" + }, + "urv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Mono-Uruavan" + }, + "urw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka" + }, + "urx": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Elepi-Kamasau-Marienberg" + }, + "ury": { + "level0": "Tor-Orya" + }, + "urz": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva", + "level9": "Amondava-Uru-Eu-Wau-Wau" + }, + "usa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Auyana" + }, + "ush": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Kohistanic Shina" + }, + "usi": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "usk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross" + }, + "usp": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean" + }, + "usu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru", + "level4": "Erimaic" + }, + "uta": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand", + "level9": "Tiv-Iyive-Otanga" + }, + "ute": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Southern Numic" + }, + "utp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro", + "level6": "Utupua" + }, + "utr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma" + }, + "utu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic", + "level6": "Silopi-Utu" + }, + "uum": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Crimean Tatar-Urum" + }, + "uur": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Erromanga" + }, + "uuu": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic" + }, + "uve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers" + }, + "uvh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap" + }, + "uvl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Mengenic" + }, + "uwa": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "uya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Agoi-Doko-Iyoniyong" + }, + "uzn": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uzbek" + }, + "uzs": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uzbek" + }, + "vaa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "vae": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Nduga-Luto" + }, + "vaf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Vafsic" + }, + "vag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic", + "level11": "Chakali-Tamprusi-Vagala" + }, + "vah": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "vai": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Vai-Kono" + }, + "vaj": { + "level0": "Kxa", + "level1": "Ju-Kung" + }, + "val": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage" + }, + "vam": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Skouic", + "level3": "Eastern Skouic", + "level4": "West Coast Skouic" + }, + "van": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "West Palai" + }, + "vao": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Northern Malakula", + "level8": "North Coast Malakula", + "level9": "Botovro-Vovo-Vao" + }, + "vap": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic", + "level6": "Gangte-Vaiphei" + }, + "var": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio" + }, + "vas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Vasave-Noiri" + }, + "vau": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Vanuma-Nyali" + }, + "vav": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "vay": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Northwestern Kiranti" + }, + "vbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "vec": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "ved": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Dhivehi-Sinhala", + "level6": "Sinhalaic" + }, + "vem": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Lamang-Hdi" + }, + "ven": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu" + }, + "veo": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "vep": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan", + "level6": "East Ladoga" + }, + "ver": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Vere" + }, + "vgr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "vgt": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Dutch-Belgian Sign", + "level4": "Belgian Sign" + }, + "vic": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius" + }, + "vid": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Vidunda-Sagala" + }, + "vie": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Viet-Muong" + }, + "vif": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic" + }, + "vig": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur" + }, + "vin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu" + }, + "vis": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "vit": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Unclassified Narrow Grassfields" + }, + "viv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Bwaidoka-Iduna" + }, + "vka": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Ngarluma-Kariyarra" + }, + "vki": { + "level0": "Bookkeeping" + }, + "vkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "South Sumatra Malay" + }, + "vkl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "vkm": { + "level0": "Kamakanan", + "level1": "Nuclear Kamakanan" + }, + "vkn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Unclassified Western Benue-Congo Plateau" + }, + "vko": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "vkp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Indo-Portuguesic", + "level15": "Northern Indo-Portuguesic" + }, + "vkt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "vku": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Yindjibarndi-Kurrama" + }, + "vky": { + "level0": "Bookkeeping" + }, + "vkz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid" + }, + "vlp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "vlr": { + "level0": "Bookkeeping" + }, + "vls": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch" + }, + "vma": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda" + }, + "vmb": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Finasleigh Pama" + }, + "vmc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec" + }, + "vmd": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Koraga" + }, + "vme": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "vmf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Greater East Franconian" + }, + "vmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai", + "level9": "Minigir-Tolai" + }, + "vmh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic" + }, + "vmi": { + "level0": "Worrorran", + "level1": "Northern Worrorran", + "level2": "Forrest River" + }, + "vmj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec" + }, + "vmk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "vml": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda", + "level3": "Kartu" + }, + "vmm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "vmp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec", + "level6": "Ayautlic", + "level7": "Northern Baja Mazatec" + }, + "vmq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northeastern Alta Mixtec" + }, + "vmr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic" + }, + "vms": { + "level0": "Unattested" + }, + "vmu": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yalandyic" + }, + "vmv": { + "level0": "Maiduan" + }, + "vmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "vmx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "vmy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec", + "level6": "Ayautlic" + }, + "vmz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Central Mazatec" + }, + "vnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro" + }, + "vnm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "vnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "vol": { + "level0": "Artificial Language" + }, + "vor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi", + "level5": "Bena", + "level6": "Yungur-Voro" + }, + "vot": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "Central Finnic" + }, + "vra": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Lemerig-Veraa" + }, + "vro": { + "level0": "Uralic", + "level1": "Finnic" + }, + "vrs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "West Choiseul" + }, + "vrt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula" + }, + "vsi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic", + "level4": "Central RSLic" + }, + "vsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign" + }, + "vsv": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign", + "level3": "Nuclear Spanish Sign" + }, + "vto": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor", + "level3": "Betaf-Vitou" + }, + "vum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Punu-Vungu" + }, + "vun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "Central Kilimanjaro" + }, + "vut": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Vutic", + "level10": "Vute-Wawa" + }, + "vwa": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Nuclear Waic" + }, + "waa": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Northern Sahaptin" + }, + "wab": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Eastern Bel" + }, + "wac": { + "level0": "Chinookan" + }, + "wad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "wae": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "South Alemannic" + }, + "waf": { + "level0": "Unattested" + }, + "wag": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota", + "level12": "Taupota-Waiema" + }, + "wah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut" + }, + "wai": { + "level0": "Unattested", + "level1": "Tor-Orya (Unattested)" + }, + "waj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "wal": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "wam": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian" + }, + "wan": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Wan-Mwan" + }, + "wao": { + "level0": "Yuki-Wappo" + }, + "wap": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Pidjanan", + "level3": "Wapishanan", + "level4": "Wapishana-Atorai" + }, + "war": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan", + "level7": "Samar-Waray" + }, + "wat": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage" + }, + "wau": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xinguan Arawak", + "level4": "Waura-Mehinaku-Kustenau", + "level5": "Waura-Mehinaku" + }, + "wav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Waka-Yendang-Teme", + "level8": "Waka-Yandang" + }, + "waw": { + "level0": "Cariban", + "level1": "Parukotoan", + "level2": "Waiwaian" + }, + "wax": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Watam-Kaian" + }, + "way": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Wayanaic" + }, + "waz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "wbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Eastern Yapen" + }, + "wbe": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "wbf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Samu" + }, + "wbh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "wbi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Wanji-Sangu" + }, + "wbj": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic", + "level5": "Northern West Rift South Cushitic" + }, + "wbk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani", + "level4": "Nuristani Kalasha-Tregami" + }, + "wbl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Saka-Wakhi" + }, + "wbm": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Nuclear Waic" + }, + "wbp": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Yapa" + }, + "wbq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "wbr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "wbt": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Martuwangkic", + "level4": "Warnman-Wangka" + }, + "wbv": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda", + "level3": "Kartu" + }, + "wbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "wca": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame", + "level2": "Yanomam-Yaroame", + "level3": "Yanomam-Yanimamo" + }, + "wci": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Kpesi-Waci" + }, + "wdd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi B" + }, + "wdg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan" + }, + "wdu": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "wea": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Southern Karen", + "level3": "Sgaw" + }, + "wec": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn", + "level6": "Guere" + }, + "wed": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Wedauic" + }, + "weh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic", + "level11": "Aghem-Weh" + }, + "wei": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "wem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "weo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers" + }, + "wep": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "West Low German" + }, + "wer": { + "level0": "Kunimaipan", + "level1": "Weric" + }, + "wes": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English", + "level13": "Coastal Nigerian Krio", + "level14": "Nigeria-Cameroon Creole English" + }, + "wet": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar", + "level5": "Perai-Tugun-Aputai", + "level6": "Perai-Aputai" + }, + "weu": { + "level0": "Bookkeeping" + }, + "wew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Wewewa-Laboya" + }, + "wfg": { + "level0": "Pauwasi", + "level1": "Eastern Pauwasi" + }, + "wga": { + "level0": "Pama-Nyungan", + "level1": "Ngarna", + "level2": "Southern Ngarna", + "level3": "Ngarru" + }, + "wgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "wgg": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Arabana-Wangganguru" + }, + "wgi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Wahgic" + }, + "wgo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera" + }, + "wgu": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura" + }, + "wgw": { + "level0": "Bookkeeping" + }, + "wgy": { + "level0": "Pama-Nyungan" + }, + "wha": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Manusela-Huaulu" + }, + "whg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Wahgic" + }, + "whk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah" + }, + "whu": { + "level0": "Bookkeeping" + }, + "wib": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Tusia" + }, + "wic": { + "level0": "Caddoan", + "level1": "Northern Caddoan" + }, + "wie": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Ep-Meanha-Keyenganh" + }, + "wif": { + "level0": "Unattested", + "level1": "Pama-Nyungan (Unattested)" + }, + "wig": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Ngatharra-Ngathana-Iinychanya" + }, + "wih": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Ep-Meanha-Keyenganh" + }, + "wii": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic" + }, + "wij": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Mungkanic", + "level7": "Mungkan-Mungkanhu" + }, + "wik": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Ngatharra-Ngathana-Iinychanya" + }, + "wil": { + "level0": "Worrorran", + "level1": "Northern Worrorran" + }, + "wim": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Mungkanic", + "level7": "Mungkan-Mungkanhu" + }, + "win": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Winnebago-Chiwere" + }, + "wir": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Unclassified Kawahiva" + }, + "wit": { + "level0": "Wintuan" + }, + "wiv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Bali-Vitu" + }, + "wiw": { + "level0": "Bookkeeping" + }, + "wiy": { + "level0": "Algic" + }, + "wja": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja" + }, + "wji": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Central West Chadic B.2", + "level7": "Warji-Gala-Kariya" + }, + "wka": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic" + }, + "wkb": { + "level0": "Bookkeeping" + }, + "wkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic" + }, + "wkl": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Kalanadic" + }, + "wku": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Kalanadic" + }, + "wkw": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Western Waka-Kabic" + }, + "wla": { + "level0": "Walioic", + "level1": "Pai-Sinen-Walio" + }, + "wlc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu" + }, + "wle": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage", + "level6": "Silte-Wolane" + }, + "wlg": { + "level0": "Gunwinyguan" + }, + "wlh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor" + }, + "wli": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan", + "level3": "Nuclear Sahuan", + "level4": "Sahu-Waioli" + }, + "wlk": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "wll": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "wln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil" + }, + "wlo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Wolio-Kamaru" + }, + "wlr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Southwest Santo" + }, + "wls": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "East Uvean-Niuafo'ou" + }, + "wlu": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Western Waka-Kabic" + }, + "wlv": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Wichi" + }, + "wlw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Walakic" + }, + "wlx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "Central-South Dagaric", + "level16": "South Dagaric" + }, + "wly": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti", + "level6": "Bantawic" + }, + "wma": { + "level0": "Unattested" + }, + "wmb": { + "level0": "Mirndi", + "level1": "Ngurlun" + }, + "wmc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "wmd": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran" + }, + "wme": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Chaurasiya" + }, + "wmg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Muya" + }, + "wmh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Kawaimina" + }, + "wmi": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Finasleigh Pama" + }, + "wmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "wmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone" + }, + "wmo": { + "level0": "Nuclear Torricelli" + }, + "wms": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Ketum-Wambon" + }, + "wmt": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Western Ngumpin" + }, + "wmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili" + }, + "wmx": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Serra Hills", + "level3": "Rawo-Main Serra" + }, + "wnb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor" + }, + "wnc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Wantoatic", + "level4": "Wantoat-Awara" + }, + "wnd": { + "level0": "Mangarrayi-Maran", + "level1": "Maran" + }, + "wne": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto" + }, + "wng": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Ndeiram" + }, + "wni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu", + "level12": "Shindzwani-Shimaore" + }, + "wnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese", + "level7": "Central Sumbanese" + }, + "wnm": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Pitta-Pitta" + }, + "wno": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani" + }, + "wnp": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei" + }, + "wnu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Karian-Usan-Yaben" + }, + "wny": { + "level0": "Garrwan" + }, + "woa": { + "level0": "Northern Daly" + }, + "wob": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee" + }, + "woc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Kis-Wogeo" + }, + "wod": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Mee-Wodani" + }, + "woe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic" + }, + "wof": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Wolofic" + }, + "wog": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Wogamusin-Chenapian" + }, + "woi": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "Central Alor", + "level4": "Abuic" + }, + "wok": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru" + }, + "wol": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Wolofic" + }, + "wom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic", + "level8": "Samba-Leko-Perema-Mumbake", + "level9": "Perema-Mumbake" + }, + "won": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Bushoong-Wongo-Lele" + }, + "woo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru" + }, + "wor": { + "level0": "Geelvink Bay" + }, + "wos": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Ambulas-Hanga-Hundi" + }, + "wow": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "woy": { + "level0": "Unattested" + }, + "wpc": { + "level0": "Saliban", + "level1": "Maco-Piaroa" + }, + "wrb": { + "level0": "Pama-Nyungan", + "level1": "Ngarna", + "level2": "Southern Ngarna", + "level3": "Thawa" + }, + "wrd": { + "level0": "Bookkeeping" + }, + "wre": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "wrg": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Northern Maric", + "level5": "Warungu-Gugu Badhun" + }, + "wrh": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Wiradhuric" + }, + "wri": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Wariyangga-Dhargari" + }, + "wrk": { + "level0": "Garrwan" + }, + "wrl": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Yapa" + }, + "wrm": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic" + }, + "wrn": { + "level0": "Heibanic", + "level1": "Eastern Heibanic" + }, + "wro": { + "level0": "Worrorran", + "level1": "Western Worrorran" + }, + "wrp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea" + }, + "wrr": { + "level0": "Yangmanic" + }, + "wrs": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "wru": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "wrv": { + "level0": "Suki-Gogodala", + "level1": "Gogodalic", + "level2": "Ari-Waruna" + }, + "wrw": { + "level0": "Pama-Nyungan", + "level1": "Unclassified Pama-Nyungan" + }, + "wrx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Waerana-Razong" + }, + "wry": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani" + }, + "wrz": { + "level0": "Gunwinyguan", + "level1": "Western Gunwinyguan", + "level2": "Warrayic" + }, + "wsa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Lower Mamberamo" + }, + "wsg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Southern Gondi" + }, + "wsi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo" + }, + "wsk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Kowan" + }, + "wsr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Awa-Oweina" + }, + "wss": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Akanic" + }, + "wsu": { + "level0": "Unattested" + }, + "wsv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani" + }, + "wtf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia" + }, + "wth": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Kulin", + "level5": "Nuclear Kulin" + }, + "wtk": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill", + "level4": "Kapriman-Watakataui" + }, + "wtm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "wtw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio" + }, + "wua": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama" + }, + "wub": { + "level0": "Worrorran", + "level1": "Northern Worrorran" + }, + "wud": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "wuh": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese", + "level5": "Mandarinic" + }, + "wul": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga" + }, + "wum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic", + "level9": "Ndasa-Wumbvu" + }, + "wun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika" + }, + "wur": { + "level0": "Marrku-Wurrugu" + }, + "wut": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Skouic", + "level3": "Eastern Skouic", + "level4": "West Coast Skouic" + }, + "wuu": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Wu-Hui Chinese" + }, + "wuv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Western Admiralty Islands" + }, + "wux": { + "level0": "Limilngan-Wulna" + }, + "wuy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "wwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari" + }, + "wwb": { + "level0": "Unclassifiable" + }, + "wwo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "wwr": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Nyikinic" + }, + "www": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Vutic", + "level10": "Vute-Wawa" + }, + "wxa": { + "level0": "Sino-Tibetan", + "level1": "Sinitic" + }, + "wya": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "wyb": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Wiradhuric" + }, + "wyi": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Kulin", + "level5": "Nuclear Kulin" + }, + "wym": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German", + "level8": "Schlesisch-Wilmesau" + }, + "wyr": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Wayoro-Tupari" + }, + "wyy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Western Fijian" + }, + "xaa": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Moroccan-Andalusian Arabic" + }, + "xab": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic", + "level5": "Hasha-Sambe" + }, + "xac": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Tiwa-Boro", + "level5": "Bodo-Mech-Kachari" + }, + "xag": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Udi-Aghwan" + }, + "xah": { + "level0": "Bookkeeping" + }, + "xai": { + "level0": "Unclassifiable" + }, + "xal": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Mongolian" + }, + "xam": { + "level0": "Tuu", + "level1": "!Ui" + }, + "xan": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw", + "level3": "Northern-Eastern-Western Agaw", + "level4": "Northeastern Agaw" + }, + "xao": { + "level0": "Bookkeeping" + }, + "xap": { + "level0": "Muskogean", + "level1": "Alabaman-Koasati" + }, + "xas": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Kamas-Selkup" + }, + "xat": { + "level0": "Katukinan" + }, + "xau": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "xav": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "xaw": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Southern Numic" + }, + "xay": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic", + "level6": "Rejang-Makaham Kayan" + }, + "xba": { + "level0": "Bookkeeping" + }, + "xbc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB" + }, + "xbe": { + "level0": "Pama-Nyungan", + "level1": "East Queensland Border Pama Nyungan", + "level2": "Yugambalic", + "level3": "Yugambal-Bigambal" + }, + "xbg": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Warrnambool-Bunganditj" + }, + "xbi": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Kombioic" + }, + "xbo": { + "level0": "Turkic", + "level1": "Bolgar" + }, + "xbr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese" + }, + "xbw": { + "level0": "Unclassifiable" + }, + "xbx": { + "level0": "Bookkeeping" + }, + "xcc": { + "level0": "Unclassifiable" + }, + "xce": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic" + }, + "xcg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Cisalpine Celtic" + }, + "xch": { + "level0": "Chimakuan" + }, + "xcl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Armenic" + }, + "xco": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Sogdic" + }, + "xcr": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Milyan-Carian" + }, + "xct": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan" + }, + "xcv": { + "level0": "Yukaghir", + "level1": "Kolymic" + }, + "xda": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Sydney-Hawkesbury" + }, + "xdc": { + "level0": "Indo-European", + "level1": "Unclassified Indo-European" + }, + "xdk": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Sydney-Hawkesbury" + }, + "xdo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia" + }, + "xdq": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Dargwic", + "level3": "South Dargwa" + }, + "xdy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic" + }, + "xeb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "East Semitic" + }, + "xed": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Lamang-Hdi" + }, + "xeg": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Eastern !Ui" + }, + "xel": { + "level0": "Eastern Jebel", + "level1": "Aka-Kelo-Molo" + }, + "xem": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "xer": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "xes": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia", + "level4": "Nuclear Evapia", + "level5": "Kesawai-Wia" + }, + "xet": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I" + }, + "xeu": { + "level0": "Eleman", + "level1": "Western Eleman" + }, + "xfa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan" + }, + "xga": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Continental Transalpine Celtic", + "level6": "Unclassified Continental Transalpine Celtic" + }, + "xgb": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Unclassified Nwa-Ben" + }, + "xgd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Gudang-Northeast Paman" + }, + "xgf": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Serran" + }, + "xgm": { + "level0": "Pama-Nyungan", + "level1": "Rockhampton-Gladstone" + }, + "xgu": { + "level0": "Worrorran", + "level1": "Western Worrorran" + }, + "xgw": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Guwa-Yanda" + }, + "xhd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "xhe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic" + }, + "xho": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland" + }, + "xhu": { + "level0": "Hurro-Urartian" + }, + "xhv": { + "level0": "Bookkeeping" + }, + "xii": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "South Khoekhoe" + }, + "xil": { + "level0": "Unclassifiable" + }, + "xip": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "xir": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Bahuanaic" + }, + "xiv": { + "level0": "Unattested" + }, + "xiy": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Jurunic" + }, + "xjb": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Greater Bandjalangic", + "level4": "Bandjalangic", + "level5": "Coastal Bandjalang" + }, + "xka": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Western Shinaic", + "level9": "Dangari" + }, + "xkb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "xkc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic" + }, + "xkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "xke": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Muller-Schwaner", + "level6": "Hovongan-Kereho" + }, + "xkf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic", + "level5": "Bumthangic" + }, + "xkg": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Kita-Kagoro" + }, + "xkh": { + "level0": "Unattested", + "level1": "Cariban (Unattested)" + }, + "xki": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "xkj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Khalkhalic" + }, + "xkk": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Lamamic" + }, + "xkl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Highland Kenyah" + }, + "xkm": { + "level0": "Bookkeeping" + }, + "xkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "xko": { + "level0": "Bookkeeping" + }, + "xkp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Taromic" + }, + "xkq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "xkr": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "xks": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Butonic", + "level9": "East Buton" + }, + "xkt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Mossi-Farefare", + "level14": "Mossic" + }, + "xku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kamba-Kunyi" + }, + "xkv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana" + }, + "xkw": { + "level0": "Lepki-Murkim-Kembra" + }, + "xkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "xky": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Highland Kenyah", + "level6": "Upper Pujungan" + }, + "xkz": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic", + "level5": "Bumthangic" + }, + "xla": { + "level0": "Kamula-Elevala" + }, + "xlb": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Mahican-Woronoco-Pojassick" + }, + "xlc": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Lyco-Sidetic" + }, + "xld": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian" + }, + "xle": { + "level0": "Unclassifiable" + }, + "xlg": { + "level0": "Unclassifiable" + }, + "xlo": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian" + }, + "xlp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Cisalpine Celtic" + }, + "xls": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Unclassified Italic" + }, + "xlu": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Luvian" + }, + "xly": { + "level0": "Unclassifiable" + }, + "xmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan" + }, + "xmc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "xmd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic" + }, + "xmf": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Zan" + }, + "xmg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke" + }, + "xmh": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "xmi": { + "level0": "Unattested" + }, + "xmj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Meridional" + }, + "xml": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Malaysian Sign" + }, + "xmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Manadoic Malay" + }, + "xmo": { + "level0": "Unattested", + "level1": "Tupian (Unattested)" + }, + "xmp": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "xmq": { + "level0": "Bookkeeping" + }, + "xms": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "xmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat" + }, + "xmu": { + "level0": "Eastern Daly" + }, + "xmv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Northern Malagasic" + }, + "xmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Northern Malagasic", + "level8": "Tsimihety-Betsimisaraka" + }, + "xmx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Salawati-Batta" + }, + "xmy": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Mayabic" + }, + "xmz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "xna": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian" + }, + "xnb": { + "level0": "Austronesian", + "level1": "Tsouic", + "level2": "Kanakanavu-Saaroa" + }, + "xng": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic" + }, + "xnh": { + "level0": "Bookkeeping" + }, + "xnj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Manda-Ngoni", + "level10": "Tanzania-Mozambique Ngoni" + }, + "xnm": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Unclassified Eastern Nyulnyulan" + }, + "xnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Kankanay", + "level9": "Maeng-Northern Kankanay" + }, + "xnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Manda-Ngoni", + "level10": "Tanzania-Mozambique Ngoni" + }, + "xnr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Kangri-Dogri" + }, + "xns": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric" + }, + "xny": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda" + }, + "xod": { + "level0": "South Bird's Head Family", + "level1": "East South Bird's Head" + }, + "xog": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza", + "level11": "Soga-Kenyi" + }, + "xoi": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Unclassified Tamolan" + }, + "xok": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Southern Je", + "level3": "Kaingang-Xokleng" + }, + "xom": { + "level0": "Koman", + "level1": "Central Koman", + "level2": "Komo-Uduk" + }, + "xon": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Konkomba-Gangam" + }, + "xop": { + "level0": "Lower Sepik", + "level1": "Nor" + }, + "xor": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matses subgroup" + }, + "xow": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "xpa": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Pirriya-Kungkari" + }, + "xpb": { + "level0": "North-Eastern Tasmanian" + }, + "xpc": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Unclassified Kipchak" + }, + "xpe": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Kpelle" + }, + "xpf": { + "level0": "South-Eastern Tasmanian" + }, + "xpg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian" + }, + "xph": { + "level0": "North-Eastern Tasmanian" + }, + "xpi": { + "level0": "Unclassifiable" + }, + "xpk": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matses subgroup" + }, + "xpl": { + "level0": "Western Tasmanian" + }, + "xpm": { + "level0": "Yeniseian" + }, + "xpn": { + "level0": "Unclassifiable" + }, + "xpo": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec" + }, + "xpr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian" + }, + "xps": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Unclassified Luvic" + }, + "xpu": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Ugarito-Phoenician", + "level7": "Phoenician-Punic" + }, + "xpw": { + "level0": "Western Tasmanian", + "level1": "Western Coastal Tasmanian" + }, + "xpx": { + "level0": "Western Tasmanian", + "level1": "Western Coastal Tasmanian" + }, + "xpz": { + "level0": "South-Eastern Tasmanian" + }, + "xqt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "xrb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "Karaboro" + }, + "xre": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je", + "level5": "Eastern Timbira" + }, + "xrn": { + "level0": "Yeniseian" + }, + "xrr": { + "level0": "Unclassifiable" + }, + "xrt": { + "level0": "Unclassifiable" + }, + "xru": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "xrw": { + "level0": "Sepik", + "level1": "Ram", + "level2": "Pouye-Karawa" + }, + "xsa": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "xsb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Tina-Bolinao" + }, + "xsd": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Lyco-Sidetic" + }, + "xse": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro" + }, + "xsh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic" + }, + "xsi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "xsk": { + "level0": "Bookkeeping" + }, + "xsl": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Slaveyic", + "level5": "Slave" + }, + "xsm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi", + "level8": "Nuna-Kasem" + }, + "xsn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Chokobo-Lemoro-Sanga", + "level11": "Lemoro-Sanga" + }, + "xso": { + "level0": "Unclassifiable" + }, + "xsp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic", + "level6": "Silopi-Utu" + }, + "xsq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "xsr": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Sherpa-Jirel", + "level9": "Sherpaic" + }, + "xss": { + "level0": "Bookkeeping" + }, + "xst": { + "level0": "Bookkeeping" + }, + "xsu": { + "level0": "Yanomamic" + }, + "xsy": { + "level0": "Austronesian", + "level1": "Northwest Formosan" + }, + "xta": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec" + }, + "xtb": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Baja Mixtec" + }, + "xtc": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Katcha-Kadugli-Miri-Kanga" + }, + "xtd": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec" + }, + "xte": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Eastern Mek" + }, + "xtg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Continental Transalpine Celtic" + }, + "xti": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Sinicahua-Tijaltepec" + }, + "xtj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic", + "level8": "Yucuane-Teita" + }, + "xtl": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Sinicahua-Tijaltepec" + }, + "xtm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic" + }, + "xtn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec" + }, + "xto": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Tokharian" + }, + "xtp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec", + "level9": "Sindihuic" + }, + "xtq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Saka-Wakhi", + "level5": "Saka" + }, + "xts": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec", + "level9": "Sindihuic" + }, + "xtt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic" + }, + "xtu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Alta Mixtec" + }, + "xtv": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Yuin" + }, + "xtw": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran", + "level3": "Roosevelt" + }, + "xty": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec", + "level8": "Southwestern Guerrero Mixtec" + }, + "xtz": { + "level0": "Bookkeeping" + }, + "xua": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "xub": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "xuf": { + "level0": "Bookkeeping" + }, + "xug": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Okinawa" + }, + "xuj": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "xum": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Sabellic" + }, + "xuo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum" + }, + "xup": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan" + }, + "xur": { + "level0": "Hurro-Urartian" + }, + "xut": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama", + "level3": "Kuthant-Gurdjar" + }, + "xuu": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Kxoe-Ani" + }, + "xve": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic" + }, + "xwc": { + "level0": "Siouan", + "level1": "Catawban" + }, + "xwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "xwg": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic" + }, + "xwl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "xwr": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "xxb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo" + }, + "xxk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Nage-Keo" + }, + "xxr": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum", + "level2": "Maxakalian", + "level3": "Nuclear Maxakalian", + "level4": "Unclassified Nuclear Maxakalian" + }, + "xya": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Gumbaynggiric" + }, + "xyb": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Bidyaric" + }, + "xyl": { + "level0": "Unattested", + "level1": "Nambiquaran (Unattested)" + }, + "xyy": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Eastern Victoria" + }, + "xzh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi", + "level6": "Darma-Byangsi", + "level7": "Zhangzhungic" + }, + "yaa": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "yab": { + "level0": "Naduhup", + "level1": "Eastern Naduhup", + "level2": "Hup-Yuhup" + }, + "yac": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga", + "level3": "Yalic" + }, + "yad": { + "level0": "Peba-Yagua" + }, + "yaf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Yaka-Suku" + }, + "yah": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Shughni-Yazgulami" + }, + "yai": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Sogdic", + "level7": "Sogdian-Yagnobi" + }, + "yaj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic" + }, + "yak": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Northern Sahaptin" + }, + "yal": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Susu-Yalunka" + }, + "yam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "yan": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Sumuic" + }, + "yao": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Yaoic" + }, + "yap": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Yapesic" + }, + "yaq": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Cahitan" + }, + "yar": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Mapoyo-Yawarana" + }, + "yas": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)", + "level12": "Mmala-Elip-Gunu", + "level13": "Elip-Gunu" + }, + "yat": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)" + }, + "yav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)" + }, + "yaw": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xinguan Arawak" + }, + "yax": { + "level0": "Bookkeeping" + }, + "yay": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono", + "level8": "Kohumonoic" + }, + "yaz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Lokoic" + }, + "yba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma" + }, + "ybb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke", + "level11": "Bamboutos" + }, + "ybd": { + "level0": "Bookkeeping" + }, + "ybe": { + "level0": "Turkic", + "level1": "Common Turkic" + }, + "ybh": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar", + "level6": "Yakkha-Athpariyic" + }, + "ybi": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Lohorung-Yamphu", + "level7": "Yamphuic" + }, + "ybj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic", + "level5": "Hasha-Sambe" + }, + "ybk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Bokha-Phuma" + }, + "ybl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic" + }, + "ybm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Karian-Usan-Yaben" + }, + "ybn": { + "level0": "Arawakan", + "level1": "Medio Rio Negro", + "level2": "Marauia-Castana" + }, + "ybo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon" + }, + "ybx": { + "level0": "Walioic" + }, + "yby": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria" + }, + "ych": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu", + "level11": "Unclassified Nasu-Gepu" + }, + "ycl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo" + }, + "ycn": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Caqueta" + }, + "ycp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic" + }, + "ycr": { + "level0": "Japonic", + "level1": "Japanesic", + "level2": "Japan-Taiwan Japanese" + }, + "yda": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Guwa-Yanda" + }, + "ydd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German", + "level8": "Schlesisch-Wilmesau" + }, + "yde": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak", + "level5": "Yangum" + }, + "ydg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Yidgha-Munji" + }, + "ydk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Rempic" + }, + "yds": { + "level0": "Bookkeeping" + }, + "yea": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Ravulic" + }, + "yec": { + "level0": "Mixed Language", + "level1": "German-Yiddish-Romani-Rotwelsch" + }, + "yee": { + "level0": "Lower Sepik", + "level1": "Karawarian" + }, + "yei": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Njerup" + }, + "yej": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek" + }, + "yel": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic" + }, + "yen": { + "level0": "Bookkeeping" + }, + "yer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Yangkam-Tarok-Pe", + "level6": "Tarok-Pe" + }, + "yes": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic" + }, + "yeu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Yerukula-Korava-Kaikadi" + }, + "yev": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "West Palai", + "level3": "Agi-Yeri" + }, + "yey": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu" + }, + "ygl": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak", + "level5": "Yangum" + }, + "ygm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Degenanic" + }, + "ygp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "ygr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "ygs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ygu": { + "level0": "Unattested", + "level1": "Mangarrayi-Maran (Unattested)" + }, + "ygw": { + "level0": "Angan", + "level1": "Nuclear Angan" + }, + "yha": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Southern Kra" + }, + "yhd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Qeltu" + }, + "yhl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha" + }, + "yia": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan" + }, + "yib": { + "level0": "Bookkeeping" + }, + "yif": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Unclassified Nisoid" + }, + "yig": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nesu" + }, + "yih": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Greater East Franconian" + }, + "yii": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yidinic" + }, + "yij": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Yindjibarndi-Kurrama" + }, + "yik": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo" + }, + "yil": { + "level0": "Pama-Nyungan", + "level1": "Ngarna", + "level2": "Southern Ngarna", + "level3": "Ngarru" + }, + "yim": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric" + }, + "yin": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Riang" + }, + "yio": { + "level0": "Bookkeeping" + }, + "yip": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish" + }, + "yiq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Lipo-Micha" + }, + "yir": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "yis": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Yau-Yis" + }, + "yit": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo", + "level12": "Unclassified Core Lalu" + }, + "yiu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid" + }, + "yiv": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu", + "level9": "Nuclear Nisu", + "level10": "Northern Nisu" + }, + "yix": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid", + "level7": "Sani-Axi-Azhe", + "level8": "Sani-Axi" + }, + "yiy": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Coastal Southwest Paman" + }, + "yiz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid", + "level7": "Sani-Axi-Azhe", + "level8": "Sani-Axi" + }, + "yka": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw" + }, + "ykg": { + "level0": "Yukaghir" + }, + "ykh": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic" + }, + "yki": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Lower Mamberamo", + "level5": "Yoke-Pauwi" + }, + "ykk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Wedauic" + }, + "ykl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha", + "level9": "Khlula-Zokhuo" + }, + "ykm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Ali-Tumleo" + }, + "ykn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Kuansi-Kuamasi-Sonaga", + "level10": "Kuansi-Kuamasi" + }, + "yko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic", + "level9": "Yasa-Kombe" + }, + "ykr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean" + }, + "ykt": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Mondzish" + }, + "yku": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Kuansi-Kuamasi-Sonaga", + "level10": "Kuansi-Kuamasi" + }, + "yky": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "yla": { + "level0": "Keram", + "level1": "Ulmapo" + }, + "ylg": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Manambu-Yalaku" + }, + "yli": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga", + "level3": "Yalic" + }, + "yll": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Ningil-Yil" + }, + "ylm": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo" + }, + "yln": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang", + "level4": "Northern Buyang" + }, + "ylo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Unclassified Lisoid" + }, + "ylr": { + "level0": "Pama-Nyungan", + "level1": "Kalkatungic" + }, + "ylu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu" + }, + "yly": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Nyalayu" + }, + "yma": { + "level0": "Bookkeeping" + }, + "ymb": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes" + }, + "ymc": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Northern-Southern Muji" + }, + "ymd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic" + }, + "yme": { + "level0": "Peba-Yagua", + "level1": "Peba-Yameo" + }, + "ymg": { + "level0": "Bookkeeping" + }, + "ymh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo", + "level8": "Southwestern Lolo" + }, + "ymi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji" + }, + "ymj": { + "level0": "Bookkeeping" + }, + "ymk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Makonde-Makwe" + }, + "yml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Iamalelic" + }, + "ymm": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "ymn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi" + }, + "ymo": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak", + "level5": "Yangum" + }, + "ymp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Misim-Yamap" + }, + "ymq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji" + }, + "ymr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Malasa-Eravallan" + }, + "ymx": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Northern-Southern Muji" + }, + "ymz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji" + }, + "yna": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Unclassified Nuclear Nisoid" + }, + "ynd": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Yandruwandhic" + }, + "yne": { + "level0": "Bookkeeping" + }, + "yng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Monzomboic", + "level9": "Kpala-Bakpa" + }, + "ynh": { + "level0": "Bookkeeping" + }, + "ynk": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "ynl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru" + }, + "yno": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Northern Shanic", + "level12": "Sipsongpannic" + }, + "ynq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Waka-Yendang-Teme", + "level8": "Waka-Yandang" + }, + "yns": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie" + }, + "ynu": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "South Eastern Tucanoan" + }, + "yob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic", + "level9": "Magoric" + }, + "yog": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic" + }, + "yoi": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Southern Ryukyu", + "level3": "Macro-Yaeyama" + }, + "yok": { + "level0": "Yokutsan", + "level1": "General Yokuts", + "level2": "Nim Yokuts" + }, + "yol": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English" + }, + "yom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo" + }, + "yon": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok", + "level6": "Division A Lowland Ok" + }, + "yor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede", + "level9": "Nuclear Yoruba", + "level10": "Lucumi-Yoruba" + }, + "yos": { + "level0": "Bookkeeping" + }, + "yot": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Bali-Kpasam" + }, + "yox": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami" + }, + "yoy": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai", + "level10": "Sakon Nakhon" + }, + "ypa": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Upriver Riverine Phula" + }, + "ypb": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Ani-Labo" + }, + "ypg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Upriver Riverine Phula", + "level8": "Pholic" + }, + "yph": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupha-Alugu" + }, + "ypl": { + "level0": "Bookkeeping" + }, + "ypm": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Bokha-Phuma" + }, + "ypn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Ani-Labo" + }, + "ypo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Upriver Riverine Phula", + "level8": "Pholic" + }, + "ypp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupa-Phuza" + }, + "ypw": { + "level0": "Bookkeeping" + }, + "ypz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupa-Phuza" + }, + "yrb": { + "level0": "Yareban", + "level1": "Yareba-Bariji-Nawaru" + }, + "yre": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Guro-Yaoure" + }, + "yri": { + "level0": "Bookkeeping" + }, + "yrk": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Enets-Nenets", + "level3": "Nenets" + }, + "yrl": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III" + }, + "yrn": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang" + }, + "yro": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame", + "level2": "Yanomam-Yaroame" + }, + "yrs": { + "level0": "Bookkeeping" + }, + "yrw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Yarawata-Parawen-Ukuriguma" + }, + "ysd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Kazhouish" + }, + "ysg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Kuansi-Kuamasi-Sonaga" + }, + "ysl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Yugoslav Sign" + }, + "ysm": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Myanmar Sign" + }, + "ysn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid", + "level7": "Sani-Axi-Azhe" + }, + "yso": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Unclassified Southeastern Ngwi" + }, + "ysp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo", + "level8": "Southwestern Lolo" + }, + "ysr": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo" + }, + "yss": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi" + }, + "ysy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "yta": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu" + }, + "ytl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid" + }, + "ytp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji" + }, + "ytw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Unclassified Yupna" + }, + "yua": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan", + "level3": "Nuclear Yucatecan", + "level4": "Yucatec-Lacandon" + }, + "yub": { + "level0": "Pama-Nyungan", + "level1": "East Queensland Border Pama Nyungan", + "level2": "Yugambalic", + "level3": "Yugambal-Bigambal" + }, + "yud": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "yue": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Yue-Pinghua" + }, + "yuf": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Pai" + }, + "yug": { + "level0": "Yeniseian", + "level1": "Northern Yeniseian" + }, + "yui": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Pisamira-Yuruti", + "level5": "Tuyuca-Yuruti" + }, + "yuj": { + "level0": "Pauwasi", + "level1": "Eastern Pauwasi" + }, + "yuk": { + "level0": "Yuki-Wappo" + }, + "yul": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental" + }, + "yum": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "River Yuman" + }, + "yun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi", + "level5": "Bena", + "level6": "Yungur-Voro" + }, + "yup": { + "level0": "Cariban", + "level1": "Opon-Yukpan", + "level2": "Yukpan" + }, + "yuq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid", + "level8": "Sirionoid" + }, + "yur": { + "level0": "Algic" + }, + "yus": { + "level0": "Bookkeeping" + }, + "yut": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Kewieng-Bonkiman-Nokopo" + }, + "yuu": { + "level0": "Bookkeeping" + }, + "yuw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa" + }, + "yux": { + "level0": "Yukaghir", + "level1": "Kolymic" + }, + "yuy": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic" + }, + "yva": { + "level0": "Yawa-Saweru" + }, + "yvt": { + "level0": "Arawakan", + "level1": "Alto Orinoco", + "level2": "Parenic" + }, + "ywa": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi", + "level3": "Yimin-Bel" + }, + "ywg": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Panytyima-Yinhawangka" + }, + "ywl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo", + "level12": "Central-Western Lalo" + }, + "ywm": { + "level0": "Bookkeeping" + }, + "ywn": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "ywq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "ywr": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Yawuric" + }, + "ywt": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo", + "level12": "Central-Western Lalo" + }, + "ywu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nesu" + }, + "yww": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Yandruwandhic" + }, + "yxm": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama" + }, + "yym": { + "level0": "Bookkeeping" + }, + "yyu": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Yau-Yis" + }, + "yyz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu", + "level11": "Unclassified Nasu-Gepu" + }, + "yzg": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang", + "level4": "Northern Buyang" + }, + "yzk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha", + "level9": "Khlula-Zokhuo" + }, + "zaa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zab": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec" + }, + "zac": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec" + }, + "zad": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zae": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zaf": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zag": { + "level0": "Saharan", + "level1": "Eastern Saharan" + }, + "zah": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic", + "level7": "Tala-Sho-Zangwal", + "level8": "Tala-Zamwar" + }, + "zai": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec" + }, + "zaj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu", + "level12": "Kutu-Zaramo" + }, + "zak": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Southwest Mara" + }, + "zal": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Nusoish" + }, + "zam": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "zao": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "zaq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zar": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Rinconic" + }, + "zas": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zat": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zau": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Kenhatic" + }, + "zav": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zaw": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zax": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "zay": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "East Ometo" + }, + "zaz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zakse-Saya" + }, + "zbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Berawan", + "level6": "Central-East Berawan" + }, + "zbe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Berawan", + "level6": "Central-East Berawan" + }, + "zbl": { + "level0": "Artificial Language" + }, + "zbt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai", + "level7": "Saluanic", + "level8": "Batui-Saluan" + }, + "zbu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Gejic" + }, + "zbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Berawan" + }, + "zca": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano" + }, + "zch": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Hongshui He", + "level8": "Western Hongshui He" + }, + "zdj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu" + }, + "zea": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "zeg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Zenag-Patep" + }, + "zeh": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Hongshui He", + "level8": "Western Hongshui He" + }, + "zem": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic", + "level8": "Nuclear Zeemic" + }, + "zen": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Western Berber" + }, + "zga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kinga-Magoma" + }, + "zgb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai" + }, + "zgm": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "zgn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Yei Zhuang" + }, + "zgr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic", + "level9": "Magoric" + }, + "zhb": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic" + }, + "zhd": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "zhi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic", + "level7": "Zhiric" + }, + "zhn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "zhw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic" + }, + "zia": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "North Binanderean" + }, + "zib": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "zik": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Boazi" + }, + "zil": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Bandi-Zialo" + }, + "zim": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa" + }, + "zin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza" + }, + "ziw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Zigua-Nguu" + }, + "ziz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan" + }, + "zka": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic" + }, + "zkg": { + "level0": "Unclassifiable" + }, + "zko": { + "level0": "Yeniseian" + }, + "zkp": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Southern Je", + "level3": "Kaingang-Xokleng", + "level4": "Kaingangic" + }, + "zkr": { + "level0": "Sino-Tibetan", + "level1": "Kman-Meyor" + }, + "zkt": { + "level0": "Mongolic-Khitan" + }, + "zku": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Southern Thura-Yura" + }, + "zla": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde" + }, + "zlj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Lianshan-Liujiang" + }, + "zlm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "zln": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Lianshan-Liujiang" + }, + "zlq": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Hongshui He" + }, + "zlu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic", + "level9": "Zulic" + }, + "zma": { + "level0": "Western Daly", + "level1": "Maranunggu-Ame-Manda", + "level2": "Ame-Manda" + }, + "zmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Songola-Binja" + }, + "zmc": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Margany-Gunya" + }, + "zmd": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "zme": { + "level0": "Giimbiyu" + }, + "zmf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie" + }, + "zmg": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Maringarr-Matige" + }, + "zmh": { + "level0": "Baining", + "level1": "Unclassified Baining" + }, + "zmi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "zmj": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "zmk": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "zml": { + "level0": "Eastern Daly" + }, + "zmm": { + "level0": "Western Daly", + "level1": "Bringen" + }, + "zmn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic" + }, + "zmo": { + "level0": "Eastern Jebel", + "level1": "Aka-Kelo-Molo" + }, + "zmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended" + }, + "zmq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Mitukuic" + }, + "zmr": { + "level0": "Western Daly", + "level1": "Maranunggu-Ame-Manda" + }, + "zms": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic" + }, + "zmt": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Maringarr-Matige" + }, + "zmu": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Muruwaric" + }, + "zmv": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Lamalamic" + }, + "zmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Budu-Ndaka-Mbo", + "level16": "Ndaka-Mbo" + }, + "zmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Impfondoic" + }, + "zmy": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "zmz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic" + }, + "zna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic", + "level7": "Zan-Kulaalic" + }, + "zne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Zande-Nzakara" + }, + "zng": { + "level0": "Austroasiatic", + "level1": "Mangic" + }, + "znk": { + "level0": "Unattested", + "level1": "Iwaidjan Proper (Unattested)" + }, + "zns": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Boghomic", + "level7": "Kir-Mangas" + }, + "zoc": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Chiapas-Jitotolteco Zoque", + "level3": "Chiapas Zoque" + }, + "zoh": { + "level0": "Mixe-Zoque", + "level1": "Zoque" + }, + "zom": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "zoo": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zoq": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Gulf Zoque", + "level3": "Texistepec-Ayapa Zoque" + }, + "zor": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Chiapas-Jitotolteco Zoque", + "level3": "Chiapas Zoque" + }, + "zos": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Chiapas-Jitotolteco Zoque", + "level3": "Chiapas Zoque" + }, + "zpa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Yautepec" + }, + "zpb": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano" + }, + "zpc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec" + }, + "zpd": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zpe": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Tehuantepec" + }, + "zpf": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zpg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Tehuantepec" + }, + "zph": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "West Zapotec" + }, + "zpi": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "zpj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Yautepec" + }, + "zpk": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Amatecano" + }, + "zpl": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "West Zapotec", + "level6": "West-Central West Zapotec" + }, + "zpm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o", + "level9": "Mixtepec-Quioquitani-Quieri Zapotec" + }, + "zpn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec", + "level10": "Tilquiapanic" + }, + "zpo": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Amatecano" + }, + "zpp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "West Zapotec", + "level6": "West-Central West Zapotec" + }, + "zpq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zpr": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "zps": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Coatecano", + "level8": "Coatlan-Loxicha Zapotec" + }, + "zpt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Coatecano" + }, + "zpu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zpv": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec" + }, + "zpw": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Papabuco" + }, + "zpx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Coatecano", + "level8": "Coatlan-Loxicha Zapotec" + }, + "zpy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zpz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Papabuco" + }, + "zqe": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Yei Zhuang" + }, + "zrg": { + "level0": "Bookkeeping" + }, + "zrn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "zro": { + "level0": "Zaparoan", + "level1": "Zaparo-Abishira" + }, + "zrp": { + "level0": "Bookkeeping" + }, + "zrs": { + "level0": "Mairasic" + }, + "zsa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "zsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "zsm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Standard Malay-Indonesian" + }, + "zsu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "ztc": { + "level0": "Bookkeeping" + }, + "zte": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Papabuco" + }, + "ztg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "ztl": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "ztm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "ztn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "ztp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "ztq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o", + "level9": "Mixtepec-Quioquitani-Quieri Zapotec" + }, + "zts": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec", + "level10": "Tilquiapanic" + }, + "ztt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "ztu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec" + }, + "ztx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec" + }, + "zty": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Rinconic" + }, + "zuh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Gahuku" + }, + "zul": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland" + }, + "zum": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "zuy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Unclassified North Masa" + }, + "zwa": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage" + }, + "zyb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Yongnan-Yongbei" + }, + "zyg": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "zyj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai" + }, + "zyn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Yongnan-Yongbei" + }, + "zyp": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "zzj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic" + } +} \ No newline at end of file diff --git a/mteb/languages.py b/mteb/languages.py index f7f6477503..9b170a707f 100644 --- a/mteb/languages.py +++ b/mteb/languages.py @@ -17,7 +17,7 @@ # Language mappings path_to_lang_codes = Path(__file__).parent / "iso_639_3_to_language.json" path_to_lang_scripts = Path(__file__).parent / "iso_15924_to_script.json" - +path_to_lang_fam = Path(__file__).parent / "language_family.json" with path_to_lang_codes.open("r") as f: ISO_TO_LANGUAGE = json.load(f) @@ -25,6 +25,11 @@ with path_to_lang_scripts.open("r") as f: ISO_TO_SCRIPT = json.load(f) +with path_to_lang_fam.open("r") as f: + ISO_TO_FAM = json.load(f) + +ISO_TO_FAM_LEVEL0 = {k: v["level0"] for k, v in ISO_TO_FAM.items()} + @dataclass class LanguageScripts: diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 8a5eb961c1..c51dc7a502 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -81,7 +81,7 @@ def update_task_info(task_names: str) -> gr.DataFrame: return gr.DataFrame(df, datatype=["markdown"] + ["str"] * (len(df.columns) - 1)) -all_results = load_results().filter_models() +all_results = load_results().join_revisions().filter_models() # Model sizes in million parameters min_model_size, max_model_size = 0, 10_000 @@ -316,12 +316,13 @@ def update_scores( domains=domains, ) lower, upper = model_size - # Multiplying by millions - lower = lower * 1e6 - upper = upper * 1e6 # Setting to None, when the user doesn't specify anything if (lower == min_model_size) and (upper == max_model_size): lower, upper = None, None + else: + # Multiplying by millions + lower = lower * 1e6 + upper = upper * 1e6 benchmark_results = benchmark_results.filter_models( open_weights=availability, use_instructions=instructions, diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index c965a7f682..9856493c74 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -32,7 +32,7 @@ def format_scores(score: float) -> float: def format_n_parameters(n_parameters) -> str: if (n_parameters is None) or (not int(n_parameters)): - return "" + return "Unknown" n_thousand = int(n_parameters // 1e3) if n_thousand < 1: return str(int(n_parameters)) @@ -46,9 +46,7 @@ def format_n_parameters(n_parameters) -> str: def split_on_capital(s: str) -> str: """Splits on capital letters and joins with spaces""" - if all(c.isupper() for c in s): - return s - return " ".join(re.findall("[A-Z][^A-Z]*", s)) + return " ".join(re.findall(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)", s)) def get_column_widths(df: pd.DataFrame) -> list[str]: @@ -59,9 +57,12 @@ def get_column_widths(df: pd.DataFrame) -> list[str]: value_lengths = [len(f"{value:.2f}") for value in df[column_name]] else: value_lengths = [len(str(value)) for value in df[column_name]] - max_length = max(max(column_word_lengths), max(value_lengths)) - n_pixels = 25 + (max_length * 10) - widths.append(f"{n_pixels}px") + try: + max_length = max(max(column_word_lengths), max(value_lengths)) + n_pixels = 35 + (max_length * 12.5) + widths.append(f"{n_pixels}px") + except Exception: + widths.append("50px") return widths @@ -98,6 +99,21 @@ def get_means_per_types(df: pd.DataFrame) -> pd.DataFrame: return pd.DataFrame.from_records(records) +def failsafe_get_model_meta(model_name): + try: + return get_model_meta(model_name) + except Exception: + return None + + +def format_max_tokens(max_tokens: float | None) -> str: + if max_tokens is None: + return "Unknown" + if max_tokens == np.inf: + return "Infinite" + return str(int(max_tokens)) + + def scores_to_tables( scores_long: list[dict], search_query: str | None = None ) -> tuple[gr.DataFrame, gr.DataFrame]: @@ -131,18 +147,18 @@ def scores_to_tables( joint_table.insert(1, "mean_by_task_type", typed_mean) joint_table["borda_rank"] = get_borda_rank(per_task) joint_table = joint_table.reset_index() - joint_table = joint_table.drop(columns=["model_revision"]) - model_metas = joint_table["model_name"].map(get_model_meta) + model_metas = joint_table["model_name"].map(failsafe_get_model_meta) + joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) joint_table.insert( 1, "Max Tokens", - model_metas.map(lambda m: str(int(m.max_tokens)) if m.max_tokens else ""), + model_metas.map(lambda m: format_max_tokens(m.max_tokens)), ) joint_table.insert( 1, "Embedding Dimensions", - model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else ""), + model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"), ) joint_table.insert( 1, @@ -150,14 +166,22 @@ def scores_to_tables( model_metas.map(lambda m: format_n_parameters(m.n_parameters)), ) joint_table = joint_table.sort_values("borda_rank", ascending=True) + per_task = per_task.loc[ + joint_table.set_index(["model_name", "model_revision"]).index + ] + joint_table = joint_table.drop(columns=["model_revision"]) # Removing HF organization from model joint_table["model_name"] = joint_table["model_name"].map( lambda name: name.split("/")[-1] ) # Adding markdown link to model names - joint_table["model_name"] = ( - "[" + joint_table["model_name"] + "](" + joint_table.pop("model_link") + ")" + name_w_link = ( + "[" + joint_table["model_name"] + "](" + joint_table["model_link"] + ")" + ) + joint_table["model_name"] = joint_table["model_name"].mask( + joint_table["model_link"].notna(), name_w_link ) + joint_table = joint_table.drop(columns=["model_link"]) joint_table = joint_table.rename( columns={ "model_name": "Model", @@ -176,6 +200,7 @@ def scores_to_tables( ) joint_table.insert(0, "Rank (Borda)", joint_table.pop("borda_rank")) column_widths = get_column_widths(joint_table) + task_column_widths = get_column_widths(per_task) # overriding for model name column_widths[1] = "250px" column_types = get_column_types(joint_table) @@ -198,9 +223,12 @@ def scores_to_tables( return ( gr.DataFrame( joint_table_style, - # column_widths=column_widths, + column_widths=column_widths, datatype=column_types, - # wrap=True, + interactive=False, + wrap=True, + ), + gr.DataFrame( + per_task_style, column_widths=task_column_widths, interactive=False ), - gr.DataFrame(per_task_style), ) diff --git a/mteb/load_results/benchmark_results.py b/mteb/load_results/benchmark_results.py index bf3fa5fe92..25a332e2cc 100644 --- a/mteb/load_results/benchmark_results.py +++ b/mteb/load_results/benchmark_results.py @@ -1,12 +1,15 @@ from __future__ import annotations import json +import warnings from collections import defaultdict from collections.abc import Iterable from pathlib import Path from typing import Any, Callable, Literal import numpy as np +import pandas as pd +from packaging.version import InvalidVersion, Version from pydantic import BaseModel, ConfigDict from mteb.abstasks.AbsTask import AbsTask, ScoresDict @@ -89,36 +92,45 @@ def get_scores( format: Literal["wide", "long"] = "wide", ) -> dict | list: if format == "wide": - scores = { - res.task_name: res.get_score( - splits=splits, - languages=languages, - scripts=scripts, - getter=getter, - aggregation=aggregation, - ) - for res in self.task_results - } - return scores - if format == "long": - entries = [] - for task_res in self.task_results: - entry = dict( # noqa - model_name=self.model_name, - model_revision=self.model_revision, - task_name=task_res.task_name, - score=task_res.get_score( + scores = {} + for res in self.task_results: + try: + scores[res.task_name] = res.get_score( splits=splits, languages=languages, + scripts=scripts, getter=getter, aggregation=aggregation, - ), - mteb_version=task_res.mteb_version, - dataset_revision=task_res.dataset_revision, - evaluation_time=task_res.evaluation_time, - kg_co2_emissions=task_res.kg_co2_emissions, - ) - entries.append(entry) + ) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {res.task_name} due to {e}." + ) + return scores + if format == "long": + entries = [] + for task_res in self.task_results: + try: + entry = dict( # noqa + model_name=self.model_name, + model_revision=self.model_revision, + task_name=task_res.task_name, + score=task_res.get_score( + splits=splits, + languages=languages, + getter=getter, + aggregation=aggregation, + ), + mteb_version=task_res.mteb_version, + dataset_revision=task_res.dataset_revision, + evaluation_time=task_res.evaluation_time, + kg_co2_emissions=task_res.kg_co2_emissions, + ) + entries.append(entry) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {task_res.task_name} due to {e}." + ) return entries def __iter__(self): @@ -198,6 +210,8 @@ def filter_models( n_parameters_range: tuple[int | None, int | None] = (None, None), use_instructions: bool | None = None, ) -> BenchmarkResults: + # if model_names is None: + # model_names = [model_res.model_name for model_res in self] model_metas = get_model_metas( model_names=model_names, languages=languages, @@ -206,13 +220,64 @@ def filter_models( n_parameters_range=n_parameters_range, use_instructions=use_instructions, ) - model_revision_pairs = {(meta.name, meta.revision) for meta in model_metas} + models = {meta.name for meta in model_metas} + # model_revision_pairs = {(meta.name, meta.revision) for meta in model_metas} new_model_results = [] for model_res in self: - if (model_res.model_name, model_res.model_revision) in model_revision_pairs: + if model_res.model_name in models: new_model_results.append(model_res) return type(self).model_construct(model_results=new_model_results) + def join_revisions(self): + def parse_version(version_str: str) -> Version | None: + try: + return Version(version_str) + except (InvalidVersion, TypeError): + return None + + def keep_best(group: pd.DataFrame) -> pd.DataFrame: + is_main_revision = group["revision"] == group["main_revision"] + if is_main_revision.sum() == 1: + return group[is_main_revision] + if group["mteb_version"].notna().any(): + group = group.dropna(subset=["mteb_version"]) + group = group.sort_values("mteb_version", ascending=False) + return group.head(n=1) + return group.head(n=1) + + records = [] + for model_result in self: + for task_result in model_result: + records.append( + dict( + model=model_result.model_name, + revision=model_result.model_revision, + task_name=task_result.task_name, + mteb_version=task_result.mteb_version, + task_result=task_result, + ) + ) + task_df = pd.DataFrame.from_records(records) + model_to_main_revision = { + meta.name: meta.revision for meta in get_model_metas() + } + task_df["main_revision"] = task_df["model"].map(model_to_main_revision) + task_df["mteb_version"] = task_df["mteb_version"].map(parse_version) + task_df = ( + task_df.groupby(["model", "task_name"]) + .apply(keep_best) + .reset_index(drop=True) + ) + model_results = [] + for (model, model_revision), group in task_df.groupby(["model", "revision"]): + model_result = ModelResult.model_construct( + model_name=model, + model_revision=model_revision, + task_results=list(group["task_result"]), + ) + model_results.append(model_result) + return BenchmarkResults.model_construct(model_results=model_results) + def get_scores( self, splits: list[Split] | None = None, @@ -225,33 +290,43 @@ def get_scores( entries = [] if format == "wide": for model_res in self: - model_scores = model_res.get_scores( - splits=splits, - languages=languages, - scripts=scripts, - getter=getter, - aggregation=aggregation, - format="wide", - ) - entries.append( - { - "model": model_res.model_name, - "revision": model_res.model_revision, - **model_scores, - } - ) - if format == "long": - for model_res in self: - entries.extend( - model_res.get_scores( + try: + model_scores = model_res.get_scores( splits=splits, languages=languages, scripts=scripts, getter=getter, aggregation=aggregation, - format="long", + format="wide", + ) + entries.append( + { + "model": model_res.model_name, + "revision": model_res.model_revision, + **model_scores, + } + ) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {model_res.model_name}({model_res.model_revision}), due to: {e}" + ) + if format == "long": + for model_res in self: + try: + entries.extend( + model_res.get_scores( + splits=splits, + languages=languages, + scripts=scripts, + getter=getter, + aggregation=aggregation, + format="long", + ) + ) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {model_res.model_name}({model_res.model_revision}), due to: {e}" ) - ) return entries def __iter__(self): diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index 8f587fd72b..965de9d415 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -514,12 +514,14 @@ def validate_and_filter_scores(self, task: AbsTask | None = None) -> AbsTask: new_scores[split].append(_scores) seen_subsets.add(_scores["hf_subset"]) if seen_subsets != hf_subsets: - raise ValueError( - f"Missing subsets {hf_subsets - seen_subsets} for split {split}" + logger.warning( + f"{task.metadata.name}: Missing subsets {hf_subsets - seen_subsets} for split {split}" ) seen_splits.add(split) if seen_splits != set(splits): - raise ValueError(f"Missing splits {set(splits) - seen_splits}") + logger.warning( + f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}" + ) new_res = {**self.to_dict(), "scores": new_scores} new_res = TaskResult.from_validated(**new_res) return new_res diff --git a/mteb/model_meta.py b/mteb/model_meta.py index df6ac598fb..38b77432eb 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -25,6 +25,7 @@ "TensorFlow", "API", "Tevatron", + "NumPy", ] DISTANCE_METRICS = Literal["cosine", "dot"] @@ -75,6 +76,11 @@ class ModelMeta(BaseModel): zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models are evaluated non-zero-shot unless specified otherwise. citation: The citation for the model. This is a bibtex string. + training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example + {"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to + a benchmark as well as mark dataset contaminations. + adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. + superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1. """ model_config = ConfigDict(extra="forbid") @@ -86,7 +92,7 @@ class ModelMeta(BaseModel): loader: Callable[..., Encoder] | None = None n_parameters: int | None = None memory_usage: float | None = None - max_tokens: int | None = None + max_tokens: float | None = None embed_dim: int | None = None license: str | None = None open_weights: bool | None = None @@ -96,7 +102,9 @@ class ModelMeta(BaseModel): reference: STR_URL | None = None similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None - zero_shot_benchmarks: list[str] | None = None + training_datasets: dict[str, list[str]] | None = None + adapted_from: str | None = None + superseded_by: str | None = None citation: str | None = None def to_dict(self): diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 3804aebcd8..ce63e85798 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -1,7 +1,5 @@ from __future__ import annotations -import logging - from mteb.models.overview import ( MODEL_REGISTRY, ModelMeta, @@ -11,8 +9,7 @@ model_meta_from_sentence_transformers, ) -logger = logging.getLogger(__name__) - +from .sentence_transformer_wrapper import SentenceTransformerWrapper __all__ = [ "MODEL_REGISTRY", @@ -21,4 +18,5 @@ "get_model_meta", "get_model_metas", "model_meta_from_sentence_transformers", + "SentenceTransformerWrapper", ] diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index b0be125891..ce1db29bbd 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -22,11 +22,13 @@ n_parameters=109_000_000, memory_usage=None, max_tokens=512, - embed_dim=256, + embed_dim=768, license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5", - similarity_fn_name="cosine_similarity", + similarity_fn_name="cosine", use_instructions=False, + adapted_from=None, + superseded_by=None, citation="""@misc{merrick2024embeddingclusteringdataimprove, title={Embedding And Clustering Your Data Can Improve Contrastive Pretraining}, author={Luke Merrick}, @@ -37,3 +39,127 @@ url={https://arxiv.org/abs/2407.18887}, }""", ) + + +arctic_embed_xs = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-xs", + revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e", + ), + name="Snowflake/snowflake-arctic-embed-xs", + revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e", + release_date="2024-07-08", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=22_600_000, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="sentence-transformers/all-MiniLM-L6-v2", + superseded_by=None, +) + + +arctic_embed_s = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-s", + revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f", + ), + name="Snowflake/snowflake-arctic-embed-s", + revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=32_200_000, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-s", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="intfloat/e5-small-unsupervised", + superseded_by=None, +) + + +arctic_embed_m = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-m", + revision="cc17beacbac32366782584c8752220405a0f3f40", + ), + name="Snowflake/snowflake-arctic-embed-m", + revision="cc17beacbac32366782584c8752220405a0f3f40", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=512, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="intfloat/e5-base-unsupervised", + superseded_by="Snowflake/snowflake-arctic-embed-m-v1.5", +) + +arctic_embed_m_long = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-m-long", + revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1", + ), + name="Snowflake/snowflake-arctic-embed-m-long", + revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=2048, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-long", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised", + superseded_by=None, +) + + +arctic_embed_l = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-l", + revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c", + ), + name="Snowflake/snowflake-arctic-embed-l", + revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=512, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="intfloat/e5-base-unsupervised", + superseded_by=None, +) diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index 1848b9e4e4..7d1161cdde 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -17,7 +17,7 @@ def bm25_loader(**kwargs): import Stemmer except ImportError: raise ImportError( - "bm25s or Stemmer is not installed. Please install it with `pip install bm25s Stemmer`." + "bm25s or Stemmer is not installed. Please install it with `pip install bm25s PyStemmer`." ) class BM25Search(DRESModel, Wrapper): @@ -58,7 +58,17 @@ def search( ) -> dict[str, dict[str, float]]: logger.info("Encoding Corpus...") corpus_ids = list(corpus.keys()) - corpus_with_ids = [{"doc_id": cid, **corpus[cid]} for cid in corpus_ids] + corpus_with_ids = [ + { + "doc_id": cid, + **( + {"text": corpus[cid]} + if isinstance(corpus[cid], str) + else corpus[cid] + ), + } + for cid in corpus_ids + ] corpus_texts = [ "\n".join([doc.get("title", ""), doc["text"]]) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 26eb5e92ed..3f07a0d23b 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -5,12 +5,125 @@ import numpy as np import torch +import tqdm from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta from .wrapper import Wrapper +supported_languages = [ + "afr-Latn", + "amh-Ethi", + "ara-Arab", + "asm-Beng", + "aze-Latn", + "bel-Cyrl", + "bul-Cyrl", + "ben-Beng", + "bod-Tibt", + "bos-Latn", + "cat-Latn", + "ceb-Latn", + "cos-Latn", + "ces-Latn", + "cym-Latn", + "dan-Latn", + "deu-Latn", + "ell-Grek", + "eng-Latn", + "epo-Latn", + "spa-Latn", + "est-Latn", + "eus-Latn", + "fas-Arab", + "fin-Latn", + "fra-Latn", + "fry-Latn", + "gle-Latn", + "gla-Latn", + "glg-Latn", + "guj-Gujr", + "hau-Latn", + "haw-Latn", + "heb-Hebr", + "hin-Deva", + "hmn-Latn", + "hrv-Latn", + "hat-Latn", + "hun-Latn", + "hye-Armn", + "ind-Latn", + "ibo-Latn", + "isl-Latn", + "ita-Latn", + "jpn-Jpan", + "jav-Latn", + "kat-Geor", + "kaz-Cyrl", + "khm-Khmr", + "kan-Knda", + "kor-Kore", + "kur-Arab", + "kir-Cyrl", + "lat-Latn", + "ltz-Latn", + "lao-Laoo", + "lit-Latn", + "lav-Latn", + "mlg-Latn", + "mri-Latn", + "mkd-Cyrl", + "mal-Mlym", + "mon-Cyrl", + "mar-Deva", + "msa-Latn", + "mlt-Latn", + "mya-Mymr", + "nep-Deva", + "nld-Latn", + "nor-Latn", + "nya-Latn", + "ori-Orya", + "pan-Guru", + "pol-Latn", + "por-Latn", + "ron-Latn", + "rus-Cyrl", + "kin-Latn", + "sin-Sinh", + "slk-Latn", + "slv-Latn", + "smo-Latn", + "sna-Latn", + "som-Latn", + "sqi-Latn", + "srp-Cyrl", + "sot-Latn", + "sun-Latn", + "swe-Latn", + "swa-Latn", + "tam-Taml", + "tel-Telu", + "tgk-Cyrl", + "tha-Thai", + "tuk-Latn", + "tgl-Latn", + "tur-Latn", + "tat-Cyrl", + "uig-Arab", + "ukr-Cyrl", + "urd-Arab", + "uzb-Latn", + "vie-Latn", + "wol-Latn", + "xho-Latn", + "yid-Hebr", + "yor-Latn", + "zho-Hans", + "zul-Latn", +] + # Implementation follows https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/blob/main/src/seb/registered_models/cohere_models.py class CohereTextEmbeddingModel(Wrapper): @@ -28,25 +141,43 @@ def __init__( ) def _embed( - self, sentences: list[str], cohere_task_type: str, retries: int = 5 + self, + sentences: list[str], + cohere_task_type: str, + show_progress_bar: bool = False, + retries: int = 5, ) -> torch.Tensor: import cohere # type: ignore + max_batch_size = 256 + + batches = [ + sentences[i : i + max_batch_size] + for i in range(0, len(sentences), max_batch_size) + ] + client = cohere.Client() - while retries > 0: # Cohere's API is not always reliable - try: - response = client.embed( - texts=list(sentences), - model=self.model_name, - input_type=cohere_task_type, - ) - break - except Exception as e: - print(f"Retrying... {retries} retries left.") - retries -= 1 - if retries == 0: - raise e - return torch.tensor(response.embeddings) + + all_embeddings = [] + + for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar): + while retries > 0: # Cohere's API is not always reliable + try: + response = client.embed( + texts=batch, + model=self.model_name, + input_type=cohere_task_type, + ) + break + except Exception as e: + print(f"Retrying... {retries} retries left.") + retries -= 1 + if retries == 0: + raise e + + all_embeddings.extend(torch.tensor(response.embeddings).numpy()) + + return np.array(all_embeddings) def encode( self, @@ -56,13 +187,24 @@ def encode( prompt_type: PromptType | None = None, **kwargs: Any, ) -> np.ndarray: - cohere_task_type = self.get_prompt_name( - self.model_prompts, task_name, prompt_type - ) + prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + cohere_task_type = self.model_prompts.get(prompt_name) + if cohere_task_type is None: # search_document is recommended if unknown (https://cohere.com/blog/introducing-embed-v3) cohere_task_type = "search_document" - return self._embed(sentences, cohere_task_type=cohere_task_type).numpy() + + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + + return self._embed( + sentences, + cohere_task_type=cohere_task_type, + show_progress_bar=show_progress_bar, + ) model_prompts = { @@ -79,15 +221,16 @@ def encode( model_name="embed-multilingual-v3.0", model_prompts=model_prompts, ), - name="embed-multilingual-v3.0", - languages=[], # Unknown, but support >100 languages + name="Cohere/Cohere-embed-multilingual-v3.0", + languages=supported_languages, open_weights=False, revision="1", release_date="2023-11-02", n_parameters=None, memory_usage=None, max_tokens=None, - embed_dim=1024, + embed_dim=512, + reference="https://cohere.com/blog/introducing-embed-v3", license=None, similarity_fn_name="cosine", framework=["API"], @@ -97,20 +240,65 @@ def encode( cohere_eng_3 = ModelMeta( loader=partial( CohereTextEmbeddingModel, - model_name="embed-multilingual-v3.0", + model_name="embed-english-v3.0", model_prompts=model_prompts, ), - name="embed-english-v3.0", + name="Cohere/Cohere-embed-english-v3.0", languages=["eng-Latn"], open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", revision="1", release_date="2023-11-02", n_parameters=None, memory_usage=None, - max_tokens=None, + max_tokens=512, embed_dim=1024, license=None, similarity_fn_name="cosine", framework=["API"], use_instructions=False, ) + +cohere_mult_light_3 = ModelMeta( + loader=partial( + CohereTextEmbeddingModel, + model_name="embed-multilingual-light-v3.0", + model_prompts=model_prompts, + ), + name="Cohere/Cohere-embed-multilingual-light-v3.0", + languages=supported_languages, + open_weights=False, + revision="1", + reference="https://cohere.com/blog/introducing-embed-v3", + release_date="2023-11-02", + n_parameters=None, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) + +cohere_eng_light_3 = ModelMeta( + loader=partial( + CohereTextEmbeddingModel, + model_name="embed-english-light-v3.0", + model_prompts=model_prompts, + ), + name="Cohere/Cohere-embed-english-light-v3.0", + languages=["eng-Latn"], + open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", + revision="1", + release_date="2023-11-02", + n_parameters=None, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index 1a8a3db41c..3bce039f02 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -261,15 +261,17 @@ open_weights=True, revision="1c644c92ad3ba1efdad3f1451a637716616a20e8", release_date=E5_PAPER_RELEASE_DATE, - n_parameters=278_000_000, + n_parameters=109_000_000, memory_usage=None, embed_dim=768, license="mit", - max_tokens=514, + max_tokens=512, reference="https://huggingface.co/intfloat/e5-base-v2", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + superseded_by=None, + adapted_from=None, citation=E5_CITATION, ) @@ -285,7 +287,7 @@ open_weights=True, revision="b322e09026e4ea05f42beadf4d661fb4e101d311", release_date=E5_PAPER_RELEASE_DATE, - n_parameters=560_000_000, + n_parameters=335_000_000, memory_usage=None, embed_dim=1024, license="mit", @@ -294,5 +296,59 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + superseded_by=None, + adapted_from=None, + citation=E5_CITATION, +) + +e5_large = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="intfloat/e5-large", + revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81", + model_prompts=model_prompts, + ), + name="intfloat/e5-large", + languages=["eng-Latn"], + open_weights=True, + revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81", + release_date="2022-12-26", + n_parameters=335_000_000, + memory_usage=None, + embed_dim=1024, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/intfloat/e5-large", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + superseded_by="intfloat/e5-large-v2", + adapted_from=None, + citation=E5_CITATION, +) + +e5_base = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="intfloat/e5-base", + revision="b533fe4636f4a2507c08ddab40644d20b0006d6a", + model_prompts=model_prompts, + ), + name="intfloat/e5-base", + languages=["eng-Latn"], + open_weights=True, + revision="b533fe4636f4a2507c08ddab40644d20b0006d6a", + release_date="2022-12-26", + n_parameters=109_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/intfloat/e5-base", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + superseded_by="intfloat/e5-base-v2", + adapted_from=None, citation=E5_CITATION, ) diff --git a/mteb/models/google_models.py b/mteb/models/google_models.py index 688680abc4..4fcd21ae6e 100644 --- a/mteb/models/google_models.py +++ b/mteb/models/google_models.py @@ -4,12 +4,44 @@ from typing import Any import numpy as np +import tqdm from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta from .wrapper import Wrapper +MULTILINGUAL_EVALUATED_LANGUAGES = [ + "arb_Arab", + "ben_Beng", + "eng_Latn", + "spa_Latn", + "deu_Latn", + "pes_Arab", + "fin_Latn", + "fra_Latn", + "hin_Deva", + "ind_Latn", + "jpn_Jpan", + "kor_Hang", + "rus_Cyrl", + "swh_Latn", + "tel_Telu", + "tha_Thai", + "yor_Latn", + "zho_Hant", + "zho_Hans", +] + +MODEL_PROMPTS = { + "Classification": "CLASSIFICATION", + "MultilabelClassification": "CLASSIFICATION", + "Clustering": "CLUSTERING", + "STS": "SIMILARITY", + PromptType.query.value: "RETRIEVAL_QUERY", + PromptType.passage.value: "RETRIEVAL_DOCUMENT", +} + class GoogleTextEmbeddingModel(Encoder, Wrapper): def __init__( @@ -28,6 +60,7 @@ def _embed( self, texts: list[str], google_task_type: str | None = None, + show_progress_bar: bool = False, titles: list[str] | None = None, dimensionality: int | None = 768, ) -> list[list[float]]: @@ -54,14 +87,28 @@ def _embed( inputs = [ TextEmbeddingInput(text, task_type=google_task_type) for text in texts ] + kwargs = {"output_dimensionality": dimensionality} if dimensionality else {} - try: - embeddings = model.get_embeddings(inputs, **kwargs) - # Except the very rare google.api_core.exceptions.InternalServerError - except Exception as e: - print("Retrying once after error:", e) - embeddings = model.get_embeddings(inputs, **kwargs) - return np.asarray([embedding.values for embedding in embeddings]) + + max_batch_size = 16 ## Vertex API limits the number of instances per call to 250, but there is also a limit of tokens involved. Let's be conservative and set it to 16 by default. TODO: in a future PR, leverage the CountTokens API to get the optimum batch size for each request. + batches = [ + inputs[i : i + max_batch_size] + for i in range(0, len(inputs), max_batch_size) + ] + + all_embeddings = [] + + for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar): + try: + embeddings_batch = model.get_embeddings(batch, **kwargs) + # Except the very rare google.api_core.exceptions.InternalServerError + except Exception as e: + print("Retrying once after error:", e) + embeddings_batch = model.get_embeddings(batch, **kwargs) + + all_embeddings.extend([embedding.values for embedding in embeddings_batch]) + + return np.asarray(all_embeddings) def encode( self, @@ -70,31 +117,75 @@ def encode( prompt_type: PromptType | None = None, **kwargs: Any, ) -> np.ndarray: - google_task_type = self.get_prompt_name( - self.model_prompts, task_name, prompt_type + prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + google_task_type = self.model_prompts.get(prompt_name) + + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + + return self._embed( + sentences, + google_task_type=google_task_type, + show_progress_bar=show_progress_bar, ) - return self._embed(sentences, google_task_type=google_task_type) -name = "text-embedding-004" -google_emb_004 = ModelMeta( +google_text_emb_004 = ModelMeta( loader=partial( GoogleTextEmbeddingModel, - model_name=name, - model_prompts={ - "Classification": "CLASSIFICATION", - "MultilabelClassification": "CLASSIFICATION", - "Clustering": "CLUSTERING", - "STS": "SIMILARITY", - PromptType.query.value: "RETRIEVAL_QUERY", - PromptType.passage.value: "RETRIEVAL_DOCUMENT", - }, + model_name="text-embedding-004", + model_prompts=MODEL_PROMPTS, ), - name=name, + name="google/text-embedding-004", languages=["eng-Latn"], open_weights=False, revision="1", # revision is intended for implementation - release_date=None, # couldnt figure this out + release_date="2024-05-14", + n_parameters=None, + memory_usage=None, + max_tokens=2048, + embed_dim=768, + license=None, + similarity_fn_name="cosine", # assumed + framework=["API"], + use_instructions=True, +) + +google_text_emb_005 = ModelMeta( + loader=partial( + GoogleTextEmbeddingModel, + model_name="text-embedding-005", + model_prompts=MODEL_PROMPTS, + ), + name="google/text-embedding-005", + languages=["eng-Latn"], + open_weights=False, + revision="1", # revision is intended for implementation + release_date="2024-11-18", + n_parameters=None, + memory_usage=None, + max_tokens=2048, + embed_dim=768, + license=None, + similarity_fn_name="cosine", # assumed + framework=["API"], + use_instructions=True, +) + +google_text_multilingual_emb_002 = ModelMeta( + loader=partial( + GoogleTextEmbeddingModel, + model_name="text-multilingual-embedding-002", + model_prompts=MODEL_PROMPTS, + ), + name="google/text-multilingual-embedding-002", + languages=MULTILINGUAL_EVALUATED_LANGUAGES, # From the list of evaluated languages in https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#supported_text_languages + open_weights=False, + revision="1", # revision is intended for implementation + release_date="2024-05-14", n_parameters=None, memory_usage=None, max_tokens=2048, diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py new file mode 100644 index 0000000000..e0109be125 --- /dev/null +++ b/mteb/models/model2vec_models.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import logging +from collections.abc import Sequence +from functools import partial +from typing import Any + +import numpy as np + +from mteb.model_meta import ModelMeta + +from .wrapper import Wrapper + +logger = logging.getLogger(__name__) + + +class Model2VecWrapper(Wrapper): + def __init__( + self, + model_name: str, + **kwargs, + ) -> None: + """Wrapper for Model2Vec models. + + Args: + model_name: The Model2Vec model to load from HuggingFace Hub. + **kwargs: Additional arguments to pass to the wrapper. + """ + try: + from model2vec import StaticModel + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`." + ) from e + + self.model_name = model_name + self.static_model = StaticModel.from_pretrained(self.model_name) + + def encode( + self, + sentences: Sequence[str], + **kwargs: Any, + ) -> np.ndarray: + """Encodes the given sentences using the encoder. + + Args: + sentences: The sentences to encode. + **kwargs: Additional arguments to pass to the encoder. + + Returns: + The encoded sentences. + """ + return self.static_model.encode(sentences) + + +m2v_base_glove_subword = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_base_glove_subword", + ), + name="minishlab/M2V_base_glove_subword", + languages=["eng_Latn"], + open_weights=True, + revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4", + release_date="2024-09-21", + n_parameters=103 * 1e6, + max_tokens=np.inf, # Theoretically infinite + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_base_glove_subword", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + + +m2v_base_glove = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_base_glove", + ), + name="minishlab/M2V_base_glove", + languages=["eng_Latn"], + open_weights=True, + revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b", + release_date="2024-09-21", + n_parameters=102 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_base_glove", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +m2v_base_output = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_base_output", + ), + name="minishlab/M2V_base_output", + languages=["eng_Latn"], + open_weights=True, + revision="02460ae401a22b09d2c6652e23371398329551e2", + release_date="2024-09-21", + n_parameters=7.56 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_base_output", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +m2v_multilingual_output = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_multilingual_output", + ), + name="minishlab/M2V_multilingual_output", + languages=["eng_Latn"], + open_weights=True, + revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305", + release_date="2024-09-21", + n_parameters=128 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_multilingual_output", + use_instructions=False, + adapted_from="sentence-transformers/LaBSE", + superseded_by=None, +) + +potion_base_2m = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/potion-base-2M", + ), + name="minishlab/potion-base-2M", + languages=["eng_Latn"], + open_weights=True, + revision="86db093558fbced2072b929eb1690bce5272bd4b", + release_date="2024-10-29", + n_parameters=2 * 1e6, + max_tokens=np.inf, + embed_dim=64, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/potion-base-2M", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +potion_base_4m = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/potion-base-4M", + ), + name="minishlab/potion-base-4M", + languages=["eng_Latn"], + open_weights=True, + revision="81b1802ada41afcd0987a37dc15e569c9fa76f04", + release_date="2024-10-29", + n_parameters=3.78 * 1e6, + max_tokens=np.inf, + embed_dim=128, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/potion-base-4M", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +potion_base_8m = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/potion-base-8M", + ), + name="minishlab/potion-base-8M", + languages=["eng_Latn"], + open_weights=True, + revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce", + release_date="2024-10-29", + n_parameters=7.56 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/potion-base-8M", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 203e00f743..87c0112978 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -99,6 +99,17 @@ def encode( # type: ignore revision="b0753ae76394dd36bcfb912a46018088bca48be0", release_date="2024-02-10", # first commit citation=NOMIC_CITATION, + n_parameters=137_000_000, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + superseded_by=None, ) nomic_embed_v1 = ModelMeta( @@ -124,4 +135,59 @@ def encode( # type: ignore framework=["Sentence Transformers", "PyTorch"], use_instructions=True, citation=NOMIC_CITATION, + adapted_from=None, + superseded_by="nomic-ai/nomic-embed-text-v1.5", +) + +nomic_embed_v1_ablated = ModelMeta( + loader=partial( # type: ignore + NomicWrapper, + trust_remote_code=True, + model_name="nomic-ai/nomic-embed-text-v1-ablated", + revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f", + model_prompts=model_prompts, + ), + name="nomic-ai/nomic-embed-text-v1-ablated", + languages=["eng-Latn"], + open_weights=True, + revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f", + release_date="2024-01-15", # first commit + n_parameters=None, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-ablated", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + superseded_by=None, +) + + +nomic_embed_v1_ablated = ModelMeta( + loader=partial( # type: ignore + NomicWrapper, + trust_remote_code=True, + model_name="nomic-ai/nomic-embed-text-v1-unsupervised", + revision="b53d557b15ae63852847c222d336c1609eced93c", + model_prompts=model_prompts, + ), + name="nomic-ai/nomic-embed-text-v1-unsupervised", + languages=["eng-Latn"], + open_weights=True, + revision="b53d557b15ae63852847c222d336c1609eced93c", + release_date="2024-01-15", # first commit + n_parameters=None, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-unsupervised", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + superseded_by=None, ) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 50967e898b..adf96fbe4e 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -5,6 +5,7 @@ from typing import Any import numpy as np +import tqdm from mteb.model_meta import ModelMeta from mteb.requires_package import requires_package @@ -15,16 +16,37 @@ class OpenAIWrapper(Wrapper): - def __init__(self, model_name: str, embed_dim: int | None = None, **kwargs) -> None: + def __init__( + self, + model_name: str, + max_tokens: int, + tokenizer_name: str = "cl100k_base", # since all models use this tokenizer now + embed_dim: int | None = None, + **kwargs, + ) -> None: + """Wrapper for OpenAIs embedding API. + To handle documents larger than 8192 tokens, we truncate the document to the specified sequence length. + """ requires_package(self, "openai", "Openai text embedding") from openai import OpenAI + requires_package(self, "tiktoken", "Tiktoken package") + import tiktoken + self._client = OpenAI() self._model_name = model_name self._embed_dim = embed_dim + self._max_tokens = max_tokens + self._encoding = tiktoken.get_encoding(tokenizer_name) + + def truncate_text_tokens(self, text): + """Truncate a string to have `max_tokens` according to the given encoding.""" + truncated_sentence = self._encoding.encode(text)[: self._max_tokens] + return self._encoding.decode(truncated_sentence) def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: requires_package(self, "openai", "Openai text embedding") + from openai import NotGiven if self._model_name == "text-embedding-ada-002" and self._embed_dim is not None: @@ -32,21 +54,59 @@ def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: "Reducing embedding size available only for text-embedding-3-* models" ) + trimmed_sentences = [] + for sentence in sentences: + encoded_sentence = self._encoding.encode(sentence) + if len(encoded_sentence) > self._max_tokens: + truncated_sentence = self.truncate_text_tokens(sentence) + trimmed_sentences.append(truncated_sentence) + else: + trimmed_sentences.append(sentence) + max_batch_size = 2048 sublists = [ - sentences[i : i + max_batch_size] - for i in range(0, len(sentences), max_batch_size) + trimmed_sentences[i : i + max_batch_size] + for i in range(0, len(trimmed_sentences), max_batch_size) ] + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + all_embeddings = [] - for sublist in sublists: - response = self._client.embeddings.create( - input=sublist, - model=self._model_name, - encoding_format="float", - dimensions=self._embed_dim or NotGiven(), - ) + for sublist in tqdm.tqdm(sublists, leave=False, disable=not show_progress_bar): + try: + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) + except Exception as e: + # Sleep due to too many requests + logger.info("Sleeping for 10 seconds due to error", e) + import time + + time.sleep(10) + try: + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) + except Exception as e: + logger.info("Sleeping for 60 seconds due to error", e) + time.sleep(60) + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) all_embeddings.extend(self._to_numpy(response)) return np.array(all_embeddings) @@ -56,11 +116,16 @@ def _to_numpy(self, embedding_response) -> np.ndarray: text_embedding_3_small = ModelMeta( - name="text-embedding-3-small", - revision="1", + name="openai/text-embedding-3-small", + revision="2", release_date="2024-01-25", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-3-small"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-3-small", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), max_tokens=8191, embed_dim=1536, open_weights=False, @@ -73,25 +138,37 @@ def _to_numpy(self, embedding_response) -> np.ndarray: use_instructions=False, ) text_embedding_3_large = ModelMeta( - name="text-embedding-3-large", - revision="1", + name="openai/text-embedding-3-large", + revision="2", release_date="2024-01-25", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-3-large"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-3-large", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), max_tokens=8191, embed_dim=3072, open_weights=False, + reference="https://openai.com/index/new-embedding-models-and-api-updates/", framework=["API"], use_instructions=False, n_parameters=None, memory_usage=None, ) text_embedding_ada_002 = ModelMeta( - name="text-embedding-ada-002", - revision="1", + name="openai/text-embedding-ada-002", + revision="2", release_date="2022-12-15", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-ada-002"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-ada-002", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), + reference="https://openai.com/index/new-and-improved-embedding-model/", max_tokens=8191, embed_dim=1536, open_weights=False, diff --git a/mteb/models/overview.py b/mteb/models/overview.py index 91b84e38d8..7418ee98fa 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -2,13 +2,16 @@ import logging from collections.abc import Iterable +from functools import lru_cache from typing import Any +from huggingface_hub import ModelCard from sentence_transformers import SentenceTransformer from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models import ( + arctic_models, bge_models, bm25, cohere_models, @@ -19,6 +22,7 @@ gte_models, jina_models, llm2vec_models, + model2vec_models, mxbai_models, nomic_models, openai_models, @@ -37,6 +41,7 @@ logger = logging.getLogger(__name__) model_modules = [ + arctic_models, bge_models, bm25, cohere_models, @@ -47,6 +52,7 @@ gte_models, llm2vec_models, mxbai_models, + model2vec_models, nomic_models, openai_models, ru_sentence_models, @@ -152,21 +158,46 @@ def get_model_meta(model_name: str, revision: str | None = None) -> ModelMeta: return MODEL_REGISTRY[model_name] else: # assume it is a sentence-transformers model logger.info( - "Model not found in model registry, assuming it is a sentence-transformers model." + "Model not found in model registry, assuming it is on HF Hub model." ) logger.info( - f"Attempting to extract metadata by loading the model ({model_name}) using sentence-transformers." + f"Attempting to extract metadata by loading the model ({model_name}) using HuggingFace." ) - model = SentenceTransformer( - model_name, revision=revision, trust_remote_code=True - ) - meta = model_meta_from_sentence_transformers(model) - + meta = model_meta_from_hf_hub(model_name) meta.revision = revision meta.name = model_name return meta +@lru_cache +def model_meta_from_hf_hub(model_name: str) -> ModelMeta: + try: + card = ModelCard.load(model_name) + card_data = card.data.to_dict() + frameworks = ["PyTorch"] + if card_data.get("library_name", None) == "sentence-transformers": + frameworks.append("Sentence Transformers") + return ModelMeta( + name=model_name, + revision=None, + # TODO + release_date=None, + # TODO: We need a mapping between conflicting language codes + languages=None, + license=card_data.get("license", None), + framework=frameworks, + public_training_data=bool(card_data.get("datasets", None)), + ) + except Exception as e: + logger.warning(f"Failed to extract metadata from model: {e}.") + return ModelMeta( + name=None, + revision=None, + languages=None, + release_date=None, + ) + + def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta: try: name = ( diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 9c0c7f4bcc..815ab137da 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -81,11 +81,13 @@ memory_usage=None, embed_dim=384, license="apache-2.0", - max_tokens=512, + max_tokens=256, reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, citation=SBERT_CITATION, ) @@ -104,6 +106,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, citation=SBERT_CITATION, ) @@ -122,6 +126,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, citation=SBERT_CITATION, ) @@ -140,6 +146,151 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, + citation="""@misc{feng2022languageagnosticbertsentenceembedding, + title={Language-agnostic BERT Sentence Embedding}, + author={Fangxiaoyu Feng and Yinfei Yang and Daniel Cer and Naveen Arivazhagan and Wei Wang}, + year={2022}, + eprint={2007.01852}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2007.01852}, + }""", +) + +multi_qa_MiniLM_L6_cos_v1 = ModelMeta( + name="sentence-transformer/multi-qa-MiniLM-L6-cos-v1", + languages=["eng-Latn"], + open_weights=True, + revision="b207367332321f8e44f96e224ef15bc607f4dbf0", # can be any + release_date="2021-08-30", + n_parameters=22_700_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, +) + +all_mpnet_base_v2 = ModelMeta( + name="sentence-transformers/all-mpnet-base-v2", + languages=["eng-Latn"], + open_weights=True, + revision="9a3225965996d404b775526de6dbfe85d3368642", # can be any + release_date="2021-08-30", + n_parameters=109_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=384, + reference="https://huggingface.co/sentence-transformers/all-mpnet-base-v2", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, +) + +jina_embeddings_v2_base_en = ModelMeta( + name="jinaai/jina-embeddings-v2-base-en", + languages=["eng-Latn"], + open_weights=True, + revision="6e85f575bc273f1fd840a658067d0157933c83f0", # can be any + release_date="2023-09-27", + n_parameters=137_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=8192, + reference="https://huggingface.co/jinaai/jina-embeddings-v2-base-en", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, +) + +jina_embeddings_v2_small_en = ModelMeta( + name="jinaai/jina-embeddings-v2-small-en", + languages=["eng-Latn"], + open_weights=True, + revision="", # can be any + release_date="2023-09-27", + n_parameters=32_700_000, + memory_usage=None, + embed_dim=512, + license="apache-2.0", + max_tokens=8192, + reference="https://huggingface.co/jinaai/jina-embeddings-v2-small-en", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, +) + +jina_embedding_b_en_v1 = ModelMeta( + name="jinaai/jina-embedding-b-en-v1", + languages=["eng-Latn"], + open_weights=True, + revision="aa0645035294a8c0607ce5bb700aba982cdff32c", # can be any + release_date="2023-07-07", + n_parameters=110_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/jinaai/jina-embedding-b-en-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by="jinaai/jina-embeddings-v2-base-en", + adapted_from=None, +) + +jina_embedding_s_en_v1 = ModelMeta( + name="jinaai/jina-embedding-s-en-v1", + languages=["eng-Latn"], + open_weights=True, + revision="c1fed70aa4823a640f1a7150a276e4d3b08dce08", # can be any + release_date="2023-07-07", + n_parameters=35_000_000, + memory_usage=None, + embed_dim=512, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/jinaai/jina-embedding-s-en-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by="jinaai/jina-embeddings-v2-small-en", + adapted_from=None, +) + + +all_MiniLM_L12_v2 = ModelMeta( + name="sentence-transformers/all-MiniLM-L12-v2", + languages=["eng-Latn"], + open_weights=True, + revision="364dd28d28dcd3359b537f3cf1f5348ba679da62", # can be any + release_date="2021-08-30", + n_parameters=33_400_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=256, + reference="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, citation="""@misc{feng2022languageagnosticbertsentenceembedding, title={Language-agnostic BERT Sentence Embedding}, author={Fangxiaoyu Feng and Yinfei Yang and Daniel Cer and Naveen Arivazhagan and Wei Wang}, diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index ea6b25bde1..9f42808b37 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -141,7 +141,7 @@ def _batched_encode( } voyage_large_2_instruct = ModelMeta( - name="voyage-large-2-instruct", + name="voyageai/voyage-large-2-instruct", revision="1", release_date="2024-05-05", languages=None, # supported languages not specified @@ -163,7 +163,7 @@ def _batched_encode( ) voyage_finance_2 = ModelMeta( - name="voyage-finance-2", + name="voyageai/voyage-finance-2", revision="1", release_date="2024-05-30", languages=None, # supported languages not specified @@ -185,7 +185,7 @@ def _batched_encode( ) voyage_law_2 = ModelMeta( - name="voyage-law-2", + name="voyageai/voyage-law-2", revision="1", release_date="2024-04-15", languages=None, # supported languages not specified @@ -207,7 +207,7 @@ def _batched_encode( ) voyage_code_2 = ModelMeta( - name="voyage-code-2", + name="voyageai/voyage-code-2", revision="1", release_date="2024-01-23", languages=None, # supported languages not specified @@ -251,7 +251,7 @@ def _batched_encode( ) voyage_2 = ModelMeta( - name="voyage-2", + name="voyageai/voyage-2", revision="1", release_date="2023-10-29", languages=None, # supported languages not specified @@ -272,7 +272,7 @@ def _batched_encode( use_instructions=False, ) voyage_multilingual_2 = ModelMeta( - name="voyage-multilingual-2", + name="voyageai/voyage-multilingual-2", revision="1", release_date="2024-06-10", languages=None, # supported languages not specified @@ -292,3 +292,47 @@ def _batched_encode( framework=["API"], use_instructions=False, ) + +voyage_3 = ModelMeta( + name="voyageai/voyage-3", + revision="1", + release_date="2024-09-18", + languages=None, # supported languages not specified + loader=partial( + VoyageWrapper, + model_name="voyage-3", + model_prompts=model_prompts, + ), + max_tokens=32000, + embed_dim=1024, + open_weights=False, + n_parameters=None, + memory_usage=None, + license=None, + reference="https://blog.voyageai.com/2024/09/18/voyage-3/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) + +voyage_3_lite = ModelMeta( + name="voyageai/voyage-3-lite", + revision="1", + release_date="2024-09-18", + languages=None, # supported languages not specified + loader=partial( + VoyageWrapper, + model_name="voyage-3-lite", + model_prompts=model_prompts, + ), + max_tokens=32000, + embed_dim=512, + open_weights=False, + n_parameters=None, + memory_usage=None, + license=None, + reference="https://blog.voyageai.com/2024/09/18/voyage-3/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) diff --git a/mteb/overview.py b/mteb/overview.py index 91723ec4df..b3a61e73ec 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -69,7 +69,7 @@ def check_is_valid_language(lang: str) -> None: ) -def filter_superseeded_datasets(tasks: list[AbsTask]) -> list[AbsTask]: +def filter_superseded_datasets(tasks: list[AbsTask]) -> list[AbsTask]: return [t for t in tasks if t.superseded_by is None] @@ -242,7 +242,7 @@ def get_tasks( task_types: list[TASK_TYPE] | None = None, categories: list[TASK_CATEGORY] | None = None, tasks: list[str] | None = None, - exclude_superseeded: bool = True, + exclude_superseded: bool = True, eval_splits: list[str] | None = None, ) -> MTEBTasks: """Get a list of tasks based on the specified filters. @@ -257,7 +257,7 @@ def get_tasks( categories: A list of task categories these include "s2s" (sentence to sentence), "s2p" (sentence to paragraph) and "p2p" (paragraph to paragraph). tasks: A list of task names to include. If None, all tasks which pass the filters are included. - exclude_superseeded: A boolean flag to exclude datasets which are superseeded by another. + exclude_superseded: A boolean flag to exclude datasets which are superseded by another. eval_splits: A list of evaluation splits to include. If None, all splits are included. Returns: @@ -266,7 +266,7 @@ def get_tasks( Examples: >>> get_tasks(languages=["eng", "deu"], script=["Latn"], domains=["Legal"]) >>> get_tasks(languages=["eng"], script=["Latn"], task_types=["Classification"]) - >>> get_tasks(languages=["eng"], script=["Latn"], task_types=["Clustering"], exclude_superseeded=False) + >>> get_tasks(languages=["eng"], script=["Latn"], task_types=["Clustering"], exclude_superseded=False) >>> get_tasks(languages=["eng"], tasks=["WikipediaRetrievalMultilingual"], eval_splits=["test"]) """ if tasks: @@ -290,8 +290,8 @@ def get_tasks( _tasks = filter_tasks_by_task_types(_tasks, task_types) if categories: _tasks = filter_task_by_categories(_tasks, categories) - if exclude_superseeded: - _tasks = filter_superseeded_datasets(_tasks) + if exclude_superseded: + _tasks = filter_superseded_datasets(_tasks) return MTEBTasks(_tasks) @@ -320,9 +320,7 @@ def get_task( if task_name not in TASKS_REGISTRY: close_matches = difflib.get_close_matches(task_name, TASKS_REGISTRY.keys()) if close_matches: - suggestion = ( - f"KeyError: '{task_name}' not found. Did you mean: {close_matches[0]}?" - ) + suggestion = f"KeyError: '{task_name}' not found. Did you mean: '{close_matches[0]}'?" else: suggestion = ( f"KeyError: '{task_name}' not found and no similar keys were found." diff --git a/mteb/tasks/BitextMining/__init__.py b/mteb/tasks/BitextMining/__init__.py index c176077215..f43f53a49a 100644 --- a/mteb/tasks/BitextMining/__init__.py +++ b/mteb/tasks/BitextMining/__init__.py @@ -1,24 +1,51 @@ from __future__ import annotations -from .dan.BornholmskBitextMining import * -from .kat.TbilisiCityHallBitextMining import * -from .multilingual.BibleNLPBitextMining import * -from .multilingual.BUCCBitextMining import * -from .multilingual.BUCCBitextMiningFast import * -from .multilingual.DiaBLaBitextMining import * -from .multilingual.FloresBitextMining import * -from .multilingual.IN22ConvBitextMining import * -from .multilingual.IN22GenBitextMining import * -from .multilingual.IndicGenBenchFloresBitextMining import * -from .multilingual.IWSLT2017BitextMining import * -from .multilingual.LinceMTBitextMining import * -from .multilingual.NollySentiBitextMining import * -from .multilingual.NorwegianCourtsBitextMining import * -from .multilingual.NTREXBitextMining import * -from .multilingual.NusaTranslationBitextMining import * -from .multilingual.NusaXBitextMining import * -from .multilingual.PhincBitextMining import * -from .multilingual.RomaTalesBitextMining import * -from .multilingual.TatoebaBitextMining import * -from .srn.SRNCorpusBitextMining import * -from .vie.VieMedEVBitextMining import * +from .dan import BornholmBitextMining +from .kat import TbilisiCityHallBitextMining +from .multilingual import ( + BibleNLPBitextMining, + BUCCBitextMining, + BUCCBitextMiningFast, + DiaBLaBitextMining, + FloresBitextMining, + IN22ConvBitextMining, + IN22GenBitextMining, + IndicGenBenchFloresBitextMining, + IWSLT2017BitextMining, + LinceMTBitextMining, + NollySentiBitextMining, + NorwegianCourtsBitextMining, + NTREXBitextMining, + NusaTranslationBitextMining, + NusaXBitextMining, + PhincBitextMining, + RomaTalesBitextMining, + TatoebaBitextMining, +) +from .srn import SRNCorpusBitextMining +from .vie import VieMedEVBitextMining + +__all__ = [ + "TbilisiCityHallBitextMining", + "VieMedEVBitextMining", + "BornholmBitextMining", + "SRNCorpusBitextMining", + "IN22ConvBitextMining", + "IN22GenBitextMining", + "BUCCBitextMining", + "LinceMTBitextMining", + "NusaTranslationBitextMining", + "DiaBLaBitextMining", + "NTREXBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "BUCCBitextMiningFast", + "PhincBitextMining", + "TatoebaBitextMining", + "NusaXBitextMining", + "IWSLT2017BitextMining", + "BibleNLPBitextMining", + "FloresBitextMining", + "RomaTalesBitextMining", + "NorwegianCourtsBitextMining", +] diff --git a/mteb/tasks/BitextMining/dan/__init__.py b/mteb/tasks/BitextMining/dan/__init__.py index e69de29bb2..00f3bbf4aa 100644 --- a/mteb/tasks/BitextMining/dan/__init__.py +++ b/mteb/tasks/BitextMining/dan/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .BornholmskBitextMining import BornholmBitextMining + +__all__ = ["BornholmBitextMining"] diff --git a/mteb/tasks/BitextMining/kat/__init__.py b/mteb/tasks/BitextMining/kat/__init__.py index e69de29bb2..808630021d 100644 --- a/mteb/tasks/BitextMining/kat/__init__.py +++ b/mteb/tasks/BitextMining/kat/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TbilisiCityHallBitextMining import TbilisiCityHallBitextMining + +__all__ = ["TbilisiCityHallBitextMining"] diff --git a/mteb/tasks/BitextMining/multilingual/__init__.py b/mteb/tasks/BitextMining/multilingual/__init__.py index e69de29bb2..e7c6ca70f6 100644 --- a/mteb/tasks/BitextMining/multilingual/__init__.py +++ b/mteb/tasks/BitextMining/multilingual/__init__.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from .BibleNLPBitextMining import BibleNLPBitextMining +from .BUCCBitextMining import BUCCBitextMining +from .BUCCBitextMiningFast import BUCCBitextMiningFast +from .DiaBLaBitextMining import DiaBLaBitextMining +from .FloresBitextMining import FloresBitextMining +from .IN22ConvBitextMining import IN22ConvBitextMining +from .IN22GenBitextMining import IN22GenBitextMining +from .IndicGenBenchFloresBitextMining import IndicGenBenchFloresBitextMining +from .IWSLT2017BitextMining import IWSLT2017BitextMining +from .LinceMTBitextMining import LinceMTBitextMining +from .NollySentiBitextMining import NollySentiBitextMining +from .NorwegianCourtsBitextMining import NorwegianCourtsBitextMining +from .NTREXBitextMining import NTREXBitextMining +from .NusaTranslationBitextMining import NusaTranslationBitextMining +from .NusaXBitextMining import NusaXBitextMining +from .PhincBitextMining import PhincBitextMining +from .RomaTalesBitextMining import RomaTalesBitextMining +from .TatoebaBitextMining import TatoebaBitextMining + +__all__ = [ + "IN22ConvBitextMining", + "IN22GenBitextMining", + "BUCCBitextMining", + "LinceMTBitextMining", + "NusaTranslationBitextMining", + "DiaBLaBitextMining", + "NTREXBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "BUCCBitextMiningFast", + "PhincBitextMining", + "TatoebaBitextMining", + "NusaXBitextMining", + "IWSLT2017BitextMining", + "BibleNLPBitextMining", + "FloresBitextMining", + "RomaTalesBitextMining", + "NorwegianCourtsBitextMining", +] diff --git a/mteb/tasks/BitextMining/srn/__init__.py b/mteb/tasks/BitextMining/srn/__init__.py index e69de29bb2..b3d0401a96 100644 --- a/mteb/tasks/BitextMining/srn/__init__.py +++ b/mteb/tasks/BitextMining/srn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SRNCorpusBitextMining import SRNCorpusBitextMining + +__all__ = ["SRNCorpusBitextMining"] diff --git a/mteb/tasks/BitextMining/vie/__init__.py b/mteb/tasks/BitextMining/vie/__init__.py index e69de29bb2..2ca8d5290d 100644 --- a/mteb/tasks/BitextMining/vie/__init__.py +++ b/mteb/tasks/BitextMining/vie/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .VieMedEVBitextMining import VieMedEVBitextMining + +__all__ = ["VieMedEVBitextMining"] diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 3e80ae2181..ca9f8bd64d 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -1,144 +1,543 @@ from __future__ import annotations -from .ara.AJGT import * -from .ara.HotelReviewSentimentClassification import * -from .ara.OnlineStoreReviewSentimentClassification import * -from .ara.RestaurantReviewSentimentClassification import * -from .ara.TweetEmotionClassification import * -from .ara.TweetSarcasmClassification import * -from .ben.BengaliDocumentClassification import * -from .ben.BengaliHateSpeechClassification import * -from .ben.BengaliSentimentAnalysis import * -from .bul.BulgarianStoreReviewSentimentClassfication import * -from .ces.CSFDCZMovieReviewSentimentClassification import * -from .ces.CzechProductReviewSentimentClassification import * -from .ces.CzechSoMeSentimentClassification import * -from .ces.CzechSubjectivityClassification import * -from .dan.AngryTweetsClassification import * -from .dan.DanishPoliticalCommentsClassification import * -from .dan.DKHateClassification import * -from .dan.LccSentimentClassification import * -from .deu.GermanPoliticiansTwitterSentimentClassification import * -from .deu.TenKGnadClassification import * -from .ell.GreekLegalCodeClassification import * -from .eng.AmazonPolarityClassification import * -from .eng.ArxivClassification import * -from .eng.Banking77Classification import * -from .eng.DBpediaClassification import * -from .eng.EmotionClassification import * -from .eng.FinancialPhrasebankClassification import * -from .eng.FrenkEnClassification import * -from .eng.ImdbClassification import * -from .eng.LegalBenchClassification import * -from .eng.NewsClassification import * -from .eng.PatentClassification import * -from .eng.PoemSentimentClassification import * -from .eng.ToxicChatClassification import * -from .eng.ToxicConversationsClassification import * -from .eng.TweetSentimentExtractionClassification import * -from .eng.TweetTopicSingleClassification import * -from .eng.YahooAnswersTopicsClassification import * -from .eng.YelpReviewFullClassification import * -from .est.estonian_valence import * -from .fas.PersianFoodSentimentClassification import * -from .fil.FilipinoHateSpeechClassification import * -from .fil.FilipinoShopeeReviewsClassification import * -from .fin.FinToxicityClassification import * -from .fra.FrenchBookReviews import * -from .fra.MovieReviewSentimentClassification import * -from .guj.GujaratiNewsClassification import * -from .heb.HebrewSentimentAnalysis import * -from .hin.HindiDiscourseClassification import * -from .hin.SentimentAnalysisHindi import * -from .hrv.FrenkHrClassification import * -from .ind.IndonesianIdClickbaitClassification import * -from .ind.IndonesianMongabayConservationClassification import * -from .ita.ItaCaseholdClassification import * -from .ita.ItalianLinguistAcceptabilityClassification import * -from .jav.JavaneseIMDBClassification import * -from .jpn.WRIMEClassification import * -from .kan.KannadaNewsClassification import * -from .kor.KlueTC import * -from .kor.KorFin import * -from .kor.KorHateClassification import * -from .kor.KorSarcasmClassification import * -from .kur.KurdishSentimentClassification import * -from .mal.MalayalamNewsClassification import * -from .mar.MarathiNewsClassification import * -from .mkd.MacedonianTweetSentimentClassification import * -from .multilingual.AfriSentiClassification import * -from .multilingual.AfriSentiLangClassification import * -from .multilingual.AmazonCounterfactualClassification import * -from .multilingual.AmazonReviewsClassification import * -from .multilingual.CataloniaTweetClassification import * -from .multilingual.CyrillicTurkicLangClassification import * -from .multilingual.HinDialectClassification import * -from .multilingual.IndicLangClassification import * -from .multilingual.IndicNLPNewsClassification import * -from .multilingual.IndicSentimentClassification import * -from .multilingual.LanguageClassification import * -from .multilingual.MasakhaNEWSClassification import * -from .multilingual.MassiveIntentClassification import * -from .multilingual.MassiveScenarioClassification import * -from .multilingual.MTOPDomainClassification import * -from .multilingual.MTOPIntentClassification import * -from .multilingual.MultiHateClassification import * -from .multilingual.MultilingualSentimentClassification import * -from .multilingual.NaijaSenti import * -from .multilingual.NordicLangClassification import * -from .multilingual.NusaParagraphEmotionClassification import * -from .multilingual.NusaParagraphTopicClassification import * -from .multilingual.NusaXSenti import * -from .multilingual.ScalaClassification import * -from .multilingual.SIB200Classification import * -from .multilingual.SouthAfricanLangClassification import * -from .multilingual.SwissJudgementClassification import * -from .multilingual.TurkicClassification import * -from .multilingual.TweetSentimentClassification import * -from .mya.MyanmarNews import * -from .nep.NepaliNewsClassification import * -from .nld.DutchBookReviewSentimentClassification import * -from .nob.NoRecClassification import * -from .nob.NorwegianParliamentClassification import * -from .ory.OdiaNewsClassification import * -from .pan.PunjabiNewsClassification import * -from .pol.PolishClassification import * -from .por.HateSpeechPortugueseClassification import * -from .ron.Moroco import * -from .ron.RomanianReviewsSentiment import * -from .ron.RomanianSentimentClassification import * -from .rus.GeoreviewClassification import * -from .rus.HeadlineClassification import * -from .rus.InappropriatenessClassification import * -from .rus.KinopoiskClassification import * -from .rus.RuReviewsClassification import * -from .rus.RuSciBenchGRNTIClassification import * -from .rus.RuSciBenchOECDClassification import * -from .san.SanskritShlokasClassification import * -from .sin.SinhalaNewsClassification import * -from .sin.SinhalaNewsSourceClassification import * -from .slk.CSFDSKMovieReviewSentimentClassification import * -from .slk.SlovakHateSpeechClassification import * -from .slv.FrenkSlClassification import * -from .spa.SpanishNewsClassification import * -from .spa.SpanishSentimentClassification import * -from .ssw.SiswatiNewsClassification import * -from .svk.SlovakMovieReviewSentimentClassification import * -from .swa.SwahiliNewsClassification import * -from .swe.DalajClassification import * -from .swe.SwedishSentimentClassification import * -from .swe.SweRecClassification import * -from .tam.TamilNewsClassification import * -from .tel.TeluguAndhraJyotiNewsClassification import * -from .tha.WisesightSentimentClassification import * -from .tsn.TswanaNewsClassification import * -from .tur.TurkishMovieSentimentClassification import * -from .tur.TurkishProductSentimentClassification import * -from .ukr.UkrFormalityClassification import * -from .urd.UrduRomanSentimentClassification import * -from .vie.VieStudentFeedbackClassification import * -from .zho.CMTEBClassification import * -from .zho.YueOpenriceReviewClassification import ( - YueOpenriceReviewClassification, # noqa: F401 +from .ara import ( + AJGT, + HotelReviewSentimentClassification, + OnlineStoreReviewSentimentClassification, + RestaurantReviewSentimentClassification, + TweetEmotionClassification, + TweetSarcasmClassification, ) -from .zul.IsiZuluNewsClassification import * +from .ben import ( + BengaliDocumentClassification, + BengaliHateSpeechClassification, + BengaliSentimentAnalysis, +) +from .bul import BulgarianStoreReviewSentimentClassfication +from .ces import ( + CSFDCZMovieReviewSentimentClassification, + CzechProductReviewSentimentClassification, + CzechSoMeSentimentClassification, + CzechSubjectivityClassification, +) +from .dan import ( + AngryTweetsClassification, + DanishPoliticalCommentsClassification, + DdiscoCohesionClassification, + DKHateClassification, + LccSentimentClassification, +) +from .deu import GermanPoliticiansTwitterSentimentClassification, TenKGnadClassification +from .ell import GreekLegalCodeClassification +from .eng import ( + AmazonPolarityClassification, + ArxivClassification, + Banking77Classification, + CanadaTaxCourtOutcomesLegalBenchClassification, + ContractNLIConfidentialityOfAgreementLegalBenchClassification, + ContractNLIExplicitIdentificationLegalBenchClassification, + ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification, + ContractNLILimitedUseLegalBenchClassification, + ContractNLINoLicensingLegalBenchClassification, + ContractNLINoticeOnCompelledDisclosureLegalBenchClassification, + ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification, + ContractNLIPermissibleCopyLegalBenchClassification, + ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification, + ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification, + ContractNLIReturnOfConfidentialInformationLegalBenchClassification, + ContractNLISharingWithEmployeesLegalBenchClassification, + ContractNLISharingWithThirdPartiesLegalBenchClassification, + ContractNLISurvivalOfObligationsLegalBenchClassification, + CorporateLobbyingLegalBenchClassification, + CUADAffiliateLicenseLicenseeLegalBenchClassification, + CUADAffiliateLicenseLicensorLegalBenchClassification, + CUADAntiAssignmentLegalBenchClassification, + CUADAuditRightsLegalBenchClassification, + CUADCapOnLiabilityLegalBenchClassification, + CUADChangeOfControlLegalBenchClassification, + CUADCompetitiveRestrictionExceptionLegalBenchClassification, + CUADCovenantNotToSueLegalBenchClassification, + CUADEffectiveDateLegalBenchClassification, + CUADExclusivityLegalBenchClassification, + CUADExpirationDateLegalBenchClassification, + CUADGoverningLawLegalBenchClassification, + CUADInsuranceLegalBenchClassification, + CUADIPOwnershipAssignmentLegalBenchClassification, + CUADIrrevocableOrPerpetualLicenseLegalBenchClassification, + CUADJointIPOwnershipLegalBenchClassification, + CUADLicenseGrantLegalBenchClassification, + CUADLiquidatedDamagesLegalBenchClassification, + CUADMinimumCommitmentLegalBenchClassification, + CUADMostFavoredNationLegalBenchClassification, + CUADNonCompeteLegalBenchClassification, + CUADNonDisparagementLegalBenchClassification, + CUADNonTransferableLicenseLegalBenchClassification, + CUADNoSolicitOfCustomersLegalBenchClassification, + CUADNoSolicitOfEmployeesLegalBenchClassification, + CUADNoticePeriodToTerminateRenewalLegalBenchClassification, + CUADPostTerminationServicesLegalBenchClassification, + CUADPriceRestrictionsLegalBenchClassification, + CUADRenewalTermLegalBenchClassification, + CUADRevenueProfitSharingLegalBenchClassification, + CUADRofrRofoRofnLegalBenchClassification, + CUADSourceCodeEscrowLegalBenchClassification, + CUADTerminationForConvenienceLegalBenchClassification, + CUADThirdPartyBeneficiaryLegalBenchClassification, + CUADUncappedLiabilityLegalBenchClassification, + CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification, + CUADVolumeRestrictionLegalBenchClassification, + CUADWarrantyDurationLegalBenchClassification, + DBpediaClassification, + DefinitionClassificationLegalBenchClassification, + Diversity1LegalBenchClassification, + Diversity2LegalBenchClassification, + Diversity3LegalBenchClassification, + Diversity4LegalBenchClassification, + Diversity5LegalBenchClassification, + Diversity6LegalBenchClassification, + EmotionClassification, + FinancialPhrasebankClassification, + FrenkEnClassification, + FunctionOfDecisionSectionLegalBenchClassification, + ImdbClassification, + InsurancePolicyInterpretationLegalBenchClassification, + InternationalCitizenshipQuestionsLegalBenchClassification, + JCrewBlockerLegalBenchClassification, + LearnedHandsBenefitsLegalBenchClassification, + LearnedHandsBusinessLegalBenchClassification, + LearnedHandsConsumerLegalBenchClassification, + LearnedHandsCourtsLegalBenchClassification, + LearnedHandsCrimeLegalBenchClassification, + LearnedHandsDivorceLegalBenchClassification, + LearnedHandsDomesticViolenceLegalBenchClassification, + LearnedHandsEducationLegalBenchClassification, + LearnedHandsEmploymentLegalBenchClassification, + LearnedHandsEstatesLegalBenchClassification, + LearnedHandsFamilyLegalBenchClassification, + LearnedHandsHealthLegalBenchClassification, + LearnedHandsHousingLegalBenchClassification, + LearnedHandsImmigrationLegalBenchClassification, + LearnedHandsTortsLegalBenchClassification, + LearnedHandsTrafficLegalBenchClassification, + LegalReasoningCausalityLegalBenchClassification, + MAUDLegalBenchClassification, + NewsClassification, + NYSJudicialEthicsLegalBenchClassification, + OPP115DataRetentionLegalBenchClassification, + OPP115DataSecurityLegalBenchClassification, + OPP115DoNotTrackLegalBenchClassification, + OPP115FirstPartyCollectionUseLegalBenchClassification, + OPP115InternationalAndSpecificAudiencesLegalBenchClassification, + OPP115PolicyChangeLegalBenchClassification, + OPP115ThirdPartySharingCollectionLegalBenchClassification, + OPP115UserAccessEditAndDeletionLegalBenchClassification, + OPP115UserChoiceControlLegalBenchClassification, + OralArgumentQuestionPurposeLegalBenchClassification, + OverrulingLegalBenchClassification, + PatentClassification, + PersonalJurisdictionLegalBenchClassification, + PoemSentimentClassification, + PROALegalBenchClassification, + SCDBPAccountabilityLegalBenchClassification, + SCDBPAuditsLegalBenchClassification, + SCDBPCertificationLegalBenchClassification, + SCDBPTrainingLegalBenchClassification, + SCDBPVerificationLegalBenchClassification, + SCDDAccountabilityLegalBenchClassification, + SCDDAuditsLegalBenchClassification, + SCDDCertificationLegalBenchClassification, + SCDDTrainingLegalBenchClassification, + SCDDVerificationLegalBenchClassification, + TelemarketingSalesRuleLegalBenchClassification, + TextualismToolDictionariesLegalBenchClassification, + TextualismToolPlainLegalBenchClassification, + ToxicChatClassification, + ToxicConversationsClassification, + TweetSentimentExtractionClassification, + TweetTopicSingleClassification, + UCCVCommonLawLegalBenchClassification, + UnfairTOSLegalBenchClassification, + YahooAnswersTopicsClassification, + YelpReviewFullClassification, +) +from .est import EstonianValenceClassification +from .fas import PersianFoodSentimentClassification +from .fil import FilipinoHateSpeechClassification, FilipinoShopeeReviewsClassification +from .fin import FinToxicityClassification +from .fra import FrenchBookReviews, MovieReviewSentimentClassification +from .guj import GujaratiNewsClassification +from .heb import HebrewSentimentAnalysis +from .hin import HindiDiscourseClassification, SentimentAnalysisHindi +from .hrv import FrenkHrClassification +from .ind import ( + IndonesianIdClickbaitClassification, + IndonesianMongabayConservationClassification, +) +from .ita import ItaCaseholdClassification, ItalianLinguisticAcceptabilityClassification +from .jav import JavaneseIMDBClassification +from .jpn import WRIMEClassification +from .kan import KannadaNewsClassification +from .kat import GeorgianSentimentClassification +from .kor import KlueTC, KorFin, KorHateClassification, KorSarcasmClassification +from .kur import KurdishSentimentClassification +from .mal import MalayalamNewsClassification +from .mar import MarathiNewsClassification +from .mkd import MacedonianTweetSentimentClassification +from .multilingual import ( + AfriSentiClassification, + AfriSentiLangClassification, + AmazonCounterfactualClassification, + AmazonReviewsClassification, + CataloniaTweetClassification, + CyrillicTurkicLangClassification, + HinDialectClassification, + IndicLangClassification, + IndicNLPNewsClassification, + IndicSentimentClassification, + LanguageClassification, + MasakhaNEWSClassification, + MassiveIntentClassification, + MassiveScenarioClassification, + MTOPDomainClassification, + MTOPIntentClassification, + MultiHateClassification, + MultilingualSentimentClassification, + NaijaSenti, + NordicLangClassification, + NusaParagraphEmotionClassification, + NusaParagraphTopicClassification, + NusaXSentiClassification, + ScalaClassification, + SIB200Classification, + SouthAfricanLangClassification, + SwissJudgementClassification, + TurkicClassification, + TweetSentimentClassification, +) +from .mya import MyanmarNews +from .nep import NepaliNewsClassification +from .nld import DutchBookReviewSentimentClassification +from .nob import NoRecClassification, NorwegianParliamentClassification +from .ory import OdiaNewsClassification +from .pan import PunjabiNewsClassification +from .pol import ( + AllegroReviewsClassification, + CbdClassification, + PacClassification, + PolEmo2InClassification, + PolEmo2OutClassification, +) +from .por import HateSpeechPortugueseClassification +from .ron import Moroco, RomanianReviewsSentiment, RomanianSentimentClassification +from .rus import ( + GeoreviewClassification, + HeadlineClassification, + InappropriatenessClassification, + KinopoiskClassification, + RuReviewsClassification, + RuSciBenchGRNTIClassification, + RuSciBenchOECDClassification, +) +from .san import SanskritShlokasClassification +from .sin import SinhalaNewsClassification, SinhalaNewsSourceClassification +from .slk import ( + CSFDSKMovieReviewSentimentClassification, + SlovakHateSpeechClassification, +) +from .slv import FrenkSlClassification +from .spa import SpanishNewsClassification, SpanishSentimentClassification +from .ssw import SiswatiNewsClassification +from .svk import SlovakMovieReviewSentimentClassification +from .swa import SwahiliNewsClassification +from .swe import ( + DalajClassification, + SwedishSentimentClassification, + SweRecClassification, +) +from .tam import TamilNewsClassification +from .tel import TeluguAndhraJyotiNewsClassification +from .tha import WisesightSentimentClassification, WongnaiReviewsClassification +from .tsn import TswanaNewsClassification +from .tur import ( + TurkishMovieSentimentClassification, + TurkishProductSentimentClassification, +) +from .ukr import UkrFormalityClassification +from .urd import UrduRomanSentimentClassification +from .vie import VieStudentFeedbackClassification +from .zho import ( + IFlyTek, + JDReview, + MultilingualSentiment, + OnlineShopping, + TNews, + Waimai, + YueOpenriceReviewClassification, +) +from .zul import IsiZuluNewsClassification + +__all__ = [ + "TeluguAndhraJyotiNewsClassification", + "IFlyTek", + "JDReview", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", + "YueOpenriceReviewClassification", + "HateSpeechPortugueseClassification", + "SpanishNewsClassification", + "SpanishSentimentClassification", + "AllegroReviewsClassification", + "CbdClassification", + "PacClassification", + "PolEmo2InClassification", + "PolEmo2OutClassification", + "BulgarianStoreReviewSentimentClassfication", + "KurdishSentimentClassification", + "ItaCaseholdClassification", + "ItalianLinguisticAcceptabilityClassification", + "GeorgianSentimentClassification", + "DalajClassification", + "SweRecClassification", + "SwedishSentimentClassification", + "CSFDSKMovieReviewSentimentClassification", + "SlovakHateSpeechClassification", + "NorwegianParliamentClassification", + "NoRecClassification", + "FilipinoHateSpeechClassification", + "FilipinoShopeeReviewsClassification", + "MarathiNewsClassification", + "IndonesianIdClickbaitClassification", + "IndonesianMongabayConservationClassification", + "UrduRomanSentimentClassification", + "MacedonianTweetSentimentClassification", + "FrenkSlClassification", + "SwahiliNewsClassification", + "FinToxicityClassification", + "KannadaNewsClassification", + "TenKGnadClassification", + "GermanPoliticiansTwitterSentimentClassification", + "PunjabiNewsClassification", + "TswanaNewsClassification", + "TweetSarcasmClassification", + "TweetEmotionClassification", + "RestaurantReviewSentimentClassification", + "HotelReviewSentimentClassification", + "OnlineStoreReviewSentimentClassification", + "AJGT", + "TurkishProductSentimentClassification", + "TurkishMovieSentimentClassification", + "NepaliNewsClassification", + "VieStudentFeedbackClassification", + "DutchBookReviewSentimentClassification", + "SiswatiNewsClassification", + "UkrFormalityClassification", + "SanskritShlokasClassification", + "SlovakMovieReviewSentimentClassification", + "AngryTweetsClassification", + "DdiscoCohesionClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", + "TamilNewsClassification", + "CSFDCZMovieReviewSentimentClassification", + "CzechSubjectivityClassification", + "CzechProductReviewSentimentClassification", + "CzechSoMeSentimentClassification", + "EstonianValenceClassification", + "MyanmarNews", + "JavaneseIMDBClassification", + "YahooAnswersTopicsClassification", + "CUADAffiliateLicenseLicenseeLegalBenchClassification", + "CUADAffiliateLicenseLicensorLegalBenchClassification", + "CUADAntiAssignmentLegalBenchClassification", + "CUADAuditRightsLegalBenchClassification", + "CUADCapOnLiabilityLegalBenchClassification", + "CUADChangeOfControlLegalBenchClassification", + "CUADCompetitiveRestrictionExceptionLegalBenchClassification", + "CUADCovenantNotToSueLegalBenchClassification", + "CUADEffectiveDateLegalBenchClassification", + "CUADExclusivityLegalBenchClassification", + "CUADExpirationDateLegalBenchClassification", + "CUADGoverningLawLegalBenchClassification", + "CUADIPOwnershipAssignmentLegalBenchClassification", + "CUADInsuranceLegalBenchClassification", + "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", + "CUADJointIPOwnershipLegalBenchClassification", + "CUADLicenseGrantLegalBenchClassification", + "CUADLiquidatedDamagesLegalBenchClassification", + "CUADMinimumCommitmentLegalBenchClassification", + "CUADMostFavoredNationLegalBenchClassification", + "CUADNoSolicitOfCustomersLegalBenchClassification", + "CUADNoSolicitOfEmployeesLegalBenchClassification", + "CUADNonCompeteLegalBenchClassification", + "CUADNonDisparagementLegalBenchClassification", + "CUADNonTransferableLicenseLegalBenchClassification", + "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", + "CUADPostTerminationServicesLegalBenchClassification", + "CUADPriceRestrictionsLegalBenchClassification", + "CUADRenewalTermLegalBenchClassification", + "CUADRevenueProfitSharingLegalBenchClassification", + "CUADRofrRofoRofnLegalBenchClassification", + "CUADSourceCodeEscrowLegalBenchClassification", + "CUADTerminationForConvenienceLegalBenchClassification", + "CUADThirdPartyBeneficiaryLegalBenchClassification", + "CUADUncappedLiabilityLegalBenchClassification", + "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", + "CUADVolumeRestrictionLegalBenchClassification", + "CUADWarrantyDurationLegalBenchClassification", + "CanadaTaxCourtOutcomesLegalBenchClassification", + "ContractNLIConfidentialityOfAgreementLegalBenchClassification", + "ContractNLIExplicitIdentificationLegalBenchClassification", + "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", + "ContractNLILimitedUseLegalBenchClassification", + "ContractNLINoLicensingLegalBenchClassification", + "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", + "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissibleCopyLegalBenchClassification", + "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", + "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", + "ContractNLISharingWithEmployeesLegalBenchClassification", + "ContractNLISharingWithThirdPartiesLegalBenchClassification", + "ContractNLISurvivalOfObligationsLegalBenchClassification", + "CorporateLobbyingLegalBenchClassification", + "DefinitionClassificationLegalBenchClassification", + "Diversity1LegalBenchClassification", + "Diversity2LegalBenchClassification", + "Diversity3LegalBenchClassification", + "Diversity4LegalBenchClassification", + "Diversity5LegalBenchClassification", + "Diversity6LegalBenchClassification", + "FunctionOfDecisionSectionLegalBenchClassification", + "InsurancePolicyInterpretationLegalBenchClassification", + "InternationalCitizenshipQuestionsLegalBenchClassification", + "JCrewBlockerLegalBenchClassification", + "LearnedHandsBenefitsLegalBenchClassification", + "LearnedHandsBusinessLegalBenchClassification", + "LearnedHandsConsumerLegalBenchClassification", + "LearnedHandsCourtsLegalBenchClassification", + "LearnedHandsCrimeLegalBenchClassification", + "LearnedHandsDivorceLegalBenchClassification", + "LearnedHandsDomesticViolenceLegalBenchClassification", + "LearnedHandsEducationLegalBenchClassification", + "LearnedHandsEmploymentLegalBenchClassification", + "LearnedHandsEstatesLegalBenchClassification", + "LearnedHandsFamilyLegalBenchClassification", + "LearnedHandsHealthLegalBenchClassification", + "LearnedHandsHousingLegalBenchClassification", + "LearnedHandsImmigrationLegalBenchClassification", + "LearnedHandsTortsLegalBenchClassification", + "LearnedHandsTrafficLegalBenchClassification", + "LegalReasoningCausalityLegalBenchClassification", + "MAUDLegalBenchClassification", + "NYSJudicialEthicsLegalBenchClassification", + "OPP115DataRetentionLegalBenchClassification", + "OPP115DataSecurityLegalBenchClassification", + "OPP115DoNotTrackLegalBenchClassification", + "OPP115FirstPartyCollectionUseLegalBenchClassification", + "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", + "OPP115PolicyChangeLegalBenchClassification", + "OPP115ThirdPartySharingCollectionLegalBenchClassification", + "OPP115UserAccessEditAndDeletionLegalBenchClassification", + "OPP115UserChoiceControlLegalBenchClassification", + "OralArgumentQuestionPurposeLegalBenchClassification", + "OverrulingLegalBenchClassification", + "PROALegalBenchClassification", + "PersonalJurisdictionLegalBenchClassification", + "SCDBPAccountabilityLegalBenchClassification", + "SCDBPAuditsLegalBenchClassification", + "SCDBPCertificationLegalBenchClassification", + "SCDBPTrainingLegalBenchClassification", + "SCDBPVerificationLegalBenchClassification", + "SCDDAccountabilityLegalBenchClassification", + "SCDDAuditsLegalBenchClassification", + "SCDDCertificationLegalBenchClassification", + "SCDDTrainingLegalBenchClassification", + "SCDDVerificationLegalBenchClassification", + "TelemarketingSalesRuleLegalBenchClassification", + "TextualismToolDictionariesLegalBenchClassification", + "TextualismToolPlainLegalBenchClassification", + "UCCVCommonLawLegalBenchClassification", + "UnfairTOSLegalBenchClassification", + "FinancialPhrasebankClassification", + "DBpediaClassification", + "FrenkEnClassification", + "TweetTopicSingleClassification", + "AmazonPolarityClassification", + "NewsClassification", + "ToxicChatClassification", + "YelpReviewFullClassification", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", + "PatentClassification", + "ImdbClassification", + "ArxivClassification", + "EmotionClassification", + "PoemSentimentClassification", + "Banking77Classification", + "PersianFoodSentimentClassification", + "HebrewSentimentAnalysis", + "BengaliSentimentAnalysis", + "BengaliDocumentClassification", + "BengaliHateSpeechClassification", + "SinhalaNewsSourceClassification", + "SinhalaNewsClassification", + "WisesightSentimentClassification", + "WongnaiReviewsClassification", + "WRIMEClassification", + "RomanianReviewsSentiment", + "Moroco", + "RomanianSentimentClassification", + "KorSarcasmClassification", + "KorHateClassification", + "KorFin", + "KlueTC", + "IndicLangClassification", + "SouthAfricanLangClassification", + "SwissJudgementClassification", + "AmazonReviewsClassification", + "NaijaSenti", + "TurkicClassification", + "ScalaClassification", + "MultilingualSentimentClassification", + "SIB200Classification", + "NordicLangClassification", + "NusaParagraphTopicClassification", + "CyrillicTurkicLangClassification", + "IndicNLPNewsClassification", + "MassiveScenarioClassification", + "MTOPIntentClassification", + "NusaParagraphEmotionClassification", + "MultiHateClassification", + "AfriSentiClassification", + "IndicSentimentClassification", + "LanguageClassification", + "AfriSentiLangClassification", + "NusaXSentiClassification", + "MTOPDomainClassification", + "HinDialectClassification", + "CataloniaTweetClassification", + "TweetSentimentClassification", + "MassiveIntentClassification", + "AmazonCounterfactualClassification", + "MasakhaNEWSClassification", + "GujaratiNewsClassification", + "IsiZuluNewsClassification", + "KinopoiskClassification", + "HeadlineClassification", + "InappropriatenessClassification", + "RuSciBenchGRNTIClassification", + "RuSciBenchOECDClassification", + "RuReviewsClassification", + "GeoreviewClassification", + "OdiaNewsClassification", + "GreekLegalCodeClassification", + "MovieReviewSentimentClassification", + "FrenchBookReviews", + "FrenkHrClassification", + "HindiDiscourseClassification", + "SentimentAnalysisHindi", + "MalayalamNewsClassification", +] diff --git a/mteb/tasks/Classification/ara/__init__.py b/mteb/tasks/Classification/ara/__init__.py index e69de29bb2..f23b02d135 100644 --- a/mteb/tasks/Classification/ara/__init__.py +++ b/mteb/tasks/Classification/ara/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .AJGT import AJGT +from .HotelReviewSentimentClassification import HotelReviewSentimentClassification +from .OnlineStoreReviewSentimentClassification import ( + OnlineStoreReviewSentimentClassification, +) +from .RestaurantReviewSentimentClassification import ( + RestaurantReviewSentimentClassification, +) +from .TweetEmotionClassification import TweetEmotionClassification +from .TweetSarcasmClassification import TweetSarcasmClassification + +__all__ = [ + "TweetSarcasmClassification", + "TweetEmotionClassification", + "RestaurantReviewSentimentClassification", + "HotelReviewSentimentClassification", + "OnlineStoreReviewSentimentClassification", + "AJGT", +] diff --git a/mteb/tasks/Classification/ben/__init__.py b/mteb/tasks/Classification/ben/__init__.py index e69de29bb2..ae96c8b0b3 100644 --- a/mteb/tasks/Classification/ben/__init__.py +++ b/mteb/tasks/Classification/ben/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .BengaliDocumentClassification import BengaliDocumentClassification +from .BengaliHateSpeechClassification import BengaliHateSpeechClassification +from .BengaliSentimentAnalysis import BengaliSentimentAnalysis + +__all__ = [ + "BengaliSentimentAnalysis", + "BengaliDocumentClassification", + "BengaliHateSpeechClassification", +] diff --git a/mteb/tasks/Classification/bul/__init__.py b/mteb/tasks/Classification/bul/__init__.py index e69de29bb2..5a5d0fbb3b 100644 --- a/mteb/tasks/Classification/bul/__init__.py +++ b/mteb/tasks/Classification/bul/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .BulgarianStoreReviewSentimentClassfication import ( + BulgarianStoreReviewSentimentClassfication, +) + +__all__ = ["BulgarianStoreReviewSentimentClassfication"] diff --git a/mteb/tasks/Classification/ces/__init__.py b/mteb/tasks/Classification/ces/__init__.py index e69de29bb2..e3f0adaa87 100644 --- a/mteb/tasks/Classification/ces/__init__.py +++ b/mteb/tasks/Classification/ces/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .CSFDCZMovieReviewSentimentClassification import ( + CSFDCZMovieReviewSentimentClassification, +) +from .CzechProductReviewSentimentClassification import ( + CzechProductReviewSentimentClassification, +) +from .CzechSoMeSentimentClassification import CzechSoMeSentimentClassification +from .CzechSubjectivityClassification import CzechSubjectivityClassification + +__all__ = [ + "CSFDCZMovieReviewSentimentClassification", + "CzechSubjectivityClassification", + "CzechProductReviewSentimentClassification", + "CzechSoMeSentimentClassification", +] diff --git a/mteb/tasks/Classification/dan/__init__.py b/mteb/tasks/Classification/dan/__init__.py index e69de29bb2..f47c1a67fd 100644 --- a/mteb/tasks/Classification/dan/__init__.py +++ b/mteb/tasks/Classification/dan/__init__.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from .AngryTweetsClassification import AngryTweetsClassification +from .DanishPoliticalCommentsClassification import DanishPoliticalCommentsClassification +from .DdiscoCohesionClassification import DdiscoCohesionClassification +from .DKHateClassification import DKHateClassification +from .LccSentimentClassification import LccSentimentClassification + +__all__ = [ + "AngryTweetsClassification", + "DdiscoCohesionClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", +] diff --git a/mteb/tasks/Classification/deu/__init__.py b/mteb/tasks/Classification/deu/__init__.py index e69de29bb2..673aafd746 100644 --- a/mteb/tasks/Classification/deu/__init__.py +++ b/mteb/tasks/Classification/deu/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .GermanPoliticiansTwitterSentimentClassification import ( + GermanPoliticiansTwitterSentimentClassification, +) +from .TenKGnadClassification import TenKGnadClassification + +__all__ = ["TenKGnadClassification", "GermanPoliticiansTwitterSentimentClassification"] diff --git a/mteb/tasks/Classification/ell/__init__.py b/mteb/tasks/Classification/ell/__init__.py index e69de29bb2..6c9f66d55f 100644 --- a/mteb/tasks/Classification/ell/__init__.py +++ b/mteb/tasks/Classification/ell/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GreekLegalCodeClassification import GreekLegalCodeClassification + +__all__ = ["GreekLegalCodeClassification"] diff --git a/mteb/tasks/Classification/eng/__init__.py b/mteb/tasks/Classification/eng/__init__.py index e69de29bb2..01b996a96d 100644 --- a/mteb/tasks/Classification/eng/__init__.py +++ b/mteb/tasks/Classification/eng/__init__.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +from .AmazonPolarityClassification import AmazonPolarityClassification +from .ArxivClassification import ArxivClassification +from .Banking77Classification import Banking77Classification +from .DBpediaClassification import DBpediaClassification +from .EmotionClassification import EmotionClassification +from .FinancialPhrasebankClassification import FinancialPhrasebankClassification +from .FrenkEnClassification import FrenkEnClassification +from .ImdbClassification import ImdbClassification +from .LegalBenchClassification import ( + CanadaTaxCourtOutcomesLegalBenchClassification, + ContractNLIConfidentialityOfAgreementLegalBenchClassification, + ContractNLIExplicitIdentificationLegalBenchClassification, + ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification, + ContractNLILimitedUseLegalBenchClassification, + ContractNLINoLicensingLegalBenchClassification, + ContractNLINoticeOnCompelledDisclosureLegalBenchClassification, + ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification, + ContractNLIPermissibleCopyLegalBenchClassification, + ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification, + ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification, + ContractNLIReturnOfConfidentialInformationLegalBenchClassification, + ContractNLISharingWithEmployeesLegalBenchClassification, + ContractNLISharingWithThirdPartiesLegalBenchClassification, + ContractNLISurvivalOfObligationsLegalBenchClassification, + CorporateLobbyingLegalBenchClassification, + CUADAffiliateLicenseLicenseeLegalBenchClassification, + CUADAffiliateLicenseLicensorLegalBenchClassification, + CUADAntiAssignmentLegalBenchClassification, + CUADAuditRightsLegalBenchClassification, + CUADCapOnLiabilityLegalBenchClassification, + CUADChangeOfControlLegalBenchClassification, + CUADCompetitiveRestrictionExceptionLegalBenchClassification, + CUADCovenantNotToSueLegalBenchClassification, + CUADEffectiveDateLegalBenchClassification, + CUADExclusivityLegalBenchClassification, + CUADExpirationDateLegalBenchClassification, + CUADGoverningLawLegalBenchClassification, + CUADInsuranceLegalBenchClassification, + CUADIPOwnershipAssignmentLegalBenchClassification, + CUADIrrevocableOrPerpetualLicenseLegalBenchClassification, + CUADJointIPOwnershipLegalBenchClassification, + CUADLicenseGrantLegalBenchClassification, + CUADLiquidatedDamagesLegalBenchClassification, + CUADMinimumCommitmentLegalBenchClassification, + CUADMostFavoredNationLegalBenchClassification, + CUADNonCompeteLegalBenchClassification, + CUADNonDisparagementLegalBenchClassification, + CUADNonTransferableLicenseLegalBenchClassification, + CUADNoSolicitOfCustomersLegalBenchClassification, + CUADNoSolicitOfEmployeesLegalBenchClassification, + CUADNoticePeriodToTerminateRenewalLegalBenchClassification, + CUADPostTerminationServicesLegalBenchClassification, + CUADPriceRestrictionsLegalBenchClassification, + CUADRenewalTermLegalBenchClassification, + CUADRevenueProfitSharingLegalBenchClassification, + CUADRofrRofoRofnLegalBenchClassification, + CUADSourceCodeEscrowLegalBenchClassification, + CUADTerminationForConvenienceLegalBenchClassification, + CUADThirdPartyBeneficiaryLegalBenchClassification, + CUADUncappedLiabilityLegalBenchClassification, + CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification, + CUADVolumeRestrictionLegalBenchClassification, + CUADWarrantyDurationLegalBenchClassification, + DefinitionClassificationLegalBenchClassification, + Diversity1LegalBenchClassification, + Diversity2LegalBenchClassification, + Diversity3LegalBenchClassification, + Diversity4LegalBenchClassification, + Diversity5LegalBenchClassification, + Diversity6LegalBenchClassification, + FunctionOfDecisionSectionLegalBenchClassification, + InsurancePolicyInterpretationLegalBenchClassification, + InternationalCitizenshipQuestionsLegalBenchClassification, + JCrewBlockerLegalBenchClassification, + LearnedHandsBenefitsLegalBenchClassification, + LearnedHandsBusinessLegalBenchClassification, + LearnedHandsConsumerLegalBenchClassification, + LearnedHandsCourtsLegalBenchClassification, + LearnedHandsCrimeLegalBenchClassification, + LearnedHandsDivorceLegalBenchClassification, + LearnedHandsDomesticViolenceLegalBenchClassification, + LearnedHandsEducationLegalBenchClassification, + LearnedHandsEmploymentLegalBenchClassification, + LearnedHandsEstatesLegalBenchClassification, + LearnedHandsFamilyLegalBenchClassification, + LearnedHandsHealthLegalBenchClassification, + LearnedHandsHousingLegalBenchClassification, + LearnedHandsImmigrationLegalBenchClassification, + LearnedHandsTortsLegalBenchClassification, + LearnedHandsTrafficLegalBenchClassification, + LegalReasoningCausalityLegalBenchClassification, + MAUDLegalBenchClassification, + NYSJudicialEthicsLegalBenchClassification, + OPP115DataRetentionLegalBenchClassification, + OPP115DataSecurityLegalBenchClassification, + OPP115DoNotTrackLegalBenchClassification, + OPP115FirstPartyCollectionUseLegalBenchClassification, + OPP115InternationalAndSpecificAudiencesLegalBenchClassification, + OPP115PolicyChangeLegalBenchClassification, + OPP115ThirdPartySharingCollectionLegalBenchClassification, + OPP115UserAccessEditAndDeletionLegalBenchClassification, + OPP115UserChoiceControlLegalBenchClassification, + OralArgumentQuestionPurposeLegalBenchClassification, + OverrulingLegalBenchClassification, + PersonalJurisdictionLegalBenchClassification, + PROALegalBenchClassification, + SCDBPAccountabilityLegalBenchClassification, + SCDBPAuditsLegalBenchClassification, + SCDBPCertificationLegalBenchClassification, + SCDBPTrainingLegalBenchClassification, + SCDBPVerificationLegalBenchClassification, + SCDDAccountabilityLegalBenchClassification, + SCDDAuditsLegalBenchClassification, + SCDDCertificationLegalBenchClassification, + SCDDTrainingLegalBenchClassification, + SCDDVerificationLegalBenchClassification, + TelemarketingSalesRuleLegalBenchClassification, + TextualismToolDictionariesLegalBenchClassification, + TextualismToolPlainLegalBenchClassification, + UCCVCommonLawLegalBenchClassification, + UnfairTOSLegalBenchClassification, +) +from .NewsClassification import NewsClassification +from .PatentClassification import PatentClassification +from .PoemSentimentClassification import PoemSentimentClassification +from .ToxicChatClassification import ToxicChatClassification +from .ToxicConversationsClassification import ToxicConversationsClassification +from .TweetSentimentExtractionClassification import ( + TweetSentimentExtractionClassification, +) +from .TweetTopicSingleClassification import TweetTopicSingleClassification +from .YahooAnswersTopicsClassification import YahooAnswersTopicsClassification +from .YelpReviewFullClassification import YelpReviewFullClassification + +__all__ = [ + "YahooAnswersTopicsClassification", + "CUADAffiliateLicenseLicenseeLegalBenchClassification", + "CUADAffiliateLicenseLicensorLegalBenchClassification", + "CUADAntiAssignmentLegalBenchClassification", + "CUADAuditRightsLegalBenchClassification", + "CUADCapOnLiabilityLegalBenchClassification", + "CUADChangeOfControlLegalBenchClassification", + "CUADCompetitiveRestrictionExceptionLegalBenchClassification", + "CUADCovenantNotToSueLegalBenchClassification", + "CUADEffectiveDateLegalBenchClassification", + "CUADExclusivityLegalBenchClassification", + "CUADExpirationDateLegalBenchClassification", + "CUADGoverningLawLegalBenchClassification", + "CUADIPOwnershipAssignmentLegalBenchClassification", + "CUADInsuranceLegalBenchClassification", + "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", + "CUADJointIPOwnershipLegalBenchClassification", + "CUADLicenseGrantLegalBenchClassification", + "CUADLiquidatedDamagesLegalBenchClassification", + "CUADMinimumCommitmentLegalBenchClassification", + "CUADMostFavoredNationLegalBenchClassification", + "CUADNoSolicitOfCustomersLegalBenchClassification", + "CUADNoSolicitOfEmployeesLegalBenchClassification", + "CUADNonCompeteLegalBenchClassification", + "CUADNonDisparagementLegalBenchClassification", + "CUADNonTransferableLicenseLegalBenchClassification", + "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", + "CUADPostTerminationServicesLegalBenchClassification", + "CUADPriceRestrictionsLegalBenchClassification", + "CUADRenewalTermLegalBenchClassification", + "CUADRevenueProfitSharingLegalBenchClassification", + "CUADRofrRofoRofnLegalBenchClassification", + "CUADSourceCodeEscrowLegalBenchClassification", + "CUADTerminationForConvenienceLegalBenchClassification", + "CUADThirdPartyBeneficiaryLegalBenchClassification", + "CUADUncappedLiabilityLegalBenchClassification", + "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", + "CUADVolumeRestrictionLegalBenchClassification", + "CUADWarrantyDurationLegalBenchClassification", + "CanadaTaxCourtOutcomesLegalBenchClassification", + "ContractNLIConfidentialityOfAgreementLegalBenchClassification", + "ContractNLIExplicitIdentificationLegalBenchClassification", + "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", + "ContractNLILimitedUseLegalBenchClassification", + "ContractNLINoLicensingLegalBenchClassification", + "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", + "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissibleCopyLegalBenchClassification", + "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", + "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", + "ContractNLISharingWithEmployeesLegalBenchClassification", + "ContractNLISharingWithThirdPartiesLegalBenchClassification", + "ContractNLISurvivalOfObligationsLegalBenchClassification", + "CorporateLobbyingLegalBenchClassification", + "DefinitionClassificationLegalBenchClassification", + "Diversity1LegalBenchClassification", + "Diversity2LegalBenchClassification", + "Diversity3LegalBenchClassification", + "Diversity4LegalBenchClassification", + "Diversity5LegalBenchClassification", + "Diversity6LegalBenchClassification", + "FunctionOfDecisionSectionLegalBenchClassification", + "InsurancePolicyInterpretationLegalBenchClassification", + "InternationalCitizenshipQuestionsLegalBenchClassification", + "JCrewBlockerLegalBenchClassification", + "LearnedHandsBenefitsLegalBenchClassification", + "LearnedHandsBusinessLegalBenchClassification", + "LearnedHandsConsumerLegalBenchClassification", + "LearnedHandsCourtsLegalBenchClassification", + "LearnedHandsCrimeLegalBenchClassification", + "LearnedHandsDivorceLegalBenchClassification", + "LearnedHandsDomesticViolenceLegalBenchClassification", + "LearnedHandsEducationLegalBenchClassification", + "LearnedHandsEmploymentLegalBenchClassification", + "LearnedHandsEstatesLegalBenchClassification", + "LearnedHandsFamilyLegalBenchClassification", + "LearnedHandsHealthLegalBenchClassification", + "LearnedHandsHousingLegalBenchClassification", + "LearnedHandsImmigrationLegalBenchClassification", + "LearnedHandsTortsLegalBenchClassification", + "LearnedHandsTrafficLegalBenchClassification", + "LegalReasoningCausalityLegalBenchClassification", + "MAUDLegalBenchClassification", + "NYSJudicialEthicsLegalBenchClassification", + "OPP115DataRetentionLegalBenchClassification", + "OPP115DataSecurityLegalBenchClassification", + "OPP115DoNotTrackLegalBenchClassification", + "OPP115FirstPartyCollectionUseLegalBenchClassification", + "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", + "OPP115PolicyChangeLegalBenchClassification", + "OPP115ThirdPartySharingCollectionLegalBenchClassification", + "OPP115UserAccessEditAndDeletionLegalBenchClassification", + "OPP115UserChoiceControlLegalBenchClassification", + "OralArgumentQuestionPurposeLegalBenchClassification", + "OverrulingLegalBenchClassification", + "PROALegalBenchClassification", + "PersonalJurisdictionLegalBenchClassification", + "SCDBPAccountabilityLegalBenchClassification", + "SCDBPAuditsLegalBenchClassification", + "SCDBPCertificationLegalBenchClassification", + "SCDBPTrainingLegalBenchClassification", + "SCDBPVerificationLegalBenchClassification", + "SCDDAccountabilityLegalBenchClassification", + "SCDDAuditsLegalBenchClassification", + "SCDDCertificationLegalBenchClassification", + "SCDDTrainingLegalBenchClassification", + "SCDDVerificationLegalBenchClassification", + "TelemarketingSalesRuleLegalBenchClassification", + "TextualismToolDictionariesLegalBenchClassification", + "TextualismToolPlainLegalBenchClassification", + "UCCVCommonLawLegalBenchClassification", + "UnfairTOSLegalBenchClassification", + "FinancialPhrasebankClassification", + "DBpediaClassification", + "FrenkEnClassification", + "TweetTopicSingleClassification", + "AmazonPolarityClassification", + "NewsClassification", + "ToxicChatClassification", + "YelpReviewFullClassification", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", + "PatentClassification", + "ImdbClassification", + "ArxivClassification", + "EmotionClassification", + "PoemSentimentClassification", + "Banking77Classification", +] diff --git a/mteb/tasks/Classification/est/__init__.py b/mteb/tasks/Classification/est/__init__.py new file mode 100644 index 0000000000..ba49956beb --- /dev/null +++ b/mteb/tasks/Classification/est/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .estonian_valence import EstonianValenceClassification + +__all__ = ["EstonianValenceClassification"] diff --git a/mteb/tasks/Classification/fas/__init__.py b/mteb/tasks/Classification/fas/__init__.py index e69de29bb2..1f35246357 100644 --- a/mteb/tasks/Classification/fas/__init__.py +++ b/mteb/tasks/Classification/fas/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PersianFoodSentimentClassification import PersianFoodSentimentClassification + +__all__ = ["PersianFoodSentimentClassification"] diff --git a/mteb/tasks/Classification/fil/__init__.py b/mteb/tasks/Classification/fil/__init__.py index e69de29bb2..bdc37399f2 100644 --- a/mteb/tasks/Classification/fil/__init__.py +++ b/mteb/tasks/Classification/fil/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .FilipinoHateSpeechClassification import FilipinoHateSpeechClassification +from .FilipinoShopeeReviewsClassification import FilipinoShopeeReviewsClassification + +__all__ = ["FilipinoHateSpeechClassification", "FilipinoShopeeReviewsClassification"] diff --git a/mteb/tasks/Classification/fin/__init__.py b/mteb/tasks/Classification/fin/__init__.py index e69de29bb2..c1d2cf4208 100644 --- a/mteb/tasks/Classification/fin/__init__.py +++ b/mteb/tasks/Classification/fin/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FinToxicityClassification import FinToxicityClassification + +__all__ = ["FinToxicityClassification"] diff --git a/mteb/tasks/Classification/fra/__init__.py b/mteb/tasks/Classification/fra/__init__.py index e69de29bb2..cff9213baf 100644 --- a/mteb/tasks/Classification/fra/__init__.py +++ b/mteb/tasks/Classification/fra/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .FrenchBookReviews import FrenchBookReviews +from .MovieReviewSentimentClassification import MovieReviewSentimentClassification + +__all__ = ["MovieReviewSentimentClassification", "FrenchBookReviews"] diff --git a/mteb/tasks/Classification/guj/__init__.py b/mteb/tasks/Classification/guj/__init__.py index e69de29bb2..211eb3bf86 100644 --- a/mteb/tasks/Classification/guj/__init__.py +++ b/mteb/tasks/Classification/guj/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GujaratiNewsClassification import GujaratiNewsClassification + +__all__ = ["GujaratiNewsClassification"] diff --git a/mteb/tasks/Classification/heb/__init__.py b/mteb/tasks/Classification/heb/__init__.py index e69de29bb2..1e6a039dd2 100644 --- a/mteb/tasks/Classification/heb/__init__.py +++ b/mteb/tasks/Classification/heb/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .HebrewSentimentAnalysis import HebrewSentimentAnalysis + +__all__ = ["HebrewSentimentAnalysis"] diff --git a/mteb/tasks/Classification/hin/__init__.py b/mteb/tasks/Classification/hin/__init__.py index e69de29bb2..da7a5dee51 100644 --- a/mteb/tasks/Classification/hin/__init__.py +++ b/mteb/tasks/Classification/hin/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .HindiDiscourseClassification import HindiDiscourseClassification +from .SentimentAnalysisHindi import SentimentAnalysisHindi + +__all__ = ["HindiDiscourseClassification", "SentimentAnalysisHindi"] diff --git a/mteb/tasks/Classification/hrv/__init__.py b/mteb/tasks/Classification/hrv/__init__.py index e69de29bb2..07be541615 100644 --- a/mteb/tasks/Classification/hrv/__init__.py +++ b/mteb/tasks/Classification/hrv/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FrenkHrClassification import FrenkHrClassification + +__all__ = ["FrenkHrClassification"] diff --git a/mteb/tasks/Classification/ind/__init__.py b/mteb/tasks/Classification/ind/__init__.py index e69de29bb2..e1efdfa7ce 100644 --- a/mteb/tasks/Classification/ind/__init__.py +++ b/mteb/tasks/Classification/ind/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .IndonesianIdClickbaitClassification import IndonesianIdClickbaitClassification +from .IndonesianMongabayConservationClassification import ( + IndonesianMongabayConservationClassification, +) + +__all__ = [ + "IndonesianIdClickbaitClassification", + "IndonesianMongabayConservationClassification", +] diff --git a/mteb/tasks/Classification/ita/__init__.py b/mteb/tasks/Classification/ita/__init__.py index e69de29bb2..4f987f9bfc 100644 --- a/mteb/tasks/Classification/ita/__init__.py +++ b/mteb/tasks/Classification/ita/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .ItaCaseholdClassification import ItaCaseholdClassification +from .ItalianLinguistAcceptabilityClassification import ( + ItalianLinguisticAcceptabilityClassification, +) + +__all__ = ["ItaCaseholdClassification", "ItalianLinguisticAcceptabilityClassification"] diff --git a/mteb/tasks/Classification/jav/__init__.py b/mteb/tasks/Classification/jav/__init__.py index e69de29bb2..417dae6e1f 100644 --- a/mteb/tasks/Classification/jav/__init__.py +++ b/mteb/tasks/Classification/jav/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .JavaneseIMDBClassification import JavaneseIMDBClassification + +__all__ = ["JavaneseIMDBClassification"] diff --git a/mteb/tasks/Classification/jpn/__init__.py b/mteb/tasks/Classification/jpn/__init__.py new file mode 100644 index 0000000000..6eca935f2f --- /dev/null +++ b/mteb/tasks/Classification/jpn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .WRIMEClassification import WRIMEClassification + +__all__ = ["WRIMEClassification"] diff --git a/mteb/tasks/Classification/kan/__init__.py b/mteb/tasks/Classification/kan/__init__.py index e69de29bb2..faa9d78ba1 100644 --- a/mteb/tasks/Classification/kan/__init__.py +++ b/mteb/tasks/Classification/kan/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KannadaNewsClassification import KannadaNewsClassification + +__all__ = ["KannadaNewsClassification"] diff --git a/mteb/tasks/Classification/kat/__init__.py b/mteb/tasks/Classification/kat/__init__.py index e69de29bb2..c26b90d67d 100644 --- a/mteb/tasks/Classification/kat/__init__.py +++ b/mteb/tasks/Classification/kat/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GeorgianSentimentClassification import GeorgianSentimentClassification + +__all__ = ["GeorgianSentimentClassification"] diff --git a/mteb/tasks/Classification/kor/__init__.py b/mteb/tasks/Classification/kor/__init__.py index e69de29bb2..bb6d78deb1 100644 --- a/mteb/tasks/Classification/kor/__init__.py +++ b/mteb/tasks/Classification/kor/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .KlueTC import KlueTC +from .KorFin import KorFin +from .KorHateClassification import KorHateClassification +from .KorSarcasmClassification import KorSarcasmClassification + +__all__ = ["KorSarcasmClassification", "KorHateClassification", "KorFin", "KlueTC"] diff --git a/mteb/tasks/Classification/kur/__init__.py b/mteb/tasks/Classification/kur/__init__.py index e69de29bb2..e72b9aa6a0 100644 --- a/mteb/tasks/Classification/kur/__init__.py +++ b/mteb/tasks/Classification/kur/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KurdishSentimentClassification import KurdishSentimentClassification + +__all__ = ["KurdishSentimentClassification"] diff --git a/mteb/tasks/Classification/mal/__init__.py b/mteb/tasks/Classification/mal/__init__.py index e69de29bb2..2ba5994626 100644 --- a/mteb/tasks/Classification/mal/__init__.py +++ b/mteb/tasks/Classification/mal/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MalayalamNewsClassification import MalayalamNewsClassification + +__all__ = ["MalayalamNewsClassification"] diff --git a/mteb/tasks/Classification/mar/__init__.py b/mteb/tasks/Classification/mar/__init__.py index e69de29bb2..3d34c2776a 100644 --- a/mteb/tasks/Classification/mar/__init__.py +++ b/mteb/tasks/Classification/mar/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MarathiNewsClassification import MarathiNewsClassification + +__all__ = ["MarathiNewsClassification"] diff --git a/mteb/tasks/Classification/mkd/__init__.py b/mteb/tasks/Classification/mkd/__init__.py index e69de29bb2..cf4c140af8 100644 --- a/mteb/tasks/Classification/mkd/__init__.py +++ b/mteb/tasks/Classification/mkd/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .MacedonianTweetSentimentClassification import ( + MacedonianTweetSentimentClassification, +) + +__all__ = ["MacedonianTweetSentimentClassification"] diff --git a/mteb/tasks/Classification/multilingual/__init__.py b/mteb/tasks/Classification/multilingual/__init__.py index e69de29bb2..440ac7055b 100644 --- a/mteb/tasks/Classification/multilingual/__init__.py +++ b/mteb/tasks/Classification/multilingual/__init__.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from .AfriSentiClassification import AfriSentiClassification +from .AfriSentiLangClassification import AfriSentiLangClassification +from .AmazonCounterfactualClassification import AmazonCounterfactualClassification +from .AmazonReviewsClassification import AmazonReviewsClassification +from .CataloniaTweetClassification import CataloniaTweetClassification +from .CyrillicTurkicLangClassification import CyrillicTurkicLangClassification +from .HinDialectClassification import HinDialectClassification +from .IndicLangClassification import IndicLangClassification +from .IndicNLPNewsClassification import IndicNLPNewsClassification +from .IndicSentimentClassification import IndicSentimentClassification +from .LanguageClassification import LanguageClassification +from .MasakhaNEWSClassification import MasakhaNEWSClassification +from .MassiveIntentClassification import MassiveIntentClassification +from .MassiveScenarioClassification import MassiveScenarioClassification +from .MTOPDomainClassification import MTOPDomainClassification +from .MTOPIntentClassification import MTOPIntentClassification +from .MultiHateClassification import MultiHateClassification +from .MultilingualSentimentClassification import MultilingualSentimentClassification +from .NaijaSenti import NaijaSenti +from .NordicLangClassification import NordicLangClassification +from .NusaParagraphEmotionClassification import NusaParagraphEmotionClassification +from .NusaParagraphTopicClassification import NusaParagraphTopicClassification +from .NusaXSenti import NusaXSentiClassification +from .ScalaClassification import ScalaClassification +from .SIB200Classification import SIB200Classification +from .SouthAfricanLangClassification import SouthAfricanLangClassification +from .SwissJudgementClassification import SwissJudgementClassification +from .TurkicClassification import TurkicClassification +from .TweetSentimentClassification import TweetSentimentClassification + +__all__ = [ + "IndicLangClassification", + "SouthAfricanLangClassification", + "SwissJudgementClassification", + "AmazonReviewsClassification", + "NaijaSenti", + "TurkicClassification", + "ScalaClassification", + "MultilingualSentimentClassification", + "SIB200Classification", + "NordicLangClassification", + "NusaParagraphTopicClassification", + "CyrillicTurkicLangClassification", + "IndicNLPNewsClassification", + "MassiveScenarioClassification", + "MTOPIntentClassification", + "NusaParagraphEmotionClassification", + "MultiHateClassification", + "AfriSentiClassification", + "IndicSentimentClassification", + "LanguageClassification", + "AfriSentiLangClassification", + "NusaXSentiClassification", + "MTOPDomainClassification", + "HinDialectClassification", + "CataloniaTweetClassification", + "TweetSentimentClassification", + "MassiveIntentClassification", + "AmazonCounterfactualClassification", + "MasakhaNEWSClassification", +] diff --git a/mteb/tasks/Classification/mya/__init__.py b/mteb/tasks/Classification/mya/__init__.py index e69de29bb2..a209ee2a3e 100644 --- a/mteb/tasks/Classification/mya/__init__.py +++ b/mteb/tasks/Classification/mya/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MyanmarNews import MyanmarNews + +__all__ = ["MyanmarNews"] diff --git a/mteb/tasks/Classification/nep/__init__.py b/mteb/tasks/Classification/nep/__init__.py index e69de29bb2..b18cae3209 100644 --- a/mteb/tasks/Classification/nep/__init__.py +++ b/mteb/tasks/Classification/nep/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .NepaliNewsClassification import NepaliNewsClassification + +__all__ = ["NepaliNewsClassification"] diff --git a/mteb/tasks/Classification/nld/__init__.py b/mteb/tasks/Classification/nld/__init__.py index e69de29bb2..17fae09dda 100644 --- a/mteb/tasks/Classification/nld/__init__.py +++ b/mteb/tasks/Classification/nld/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .DutchBookReviewSentimentClassification import ( + DutchBookReviewSentimentClassification, +) + +__all__ = ["DutchBookReviewSentimentClassification"] diff --git a/mteb/tasks/Classification/nob/__init__.py b/mteb/tasks/Classification/nob/__init__.py index e69de29bb2..5a545c2346 100644 --- a/mteb/tasks/Classification/nob/__init__.py +++ b/mteb/tasks/Classification/nob/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .NoRecClassification import NoRecClassification +from .NorwegianParliamentClassification import NorwegianParliamentClassification + +__all__ = ["NorwegianParliamentClassification", "NoRecClassification"] diff --git a/mteb/tasks/Classification/ory/__init__.py b/mteb/tasks/Classification/ory/__init__.py new file mode 100644 index 0000000000..775f171ff0 --- /dev/null +++ b/mteb/tasks/Classification/ory/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .OdiaNewsClassification import OdiaNewsClassification + +__all__ = ["OdiaNewsClassification"] diff --git a/mteb/tasks/Classification/pan/__init__.py b/mteb/tasks/Classification/pan/__init__.py new file mode 100644 index 0000000000..cfedf3155f --- /dev/null +++ b/mteb/tasks/Classification/pan/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PunjabiNewsClassification import PunjabiNewsClassification + +__all__ = ["PunjabiNewsClassification"] diff --git a/mteb/tasks/Classification/pol/__init__.py b/mteb/tasks/Classification/pol/__init__.py index e69de29bb2..a3531bea1b 100644 --- a/mteb/tasks/Classification/pol/__init__.py +++ b/mteb/tasks/Classification/pol/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .PolishClassification import ( + AllegroReviewsClassification, + CbdClassification, + PacClassification, + PolEmo2InClassification, + PolEmo2OutClassification, +) + +__all__ = [ + "AllegroReviewsClassification", + "CbdClassification", + "PacClassification", + "PolEmo2InClassification", + "PolEmo2OutClassification", +] diff --git a/mteb/tasks/Classification/por/__init__.py b/mteb/tasks/Classification/por/__init__.py index e69de29bb2..4d202d5117 100644 --- a/mteb/tasks/Classification/por/__init__.py +++ b/mteb/tasks/Classification/por/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .HateSpeechPortugueseClassification import HateSpeechPortugueseClassification + +__all__ = ["HateSpeechPortugueseClassification"] diff --git a/mteb/tasks/Classification/ron/__init__.py b/mteb/tasks/Classification/ron/__init__.py index e69de29bb2..f8473c0c9c 100644 --- a/mteb/tasks/Classification/ron/__init__.py +++ b/mteb/tasks/Classification/ron/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .Moroco import Moroco +from .RomanianReviewsSentiment import RomanianReviewsSentiment +from .RomanianSentimentClassification import RomanianSentimentClassification + +__all__ = ["RomanianReviewsSentiment", "Moroco", "RomanianSentimentClassification"] diff --git a/mteb/tasks/Classification/rus/__init__.py b/mteb/tasks/Classification/rus/__init__.py index e69de29bb2..2fada5e4e7 100644 --- a/mteb/tasks/Classification/rus/__init__.py +++ b/mteb/tasks/Classification/rus/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from .GeoreviewClassification import GeoreviewClassification +from .HeadlineClassification import HeadlineClassification +from .InappropriatenessClassification import InappropriatenessClassification +from .KinopoiskClassification import KinopoiskClassification +from .RuReviewsClassification import RuReviewsClassification +from .RuSciBenchGRNTIClassification import RuSciBenchGRNTIClassification +from .RuSciBenchOECDClassification import RuSciBenchOECDClassification + +__all__ = [ + "KinopoiskClassification", + "HeadlineClassification", + "InappropriatenessClassification", + "RuSciBenchGRNTIClassification", + "RuSciBenchOECDClassification", + "RuReviewsClassification", + "GeoreviewClassification", +] diff --git a/mteb/tasks/Classification/san/__init__.py b/mteb/tasks/Classification/san/__init__.py index e69de29bb2..2ef456ae7d 100644 --- a/mteb/tasks/Classification/san/__init__.py +++ b/mteb/tasks/Classification/san/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SanskritShlokasClassification import SanskritShlokasClassification + +__all__ = ["SanskritShlokasClassification"] diff --git a/mteb/tasks/Classification/sin/__init__.py b/mteb/tasks/Classification/sin/__init__.py index e69de29bb2..ee4b6c1cd3 100644 --- a/mteb/tasks/Classification/sin/__init__.py +++ b/mteb/tasks/Classification/sin/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SinhalaNewsClassification import SinhalaNewsClassification +from .SinhalaNewsSourceClassification import SinhalaNewsSourceClassification + +__all__ = ["SinhalaNewsSourceClassification", "SinhalaNewsClassification"] diff --git a/mteb/tasks/Classification/slk/__init__.py b/mteb/tasks/Classification/slk/__init__.py new file mode 100644 index 0000000000..d58f1a2b33 --- /dev/null +++ b/mteb/tasks/Classification/slk/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .CSFDSKMovieReviewSentimentClassification import ( + CSFDSKMovieReviewSentimentClassification, +) +from .SlovakHateSpeechClassification import SlovakHateSpeechClassification + +__all__ = ["CSFDSKMovieReviewSentimentClassification", "SlovakHateSpeechClassification"] diff --git a/mteb/tasks/Classification/slv/__init__.py b/mteb/tasks/Classification/slv/__init__.py index e69de29bb2..57605d8b60 100644 --- a/mteb/tasks/Classification/slv/__init__.py +++ b/mteb/tasks/Classification/slv/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FrenkSlClassification import FrenkSlClassification + +__all__ = ["FrenkSlClassification"] diff --git a/mteb/tasks/Classification/spa/__init__.py b/mteb/tasks/Classification/spa/__init__.py index e69de29bb2..3150b2ddcd 100644 --- a/mteb/tasks/Classification/spa/__init__.py +++ b/mteb/tasks/Classification/spa/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SpanishNewsClassification import SpanishNewsClassification +from .SpanishSentimentClassification import SpanishSentimentClassification + +__all__ = ["SpanishNewsClassification", "SpanishSentimentClassification"] diff --git a/mteb/tasks/Classification/ssw/__init__.py b/mteb/tasks/Classification/ssw/__init__.py index e69de29bb2..8de706169c 100644 --- a/mteb/tasks/Classification/ssw/__init__.py +++ b/mteb/tasks/Classification/ssw/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SiswatiNewsClassification import SiswatiNewsClassification + +__all__ = ["SiswatiNewsClassification"] diff --git a/mteb/tasks/Classification/svk/__init__.py b/mteb/tasks/Classification/svk/__init__.py index e69de29bb2..9c0c6608e4 100644 --- a/mteb/tasks/Classification/svk/__init__.py +++ b/mteb/tasks/Classification/svk/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .SlovakMovieReviewSentimentClassification import ( + SlovakMovieReviewSentimentClassification, +) + +__all__ = ["SlovakMovieReviewSentimentClassification"] diff --git a/mteb/tasks/Classification/swa/__init__.py b/mteb/tasks/Classification/swa/__init__.py index e69de29bb2..f9b04ebc77 100644 --- a/mteb/tasks/Classification/swa/__init__.py +++ b/mteb/tasks/Classification/swa/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SwahiliNewsClassification import SwahiliNewsClassification + +__all__ = ["SwahiliNewsClassification"] diff --git a/mteb/tasks/Classification/swe/__init__.py b/mteb/tasks/Classification/swe/__init__.py index e69de29bb2..73c69f1fb2 100644 --- a/mteb/tasks/Classification/swe/__init__.py +++ b/mteb/tasks/Classification/swe/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .DalajClassification import DalajClassification +from .SwedishSentimentClassification import SwedishSentimentClassification +from .SweRecClassification import SweRecClassification + +__all__ = [ + "DalajClassification", + "SweRecClassification", + "SwedishSentimentClassification", +] diff --git a/mteb/tasks/Classification/tam/__init__.py b/mteb/tasks/Classification/tam/__init__.py index e69de29bb2..ac93b4e50e 100644 --- a/mteb/tasks/Classification/tam/__init__.py +++ b/mteb/tasks/Classification/tam/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TamilNewsClassification import TamilNewsClassification + +__all__ = ["TamilNewsClassification"] diff --git a/mteb/tasks/Classification/tel/__init__.py b/mteb/tasks/Classification/tel/__init__.py index e69de29bb2..ccd1ef814a 100644 --- a/mteb/tasks/Classification/tel/__init__.py +++ b/mteb/tasks/Classification/tel/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TeluguAndhraJyotiNewsClassification import TeluguAndhraJyotiNewsClassification + +__all__ = ["TeluguAndhraJyotiNewsClassification"] diff --git a/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py similarity index 90% rename from mteb/tasks/Classification/tha/WongnaiReviewsClassification .py rename to mteb/tasks/Classification/tha/WongnaiReviewsClassification.py index 1a0bfb0834..ab00a052a3 100644 --- a/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py +++ b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py @@ -6,12 +6,12 @@ class WongnaiReviewsClassification(AbsTaskClassification): metadata = TaskMetadata( - name="WongnaiReviewsClassification ", + name="WongnaiReviewsClassification", description="Wongnai features over 200,000 restaurants, beauty salons, and spas across Thailand on its platform, with detailed information about each merchant and user reviews. In this dataset there are 5 classes corressponding each star rating", reference="https://github.com/wongnai/wongnai-corpus", dataset={ - "path": "wongnai_reviews", - "revision": "e708d4545d7ab10dd2c6b5b5b2a72ca28685dae2", + "path": "Wongnai/wongnai_reviews", + "revision": "cd351eb26093aa4b232a2390a0da35e7fab21655", }, type="Classification", category="p2p", @@ -23,7 +23,7 @@ class WongnaiReviewsClassification(AbsTaskClassification): dialect=[], domains=["Reviews", "Written"], task_subtypes=[], - license="LGPL-3.0", + license="lgpl-3.0", annotations_creators="derived", sample_creation="found", bibtex_citation=""" diff --git a/mteb/tasks/Classification/tha/__init__.py b/mteb/tasks/Classification/tha/__init__.py index e69de29bb2..16df75e134 100644 --- a/mteb/tasks/Classification/tha/__init__.py +++ b/mteb/tasks/Classification/tha/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .WisesightSentimentClassification import WisesightSentimentClassification +from .WongnaiReviewsClassification import WongnaiReviewsClassification + +__all__ = ["WisesightSentimentClassification", "WongnaiReviewsClassification"] diff --git a/mteb/tasks/Classification/tsn/__init__.py b/mteb/tasks/Classification/tsn/__init__.py index e69de29bb2..f767bb4aae 100644 --- a/mteb/tasks/Classification/tsn/__init__.py +++ b/mteb/tasks/Classification/tsn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TswanaNewsClassification import TswanaNewsClassification + +__all__ = ["TswanaNewsClassification"] diff --git a/mteb/tasks/Classification/tur/__init__.py b/mteb/tasks/Classification/tur/__init__.py index 8b13789179..c5859194ad 100644 --- a/mteb/tasks/Classification/tur/__init__.py +++ b/mteb/tasks/Classification/tur/__init__.py @@ -1 +1,9 @@ +from __future__ import annotations +from .TurkishMovieSentimentClassification import TurkishMovieSentimentClassification +from .TurkishProductSentimentClassification import TurkishProductSentimentClassification + +__all__ = [ + "TurkishProductSentimentClassification", + "TurkishMovieSentimentClassification", +] diff --git a/mteb/tasks/Classification/ukr/__init__.py b/mteb/tasks/Classification/ukr/__init__.py index e69de29bb2..094a590c33 100644 --- a/mteb/tasks/Classification/ukr/__init__.py +++ b/mteb/tasks/Classification/ukr/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .UkrFormalityClassification import UkrFormalityClassification + +__all__ = ["UkrFormalityClassification"] diff --git a/mteb/tasks/Classification/urd/__init__.py b/mteb/tasks/Classification/urd/__init__.py index e69de29bb2..7d5b5eab14 100644 --- a/mteb/tasks/Classification/urd/__init__.py +++ b/mteb/tasks/Classification/urd/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .UrduRomanSentimentClassification import UrduRomanSentimentClassification + +__all__ = ["UrduRomanSentimentClassification"] diff --git a/mteb/tasks/Classification/vie/__init__.py b/mteb/tasks/Classification/vie/__init__.py index e69de29bb2..bbf7e9d2de 100644 --- a/mteb/tasks/Classification/vie/__init__.py +++ b/mteb/tasks/Classification/vie/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .VieStudentFeedbackClassification import VieStudentFeedbackClassification + +__all__ = ["VieStudentFeedbackClassification"] diff --git a/mteb/tasks/Classification/zho/__init__.py b/mteb/tasks/Classification/zho/__init__.py index e69de29bb2..a0b55bf883 100644 --- a/mteb/tasks/Classification/zho/__init__.py +++ b/mteb/tasks/Classification/zho/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .CMTEBClassification import ( + IFlyTek, + JDReview, + MultilingualSentiment, + OnlineShopping, + TNews, + Waimai, +) +from .YueOpenriceReviewClassification import YueOpenriceReviewClassification + +__all__ = [ + "IFlyTek", + "JDReview", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", + "YueOpenriceReviewClassification", +] diff --git a/mteb/tasks/Classification/zul/__init__.py b/mteb/tasks/Classification/zul/__init__.py index e69de29bb2..1217351e0e 100644 --- a/mteb/tasks/Classification/zul/__init__.py +++ b/mteb/tasks/Classification/zul/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .IsiZuluNewsClassification import IsiZuluNewsClassification + +__all__ = ["IsiZuluNewsClassification"] diff --git a/mteb/tasks/Clustering/__init__.py b/mteb/tasks/Clustering/__init__.py index 014796a4cb..f6b683669f 100644 --- a/mteb/tasks/Clustering/__init__.py +++ b/mteb/tasks/Clustering/__init__.py @@ -1,45 +1,179 @@ from __future__ import annotations -from .deu.BlurbsClusteringP2P import * -from .deu.BlurbsClusteringS2S import * -from .deu.TenKGnadClusteringP2P import * -from .deu.TenKGnadClusteringS2S import * -from .eng.ArxivClusteringP2P import * -from .eng.ArxivClusteringS2S import * -from .eng.ArXivHierarchicalClustering import * -from .eng.BigPatentClustering import * -from .eng.BiorxivClusteringP2P import * -from .eng.BiorxivClusteringS2S import * -from .eng.MedrxivClusteringP2P import * -from .eng.MedrxivClusteringS2S import * -from .eng.RedditClustering import * -from .eng.RedditClusteringP2P import * -from .eng.StackExchangeClustering import * -from .eng.StackExchangeClusteringP2P import * -from .eng.TwentyNewsgroupsClustering import * -from .eng.WikiCitiesClustering import * -from .fra.AlloProfClusteringP2P import * -from .fra.AlloProfClusteringS2S import * -from .fra.HALClusteringS2S import * -from .jpn.LivedoorNewsClustering import * -from .jpn.MewsC16JaClustering import * -from .multilingual.IndicReviewsClusteringP2P import * -from .multilingual.MasakhaNEWSClusteringP2P import * -from .multilingual.MasakhaNEWSClusteringS2S import * -from .multilingual.MLSUMClusteringP2P import * -from .multilingual.MLSUMClusteringS2S import * -from .multilingual.SIB200ClusteringS2S import * -from .multilingual.WikiClusteringP2P import * -from .nob.snl_clustering import * -from .nob.SNLHierarchicalClustering import * -from .nob.vg_clustering import * -from .nob.VGHierarchicalClustering import * -from .pol.PolishClustering import * -from .rom.RomaniBibleClustering import * -from .rus.GeoreviewClusteringP2P import * -from .rus.RuSciBenchGRNTIClusteringP2P import * -from .rus.RuSciBenchOECDClusteringP2P import * -from .spa.SpanishNewsClusteringP2P import * -from .swe.swedn_clustering import * -from .swe.SwednClustering import * -from .zho.CMTEBClustering import * +from .deu import ( + BlurbsClusteringP2P, + BlurbsClusteringP2PFast, + BlurbsClusteringS2S, + BlurbsClusteringS2SFast, + TenKGnadClusteringP2P, + TenKGnadClusteringP2PFast, + TenKGnadClusteringS2S, + TenKGnadClusteringS2SFast, +) +from .eng import ( + ArxivClusteringP2P, + ArxivClusteringP2PFast, + ArxivClusteringS2S, + ArXivHierarchicalClusteringP2P, + ArXivHierarchicalClusteringS2S, + BigPatentClustering, + BigPatentClusteringFast, + BiorxivClusteringP2P, + BiorxivClusteringP2PFast, + BiorxivClusteringS2S, + BiorxivClusteringS2SFast, + MedrxivClusteringP2P, + MedrxivClusteringP2PFast, + MedrxivClusteringS2S, + MedrxivClusteringS2SFast, + RedditClustering, + RedditClusteringP2P, + RedditFastClusteringP2P, + RedditFastClusteringS2S, + StackExchangeClustering, + StackExchangeClusteringFast, + StackExchangeClusteringP2P, + StackExchangeClusteringP2PFast, + TwentyNewsgroupsClustering, + TwentyNewsgroupsClusteringFast, + WikiCitiesClustering, +) +from .fra import ( + AlloProfClusteringP2P, + AlloProfClusteringP2PFast, + AlloProfClusteringS2S, + AlloProfClusteringS2SFast, + HALClusteringS2S, + HALClusteringS2SFast, +) +from .jpn import LivedoorNewsClustering, LivedoorNewsClusteringv2, MewsC16JaClustering +from .multilingual import ( + IndicReviewsClusteringP2P, + MasakhaNEWSClusteringP2P, + MasakhaNEWSClusteringS2S, + MLSUMClusteringP2P, + MLSUMClusteringP2PFast, + MLSUMClusteringS2S, + MLSUMClusteringS2SFast, + SIB200ClusteringFast, + WikiClusteringFastP2P, + WikiClusteringP2P, +) +from .nob import ( + SNLClustering, + SNLHierarchicalClusteringP2P, + SNLHierarchicalClusteringS2S, + VGClustering, + VGHierarchicalClusteringP2P, + VGHierarchicalClusteringS2S, +) +from .pol import ( + EightTagsClustering, + EightTagsClusteringFast, + PlscClusteringP2P, + PlscClusteringP2PFast, + PlscClusteringS2S, + PlscClusteringS2SFast, +) +from .rom import RomaniBibleClustering +from .rus import ( + GeoreviewClusteringP2P, + RuSciBenchGRNTIClusteringP2P, + RuSciBenchOECDClusteringP2P, +) +from .spa import SpanishNewsClusteringP2P +from .swe import SwednClustering, SwednClusteringFastS2S, SwednClusteringP2P +from .zho import ( + CLSClusteringFastP2P, + CLSClusteringFastS2S, + CLSClusteringP2P, + CLSClusteringS2S, + ThuNewsClusteringFastP2P, + ThuNewsClusteringFastS2S, + ThuNewsClusteringP2P, + ThuNewsClusteringS2S, +) + +__all__ = [ + "CLSClusteringFastP2P", + "CLSClusteringFastS2S", + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringFastP2P", + "ThuNewsClusteringFastS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", + "SpanishNewsClusteringP2P", + "EightTagsClustering", + "EightTagsClusteringFast", + "PlscClusteringP2P", + "PlscClusteringP2PFast", + "PlscClusteringS2S", + "PlscClusteringS2SFast", + "SwednClustering", + "SwednClusteringFastS2S", + "SwednClusteringP2P", + "VGClustering", + "SNLHierarchicalClusteringP2P", + "SNLHierarchicalClusteringS2S", + "SNLClustering", + "VGHierarchicalClusteringP2P", + "VGHierarchicalClusteringS2S", + "BlurbsClusteringS2S", + "BlurbsClusteringS2SFast", + "TenKGnadClusteringP2P", + "TenKGnadClusteringP2PFast", + "TenKGnadClusteringS2S", + "TenKGnadClusteringS2SFast", + "BlurbsClusteringP2P", + "BlurbsClusteringP2PFast", + "RomaniBibleClustering", + "MedrxivClusteringS2S", + "MedrxivClusteringS2SFast", + "BiorxivClusteringS2S", + "BiorxivClusteringS2SFast", + "StackExchangeClustering", + "StackExchangeClusteringFast", + "RedditClustering", + "RedditFastClusteringS2S", + "ArxivClusteringS2S", + "ArxivClusteringP2P", + "ArxivClusteringP2PFast", + "MedrxivClusteringP2P", + "MedrxivClusteringP2PFast", + "WikiCitiesClustering", + "BiorxivClusteringP2P", + "BiorxivClusteringP2PFast", + "TwentyNewsgroupsClustering", + "TwentyNewsgroupsClusteringFast", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering", + "BigPatentClusteringFast", + "StackExchangeClusteringP2P", + "StackExchangeClusteringP2PFast", + "RedditClusteringP2P", + "RedditFastClusteringP2P", + "LivedoorNewsClustering", + "LivedoorNewsClusteringv2", + "MewsC16JaClustering", + "WikiClusteringFastP2P", + "WikiClusteringP2P", + "MLSUMClusteringS2S", + "MLSUMClusteringS2SFast", + "MasakhaNEWSClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringP2PFast", + "IndicReviewsClusteringP2P", + "SIB200ClusteringFast", + "MasakhaNEWSClusteringP2P", + "GeoreviewClusteringP2P", + "RuSciBenchGRNTIClusteringP2P", + "RuSciBenchOECDClusteringP2P", + "HALClusteringS2S", + "HALClusteringS2SFast", + "AlloProfClusteringS2S", + "AlloProfClusteringS2SFast", + "AlloProfClusteringP2P", + "AlloProfClusteringP2PFast", +] diff --git a/mteb/tasks/Clustering/deu/__init__.py b/mteb/tasks/Clustering/deu/__init__.py index e69de29bb2..67ebc77c34 100644 --- a/mteb/tasks/Clustering/deu/__init__.py +++ b/mteb/tasks/Clustering/deu/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .BlurbsClusteringP2P import BlurbsClusteringP2P, BlurbsClusteringP2PFast +from .BlurbsClusteringS2S import BlurbsClusteringS2S, BlurbsClusteringS2SFast +from .TenKGnadClusteringP2P import TenKGnadClusteringP2P, TenKGnadClusteringP2PFast +from .TenKGnadClusteringS2S import TenKGnadClusteringS2S, TenKGnadClusteringS2SFast + +__all__ = [ + "BlurbsClusteringS2S", + "BlurbsClusteringS2SFast", + "TenKGnadClusteringP2P", + "TenKGnadClusteringP2PFast", + "TenKGnadClusteringS2S", + "TenKGnadClusteringS2SFast", + "BlurbsClusteringP2P", + "BlurbsClusteringP2PFast", +] diff --git a/mteb/tasks/Clustering/eng/__init__.py b/mteb/tasks/Clustering/eng/__init__.py index e69de29bb2..39773845ba 100644 --- a/mteb/tasks/Clustering/eng/__init__.py +++ b/mteb/tasks/Clustering/eng/__init__.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from .ArxivClusteringP2P import ArxivClusteringP2P, ArxivClusteringP2PFast +from .ArxivClusteringS2S import ArxivClusteringS2S +from .ArXivHierarchicalClustering import ( + ArXivHierarchicalClusteringP2P, + ArXivHierarchicalClusteringS2S, +) +from .BigPatentClustering import BigPatentClustering, BigPatentClusteringFast +from .BiorxivClusteringP2P import BiorxivClusteringP2P, BiorxivClusteringP2PFast +from .BiorxivClusteringS2S import BiorxivClusteringS2S, BiorxivClusteringS2SFast +from .MedrxivClusteringP2P import MedrxivClusteringP2P, MedrxivClusteringP2PFast +from .MedrxivClusteringS2S import MedrxivClusteringS2S, MedrxivClusteringS2SFast +from .RedditClustering import RedditClustering, RedditFastClusteringS2S +from .RedditClusteringP2P import RedditClusteringP2P, RedditFastClusteringP2P +from .StackExchangeClustering import ( + StackExchangeClustering, + StackExchangeClusteringFast, +) +from .StackExchangeClusteringP2P import ( + StackExchangeClusteringP2P, + StackExchangeClusteringP2PFast, +) +from .TwentyNewsgroupsClustering import ( + TwentyNewsgroupsClustering, + TwentyNewsgroupsClusteringFast, +) +from .WikiCitiesClustering import WikiCitiesClustering + +__all__ = [ + "MedrxivClusteringS2S", + "MedrxivClusteringS2SFast", + "BiorxivClusteringS2S", + "BiorxivClusteringS2SFast", + "StackExchangeClustering", + "StackExchangeClusteringFast", + "RedditClustering", + "RedditFastClusteringS2S", + "ArxivClusteringS2S", + "ArxivClusteringP2P", + "ArxivClusteringP2PFast", + "MedrxivClusteringP2P", + "MedrxivClusteringP2PFast", + "WikiCitiesClustering", + "BiorxivClusteringP2P", + "BiorxivClusteringP2PFast", + "TwentyNewsgroupsClustering", + "TwentyNewsgroupsClusteringFast", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering", + "BigPatentClusteringFast", + "StackExchangeClusteringP2P", + "StackExchangeClusteringP2PFast", + "RedditClusteringP2P", + "RedditFastClusteringP2P", +] diff --git a/mteb/tasks/Clustering/fra/__init__.py b/mteb/tasks/Clustering/fra/__init__.py index e69de29bb2..cdc9f26fd4 100644 --- a/mteb/tasks/Clustering/fra/__init__.py +++ b/mteb/tasks/Clustering/fra/__init__.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from .AlloProfClusteringP2P import AlloProfClusteringP2P, AlloProfClusteringP2PFast +from .AlloProfClusteringS2S import AlloProfClusteringS2S, AlloProfClusteringS2SFast +from .HALClusteringS2S import HALClusteringS2S, HALClusteringS2SFast + +__all__ = [ + "HALClusteringS2S", + "HALClusteringS2SFast", + "AlloProfClusteringS2S", + "AlloProfClusteringS2SFast", + "AlloProfClusteringP2P", + "AlloProfClusteringP2PFast", +] diff --git a/mteb/tasks/Clustering/jpn/__init__.py b/mteb/tasks/Clustering/jpn/__init__.py index e69de29bb2..033d1c6173 100644 --- a/mteb/tasks/Clustering/jpn/__init__.py +++ b/mteb/tasks/Clustering/jpn/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .LivedoorNewsClustering import LivedoorNewsClustering, LivedoorNewsClusteringv2 +from .MewsC16JaClustering import MewsC16JaClustering + +__all__ = ["LivedoorNewsClustering", "LivedoorNewsClusteringv2", "MewsC16JaClustering"] diff --git a/mteb/tasks/Clustering/multilingual/__init__.py b/mteb/tasks/Clustering/multilingual/__init__.py index e69de29bb2..b6e78fa790 100644 --- a/mteb/tasks/Clustering/multilingual/__init__.py +++ b/mteb/tasks/Clustering/multilingual/__init__.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from .IndicReviewsClusteringP2P import IndicReviewsClusteringP2P +from .MasakhaNEWSClusteringP2P import MasakhaNEWSClusteringP2P +from .MasakhaNEWSClusteringS2S import MasakhaNEWSClusteringS2S +from .MLSUMClusteringP2P import MLSUMClusteringP2P, MLSUMClusteringP2PFast +from .MLSUMClusteringS2S import MLSUMClusteringS2S, MLSUMClusteringS2SFast +from .SIB200ClusteringS2S import SIB200ClusteringFast +from .WikiClusteringP2P import WikiClusteringFastP2P, WikiClusteringP2P + +__all__ = [ + "WikiClusteringFastP2P", + "WikiClusteringP2P", + "MLSUMClusteringS2S", + "MLSUMClusteringS2SFast", + "MasakhaNEWSClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringP2PFast", + "IndicReviewsClusteringP2P", + "SIB200ClusteringFast", + "MasakhaNEWSClusteringP2P", +] diff --git a/mteb/tasks/Clustering/nob/__init__.py b/mteb/tasks/Clustering/nob/__init__.py index e69de29bb2..1ebda31080 100644 --- a/mteb/tasks/Clustering/nob/__init__.py +++ b/mteb/tasks/Clustering/nob/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .snl_clustering import SNLClustering +from .SNLHierarchicalClustering import ( + SNLHierarchicalClusteringP2P, + SNLHierarchicalClusteringS2S, +) +from .vg_clustering import VGClustering +from .VGHierarchicalClustering import ( + VGHierarchicalClusteringP2P, + VGHierarchicalClusteringS2S, +) + +__all__ = [ + "VGClustering", + "SNLHierarchicalClusteringP2P", + "SNLHierarchicalClusteringS2S", + "SNLClustering", + "VGHierarchicalClusteringP2P", + "VGHierarchicalClusteringS2S", +] diff --git a/mteb/tasks/Clustering/pol/__init__.py b/mteb/tasks/Clustering/pol/__init__.py index e69de29bb2..6b19d6cdb6 100644 --- a/mteb/tasks/Clustering/pol/__init__.py +++ b/mteb/tasks/Clustering/pol/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from .PolishClustering import ( + EightTagsClustering, + EightTagsClusteringFast, + PlscClusteringP2P, + PlscClusteringP2PFast, + PlscClusteringS2S, + PlscClusteringS2SFast, +) + +__all__ = [ + "EightTagsClustering", + "EightTagsClusteringFast", + "PlscClusteringP2P", + "PlscClusteringP2PFast", + "PlscClusteringS2S", + "PlscClusteringS2SFast", +] diff --git a/mteb/tasks/Clustering/rom/__init__.py b/mteb/tasks/Clustering/rom/__init__.py new file mode 100644 index 0000000000..5ab88efd50 --- /dev/null +++ b/mteb/tasks/Clustering/rom/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .RomaniBibleClustering import RomaniBibleClustering + +__all__ = ["RomaniBibleClustering"] diff --git a/mteb/tasks/Clustering/rus/__init__.py b/mteb/tasks/Clustering/rus/__init__.py index e69de29bb2..295b1663fe 100644 --- a/mteb/tasks/Clustering/rus/__init__.py +++ b/mteb/tasks/Clustering/rus/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .GeoreviewClusteringP2P import GeoreviewClusteringP2P +from .RuSciBenchGRNTIClusteringP2P import RuSciBenchGRNTIClusteringP2P +from .RuSciBenchOECDClusteringP2P import RuSciBenchOECDClusteringP2P + +__all__ = [ + "GeoreviewClusteringP2P", + "RuSciBenchGRNTIClusteringP2P", + "RuSciBenchOECDClusteringP2P", +] diff --git a/mteb/tasks/Clustering/spa/__init__.py b/mteb/tasks/Clustering/spa/__init__.py index e69de29bb2..c67454d7f3 100644 --- a/mteb/tasks/Clustering/spa/__init__.py +++ b/mteb/tasks/Clustering/spa/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SpanishNewsClusteringP2P import SpanishNewsClusteringP2P + +__all__ = ["SpanishNewsClusteringP2P"] diff --git a/mteb/tasks/Clustering/swe/__init__.py b/mteb/tasks/Clustering/swe/__init__.py index e69de29bb2..47a7599596 100644 --- a/mteb/tasks/Clustering/swe/__init__.py +++ b/mteb/tasks/Clustering/swe/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .swedn_clustering import SwednClustering +from .SwednClustering import SwednClusteringFastS2S, SwednClusteringP2P + +__all__ = ["SwednClustering", "SwednClusteringFastS2S", "SwednClusteringP2P"] diff --git a/mteb/tasks/Clustering/zho/__init__.py b/mteb/tasks/Clustering/zho/__init__.py index e69de29bb2..8aa2618aa1 100644 --- a/mteb/tasks/Clustering/zho/__init__.py +++ b/mteb/tasks/Clustering/zho/__init__.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from .CMTEBClustering import ( + CLSClusteringFastP2P, + CLSClusteringFastS2S, + CLSClusteringP2P, + CLSClusteringS2S, + ThuNewsClusteringFastP2P, + ThuNewsClusteringFastS2S, + ThuNewsClusteringP2P, + ThuNewsClusteringS2S, +) + +__all__ = [ + "CLSClusteringFastP2P", + "CLSClusteringFastS2S", + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringFastP2P", + "ThuNewsClusteringFastS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", +] diff --git a/mteb/tasks/InstructionReranking/__init__.py b/mteb/tasks/InstructionReranking/__init__.py index f5e812247d..82523ac4ca 100644 --- a/mteb/tasks/InstructionReranking/__init__.py +++ b/mteb/tasks/InstructionReranking/__init__.py @@ -1,6 +1,16 @@ from __future__ import annotations -from .eng.Core17InstructionRetrieval import * -from .eng.News21InstructionRetrieval import * -from .eng.Robust04InstructionRetrieval import * -from .multilingual.mFollowIR import * +from .eng import ( + Core17InstructionRetrieval, + News21InstructionRetrieval, + Robust04InstructionRetrieval, +) +from .multilingual import mFollowIR, mFollowIRCrossLingual + +__all__ = [ + "News21InstructionRetrieval", + "Core17InstructionRetrieval", + "Robust04InstructionRetrieval", + "mFollowIR", + "mFollowIRCrossLingual", +] diff --git a/mteb/tasks/InstructionReranking/eng/__init__.py b/mteb/tasks/InstructionReranking/eng/__init__.py index e69de29bb2..ad30bb4ea2 100644 --- a/mteb/tasks/InstructionReranking/eng/__init__.py +++ b/mteb/tasks/InstructionReranking/eng/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .Core17InstructionRetrieval import Core17InstructionRetrieval +from .News21InstructionRetrieval import News21InstructionRetrieval +from .Robust04InstructionRetrieval import Robust04InstructionRetrieval + +__all__ = [ + "News21InstructionRetrieval", + "Core17InstructionRetrieval", + "Robust04InstructionRetrieval", +] diff --git a/mteb/tasks/InstructionReranking/multilingual/__init__.py b/mteb/tasks/InstructionReranking/multilingual/__init__.py index e69de29bb2..a559987717 100644 --- a/mteb/tasks/InstructionReranking/multilingual/__init__.py +++ b/mteb/tasks/InstructionReranking/multilingual/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .mFollowIR import mFollowIR, mFollowIRCrossLingual + +__all__ = ["mFollowIR", "mFollowIRCrossLingual"] diff --git a/mteb/tasks/InstructionRetrieval/__init__.py b/mteb/tasks/InstructionRetrieval/__init__.py index c8454858dd..60d78aca14 100644 --- a/mteb/tasks/InstructionRetrieval/__init__.py +++ b/mteb/tasks/InstructionRetrieval/__init__.py @@ -1,3 +1,5 @@ from __future__ import annotations -from .eng.InstructIR import * +from .eng import InstructIR + +__all__ = ["InstructIR"] diff --git a/mteb/tasks/InstructionRetrieval/eng/__init__.py b/mteb/tasks/InstructionRetrieval/eng/__init__.py index e69de29bb2..83928dd647 100644 --- a/mteb/tasks/InstructionRetrieval/eng/__init__.py +++ b/mteb/tasks/InstructionRetrieval/eng/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .InstructIR import InstructIR + +__all__ = ["InstructIR"] diff --git a/mteb/tasks/InstructionRetrieval/multilingual/__init__.py b/mteb/tasks/InstructionRetrieval/multilingual/__init__.py new file mode 100644 index 0000000000..4d21ee8507 --- /dev/null +++ b/mteb/tasks/InstructionRetrieval/multilingual/__init__.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +__all__ = [] diff --git a/mteb/tasks/MultiLabelClassification/__init__.py b/mteb/tasks/MultiLabelClassification/__init__.py index 0cf8c1bf6a..b27c4f8111 100644 --- a/mteb/tasks/MultiLabelClassification/__init__.py +++ b/mteb/tasks/MultiLabelClassification/__init__.py @@ -1,8 +1,16 @@ from __future__ import annotations -from .kor.KorHateSpeechMLClassification import * -from .mlt.MalteseNewsClassification import * -from .multilingual.MultiEURLEXMultilabelClassification import * -from .por.BrazilianToxicTweetsClassification import * -from .rus.CEDRClassification import * -from .rus.SensitiveTopicsClassification import * +from .kor import KorHateSpeechMLClassification +from .mlt import MalteseNewsClassification +from .multilingual import MultiEURLEXMultilabelClassification +from .por import BrazilianToxicTweetsClassification +from .rus import CEDRClassification, SensitiveTopicsClassification + +__all__ = [ + "BrazilianToxicTweetsClassification", + "MalteseNewsClassification", + "KorHateSpeechMLClassification", + "MultiEURLEXMultilabelClassification", + "SensitiveTopicsClassification", + "CEDRClassification", +] diff --git a/mteb/tasks/MultiLabelClassification/kor/__init__.py b/mteb/tasks/MultiLabelClassification/kor/__init__.py index e69de29bb2..02efbdca47 100644 --- a/mteb/tasks/MultiLabelClassification/kor/__init__.py +++ b/mteb/tasks/MultiLabelClassification/kor/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KorHateSpeechMLClassification import KorHateSpeechMLClassification + +__all__ = ["KorHateSpeechMLClassification"] diff --git a/mteb/tasks/MultiLabelClassification/mlt/__init__.py b/mteb/tasks/MultiLabelClassification/mlt/__init__.py index e69de29bb2..c1aa8a2ff2 100644 --- a/mteb/tasks/MultiLabelClassification/mlt/__init__.py +++ b/mteb/tasks/MultiLabelClassification/mlt/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MalteseNewsClassification import MalteseNewsClassification + +__all__ = ["MalteseNewsClassification"] diff --git a/mteb/tasks/MultiLabelClassification/multilingual/__init__.py b/mteb/tasks/MultiLabelClassification/multilingual/__init__.py index e69de29bb2..235cac1550 100644 --- a/mteb/tasks/MultiLabelClassification/multilingual/__init__.py +++ b/mteb/tasks/MultiLabelClassification/multilingual/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MultiEURLEXMultilabelClassification import MultiEURLEXMultilabelClassification + +__all__ = ["MultiEURLEXMultilabelClassification"] diff --git a/mteb/tasks/MultiLabelClassification/por/__init__.py b/mteb/tasks/MultiLabelClassification/por/__init__.py index e69de29bb2..0e30038e8b 100644 --- a/mteb/tasks/MultiLabelClassification/por/__init__.py +++ b/mteb/tasks/MultiLabelClassification/por/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .BrazilianToxicTweetsClassification import BrazilianToxicTweetsClassification + +__all__ = ["BrazilianToxicTweetsClassification"] diff --git a/mteb/tasks/MultiLabelClassification/rus/__init__.py b/mteb/tasks/MultiLabelClassification/rus/__init__.py index e69de29bb2..18b6192a0b 100644 --- a/mteb/tasks/MultiLabelClassification/rus/__init__.py +++ b/mteb/tasks/MultiLabelClassification/rus/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .CEDRClassification import CEDRClassification +from .SensitiveTopicsClassification import SensitiveTopicsClassification + +__all__ = ["SensitiveTopicsClassification", "CEDRClassification"] diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py index 1193728659..03bf5a90a8 100644 --- a/mteb/tasks/PairClassification/__init__.py +++ b/mteb/tasks/PairClassification/__init__.py @@ -1,26 +1,58 @@ from __future__ import annotations -from .ara.ArEntail import * -from .ces.CTKFactsNLI import * -from .deu.FalseFriendsDeEnPC import * -from .eng.LegalBenchPC import * -from .eng.SprintDuplicateQuestionsPC import * -from .eng.TwitterSemEval2015PC import * -from .eng.TwitterURLCorpusPC import * -from .fas.FarsTail import * -from .hye.ArmenianParaphrasePC import * -from .ind.IndoNLI import * -from .kor.KlueNLI import * -from .multilingual.IndicXnliPairClassification import ( - IndicXnliPairClassification as IndicXnliPairClassification, +from .ara import ArEntail +from .ces import CTKFactsNLI +from .deu import FalseFriendsDeEnPC +from .eng import ( + LegalBenchPC, + SprintDuplicateQuestionsPC, + TwitterSemEval2015PC, + TwitterURLCorpusPC, ) -from .multilingual.OpusparcusPC import * -from .multilingual.PawsXPairClassification import * -from .multilingual.RTE3 import * -from .multilingual.XNLI import * -from .multilingual.XStance import * -from .pol.PolishPC import * -from .por.Assin2RTE import * -from .por.SickBrPC import * -from .rus.TERRa import * -from .zho.CMTEBPairClassification import * +from .fas import FarsTail +from .hye import ArmenianParaphrasePC +from .ind import IndoNLI +from .kor import KlueNLI +from .multilingual import ( + RTE3, + XNLI, + XNLIV2, + IndicXnliPairClassification, + OpusparcusPC, + PawsXPairClassification, + XStance, +) +from .pol import CdscePC, PpcPC, PscPC, SickePLPC +from .por import Assin2RTE, SickBrPC +from .rus import TERRa +from .zho import Cmnli, Ocnli + +__all__ = [ + "Cmnli", + "Ocnli", + "Assin2RTE", + "SickBrPC", + "CdscePC", + "PpcPC", + "PscPC", + "SickePLPC", + "IndoNLI", + "FalseFriendsDeEnPC", + "ArEntail", + "ArmenianParaphrasePC", + "CTKFactsNLI", + "LegalBenchPC", + "TwitterSemEval2015PC", + "TwitterURLCorpusPC", + "SprintDuplicateQuestionsPC", + "FarsTail", + "KlueNLI", + "IndicXnliPairClassification", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XStance", + "XNLI", + "XNLIV2", + "TERRa", +] diff --git a/mteb/tasks/PairClassification/ara/__init__.py b/mteb/tasks/PairClassification/ara/__init__.py new file mode 100644 index 0000000000..9619569bca --- /dev/null +++ b/mteb/tasks/PairClassification/ara/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .ArEntail import ArEntail + +__all__ = ["ArEntail"] diff --git a/mteb/tasks/PairClassification/ces/__init__.py b/mteb/tasks/PairClassification/ces/__init__.py new file mode 100644 index 0000000000..82a0417518 --- /dev/null +++ b/mteb/tasks/PairClassification/ces/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CTKFactsNLI import CTKFactsNLI + +__all__ = ["CTKFactsNLI"] diff --git a/mteb/tasks/PairClassification/deu/__init__.py b/mteb/tasks/PairClassification/deu/__init__.py index 8b13789179..27c07a9982 100644 --- a/mteb/tasks/PairClassification/deu/__init__.py +++ b/mteb/tasks/PairClassification/deu/__init__.py @@ -1 +1,5 @@ +from __future__ import annotations +from .FalseFriendsDeEnPC import FalseFriendsDeEnPC + +__all__ = ["FalseFriendsDeEnPC"] diff --git a/mteb/tasks/PairClassification/eng/__init__.py b/mteb/tasks/PairClassification/eng/__init__.py index e69de29bb2..5fdb9d7080 100644 --- a/mteb/tasks/PairClassification/eng/__init__.py +++ b/mteb/tasks/PairClassification/eng/__init__.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from .LegalBenchPC import LegalBenchPC +from .SprintDuplicateQuestionsPC import SprintDuplicateQuestionsPC +from .TwitterSemEval2015PC import TwitterSemEval2015PC +from .TwitterURLCorpusPC import TwitterURLCorpusPC + +__all__ = [ + "LegalBenchPC", + "TwitterSemEval2015PC", + "TwitterURLCorpusPC", + "SprintDuplicateQuestionsPC", +] diff --git a/mteb/tasks/PairClassification/fas/__init__.py b/mteb/tasks/PairClassification/fas/__init__.py new file mode 100644 index 0000000000..81e6cffc84 --- /dev/null +++ b/mteb/tasks/PairClassification/fas/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FarsTail import FarsTail + +__all__ = ["FarsTail"] diff --git a/mteb/tasks/PairClassification/hye/__init__.py b/mteb/tasks/PairClassification/hye/__init__.py index e69de29bb2..33ba14204e 100644 --- a/mteb/tasks/PairClassification/hye/__init__.py +++ b/mteb/tasks/PairClassification/hye/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .ArmenianParaphrasePC import ArmenianParaphrasePC + +__all__ = ["ArmenianParaphrasePC"] diff --git a/mteb/tasks/PairClassification/ind/__init__.py b/mteb/tasks/PairClassification/ind/__init__.py new file mode 100644 index 0000000000..73e2935997 --- /dev/null +++ b/mteb/tasks/PairClassification/ind/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .IndoNLI import IndoNLI + +__all__ = ["IndoNLI"] diff --git a/mteb/tasks/PairClassification/kor/__init__.py b/mteb/tasks/PairClassification/kor/__init__.py new file mode 100644 index 0000000000..83b58c5d97 --- /dev/null +++ b/mteb/tasks/PairClassification/kor/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KlueNLI import KlueNLI + +__all__ = ["KlueNLI"] diff --git a/mteb/tasks/PairClassification/multilingual/XStance.py b/mteb/tasks/PairClassification/multilingual/XStance.py index e2c5037259..ca41208845 100644 --- a/mteb/tasks/PairClassification/multilingual/XStance.py +++ b/mteb/tasks/PairClassification/multilingual/XStance.py @@ -96,4 +96,21 @@ def convert_example(example): .to_dict() ) + self.dataset_transform() self.data_loaded = True + + def dataset_transform(self): + """Transform dataset into sentence-pair format""" + _dataset = {} + + for lang in self.metadata.eval_langs: + _dataset[lang] = {} + for split in self.metadata.eval_splits: + _dataset[lang][split] = [ + { + "sentence1": self.dataset[lang][split]["sentence1"], + "sentence2": self.dataset[lang][split]["sentence2"], + "labels": self.dataset[lang][split]["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/multilingual/__init__.py b/mteb/tasks/PairClassification/multilingual/__init__.py index e69de29bb2..a9ec6dc333 100644 --- a/mteb/tasks/PairClassification/multilingual/__init__.py +++ b/mteb/tasks/PairClassification/multilingual/__init__.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from .IndicXnliPairClassification import IndicXnliPairClassification +from .OpusparcusPC import OpusparcusPC +from .PawsXPairClassification import PawsXPairClassification +from .RTE3 import RTE3 +from .XNLI import XNLI, XNLIV2 +from .XStance import XStance + +__all__ = [ + "IndicXnliPairClassification", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XStance", + "XNLI", + "XNLIV2", +] diff --git a/mteb/tasks/PairClassification/pol/__init__.py b/mteb/tasks/PairClassification/pol/__init__.py index e69de29bb2..661977b294 100644 --- a/mteb/tasks/PairClassification/pol/__init__.py +++ b/mteb/tasks/PairClassification/pol/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PolishPC import CdscePC, PpcPC, PscPC, SickePLPC + +__all__ = ["CdscePC", "PpcPC", "PscPC", "SickePLPC"] diff --git a/mteb/tasks/PairClassification/por/__init__.py b/mteb/tasks/PairClassification/por/__init__.py new file mode 100644 index 0000000000..fa28ff584c --- /dev/null +++ b/mteb/tasks/PairClassification/por/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .Assin2RTE import Assin2RTE +from .SickBrPC import SickBrPC + +__all__ = ["Assin2RTE", "SickBrPC"] diff --git a/mteb/tasks/PairClassification/rus/__init__.py b/mteb/tasks/PairClassification/rus/__init__.py index e69de29bb2..71fcf2dfca 100644 --- a/mteb/tasks/PairClassification/rus/__init__.py +++ b/mteb/tasks/PairClassification/rus/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TERRa import TERRa + +__all__ = ["TERRa"] diff --git a/mteb/tasks/PairClassification/zho/__init__.py b/mteb/tasks/PairClassification/zho/__init__.py index e69de29bb2..7ad9656532 100644 --- a/mteb/tasks/PairClassification/zho/__init__.py +++ b/mteb/tasks/PairClassification/zho/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CMTEBPairClassification import Cmnli, Ocnli + +__all__ = ["Cmnli", "Ocnli"] diff --git a/mteb/tasks/Reranking/__init__.py b/mteb/tasks/Reranking/__init__.py index 18dbd53f43..497e2751b8 100644 --- a/mteb/tasks/Reranking/__init__.py +++ b/mteb/tasks/Reranking/__init__.py @@ -1,16 +1,35 @@ from __future__ import annotations -from .eng.AskUbuntuDupQuestions import * -from .eng.MindSmallReranking import * -from .eng.NevIR import * -from .eng.SciDocsReranking import * -from .eng.StackOverflowDupQuestions import * -from .eng.WebLINXCandidatesReranking import * -from .fra.AlloprofReranking import * -from .fra.SyntecReranking import * -from .jpn.MMarcoReranking import * -from .multilingual.ESCIReranking import * -from .multilingual.MIRACLReranking import * -from .multilingual.WikipediaRerankingMultilingual import * -from .rus.RuBQReranking import * -from .zho.CMTEBReranking import * +from .eng import ( + AskUbuntuDupQuestions, + MindSmallReranking, + NevIR, + SciDocsReranking, + StackOverflowDupQuestions, + WebLINXCandidatesReranking, +) +from .fra import AlloprofReranking, SyntecReranking +from .jpn import VoyageMMarcoReranking +from .multilingual import ESCIReranking, MIRACLReranking, WikipediaRerankingMultilingual +from .rus import RuBQReranking +from .zho import CMedQAv1, CMedQAv2, MMarcoReranking, T2Reranking + +__all__ = [ + "CMedQAv1", + "CMedQAv2", + "MMarcoReranking", + "T2Reranking", + "AskUbuntuDupQuestions", + "WebLINXCandidatesReranking", + "StackOverflowDupQuestions", + "NevIR", + "MindSmallReranking", + "SciDocsReranking", + "VoyageMMarcoReranking", + "MIRACLReranking", + "ESCIReranking", + "WikipediaRerankingMultilingual", + "RuBQReranking", + "SyntecReranking", + "AlloprofReranking", +] diff --git a/mteb/tasks/Reranking/eng/__init__.py b/mteb/tasks/Reranking/eng/__init__.py index e69de29bb2..a9975a736d 100644 --- a/mteb/tasks/Reranking/eng/__init__.py +++ b/mteb/tasks/Reranking/eng/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .AskUbuntuDupQuestions import AskUbuntuDupQuestions +from .MindSmallReranking import MindSmallReranking +from .NevIR import NevIR +from .SciDocsReranking import SciDocsReranking +from .StackOverflowDupQuestions import StackOverflowDupQuestions +from .WebLINXCandidatesReranking import WebLINXCandidatesReranking + +__all__ = [ + "AskUbuntuDupQuestions", + "WebLINXCandidatesReranking", + "StackOverflowDupQuestions", + "NevIR", + "MindSmallReranking", + "SciDocsReranking", +] diff --git a/mteb/tasks/Reranking/fra/__init__.py b/mteb/tasks/Reranking/fra/__init__.py index e69de29bb2..a9f6cc9d64 100644 --- a/mteb/tasks/Reranking/fra/__init__.py +++ b/mteb/tasks/Reranking/fra/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .AlloprofReranking import AlloprofReranking +from .SyntecReranking import SyntecReranking + +__all__ = ["SyntecReranking", "AlloprofReranking"] diff --git a/mteb/tasks/Reranking/jpn/__init__.py b/mteb/tasks/Reranking/jpn/__init__.py new file mode 100644 index 0000000000..bb4df4f804 --- /dev/null +++ b/mteb/tasks/Reranking/jpn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MMarcoReranking import VoyageMMarcoReranking + +__all__ = ["VoyageMMarcoReranking"] diff --git a/mteb/tasks/Reranking/multilingual/__init__.py b/mteb/tasks/Reranking/multilingual/__init__.py index e69de29bb2..67f1504333 100644 --- a/mteb/tasks/Reranking/multilingual/__init__.py +++ b/mteb/tasks/Reranking/multilingual/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .ESCIReranking import ESCIReranking +from .MIRACLReranking import MIRACLReranking +from .WikipediaRerankingMultilingual import WikipediaRerankingMultilingual + +__all__ = ["MIRACLReranking", "ESCIReranking", "WikipediaRerankingMultilingual"] diff --git a/mteb/tasks/Reranking/rus/__init__.py b/mteb/tasks/Reranking/rus/__init__.py index e69de29bb2..579e3727fa 100644 --- a/mteb/tasks/Reranking/rus/__init__.py +++ b/mteb/tasks/Reranking/rus/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .RuBQReranking import RuBQReranking + +__all__ = ["RuBQReranking"] diff --git a/mteb/tasks/Reranking/zho/__init__.py b/mteb/tasks/Reranking/zho/__init__.py index e69de29bb2..b9be2bdc26 100644 --- a/mteb/tasks/Reranking/zho/__init__.py +++ b/mteb/tasks/Reranking/zho/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CMTEBReranking import CMedQAv1, CMedQAv2, MMarcoReranking, T2Reranking + +__all__ = ["CMedQAv1", "CMedQAv2", "MMarcoReranking", "T2Reranking"] diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index ca41d4354f..859e8d3a49 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -1,147 +1,348 @@ from __future__ import annotations -from .ara.SadeemQuestionRetrieval import * -from .code.AppsRetrieval import * -from .code.CodeEditSearchRetrieval import * -from .code.CodeFeedbackMTRetrieval import * -from .code.CodeFeedbackSTRetrieval import * -from .code.CodeSearchNetCCRetrieval import * -from .code.CodeSearchNetRetrieval import * -from .code.CodeTransOceanContestRetrieval import * -from .code.CodeTransOceanDLRetrieval import * -from .code.COIRCodeSearchNetRetrieval import * -from .code.CosQARetrieval import * -from .code.StackOverflowQARetrieval import * -from .code.SyntheticText2SqlRetrieval import * -from .dan.DanFeverRetrieval import * -from .dan.TV2Nordretrieval import * -from .dan.TwitterHjerneRetrieval import * -from .deu.GerDaLIRRetrieval import * -from .deu.GerDaLIRSmallRetrieval import * -from .deu.GermanDPRRetrieval import * -from .deu.GermanGovServiceRetrieval import * -from .deu.GermanQuADRetrieval import * -from .deu.LegalQuADRetrieval import * -from .ell.GreekCivicsQA import * -from .eng.AILACasedocsRetrieval import * -from .eng.AILAStatutesRetrieval import * -from .eng.AlphaNLIRetrieval import * -from .eng.ARCChallengeRetrieval import * -from .eng.ArguAnaRetrieval import * -from .eng.BrightRetrieval import * -from .eng.ClimateFEVERRetrieval import * -from .eng.CQADupstackAndroidRetrieval import * -from .eng.CQADupstackEnglishRetrieval import * -from .eng.CQADupstackGamingRetrieval import * -from .eng.CQADupstackGisRetrieval import * -from .eng.CQADupstackMathematicaRetrieval import * -from .eng.CQADupstackPhysicsRetrieval import * -from .eng.CQADupstackProgrammersRetrieval import * -from .eng.CQADupstackStatsRetrieval import * -from .eng.CQADupstackTexRetrieval import * -from .eng.CQADupstackUnixRetrieval import * -from .eng.CQADupstackWebmastersRetrieval import * -from .eng.CQADupstackWordpressRetrieval import * -from .eng.DBPediaRetrieval import * -from .eng.FaithDialRetrieval import * -from .eng.FeedbackQARetrieval import * -from .eng.FEVERRetrieval import * -from .eng.FiQA2018Retrieval import * -from .eng.HagridRetrieval import * -from .eng.HellaSwagRetrieval import * -from .eng.HotpotQARetrieval import * -from .eng.LegalBenchConsumerContractsQARetrieval import * -from .eng.LegalBenchCorporateLobbyingRetrieval import * -from .eng.LegalSummarizationRetrieval import * -from .eng.LEMBNarrativeQARetrieval import * -from .eng.LEMBNeedleRetrieval import * -from .eng.LEMBPasskeyRetrieval import * -from .eng.LEMBQMSumRetrieval import * -from .eng.LEMBSummScreenFDRetrieval import * -from .eng.LEMBWikimQARetrieval import * -from .eng.LitSearchRetrieval import * -from .eng.MedicalQARetrieval import * -from .eng.MLQuestions import * -from .eng.MSMARCORetrieval import * -from .eng.MSMARCOv2Retrieval import * -from .eng.NarrativeQARetrieval import * -from .eng.NFCorpusRetrieval import * -from .eng.NQRetrieval import * -from .eng.PiqaRetrieval import * -from .eng.QuailRetrieval import * -from .eng.QuoraRetrieval import * -from .eng.RARbCodeRetrieval import * -from .eng.RARbMathRetrieval import * -from .eng.SCIDOCSRetrieval import * -from .eng.SciFactRetrieval import * -from .eng.SiqaRetrieval import * -from .eng.SpartQARetrieval import * -from .eng.TempReasonL1Retrieval import * -from .eng.TempReasonL2ContextRetrieval import * -from .eng.TempReasonL2FactRetrieval import * -from .eng.TempReasonL2PureRetrieval import * -from .eng.TempReasonL3ContextRetrieval import * -from .eng.TempReasonL3FactRetrieval import * -from .eng.TempReasonL3PureRetrieval import * -from .eng.TopiOCQARetrieval import * -from .eng.Touche2020Retrieval import * -from .eng.TRECCOVIDRetrieval import * -from .eng.WinoGrandeRetrieval import * -from .est.estqa import * -from .fra.AlloprofRetrieval import * -from .fra.BSARDRetrieval import * -from .fra.FQuADRetrieval import * -from .fra.SyntecRetrieval import * -from .hun.HunSum2 import * -from .jpn.JaGovFaqsRetrieval import * -from .jpn.JaqketRetrieval import * -from .jpn.JaQuADRetrieval import * -from .jpn.NLPJournalAbsIntroRetrieval import * -from .jpn.NLPJournalTitleAbsRetrieval import * -from .jpn.NLPJournalTitleIntroRetrieval import * -from .kat.GeorgianFAQRetrieval import * -from .kor.AutoRAGRetrieval import * -from .kor.KoStrategyQA import * -from .multilingual.BelebeleRetrieval import * -from .multilingual.CrossLingualSemanticDiscriminationWMT19 import * -from .multilingual.CrossLingualSemanticDiscriminationWMT21 import * -from .multilingual.CUREv1Retrieval import * -from .multilingual.IndicQARetrieval import * -from .multilingual.MintakaRetrieval import * -from .multilingual.MIRACLRetrieval import * -from .multilingual.MLQARetrieval import * -from .multilingual.MrTidyRetrieval import * -from .multilingual.MultiLongDocRetrieval import * -from .multilingual.NeuCLIR2022Retrieval import * -from .multilingual.NeuCLIR2023Retrieval import * -from .multilingual.PublicHealthQARetrieval import * -from .multilingual.StatcanDialogueDatasetRetrieval import * -from .multilingual.WikipediaRetrievalMultilingual import * -from .multilingual.XMarketRetrieval import * -from .multilingual.XPQARetrieval import * -from .multilingual.XQuADRetrieval import * -from .nob.norquad import * -from .nob.snl_retrieval import * -from .pol.ArguAnaPLRetrieval import * -from .pol.DBPediaPLRetrieval import * -from .pol.FiQAPLRetrieval import * -from .pol.HotpotQAPLRetrieval import * -from .pol.MSMARCOPLRetrieval import * -from .pol.NFCorpusPLRetrieval import * -from .pol.NQPLRetrieval import * -from .pol.QuoraPLRetrieval import * -from .pol.SCIDOCSPLRetrieval import * -from .pol.SciFactPLRetrieval import * -from .pol.TRECCOVIDPLRetrieval import * -from .rus.RiaNewsRetrieval import * -from .rus.RuBQRetrieval import * -from .slk.SKQuadRetrieval import * -from .slk.SlovakSumRetrieval import * -from .spa.SpanishPassageRetrievalS2P import * -from .spa.SpanishPassageRetrievalS2S import * -from .swe.SwednRetrieval import * -from .swe.SweFaqRetrieval import * -from .tur.TurHistQuad import * -from .vie.VieQuADRetrieval import * -from .zho.CMTEBRetrieval import * -from .zho.LeCaRDv2Retrieval import * +from .ara import SadeemQuestionRetrieval +from .code import ( + AppsRetrieval, + CodeEditSearchRetrieval, + CodeFeedbackMT, + CodeFeedbackST, + CodeSearchNetCCRetrieval, + CodeSearchNetRetrieval, + CodeTransOceanContestRetrieval, + CodeTransOceanDLRetrieval, + COIRCodeSearchNetRetrieval, + CosQARetrieval, + StackOverflowQARetrieval, + SyntheticText2SQLRetrieval, +) +from .dan import DanFever, DanFeverRetrieval, TV2Nordretrieval, TwitterHjerneRetrieval +from .deu import ( + GerDaLIR, + GerDaLIRSmall, + GermanDPR, + GermanGovServiceRetrieval, + GermanQuADRetrieval, + LegalQuAD, +) +from .ell import GreekCivicsQA +from .eng import ( + FEVER, + MSMARCO, + NQ, + PIQA, + SCIDOCS, + SIQA, + TRECCOVID, + AILACasedocs, + AILAStatutes, + AlphaNLI, + ARCChallenge, + ArguAna, + BrightRetrieval, + ClimateFEVER, + ClimateFEVERHardNegatives, + CQADupstackAndroidRetrieval, + CQADupstackEnglishRetrieval, + CQADupstackGamingRetrieval, + CQADupstackGisRetrieval, + CQADupstackMathematicaRetrieval, + CQADupstackPhysicsRetrieval, + CQADupstackProgrammersRetrieval, + CQADupstackStatsRetrieval, + CQADupstackTexRetrieval, + CQADupstackUnixRetrieval, + CQADupstackWebmastersRetrieval, + CQADupstackWordpressRetrieval, + DBPedia, + DBPediaHardNegatives, + FaithDialRetrieval, + FeedbackQARetrieval, + FEVERHardNegatives, + FiQA2018, + HagridRetrieval, + HellaSwag, + HotpotQA, + HotpotQAHardNegatives, + LegalBenchConsumerContractsQA, + LegalBenchCorporateLobbying, + LegalSummarization, + LEMBNarrativeQARetrieval, + LEMBNeedleRetrieval, + LEMBPasskeyRetrieval, + LEMBQMSumRetrieval, + LEMBSummScreenFDRetrieval, + LEMBWikimQARetrieval, + LitSearchRetrieval, + MedicalQARetrieval, + MLQuestionsRetrieval, + MSMARCOHardNegatives, + MSMARCOv2, + NarrativeQARetrieval, + NFCorpus, + NQHardNegatives, + Quail, + QuoraRetrieval, + QuoraRetrievalHardNegatives, + RARbCode, + RARbMath, + SciFact, + SpartQA, + TempReasonL1, + TempReasonL2Context, + TempReasonL2Fact, + TempReasonL2Pure, + TempReasonL3Context, + TempReasonL3Fact, + TempReasonL3Pure, + TopiOCQARetrieval, + TopiOCQARetrievalHardNegatives, + Touche2020, + Touche2020v3Retrieval, + WinoGrande, +) +from .est import EstQA +from .fra import AlloprofRetrieval, BSARDRetrieval, FQuADRetrieval, SyntecRetrieval +from .hun import HunSum2AbstractiveRetrieval +from .jpn import ( + JaGovFaqsRetrieval, + JaqketRetrieval, + JaQuADRetrieval, + NLPJournalAbsIntroRetrieval, + NLPJournalTitleAbsRetrieval, + NLPJournalTitleIntroRetrieval, +) +from .kat import GeorgianFAQRetrieval +from .kor import AutoRAGRetrieval, KoStrategyQA +from .multilingual import ( + BelebeleRetrieval, + CrossLingualSemanticDiscriminationWMT19, + CrossLingualSemanticDiscriminationWMT21, + CUREv1Retrieval, + IndicQARetrieval, + MintakaRetrieval, + MIRACLRetrieval, + MIRACLRetrievalHardNegatives, + MLQARetrieval, + MrTidyRetrieval, + MultiLongDocRetrieval, + NeuCLIR2022Retrieval, + NeuCLIR2022RetrievalHardNegatives, + NeuCLIR2023Retrieval, + NeuCLIR2023RetrievalHardNegatives, + PublicHealthQARetrieval, + StatcanDialogueDatasetRetrieval, + WikipediaRetrievalMultilingual, + XMarket, + XPQARetrieval, + XQuADRetrieval, +) +from .nob import NorQuadRetrieval, SNLRetrieval +from .pol import ( + MSMARCOPL, + NQPL, + SCIDOCSPL, + TRECCOVIDPL, + ArguAnaPL, + DBPediaPL, + DBPediaPLHardNegatives, + FiQAPLRetrieval, + HotpotQAPL, + HotpotQAPLHardNegatives, + MSMARCOPLHardNegatives, + NFCorpusPL, + NQPLHardNegatives, + QuoraPLRetrieval, + QuoraPLRetrievalHardNegatives, + SciFactPL, +) +from .rus import RiaNewsRetrieval, RiaNewsRetrievalHardNegatives, RuBQRetrieval +from .slk import SKQuadRetrieval, SlovakSumRetrieval +from .spa import SpanishPassageRetrievalS2P, SpanishPassageRetrievalS2S +from .swe import SwednRetrieval, SweFaqRetrieval +from .tur import TurHistQuadRetrieval +from .vie import VieQuADRetrieval +from .zho import ( + CmedqaRetrieval, + CovidRetrieval, + DuRetrieval, + EcomRetrieval, + LeCaRDv2, + MedicalRetrieval, + MMarcoRetrieval, + T2Retrieval, + VideoRetrieval, +) + +__all__ = [ + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MMarcoRetrieval", + "MedicalRetrieval", + "T2Retrieval", + "VideoRetrieval", + "LeCaRDv2", + "SpanishPassageRetrievalS2S", + "SpanishPassageRetrievalS2P", + "MSMARCOPL", + "MSMARCOPLHardNegatives", + "SCIDOCSPL", + "SciFactPL", + "ArguAnaPL", + "FiQAPLRetrieval", + "NFCorpusPL", + "QuoraPLRetrieval", + "QuoraPLRetrievalHardNegatives", + "TRECCOVIDPL", + "NQPL", + "NQPLHardNegatives", + "DBPediaPL", + "DBPediaPLHardNegatives", + "HotpotQAPL", + "HotpotQAPLHardNegatives", + "GeorgianFAQRetrieval", + "SwednRetrieval", + "SweFaqRetrieval", + "SlovakSumRetrieval", + "SKQuadRetrieval", + "SNLRetrieval", + "NorQuadRetrieval", + "GermanQuADRetrieval", + "GerDaLIRSmall", + "GermanDPR", + "GermanGovServiceRetrieval", + "LegalQuAD", + "GerDaLIR", + "SadeemQuestionRetrieval", + "TurHistQuadRetrieval", + "VieQuADRetrieval", + "DanFever", + "DanFeverRetrieval", + "TV2Nordretrieval", + "TwitterHjerneRetrieval", + "EstQA", + "Quail", + "Touche2020", + "Touche2020v3Retrieval", + "TempReasonL2Pure", + "LegalSummarization", + "NQ", + "NQHardNegatives", + "SIQA", + "MSMARCO", + "MSMARCOHardNegatives", + "DBPedia", + "DBPediaHardNegatives", + "NarrativeQARetrieval", + "MSMARCOv2", + "CQADupstackTexRetrieval", + "TRECCOVID", + "WinoGrande", + "QuoraRetrieval", + "QuoraRetrievalHardNegatives", + "AlphaNLI", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "CQADupstackAndroidRetrieval", + "TempReasonL2Context", + "ARCChallenge", + "LegalBenchCorporateLobbying", + "SCIDOCS", + "MedicalQARetrieval", + "RARbCode", + "LEMBQMSumRetrieval", + "TempReasonL3Context", + "AILAStatutes", + "TopiOCQARetrieval", + "TopiOCQARetrievalHardNegatives", + "ClimateFEVER", + "ClimateFEVERHardNegatives", + "CQADupstackWordpressRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackStatsRetrieval", + "MLQuestionsRetrieval", + "TempReasonL2Fact", + "CQADupstackGamingRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackUnixRetrieval", + "TempReasonL3Pure", + "CQADupstackPhysicsRetrieval", + "FiQA2018", + "LitSearchRetrieval", + "FeedbackQARetrieval", + "HagridRetrieval", + "FaithDialRetrieval", + "SciFact", + "CQADupstackMathematicaRetrieval", + "RARbMath", + "HellaSwag", + "PIQA", + "SpartQA", + "BrightRetrieval", + "TempReasonL1", + "HotpotQA", + "HotpotQAHardNegatives", + "LegalBenchConsumerContractsQA", + "ArguAna", + "LEMBWikimQARetrieval", + "TempReasonL3Fact", + "FEVER", + "FEVERHardNegatives", + "CQADupstackGisRetrieval", + "AILACasedocs", + "NFCorpus", + "LEMBSummScreenFDRetrieval", + "LEMBNarrativeQARetrieval", + "CQADupstackProgrammersRetrieval", + "JaGovFaqsRetrieval", + "NLPJournalAbsIntroRetrieval", + "JaqketRetrieval", + "NLPJournalTitleAbsRetrieval", + "JaQuADRetrieval", + "NLPJournalTitleIntroRetrieval", + "HunSum2AbstractiveRetrieval", + "AutoRAGRetrieval", + "KoStrategyQA", + "WikipediaRetrievalMultilingual", + "MintakaRetrieval", + "PublicHealthQARetrieval", + "CrossLingualSemanticDiscriminationWMT19", + "MultiLongDocRetrieval", + "MIRACLRetrieval", + "MIRACLRetrievalHardNegatives", + "NeuCLIR2022Retrieval", + "NeuCLIR2022RetrievalHardNegatives", + "StatcanDialogueDatasetRetrieval", + "IndicQARetrieval", + "NeuCLIR2023Retrieval", + "NeuCLIR2023RetrievalHardNegatives", + "CrossLingualSemanticDiscriminationWMT21", + "XMarket", + "XPQARetrieval", + "BelebeleRetrieval", + "CUREv1Retrieval", + "MLQARetrieval", + "XQuADRetrieval", + "MrTidyRetrieval", + "CodeTransOceanContestRetrieval", + "CodeTransOceanDLRetrieval", + "CodeFeedbackMT", + "CodeSearchNetCCRetrieval", + "StackOverflowQARetrieval", + "CodeFeedbackST", + "CosQARetrieval", + "CodeEditSearchRetrieval", + "SyntheticText2SQLRetrieval", + "AppsRetrieval", + "CodeSearchNetRetrieval", + "COIRCodeSearchNetRetrieval", + "RiaNewsRetrieval", + "RiaNewsRetrievalHardNegatives", + "RuBQRetrieval", + "GreekCivicsQA", + "AlloprofRetrieval", + "BSARDRetrieval", + "SyntecRetrieval", + "FQuADRetrieval", +] diff --git a/mteb/tasks/Retrieval/ara/__init__.py b/mteb/tasks/Retrieval/ara/__init__.py index e69de29bb2..a015313def 100644 --- a/mteb/tasks/Retrieval/ara/__init__.py +++ b/mteb/tasks/Retrieval/ara/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SadeemQuestionRetrieval import SadeemQuestionRetrieval + +__all__ = ["SadeemQuestionRetrieval"] diff --git a/mteb/tasks/Retrieval/code/__init__.py b/mteb/tasks/Retrieval/code/__init__.py index e69de29bb2..bc2a2fad79 100644 --- a/mteb/tasks/Retrieval/code/__init__.py +++ b/mteb/tasks/Retrieval/code/__init__.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from .AppsRetrieval import AppsRetrieval +from .CodeEditSearchRetrieval import CodeEditSearchRetrieval +from .CodeFeedbackMTRetrieval import CodeFeedbackMT +from .CodeFeedbackSTRetrieval import CodeFeedbackST +from .CodeSearchNetCCRetrieval import CodeSearchNetCCRetrieval +from .CodeSearchNetRetrieval import CodeSearchNetRetrieval +from .CodeTransOceanContestRetrieval import CodeTransOceanContestRetrieval +from .CodeTransOceanDLRetrieval import CodeTransOceanDLRetrieval +from .COIRCodeSearchNetRetrieval import COIRCodeSearchNetRetrieval +from .CosQARetrieval import CosQARetrieval +from .StackOverflowQARetrieval import StackOverflowQARetrieval +from .SyntheticText2SqlRetrieval import SyntheticText2SQLRetrieval + +__all__ = [ + "CodeTransOceanContestRetrieval", + "CodeTransOceanDLRetrieval", + "CodeFeedbackMT", + "CodeSearchNetCCRetrieval", + "StackOverflowQARetrieval", + "CodeFeedbackST", + "CosQARetrieval", + "CodeEditSearchRetrieval", + "SyntheticText2SQLRetrieval", + "AppsRetrieval", + "CodeSearchNetRetrieval", + "COIRCodeSearchNetRetrieval", +] diff --git a/mteb/tasks/Retrieval/dan/__init__.py b/mteb/tasks/Retrieval/dan/__init__.py index e69de29bb2..c1f49a1368 100644 --- a/mteb/tasks/Retrieval/dan/__init__.py +++ b/mteb/tasks/Retrieval/dan/__init__.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from .DanFeverRetrieval import DanFever, DanFeverRetrieval +from .TV2Nordretrieval import TV2Nordretrieval +from .TwitterHjerneRetrieval import TwitterHjerneRetrieval + +__all__ = [ + "DanFever", + "DanFeverRetrieval", + "TV2Nordretrieval", + "TwitterHjerneRetrieval", +] diff --git a/mteb/tasks/Retrieval/deu/__init__.py b/mteb/tasks/Retrieval/deu/__init__.py index e69de29bb2..886efbecc9 100644 --- a/mteb/tasks/Retrieval/deu/__init__.py +++ b/mteb/tasks/Retrieval/deu/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .GerDaLIRRetrieval import GerDaLIR +from .GerDaLIRSmallRetrieval import GerDaLIRSmall +from .GermanDPRRetrieval import GermanDPR +from .GermanGovServiceRetrieval import GermanGovServiceRetrieval +from .GermanQuADRetrieval import GermanQuADRetrieval +from .LegalQuADRetrieval import LegalQuAD + +__all__ = [ + "GermanQuADRetrieval", + "GerDaLIRSmall", + "GermanDPR", + "GermanGovServiceRetrieval", + "LegalQuAD", + "GerDaLIR", +] diff --git a/mteb/tasks/Retrieval/ell/__init__.py b/mteb/tasks/Retrieval/ell/__init__.py index e69de29bb2..ac7ec59e4f 100644 --- a/mteb/tasks/Retrieval/ell/__init__.py +++ b/mteb/tasks/Retrieval/ell/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GreekCivicsQA import GreekCivicsQA + +__all__ = ["GreekCivicsQA"] diff --git a/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py index 38527d2483..77c0020aa0 100644 --- a/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py @@ -17,7 +17,7 @@ class DBPedia(AbsTaskRetrieval): type="Retrieval", category="s2p", modalities=["text"], - eval_splits=["dev", "test"], + eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=("2017-01-01", "2017-01-01"), # best guess: based on publication date diff --git a/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py index b2bdb31455..2a347d9a05 100644 --- a/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py @@ -20,7 +20,7 @@ class HotpotQA(AbsTaskRetrieval): type="Retrieval", category="s2p", modalities=["text"], - eval_splits=["train", "dev", "test"], + eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=("2018-01-01", "2018-12-31"), # best guess: based on publication date diff --git a/mteb/tasks/Retrieval/eng/__init__.py b/mteb/tasks/Retrieval/eng/__init__.py index e69de29bb2..47e2498709 100644 --- a/mteb/tasks/Retrieval/eng/__init__.py +++ b/mteb/tasks/Retrieval/eng/__init__.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +from .AILACasedocsRetrieval import AILACasedocs +from .AILAStatutesRetrieval import AILAStatutes +from .AlphaNLIRetrieval import AlphaNLI +from .ARCChallengeRetrieval import ARCChallenge +from .ArguAnaRetrieval import ArguAna +from .BrightRetrieval import BrightRetrieval +from .ClimateFEVERRetrieval import ClimateFEVER, ClimateFEVERHardNegatives +from .CQADupstackAndroidRetrieval import CQADupstackAndroidRetrieval +from .CQADupstackEnglishRetrieval import CQADupstackEnglishRetrieval +from .CQADupstackGamingRetrieval import CQADupstackGamingRetrieval +from .CQADupstackGisRetrieval import CQADupstackGisRetrieval +from .CQADupstackMathematicaRetrieval import CQADupstackMathematicaRetrieval +from .CQADupstackPhysicsRetrieval import CQADupstackPhysicsRetrieval +from .CQADupstackProgrammersRetrieval import CQADupstackProgrammersRetrieval +from .CQADupstackStatsRetrieval import CQADupstackStatsRetrieval +from .CQADupstackTexRetrieval import CQADupstackTexRetrieval +from .CQADupstackUnixRetrieval import CQADupstackUnixRetrieval +from .CQADupstackWebmastersRetrieval import CQADupstackWebmastersRetrieval +from .CQADupstackWordpressRetrieval import CQADupstackWordpressRetrieval +from .DBPediaRetrieval import DBPedia, DBPediaHardNegatives +from .FaithDialRetrieval import FaithDialRetrieval +from .FeedbackQARetrieval import FeedbackQARetrieval +from .FEVERRetrieval import FEVER, FEVERHardNegatives +from .FiQA2018Retrieval import FiQA2018 +from .HagridRetrieval import HagridRetrieval +from .HellaSwagRetrieval import HellaSwag +from .HotpotQARetrieval import HotpotQA, HotpotQAHardNegatives +from .LegalBenchConsumerContractsQARetrieval import LegalBenchConsumerContractsQA +from .LegalBenchCorporateLobbyingRetrieval import LegalBenchCorporateLobbying +from .LegalSummarizationRetrieval import LegalSummarization +from .LEMBNarrativeQARetrieval import LEMBNarrativeQARetrieval +from .LEMBNeedleRetrieval import LEMBNeedleRetrieval +from .LEMBPasskeyRetrieval import LEMBPasskeyRetrieval +from .LEMBQMSumRetrieval import LEMBQMSumRetrieval +from .LEMBSummScreenFDRetrieval import LEMBSummScreenFDRetrieval +from .LEMBWikimQARetrieval import LEMBWikimQARetrieval +from .LitSearchRetrieval import LitSearchRetrieval +from .MedicalQARetrieval import MedicalQARetrieval +from .MLQuestions import MLQuestionsRetrieval +from .MSMARCORetrieval import MSMARCO, MSMARCOHardNegatives +from .MSMARCOv2Retrieval import MSMARCOv2 +from .NarrativeQARetrieval import NarrativeQARetrieval +from .NFCorpusRetrieval import NFCorpus +from .NQRetrieval import NQ, NQHardNegatives +from .PiqaRetrieval import PIQA +from .QuailRetrieval import Quail +from .QuoraRetrieval import QuoraRetrieval, QuoraRetrievalHardNegatives +from .RARbCodeRetrieval import RARbCode +from .RARbMathRetrieval import RARbMath +from .SCIDOCSRetrieval import SCIDOCS +from .SciFactRetrieval import SciFact +from .SiqaRetrieval import SIQA +from .SpartQARetrieval import SpartQA +from .TempReasonL1Retrieval import TempReasonL1 +from .TempReasonL2ContextRetrieval import TempReasonL2Context +from .TempReasonL2FactRetrieval import TempReasonL2Fact +from .TempReasonL2PureRetrieval import TempReasonL2Pure +from .TempReasonL3ContextRetrieval import TempReasonL3Context +from .TempReasonL3FactRetrieval import TempReasonL3Fact +from .TempReasonL3PureRetrieval import TempReasonL3Pure +from .TopiOCQARetrieval import TopiOCQARetrieval, TopiOCQARetrievalHardNegatives +from .Touche2020Retrieval import Touche2020, Touche2020v3Retrieval +from .TRECCOVIDRetrieval import TRECCOVID +from .WinoGrandeRetrieval import WinoGrande + +__all__ = [ + "Quail", + "Touche2020", + "Touche2020v3Retrieval", + "TempReasonL2Pure", + "LegalSummarization", + "NQ", + "NQHardNegatives", + "SIQA", + "MSMARCO", + "MSMARCOHardNegatives", + "DBPedia", + "DBPediaHardNegatives", + "NarrativeQARetrieval", + "MSMARCOv2", + "CQADupstackTexRetrieval", + "TRECCOVID", + "WinoGrande", + "QuoraRetrieval", + "QuoraRetrievalHardNegatives", + "AlphaNLI", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "CQADupstackAndroidRetrieval", + "TempReasonL2Context", + "ARCChallenge", + "LegalBenchCorporateLobbying", + "SCIDOCS", + "MedicalQARetrieval", + "RARbCode", + "LEMBQMSumRetrieval", + "TempReasonL3Context", + "AILAStatutes", + "TopiOCQARetrieval", + "TopiOCQARetrievalHardNegatives", + "ClimateFEVER", + "ClimateFEVERHardNegatives", + "CQADupstackWordpressRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackStatsRetrieval", + "MLQuestionsRetrieval", + "TempReasonL2Fact", + "CQADupstackGamingRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackUnixRetrieval", + "TempReasonL3Pure", + "CQADupstackPhysicsRetrieval", + "FiQA2018", + "LitSearchRetrieval", + "FeedbackQARetrieval", + "HagridRetrieval", + "FaithDialRetrieval", + "SciFact", + "CQADupstackMathematicaRetrieval", + "RARbMath", + "HellaSwag", + "PIQA", + "SpartQA", + "BrightRetrieval", + "TempReasonL1", + "HotpotQA", + "HotpotQAHardNegatives", + "LegalBenchConsumerContractsQA", + "ArguAna", + "LEMBWikimQARetrieval", + "TempReasonL3Fact", + "FEVER", + "FEVERHardNegatives", + "CQADupstackGisRetrieval", + "AILACasedocs", + "NFCorpus", + "LEMBSummScreenFDRetrieval", + "LEMBNarrativeQARetrieval", + "CQADupstackProgrammersRetrieval", +] diff --git a/mteb/tasks/Retrieval/est/__init__.py b/mteb/tasks/Retrieval/est/__init__.py index e69de29bb2..45c701f6dc 100644 --- a/mteb/tasks/Retrieval/est/__init__.py +++ b/mteb/tasks/Retrieval/est/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .estqa import EstQA + +__all__ = ["EstQA"] diff --git a/mteb/tasks/Retrieval/fra/__init__.py b/mteb/tasks/Retrieval/fra/__init__.py index e69de29bb2..b3441c8122 100644 --- a/mteb/tasks/Retrieval/fra/__init__.py +++ b/mteb/tasks/Retrieval/fra/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .AlloprofRetrieval import AlloprofRetrieval +from .BSARDRetrieval import BSARDRetrieval +from .FQuADRetrieval import FQuADRetrieval +from .SyntecRetrieval import SyntecRetrieval + +__all__ = ["AlloprofRetrieval", "BSARDRetrieval", "SyntecRetrieval", "FQuADRetrieval"] diff --git a/mteb/tasks/Retrieval/hun/__init__.py b/mteb/tasks/Retrieval/hun/__init__.py new file mode 100644 index 0000000000..bd36e67f46 --- /dev/null +++ b/mteb/tasks/Retrieval/hun/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .HunSum2 import HunSum2AbstractiveRetrieval + +__all__ = ["HunSum2AbstractiveRetrieval"] diff --git a/mteb/tasks/Retrieval/jpn/__init__.py b/mteb/tasks/Retrieval/jpn/__init__.py new file mode 100644 index 0000000000..1296cb0a84 --- /dev/null +++ b/mteb/tasks/Retrieval/jpn/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .JaGovFaqsRetrieval import JaGovFaqsRetrieval +from .JaqketRetrieval import JaqketRetrieval +from .JaQuADRetrieval import JaQuADRetrieval +from .NLPJournalAbsIntroRetrieval import NLPJournalAbsIntroRetrieval +from .NLPJournalTitleAbsRetrieval import NLPJournalTitleAbsRetrieval +from .NLPJournalTitleIntroRetrieval import NLPJournalTitleIntroRetrieval + +__all__ = [ + "JaGovFaqsRetrieval", + "NLPJournalAbsIntroRetrieval", + "JaqketRetrieval", + "NLPJournalTitleAbsRetrieval", + "JaQuADRetrieval", + "NLPJournalTitleIntroRetrieval", +] diff --git a/mteb/tasks/Retrieval/kat/__init__.py b/mteb/tasks/Retrieval/kat/__init__.py index e69de29bb2..026c6dc22b 100644 --- a/mteb/tasks/Retrieval/kat/__init__.py +++ b/mteb/tasks/Retrieval/kat/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GeorgianFAQRetrieval import GeorgianFAQRetrieval + +__all__ = ["GeorgianFAQRetrieval"] diff --git a/mteb/tasks/Retrieval/kor/__init__.py b/mteb/tasks/Retrieval/kor/__init__.py index e69de29bb2..fa553802be 100644 --- a/mteb/tasks/Retrieval/kor/__init__.py +++ b/mteb/tasks/Retrieval/kor/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .AutoRAGRetrieval import AutoRAGRetrieval +from .KoStrategyQA import KoStrategyQA + +__all__ = ["AutoRAGRetrieval", "KoStrategyQA"] diff --git a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py index c22d15afc4..6f7d188b7b 100644 --- a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py @@ -43,6 +43,9 @@ def _load_publichealthqa_data( answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))} for row in data: + if row["question"] is None or row["answer"] is None: + # There are some questions and answers that are None in the original dataset, specifically in the Arabic subset. + continue question = row["question"] answer = row["answer"] query_id = f"Q{question_ids[question]}" diff --git a/mteb/tasks/Retrieval/multilingual/__init__.py b/mteb/tasks/Retrieval/multilingual/__init__.py index e69de29bb2..7a2c850ad3 100644 --- a/mteb/tasks/Retrieval/multilingual/__init__.py +++ b/mteb/tasks/Retrieval/multilingual/__init__.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from .BelebeleRetrieval import BelebeleRetrieval +from .CrossLingualSemanticDiscriminationWMT19 import ( + CrossLingualSemanticDiscriminationWMT19, +) +from .CrossLingualSemanticDiscriminationWMT21 import ( + CrossLingualSemanticDiscriminationWMT21, +) +from .CUREv1Retrieval import CUREv1Retrieval +from .IndicQARetrieval import IndicQARetrieval +from .MintakaRetrieval import MintakaRetrieval +from .MIRACLRetrieval import MIRACLRetrieval, MIRACLRetrievalHardNegatives +from .MLQARetrieval import MLQARetrieval +from .MrTidyRetrieval import MrTidyRetrieval +from .MultiLongDocRetrieval import MultiLongDocRetrieval +from .NeuCLIR2022Retrieval import ( + NeuCLIR2022Retrieval, + NeuCLIR2022RetrievalHardNegatives, +) +from .NeuCLIR2023Retrieval import ( + NeuCLIR2023Retrieval, + NeuCLIR2023RetrievalHardNegatives, +) +from .PublicHealthQARetrieval import PublicHealthQARetrieval +from .StatcanDialogueDatasetRetrieval import StatcanDialogueDatasetRetrieval +from .WikipediaRetrievalMultilingual import WikipediaRetrievalMultilingual +from .XMarketRetrieval import XMarket +from .XPQARetrieval import XPQARetrieval +from .XQuADRetrieval import XQuADRetrieval + +__all__ = [ + "WikipediaRetrievalMultilingual", + "MintakaRetrieval", + "PublicHealthQARetrieval", + "CrossLingualSemanticDiscriminationWMT19", + "MultiLongDocRetrieval", + "MIRACLRetrieval", + "MIRACLRetrievalHardNegatives", + "NeuCLIR2022Retrieval", + "NeuCLIR2022RetrievalHardNegatives", + "StatcanDialogueDatasetRetrieval", + "IndicQARetrieval", + "NeuCLIR2023Retrieval", + "NeuCLIR2023RetrievalHardNegatives", + "CrossLingualSemanticDiscriminationWMT21", + "XMarket", + "XPQARetrieval", + "BelebeleRetrieval", + "CUREv1Retrieval", + "MLQARetrieval", + "XQuADRetrieval", + "MrTidyRetrieval", +] diff --git a/mteb/tasks/Retrieval/nob/__init__.py b/mteb/tasks/Retrieval/nob/__init__.py index e69de29bb2..5429ec3c91 100644 --- a/mteb/tasks/Retrieval/nob/__init__.py +++ b/mteb/tasks/Retrieval/nob/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .norquad import NorQuadRetrieval +from .snl_retrieval import SNLRetrieval + +__all__ = ["SNLRetrieval", "NorQuadRetrieval"] diff --git a/mteb/tasks/Retrieval/pol/__init__.py b/mteb/tasks/Retrieval/pol/__init__.py index e69de29bb2..47579b9ae8 100644 --- a/mteb/tasks/Retrieval/pol/__init__.py +++ b/mteb/tasks/Retrieval/pol/__init__.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from .ArguAnaPLRetrieval import ArguAnaPL +from .DBPediaPLRetrieval import DBPediaPL, DBPediaPLHardNegatives +from .FiQAPLRetrieval import FiQAPLRetrieval +from .HotpotQAPLRetrieval import HotpotQAPL, HotpotQAPLHardNegatives +from .MSMARCOPLRetrieval import MSMARCOPL, MSMARCOPLHardNegatives +from .NFCorpusPLRetrieval import NFCorpusPL +from .NQPLRetrieval import NQPL, NQPLHardNegatives +from .QuoraPLRetrieval import QuoraPLRetrieval, QuoraPLRetrievalHardNegatives +from .SCIDOCSPLRetrieval import SCIDOCSPL +from .SciFactPLRetrieval import SciFactPL +from .TRECCOVIDPLRetrieval import TRECCOVIDPL + +__all__ = [ + "MSMARCOPL", + "MSMARCOPLHardNegatives", + "SCIDOCSPL", + "SciFactPL", + "ArguAnaPL", + "FiQAPLRetrieval", + "NFCorpusPL", + "QuoraPLRetrieval", + "QuoraPLRetrievalHardNegatives", + "TRECCOVIDPL", + "NQPL", + "NQPLHardNegatives", + "DBPediaPL", + "DBPediaPLHardNegatives", + "HotpotQAPL", + "HotpotQAPLHardNegatives", +] diff --git a/mteb/tasks/Retrieval/rus/__init__.py b/mteb/tasks/Retrieval/rus/__init__.py index e69de29bb2..1f1aedfe79 100644 --- a/mteb/tasks/Retrieval/rus/__init__.py +++ b/mteb/tasks/Retrieval/rus/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .RiaNewsRetrieval import RiaNewsRetrieval, RiaNewsRetrievalHardNegatives +from .RuBQRetrieval import RuBQRetrieval + +__all__ = ["RiaNewsRetrieval", "RiaNewsRetrievalHardNegatives", "RuBQRetrieval"] diff --git a/mteb/tasks/Retrieval/slk/__init__.py b/mteb/tasks/Retrieval/slk/__init__.py index e69de29bb2..2b24947f0d 100644 --- a/mteb/tasks/Retrieval/slk/__init__.py +++ b/mteb/tasks/Retrieval/slk/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SKQuadRetrieval import SKQuadRetrieval +from .SlovakSumRetrieval import SlovakSumRetrieval + +__all__ = ["SlovakSumRetrieval", "SKQuadRetrieval"] diff --git a/mteb/tasks/Retrieval/spa/__init__.py b/mteb/tasks/Retrieval/spa/__init__.py index e69de29bb2..39f47471aa 100644 --- a/mteb/tasks/Retrieval/spa/__init__.py +++ b/mteb/tasks/Retrieval/spa/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SpanishPassageRetrievalS2P import SpanishPassageRetrievalS2P +from .SpanishPassageRetrievalS2S import SpanishPassageRetrievalS2S + +__all__ = ["SpanishPassageRetrievalS2S", "SpanishPassageRetrievalS2P"] diff --git a/mteb/tasks/Retrieval/swe/__init__.py b/mteb/tasks/Retrieval/swe/__init__.py index e69de29bb2..3478b1dc03 100644 --- a/mteb/tasks/Retrieval/swe/__init__.py +++ b/mteb/tasks/Retrieval/swe/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SwednRetrieval import SwednRetrieval +from .SweFaqRetrieval import SweFaqRetrieval + +__all__ = ["SwednRetrieval", "SweFaqRetrieval"] diff --git a/mteb/tasks/Retrieval/tur/__init__.py b/mteb/tasks/Retrieval/tur/__init__.py index e69de29bb2..434608ee7c 100644 --- a/mteb/tasks/Retrieval/tur/__init__.py +++ b/mteb/tasks/Retrieval/tur/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TurHistQuad import TurHistQuadRetrieval + +__all__ = ["TurHistQuadRetrieval"] diff --git a/mteb/tasks/Retrieval/vie/__init__.py b/mteb/tasks/Retrieval/vie/__init__.py index e69de29bb2..07a2d891c5 100644 --- a/mteb/tasks/Retrieval/vie/__init__.py +++ b/mteb/tasks/Retrieval/vie/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .VieQuADRetrieval import VieQuADRetrieval + +__all__ = ["VieQuADRetrieval"] diff --git a/mteb/tasks/Retrieval/zho/__init__.py b/mteb/tasks/Retrieval/zho/__init__.py index e69de29bb2..c4f399e542 100644 --- a/mteb/tasks/Retrieval/zho/__init__.py +++ b/mteb/tasks/Retrieval/zho/__init__.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from .CMTEBRetrieval import ( + CmedqaRetrieval, + CovidRetrieval, + DuRetrieval, + EcomRetrieval, + MedicalRetrieval, + MMarcoRetrieval, + T2Retrieval, + VideoRetrieval, +) +from .LeCaRDv2Retrieval import LeCaRDv2 + +__all__ = [ + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MMarcoRetrieval", + "MedicalRetrieval", + "T2Retrieval", + "VideoRetrieval", + "LeCaRDv2", +] diff --git a/mteb/tasks/STS/__init__.py b/mteb/tasks/STS/__init__.py index b61b79b293..34be948cba 100644 --- a/mteb/tasks/STS/__init__.py +++ b/mteb/tasks/STS/__init__.py @@ -1,31 +1,72 @@ from __future__ import annotations -from .deu.GermanSTSBenchmarkSTS import * -from .eng.BiossesSTS import * -from .eng.SickrSTS import * -from .eng.STS12STS import * -from .eng.STS13STS import * -from .eng.STS14STS import * -from .eng.STS15STS import * -from .eng.STS16STS import * -from .eng.STSBenchmarkSTS import * -from .fao.FaroeseSTS import * -from .fin.FinParaSTS import * -from .fra.SickFrSTS import * -from .jpn.JSICK import * -from .jpn.JSTS import * -from .kor.KlueSTS import * -from .kor.KorSTS import * -from .multilingual.IndicCrosslingualSTS import * -from .multilingual.SemRel24STS import * -from .multilingual.STS17CrosslingualSTS import * -from .multilingual.STS22CrosslingualSTS import * -from .multilingual.STSBenchmarkMultilingualSTS import * -from .pol.PolishSTS import * -from .por.Assin2STS import * -from .por.SickBrSTS import * -from .ron.RonSTS import * -from .rus.RUParaPhraserSTS import * -from .rus.RuSTSBenchmarkSTS import * -from .spa.STSES import * -from .zho.CMTEBSTS import * +from .deu import GermanSTSBenchmarkSTS +from .eng import ( + STS12STS, + STS13STS, + STS14STS, + STS15STS, + STS16STS, + BiossesSTS, + SickrSTS, + STSBenchmarkSTS, +) +from .fao import FaroeseSTS +from .fin import FinParaSTS +from .fra import SickFrSTS +from .jpn import JSICK, JSTS +from .kor import KlueSTS, KorSTS +from .multilingual import ( + IndicCrosslingualSTS, + SemRel24STS, + STS17Crosslingual, + STS22CrosslingualSTS, + STS22CrosslingualSTSv2, + STSBenchmarkMultilingualSTS, +) +from .pol import CdscrSTS, SickrPLSTS +from .por import Assin2STS, SickBrSTS +from .ron import RonSTS +from .rus import RUParaPhraserSTS, RuSTSBenchmarkSTS +from .spa import STSES +from .zho import AFQMC, ATEC, BQ, LCQMC, PAWSX, QBQTC, STSB + +__all__ = [ + "AFQMC", + "ATEC", + "BQ", + "LCQMC", + "PAWSX", + "QBQTC", + "STSB", + "Assin2STS", + "SickBrSTS", + "STSES", + "CdscrSTS", + "SickrPLSTS", + "FinParaSTS", + "GermanSTSBenchmarkSTS", + "STS12STS", + "STS13STS", + "BiossesSTS", + "STS15STS", + "STSBenchmarkSTS", + "SickrSTS", + "STS16STS", + "STS14STS", + "FaroeseSTS", + "JSICK", + "JSTS", + "RonSTS", + "KorSTS", + "KlueSTS", + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17Crosslingual", + "STS22CrosslingualSTS", + "STS22CrosslingualSTSv2", + "STSBenchmarkMultilingualSTS", + "RUParaPhraserSTS", + "RuSTSBenchmarkSTS", + "SickFrSTS", +] diff --git a/mteb/tasks/STS/deu/__init__.py b/mteb/tasks/STS/deu/__init__.py index e69de29bb2..0e996fbcbb 100644 --- a/mteb/tasks/STS/deu/__init__.py +++ b/mteb/tasks/STS/deu/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GermanSTSBenchmarkSTS import GermanSTSBenchmarkSTS + +__all__ = ["GermanSTSBenchmarkSTS"] diff --git a/mteb/tasks/STS/eng/SickrSTS.py b/mteb/tasks/STS/eng/SickrSTS.py index 1d636688de..1c93fff578 100644 --- a/mteb/tasks/STS/eng/SickrSTS.py +++ b/mteb/tasks/STS/eng/SickrSTS.py @@ -12,8 +12,8 @@ class SickrSTS(AbsTaskSTS): "path": "mteb/sickr-sts", "revision": "20a6d6f312dd54037fe07a32d58e5e168867909d", }, - description="Semantic Textual Similarity SICK-R dataset as described here:", - reference="https://aclanthology.org/2020.lrec-1.207", + description="Semantic Textual Similarity SICK-R dataset", + reference="https://aclanthology.org/L14-1314/", type="STS", category="s2s", modalities=["text"], @@ -21,41 +21,37 @@ class SickrSTS(AbsTaskSTS): eval_langs=["eng-Latn"], main_score="cosine_spearman", date=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + domains=["Web", "Written"], + task_subtypes=["Textual Entailment"], + license="cc-by-nc-sa-3.0", + annotations_creators="human-annotated", dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{dadas-etal-2020-evaluation, - title = "Evaluation of Sentence Representations in {P}olish", - author = "Dadas, Slawomir and - Pere{\l}kiewicz, Micha{\l} and - Po{\'s}wiata, Rafa{\l}", + bibtex_citation="""@inproceedings{marelli-etal-2014-sick, + title = "A {SICK} cure for the evaluation of compositional distributional semantic models", + author = "Marelli, Marco and + Menini, Stefano and + Baroni, Marco and + Bentivogli, Luisa and + Bernardi, Raffaella and + Zamparelli, Roberto", editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and Choukri, Khalid and - Cieri, Christopher and Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and + Loftsson, Hrafn and Maegaard, Bente and Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", + booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)", month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.207", - pages = "1674--1680", - abstract = "Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.", - language = "English", - ISBN = "979-10-95546-34-4", + year = "2014", + address = "Reykjavik, Iceland", + publisher = "European Language Resources Association (ELRA)", + url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/363_Paper.pdf", + pages = "216--223", + abstract = "Shared and internationally recognized benchmarks are fundamental for the development of any computational system. We aim to help the research community working on compositional distributional semantic models (CDSMs) by providing SICK (Sentences Involving Compositional Knowldedge), a large size English benchmark tailored for them. SICK consists of about 10,000 English sentence pairs that include many examples of the lexical, syntactic and semantic phenomena that CDSMs are expected to account for, but do not require dealing with other aspects of existing sentential data sets (idiomatic multiword expressions, named entities, telegraphic language) that are not within the scope of CDSMs. By means of crowdsourcing techniques, each pair was annotated for two crucial semantic tasks: relatedness in meaning (with a 5-point rating scale as gold score) and entailment relation between the two elements (with three possible gold labels: entailment, contradiction, and neutral). The SICK data set was used in SemEval-2014 Task 1, and it freely available for research purposes.", }""", ) diff --git a/mteb/tasks/STS/eng/__init__.py b/mteb/tasks/STS/eng/__init__.py index e69de29bb2..fbf76028d8 100644 --- a/mteb/tasks/STS/eng/__init__.py +++ b/mteb/tasks/STS/eng/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .BiossesSTS import BiossesSTS +from .SickrSTS import SickrSTS +from .STS12STS import STS12STS +from .STS13STS import STS13STS +from .STS14STS import STS14STS +from .STS15STS import STS15STS +from .STS16STS import STS16STS +from .STSBenchmarkSTS import STSBenchmarkSTS + +__all__ = [ + "STS12STS", + "STS13STS", + "BiossesSTS", + "STS15STS", + "STSBenchmarkSTS", + "SickrSTS", + "STS16STS", + "STS14STS", +] diff --git a/mteb/tasks/STS/fao/__init__.py b/mteb/tasks/STS/fao/__init__.py new file mode 100644 index 0000000000..1a53420b36 --- /dev/null +++ b/mteb/tasks/STS/fao/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FaroeseSTS import FaroeseSTS + +__all__ = ["FaroeseSTS"] diff --git a/mteb/tasks/STS/fin/__init__.py b/mteb/tasks/STS/fin/__init__.py index e69de29bb2..6c142b6f3d 100644 --- a/mteb/tasks/STS/fin/__init__.py +++ b/mteb/tasks/STS/fin/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FinParaSTS import FinParaSTS + +__all__ = ["FinParaSTS"] diff --git a/mteb/tasks/STS/fra/__init__.py b/mteb/tasks/STS/fra/__init__.py index e69de29bb2..01dd563a0f 100644 --- a/mteb/tasks/STS/fra/__init__.py +++ b/mteb/tasks/STS/fra/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SickFrSTS import SickFrSTS + +__all__ = ["SickFrSTS"] diff --git a/mteb/tasks/STS/jpn/__init__.py b/mteb/tasks/STS/jpn/__init__.py new file mode 100644 index 0000000000..61734f3c03 --- /dev/null +++ b/mteb/tasks/STS/jpn/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .JSICK import JSICK +from .JSTS import JSTS + +__all__ = ["JSICK", "JSTS"] diff --git a/mteb/tasks/STS/kor/__init__.py b/mteb/tasks/STS/kor/__init__.py index e69de29bb2..187f787ea3 100644 --- a/mteb/tasks/STS/kor/__init__.py +++ b/mteb/tasks/STS/kor/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .KlueSTS import KlueSTS +from .KorSTS import KorSTS + +__all__ = ["KorSTS", "KlueSTS"] diff --git a/mteb/tasks/STS/multilingual/__init__.py b/mteb/tasks/STS/multilingual/__init__.py index e69de29bb2..e5aa138e14 100644 --- a/mteb/tasks/STS/multilingual/__init__.py +++ b/mteb/tasks/STS/multilingual/__init__.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from .IndicCrosslingualSTS import IndicCrosslingualSTS +from .SemRel24STS import SemRel24STS +from .STS17CrosslingualSTS import STS17Crosslingual +from .STS22CrosslingualSTS import STS22CrosslingualSTS, STS22CrosslingualSTSv2 +from .STSBenchmarkMultilingualSTS import STSBenchmarkMultilingualSTS + +__all__ = [ + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17Crosslingual", + "STS22CrosslingualSTS", + "STS22CrosslingualSTSv2", + "STSBenchmarkMultilingualSTS", +] diff --git a/mteb/tasks/STS/pol/__init__.py b/mteb/tasks/STS/pol/__init__.py index e69de29bb2..b9975c7b4f 100644 --- a/mteb/tasks/STS/pol/__init__.py +++ b/mteb/tasks/STS/pol/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PolishSTS import CdscrSTS, SickrPLSTS + +__all__ = ["CdscrSTS", "SickrPLSTS"] diff --git a/mteb/tasks/STS/por/__init__.py b/mteb/tasks/STS/por/__init__.py new file mode 100644 index 0000000000..bc6c92f1ea --- /dev/null +++ b/mteb/tasks/STS/por/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .Assin2STS import Assin2STS +from .SickBrSTS import SickBrSTS + +__all__ = ["Assin2STS", "SickBrSTS"] diff --git a/mteb/tasks/STS/ron/__init__.py b/mteb/tasks/STS/ron/__init__.py new file mode 100644 index 0000000000..cae594b55c --- /dev/null +++ b/mteb/tasks/STS/ron/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .RonSTS import RonSTS + +__all__ = ["RonSTS"] diff --git a/mteb/tasks/STS/rus/__init__.py b/mteb/tasks/STS/rus/__init__.py new file mode 100644 index 0000000000..9d778719f0 --- /dev/null +++ b/mteb/tasks/STS/rus/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .RUParaPhraserSTS import RUParaPhraserSTS +from .RuSTSBenchmarkSTS import RuSTSBenchmarkSTS + +__all__ = ["RUParaPhraserSTS", "RuSTSBenchmarkSTS"] diff --git a/mteb/tasks/STS/spa/__init__.py b/mteb/tasks/STS/spa/__init__.py index e69de29bb2..90ac2c3e75 100644 --- a/mteb/tasks/STS/spa/__init__.py +++ b/mteb/tasks/STS/spa/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .STSES import STSES + +__all__ = ["STSES"] diff --git a/mteb/tasks/STS/zho/__init__.py b/mteb/tasks/STS/zho/__init__.py index e69de29bb2..222d11ea6e 100644 --- a/mteb/tasks/STS/zho/__init__.py +++ b/mteb/tasks/STS/zho/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CMTEBSTS import AFQMC, ATEC, BQ, LCQMC, PAWSX, QBQTC, STSB + +__all__ = ["AFQMC", "ATEC", "BQ", "LCQMC", "PAWSX", "QBQTC", "STSB"] diff --git a/mteb/tasks/SpeedTask/__init__.py b/mteb/tasks/SpeedTask/__init__.py index 5e9d2ce9bb..5ef332ae20 100644 --- a/mteb/tasks/SpeedTask/__init__.py +++ b/mteb/tasks/SpeedTask/__init__.py @@ -1,4 +1,6 @@ from __future__ import annotations -from .CPUSpeedTask import * -from .GPUSpeedTask import * +from .CPUSpeedTask import CPUSpeedTask +from .GPUSpeedTask import GPUSpeedTask + +__all__ = ["GPUSpeedTask", "CPUSpeedTask"] diff --git a/mteb/tasks/Summarization/__init__.py b/mteb/tasks/Summarization/__init__.py index 51c69766fe..0b8a424167 100644 --- a/mteb/tasks/Summarization/__init__.py +++ b/mteb/tasks/Summarization/__init__.py @@ -1,4 +1,11 @@ -from __future__ import annotations - -from .eng.SummEvalSummarization import * -from .fra.SummEvalFrSummarization import * +from __future__ import annotations + +from .eng import SummEvalSummarization, SummEvalSummarizationv2 +from .fra import SummEvalFrSummarization, SummEvalFrSummarizationv2 + +__all__ = [ + "SummEvalSummarization", + "SummEvalSummarizationv2", + "SummEvalFrSummarization", + "SummEvalFrSummarizationv2", +] diff --git a/mteb/tasks/Summarization/eng/__init__.py b/mteb/tasks/Summarization/eng/__init__.py index e69de29bb2..68ea2d1c73 100644 --- a/mteb/tasks/Summarization/eng/__init__.py +++ b/mteb/tasks/Summarization/eng/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SummEvalSummarization import SummEvalSummarization, SummEvalSummarizationv2 + +__all__ = ["SummEvalSummarization", "SummEvalSummarizationv2"] diff --git a/mteb/tasks/Summarization/fra/__init__.py b/mteb/tasks/Summarization/fra/__init__.py index e69de29bb2..8b8066ae4a 100644 --- a/mteb/tasks/Summarization/fra/__init__.py +++ b/mteb/tasks/Summarization/fra/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SummEvalFrSummarization import SummEvalFrSummarization, SummEvalFrSummarizationv2 + +__all__ = ["SummEvalFrSummarization", "SummEvalFrSummarizationv2"] diff --git a/mteb/tasks/__init__.py b/mteb/tasks/__init__.py index 72c357606f..6e09541e10 100644 --- a/mteb/tasks/__init__.py +++ b/mteb/tasks/__init__.py @@ -1,14 +1,1296 @@ from __future__ import annotations -from .BitextMining import * -from .Classification import * -from .Clustering import * -from .InstructionReranking import * -from .InstructionRetrieval import * -from .MultiLabelClassification import * -from .PairClassification import * -from .Reranking import * -from .Retrieval import * -from .SpeedTask import * -from .STS import * -from .Summarization import * +from .BitextMining import ( + BibleNLPBitextMining, + BornholmBitextMining, + BUCCBitextMining, + BUCCBitextMiningFast, + DiaBLaBitextMining, + FloresBitextMining, + IN22ConvBitextMining, + IN22GenBitextMining, + IndicGenBenchFloresBitextMining, + IWSLT2017BitextMining, + LinceMTBitextMining, + NollySentiBitextMining, + NorwegianCourtsBitextMining, + NTREXBitextMining, + NusaTranslationBitextMining, + NusaXBitextMining, + PhincBitextMining, + RomaTalesBitextMining, + SRNCorpusBitextMining, + TatoebaBitextMining, + TbilisiCityHallBitextMining, + VieMedEVBitextMining, +) +from .Classification import ( + AJGT, + AfriSentiClassification, + AfriSentiLangClassification, + AllegroReviewsClassification, + AmazonCounterfactualClassification, + AmazonPolarityClassification, + AmazonReviewsClassification, + AngryTweetsClassification, + ArxivClassification, + Banking77Classification, + BengaliDocumentClassification, + BengaliHateSpeechClassification, + BengaliSentimentAnalysis, + BulgarianStoreReviewSentimentClassfication, + CanadaTaxCourtOutcomesLegalBenchClassification, + CataloniaTweetClassification, + CbdClassification, + ContractNLIConfidentialityOfAgreementLegalBenchClassification, + ContractNLIExplicitIdentificationLegalBenchClassification, + ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification, + ContractNLILimitedUseLegalBenchClassification, + ContractNLINoLicensingLegalBenchClassification, + ContractNLINoticeOnCompelledDisclosureLegalBenchClassification, + ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification, + ContractNLIPermissibleCopyLegalBenchClassification, + ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification, + ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification, + ContractNLIReturnOfConfidentialInformationLegalBenchClassification, + ContractNLISharingWithEmployeesLegalBenchClassification, + ContractNLISharingWithThirdPartiesLegalBenchClassification, + ContractNLISurvivalOfObligationsLegalBenchClassification, + CorporateLobbyingLegalBenchClassification, + CSFDCZMovieReviewSentimentClassification, + CSFDSKMovieReviewSentimentClassification, + CUADAffiliateLicenseLicenseeLegalBenchClassification, + CUADAffiliateLicenseLicensorLegalBenchClassification, + CUADAntiAssignmentLegalBenchClassification, + CUADAuditRightsLegalBenchClassification, + CUADCapOnLiabilityLegalBenchClassification, + CUADChangeOfControlLegalBenchClassification, + CUADCompetitiveRestrictionExceptionLegalBenchClassification, + CUADCovenantNotToSueLegalBenchClassification, + CUADEffectiveDateLegalBenchClassification, + CUADExclusivityLegalBenchClassification, + CUADExpirationDateLegalBenchClassification, + CUADGoverningLawLegalBenchClassification, + CUADInsuranceLegalBenchClassification, + CUADIPOwnershipAssignmentLegalBenchClassification, + CUADIrrevocableOrPerpetualLicenseLegalBenchClassification, + CUADJointIPOwnershipLegalBenchClassification, + CUADLicenseGrantLegalBenchClassification, + CUADLiquidatedDamagesLegalBenchClassification, + CUADMinimumCommitmentLegalBenchClassification, + CUADMostFavoredNationLegalBenchClassification, + CUADNonCompeteLegalBenchClassification, + CUADNonDisparagementLegalBenchClassification, + CUADNonTransferableLicenseLegalBenchClassification, + CUADNoSolicitOfCustomersLegalBenchClassification, + CUADNoSolicitOfEmployeesLegalBenchClassification, + CUADNoticePeriodToTerminateRenewalLegalBenchClassification, + CUADPostTerminationServicesLegalBenchClassification, + CUADPriceRestrictionsLegalBenchClassification, + CUADRenewalTermLegalBenchClassification, + CUADRevenueProfitSharingLegalBenchClassification, + CUADRofrRofoRofnLegalBenchClassification, + CUADSourceCodeEscrowLegalBenchClassification, + CUADTerminationForConvenienceLegalBenchClassification, + CUADThirdPartyBeneficiaryLegalBenchClassification, + CUADUncappedLiabilityLegalBenchClassification, + CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification, + CUADVolumeRestrictionLegalBenchClassification, + CUADWarrantyDurationLegalBenchClassification, + CyrillicTurkicLangClassification, + CzechProductReviewSentimentClassification, + CzechSoMeSentimentClassification, + CzechSubjectivityClassification, + DalajClassification, + DanishPoliticalCommentsClassification, + DBpediaClassification, + DdiscoCohesionClassification, + DefinitionClassificationLegalBenchClassification, + Diversity1LegalBenchClassification, + Diversity2LegalBenchClassification, + Diversity3LegalBenchClassification, + Diversity4LegalBenchClassification, + Diversity5LegalBenchClassification, + Diversity6LegalBenchClassification, + DKHateClassification, + DutchBookReviewSentimentClassification, + EmotionClassification, + EstonianValenceClassification, + FilipinoHateSpeechClassification, + FilipinoShopeeReviewsClassification, + FinancialPhrasebankClassification, + FinToxicityClassification, + FrenchBookReviews, + FrenkEnClassification, + FrenkHrClassification, + FrenkSlClassification, + FunctionOfDecisionSectionLegalBenchClassification, + GeoreviewClassification, + GeorgianSentimentClassification, + GermanPoliticiansTwitterSentimentClassification, + GreekLegalCodeClassification, + GujaratiNewsClassification, + HateSpeechPortugueseClassification, + HeadlineClassification, + HebrewSentimentAnalysis, + HinDialectClassification, + HindiDiscourseClassification, + HotelReviewSentimentClassification, + IFlyTek, + ImdbClassification, + InappropriatenessClassification, + IndicLangClassification, + IndicNLPNewsClassification, + IndicSentimentClassification, + IndonesianIdClickbaitClassification, + IndonesianMongabayConservationClassification, + InsurancePolicyInterpretationLegalBenchClassification, + InternationalCitizenshipQuestionsLegalBenchClassification, + IsiZuluNewsClassification, + ItaCaseholdClassification, + ItalianLinguisticAcceptabilityClassification, + JavaneseIMDBClassification, + JCrewBlockerLegalBenchClassification, + JDReview, + KannadaNewsClassification, + KinopoiskClassification, + KlueTC, + KorFin, + KorHateClassification, + KorSarcasmClassification, + KurdishSentimentClassification, + LanguageClassification, + LccSentimentClassification, + LearnedHandsBenefitsLegalBenchClassification, + LearnedHandsBusinessLegalBenchClassification, + LearnedHandsConsumerLegalBenchClassification, + LearnedHandsCourtsLegalBenchClassification, + LearnedHandsCrimeLegalBenchClassification, + LearnedHandsDivorceLegalBenchClassification, + LearnedHandsDomesticViolenceLegalBenchClassification, + LearnedHandsEducationLegalBenchClassification, + LearnedHandsEmploymentLegalBenchClassification, + LearnedHandsEstatesLegalBenchClassification, + LearnedHandsFamilyLegalBenchClassification, + LearnedHandsHealthLegalBenchClassification, + LearnedHandsHousingLegalBenchClassification, + LearnedHandsImmigrationLegalBenchClassification, + LearnedHandsTortsLegalBenchClassification, + LearnedHandsTrafficLegalBenchClassification, + LegalReasoningCausalityLegalBenchClassification, + MacedonianTweetSentimentClassification, + MalayalamNewsClassification, + MarathiNewsClassification, + MasakhaNEWSClassification, + MassiveIntentClassification, + MassiveScenarioClassification, + MAUDLegalBenchClassification, + Moroco, + MovieReviewSentimentClassification, + MTOPDomainClassification, + MTOPIntentClassification, + MultiHateClassification, + MultilingualSentiment, + MultilingualSentimentClassification, + MyanmarNews, + NaijaSenti, + NepaliNewsClassification, + NewsClassification, + NordicLangClassification, + NoRecClassification, + NorwegianParliamentClassification, + NusaParagraphEmotionClassification, + NusaParagraphTopicClassification, + NusaXSentiClassification, + NYSJudicialEthicsLegalBenchClassification, + OdiaNewsClassification, + OnlineShopping, + OnlineStoreReviewSentimentClassification, + OPP115DataRetentionLegalBenchClassification, + OPP115DataSecurityLegalBenchClassification, + OPP115DoNotTrackLegalBenchClassification, + OPP115FirstPartyCollectionUseLegalBenchClassification, + OPP115InternationalAndSpecificAudiencesLegalBenchClassification, + OPP115PolicyChangeLegalBenchClassification, + OPP115ThirdPartySharingCollectionLegalBenchClassification, + OPP115UserAccessEditAndDeletionLegalBenchClassification, + OPP115UserChoiceControlLegalBenchClassification, + OralArgumentQuestionPurposeLegalBenchClassification, + OverrulingLegalBenchClassification, + PacClassification, + PatentClassification, + PersianFoodSentimentClassification, + PersonalJurisdictionLegalBenchClassification, + PoemSentimentClassification, + PolEmo2InClassification, + PolEmo2OutClassification, + PROALegalBenchClassification, + PunjabiNewsClassification, + RestaurantReviewSentimentClassification, + RomanianReviewsSentiment, + RomanianSentimentClassification, + RuReviewsClassification, + RuSciBenchGRNTIClassification, + RuSciBenchOECDClassification, + SanskritShlokasClassification, + ScalaClassification, + SCDBPAccountabilityLegalBenchClassification, + SCDBPAuditsLegalBenchClassification, + SCDBPCertificationLegalBenchClassification, + SCDBPTrainingLegalBenchClassification, + SCDBPVerificationLegalBenchClassification, + SCDDAccountabilityLegalBenchClassification, + SCDDAuditsLegalBenchClassification, + SCDDCertificationLegalBenchClassification, + SCDDTrainingLegalBenchClassification, + SCDDVerificationLegalBenchClassification, + SentimentAnalysisHindi, + SIB200Classification, + SinhalaNewsClassification, + SinhalaNewsSourceClassification, + SiswatiNewsClassification, + SlovakHateSpeechClassification, + SlovakMovieReviewSentimentClassification, + SouthAfricanLangClassification, + SpanishNewsClassification, + SpanishSentimentClassification, + SwahiliNewsClassification, + SwedishSentimentClassification, + SweRecClassification, + SwissJudgementClassification, + TamilNewsClassification, + TelemarketingSalesRuleLegalBenchClassification, + TeluguAndhraJyotiNewsClassification, + TenKGnadClassification, + TextualismToolDictionariesLegalBenchClassification, + TextualismToolPlainLegalBenchClassification, + TNews, + ToxicChatClassification, + ToxicConversationsClassification, + TswanaNewsClassification, + TurkicClassification, + TurkishMovieSentimentClassification, + TurkishProductSentimentClassification, + TweetEmotionClassification, + TweetSarcasmClassification, + TweetSentimentClassification, + TweetSentimentExtractionClassification, + TweetTopicSingleClassification, + UCCVCommonLawLegalBenchClassification, + UkrFormalityClassification, + UnfairTOSLegalBenchClassification, + UrduRomanSentimentClassification, + VieStudentFeedbackClassification, + Waimai, + WisesightSentimentClassification, + WongnaiReviewsClassification, + WRIMEClassification, + YahooAnswersTopicsClassification, + YelpReviewFullClassification, + YueOpenriceReviewClassification, +) +from .Clustering import ( + AlloProfClusteringP2P, + AlloProfClusteringP2PFast, + AlloProfClusteringS2S, + AlloProfClusteringS2SFast, + ArxivClusteringP2P, + ArxivClusteringP2PFast, + ArxivClusteringS2S, + ArXivHierarchicalClusteringP2P, + ArXivHierarchicalClusteringS2S, + BigPatentClustering, + BigPatentClusteringFast, + BiorxivClusteringP2P, + BiorxivClusteringP2PFast, + BiorxivClusteringS2S, + BiorxivClusteringS2SFast, + BlurbsClusteringP2P, + BlurbsClusteringP2PFast, + BlurbsClusteringS2S, + BlurbsClusteringS2SFast, + CLSClusteringFastP2P, + CLSClusteringFastS2S, + CLSClusteringP2P, + CLSClusteringS2S, + EightTagsClustering, + EightTagsClusteringFast, + GeoreviewClusteringP2P, + HALClusteringS2S, + HALClusteringS2SFast, + IndicReviewsClusteringP2P, + LivedoorNewsClustering, + LivedoorNewsClusteringv2, + MasakhaNEWSClusteringP2P, + MasakhaNEWSClusteringS2S, + MedrxivClusteringP2P, + MedrxivClusteringP2PFast, + MedrxivClusteringS2S, + MedrxivClusteringS2SFast, + MewsC16JaClustering, + MLSUMClusteringP2P, + MLSUMClusteringP2PFast, + MLSUMClusteringS2S, + MLSUMClusteringS2SFast, + PlscClusteringP2P, + PlscClusteringP2PFast, + PlscClusteringS2S, + PlscClusteringS2SFast, + RedditClustering, + RedditClusteringP2P, + RedditFastClusteringP2P, + RedditFastClusteringS2S, + RomaniBibleClustering, + RuSciBenchGRNTIClusteringP2P, + RuSciBenchOECDClusteringP2P, + SIB200ClusteringFast, + SNLClustering, + SNLHierarchicalClusteringP2P, + SNLHierarchicalClusteringS2S, + SpanishNewsClusteringP2P, + StackExchangeClustering, + StackExchangeClusteringFast, + StackExchangeClusteringP2P, + StackExchangeClusteringP2PFast, + SwednClustering, + SwednClusteringFastS2S, + SwednClusteringP2P, + TenKGnadClusteringP2P, + TenKGnadClusteringP2PFast, + TenKGnadClusteringS2S, + TenKGnadClusteringS2SFast, + ThuNewsClusteringFastP2P, + ThuNewsClusteringFastS2S, + ThuNewsClusteringP2P, + ThuNewsClusteringS2S, + TwentyNewsgroupsClustering, + TwentyNewsgroupsClusteringFast, + VGClustering, + VGHierarchicalClusteringP2P, + VGHierarchicalClusteringS2S, + WikiCitiesClustering, + WikiClusteringFastP2P, + WikiClusteringP2P, +) +from .InstructionReranking import ( + Core17InstructionRetrieval, + News21InstructionRetrieval, + Robust04InstructionRetrieval, + mFollowIR, + mFollowIRCrossLingual, +) +from .InstructionRetrieval import InstructIR +from .MultiLabelClassification import ( + BrazilianToxicTweetsClassification, + CEDRClassification, + KorHateSpeechMLClassification, + MalteseNewsClassification, + MultiEURLEXMultilabelClassification, + SensitiveTopicsClassification, +) +from .PairClassification import ( + RTE3, + XNLI, + XNLIV2, + ArEntail, + ArmenianParaphrasePC, + Assin2RTE, + CdscePC, + Cmnli, + CTKFactsNLI, + FalseFriendsDeEnPC, + FarsTail, + IndicXnliPairClassification, + IndoNLI, + KlueNLI, + LegalBenchPC, + Ocnli, + OpusparcusPC, + PawsXPairClassification, + PpcPC, + PscPC, + SickBrPC, + SickePLPC, + SprintDuplicateQuestionsPC, + TERRa, + TwitterSemEval2015PC, + TwitterURLCorpusPC, + XStance, +) +from .Reranking import ( + AlloprofReranking, + AskUbuntuDupQuestions, + CMedQAv1, + CMedQAv2, + ESCIReranking, + MindSmallReranking, + MIRACLReranking, + MMarcoReranking, + NevIR, + RuBQReranking, + SciDocsReranking, + StackOverflowDupQuestions, + SyntecReranking, + T2Reranking, + VoyageMMarcoReranking, + WebLINXCandidatesReranking, + WikipediaRerankingMultilingual, +) +from .Retrieval import ( + FEVER, + MSMARCO, + MSMARCOPL, + NQ, + NQPL, + PIQA, + SCIDOCS, + SCIDOCSPL, + SIQA, + TRECCOVID, + TRECCOVIDPL, + AILACasedocs, + AILAStatutes, + AlloprofRetrieval, + AlphaNLI, + AppsRetrieval, + ARCChallenge, + ArguAna, + ArguAnaPL, + AutoRAGRetrieval, + BelebeleRetrieval, + BrightRetrieval, + BSARDRetrieval, + ClimateFEVER, + ClimateFEVERHardNegatives, + CmedqaRetrieval, + CodeEditSearchRetrieval, + CodeFeedbackMT, + CodeFeedbackST, + CodeSearchNetCCRetrieval, + CodeSearchNetRetrieval, + CodeTransOceanContestRetrieval, + CodeTransOceanDLRetrieval, + COIRCodeSearchNetRetrieval, + CosQARetrieval, + CovidRetrieval, + CQADupstackAndroidRetrieval, + CQADupstackEnglishRetrieval, + CQADupstackGamingRetrieval, + CQADupstackGisRetrieval, + CQADupstackMathematicaRetrieval, + CQADupstackPhysicsRetrieval, + CQADupstackProgrammersRetrieval, + CQADupstackStatsRetrieval, + CQADupstackTexRetrieval, + CQADupstackUnixRetrieval, + CQADupstackWebmastersRetrieval, + CQADupstackWordpressRetrieval, + CrossLingualSemanticDiscriminationWMT19, + CrossLingualSemanticDiscriminationWMT21, + CUREv1Retrieval, + DanFever, + DanFeverRetrieval, + DBPedia, + DBPediaHardNegatives, + DBPediaPL, + DBPediaPLHardNegatives, + DuRetrieval, + EcomRetrieval, + EstQA, + FaithDialRetrieval, + FeedbackQARetrieval, + FEVERHardNegatives, + FiQA2018, + FiQAPLRetrieval, + FQuADRetrieval, + GeorgianFAQRetrieval, + GerDaLIR, + GerDaLIRSmall, + GermanDPR, + GermanGovServiceRetrieval, + GermanQuADRetrieval, + GreekCivicsQA, + HagridRetrieval, + HellaSwag, + HotpotQA, + HotpotQAHardNegatives, + HotpotQAPL, + HotpotQAPLHardNegatives, + HunSum2AbstractiveRetrieval, + IndicQARetrieval, + JaGovFaqsRetrieval, + JaqketRetrieval, + JaQuADRetrieval, + KoStrategyQA, + LeCaRDv2, + LegalBenchConsumerContractsQA, + LegalBenchCorporateLobbying, + LegalQuAD, + LegalSummarization, + LEMBNarrativeQARetrieval, + LEMBNeedleRetrieval, + LEMBPasskeyRetrieval, + LEMBQMSumRetrieval, + LEMBSummScreenFDRetrieval, + LEMBWikimQARetrieval, + LitSearchRetrieval, + MedicalQARetrieval, + MedicalRetrieval, + MintakaRetrieval, + MIRACLRetrieval, + MIRACLRetrievalHardNegatives, + MLQARetrieval, + MLQuestionsRetrieval, + MMarcoRetrieval, + MrTidyRetrieval, + MSMARCOHardNegatives, + MSMARCOPLHardNegatives, + MSMARCOv2, + MultiLongDocRetrieval, + NarrativeQARetrieval, + NeuCLIR2022Retrieval, + NeuCLIR2022RetrievalHardNegatives, + NeuCLIR2023Retrieval, + NeuCLIR2023RetrievalHardNegatives, + NFCorpus, + NFCorpusPL, + NLPJournalAbsIntroRetrieval, + NLPJournalTitleAbsRetrieval, + NLPJournalTitleIntroRetrieval, + NorQuadRetrieval, + NQHardNegatives, + NQPLHardNegatives, + PublicHealthQARetrieval, + Quail, + QuoraPLRetrieval, + QuoraPLRetrievalHardNegatives, + QuoraRetrieval, + QuoraRetrievalHardNegatives, + RARbCode, + RARbMath, + RiaNewsRetrieval, + RiaNewsRetrievalHardNegatives, + RuBQRetrieval, + SadeemQuestionRetrieval, + SciFact, + SciFactPL, + SKQuadRetrieval, + SlovakSumRetrieval, + SNLRetrieval, + SpanishPassageRetrievalS2P, + SpanishPassageRetrievalS2S, + SpartQA, + StackOverflowQARetrieval, + StatcanDialogueDatasetRetrieval, + SwednRetrieval, + SweFaqRetrieval, + SyntecRetrieval, + SyntheticText2SQLRetrieval, + T2Retrieval, + TempReasonL1, + TempReasonL2Context, + TempReasonL2Fact, + TempReasonL2Pure, + TempReasonL3Context, + TempReasonL3Fact, + TempReasonL3Pure, + TopiOCQARetrieval, + TopiOCQARetrievalHardNegatives, + Touche2020, + Touche2020v3Retrieval, + TurHistQuadRetrieval, + TV2Nordretrieval, + TwitterHjerneRetrieval, + VideoRetrieval, + VieQuADRetrieval, + WikipediaRetrievalMultilingual, + WinoGrande, + XMarket, + XPQARetrieval, + XQuADRetrieval, +) +from .SpeedTask import CPUSpeedTask, GPUSpeedTask +from .STS import ( + AFQMC, + ATEC, + BQ, + JSICK, + JSTS, + LCQMC, + PAWSX, + QBQTC, + STS12STS, + STS13STS, + STS14STS, + STS15STS, + STS16STS, + STSB, + STSES, + Assin2STS, + BiossesSTS, + CdscrSTS, + FaroeseSTS, + FinParaSTS, + GermanSTSBenchmarkSTS, + IndicCrosslingualSTS, + KlueSTS, + KorSTS, + RonSTS, + RUParaPhraserSTS, + RuSTSBenchmarkSTS, + SemRel24STS, + SickBrSTS, + SickFrSTS, + SickrPLSTS, + SickrSTS, + STS17Crosslingual, + STS22CrosslingualSTS, + STS22CrosslingualSTSv2, + STSBenchmarkMultilingualSTS, + STSBenchmarkSTS, +) +from .Summarization import ( + SummEvalFrSummarization, + SummEvalFrSummarizationv2, + SummEvalSummarization, + SummEvalSummarizationv2, +) + +__all__ = [ + "CLSClusteringFastP2P", + "CLSClusteringFastS2S", + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringFastP2P", + "ThuNewsClusteringFastS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", + "SpanishNewsClusteringP2P", + "EightTagsClustering", + "EightTagsClusteringFast", + "PlscClusteringP2P", + "PlscClusteringP2PFast", + "PlscClusteringS2S", + "PlscClusteringS2SFast", + "SwednClustering", + "SwednClusteringFastS2S", + "SwednClusteringP2P", + "VGClustering", + "SNLHierarchicalClusteringP2P", + "SNLHierarchicalClusteringS2S", + "SNLClustering", + "VGHierarchicalClusteringP2P", + "VGHierarchicalClusteringS2S", + "BlurbsClusteringS2S", + "BlurbsClusteringS2SFast", + "TenKGnadClusteringP2P", + "TenKGnadClusteringP2PFast", + "TenKGnadClusteringS2S", + "TenKGnadClusteringS2SFast", + "BlurbsClusteringP2P", + "BlurbsClusteringP2PFast", + "RomaniBibleClustering", + "MedrxivClusteringS2S", + "MedrxivClusteringS2SFast", + "BiorxivClusteringS2S", + "BiorxivClusteringS2SFast", + "StackExchangeClustering", + "StackExchangeClusteringFast", + "RedditClustering", + "RedditFastClusteringS2S", + "ArxivClusteringS2S", + "ArxivClusteringP2P", + "ArxivClusteringP2PFast", + "MedrxivClusteringP2P", + "MedrxivClusteringP2PFast", + "WikiCitiesClustering", + "BiorxivClusteringP2P", + "BiorxivClusteringP2PFast", + "TwentyNewsgroupsClustering", + "TwentyNewsgroupsClusteringFast", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering", + "BigPatentClusteringFast", + "StackExchangeClusteringP2P", + "StackExchangeClusteringP2PFast", + "RedditClusteringP2P", + "RedditFastClusteringP2P", + "LivedoorNewsClustering", + "LivedoorNewsClusteringv2", + "MewsC16JaClustering", + "WikiClusteringFastP2P", + "WikiClusteringP2P", + "MLSUMClusteringS2S", + "MLSUMClusteringS2SFast", + "MasakhaNEWSClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringP2PFast", + "IndicReviewsClusteringP2P", + "SIB200ClusteringFast", + "MasakhaNEWSClusteringP2P", + "GeoreviewClusteringP2P", + "RuSciBenchGRNTIClusteringP2P", + "RuSciBenchOECDClusteringP2P", + "HALClusteringS2S", + "HALClusteringS2SFast", + "AlloProfClusteringS2S", + "AlloProfClusteringS2SFast", + "AlloProfClusteringP2P", + "AlloProfClusteringP2PFast", + "SummEvalSummarization", + "SummEvalSummarizationv2", + "SummEvalFrSummarization", + "SummEvalFrSummarizationv2", + "GPUSpeedTask", + "CPUSpeedTask", + "News21InstructionRetrieval", + "Core17InstructionRetrieval", + "Robust04InstructionRetrieval", + "mFollowIR", + "mFollowIRCrossLingual", + "BrazilianToxicTweetsClassification", + "MalteseNewsClassification", + "KorHateSpeechMLClassification", + "MultiEURLEXMultilabelClassification", + "SensitiveTopicsClassification", + "CEDRClassification", + "TbilisiCityHallBitextMining", + "VieMedEVBitextMining", + "BornholmBitextMining", + "SRNCorpusBitextMining", + "IN22ConvBitextMining", + "IN22GenBitextMining", + "BUCCBitextMining", + "LinceMTBitextMining", + "NusaTranslationBitextMining", + "DiaBLaBitextMining", + "NTREXBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "BUCCBitextMiningFast", + "PhincBitextMining", + "TatoebaBitextMining", + "NusaXBitextMining", + "IWSLT2017BitextMining", + "BibleNLPBitextMining", + "FloresBitextMining", + "RomaTalesBitextMining", + "NorwegianCourtsBitextMining", + "Cmnli", + "Ocnli", + "Assin2RTE", + "SickBrPC", + "CdscePC", + "PpcPC", + "PscPC", + "SickePLPC", + "IndoNLI", + "FalseFriendsDeEnPC", + "ArEntail", + "ArmenianParaphrasePC", + "CTKFactsNLI", + "LegalBenchPC", + "TwitterSemEval2015PC", + "TwitterURLCorpusPC", + "SprintDuplicateQuestionsPC", + "FarsTail", + "KlueNLI", + "IndicXnliPairClassification", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XStance", + "XNLI", + "XNLIV2", + "TERRa", + "TeluguAndhraJyotiNewsClassification", + "IFlyTek", + "JDReview", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", + "YueOpenriceReviewClassification", + "HateSpeechPortugueseClassification", + "SpanishNewsClassification", + "SpanishSentimentClassification", + "AllegroReviewsClassification", + "CbdClassification", + "PacClassification", + "PolEmo2InClassification", + "PolEmo2OutClassification", + "BulgarianStoreReviewSentimentClassfication", + "KurdishSentimentClassification", + "ItaCaseholdClassification", + "ItalianLinguisticAcceptabilityClassification", + "GeorgianSentimentClassification", + "DalajClassification", + "SweRecClassification", + "SwedishSentimentClassification", + "CSFDSKMovieReviewSentimentClassification", + "SlovakHateSpeechClassification", + "NorwegianParliamentClassification", + "NoRecClassification", + "FilipinoHateSpeechClassification", + "FilipinoShopeeReviewsClassification", + "MarathiNewsClassification", + "IndonesianIdClickbaitClassification", + "IndonesianMongabayConservationClassification", + "UrduRomanSentimentClassification", + "MacedonianTweetSentimentClassification", + "FrenkSlClassification", + "SwahiliNewsClassification", + "FinToxicityClassification", + "KannadaNewsClassification", + "TenKGnadClassification", + "GermanPoliticiansTwitterSentimentClassification", + "PunjabiNewsClassification", + "TswanaNewsClassification", + "TweetSarcasmClassification", + "TweetEmotionClassification", + "RestaurantReviewSentimentClassification", + "HotelReviewSentimentClassification", + "OnlineStoreReviewSentimentClassification", + "AJGT", + "TurkishProductSentimentClassification", + "TurkishMovieSentimentClassification", + "NepaliNewsClassification", + "VieStudentFeedbackClassification", + "DutchBookReviewSentimentClassification", + "SiswatiNewsClassification", + "UkrFormalityClassification", + "SanskritShlokasClassification", + "SlovakMovieReviewSentimentClassification", + "AngryTweetsClassification", + "DdiscoCohesionClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", + "TamilNewsClassification", + "CSFDCZMovieReviewSentimentClassification", + "CzechSubjectivityClassification", + "CzechProductReviewSentimentClassification", + "CzechSoMeSentimentClassification", + "EstonianValenceClassification", + "MyanmarNews", + "JavaneseIMDBClassification", + "YahooAnswersTopicsClassification", + "CUADAffiliateLicenseLicenseeLegalBenchClassification", + "CUADAffiliateLicenseLicensorLegalBenchClassification", + "CUADAntiAssignmentLegalBenchClassification", + "CUADAuditRightsLegalBenchClassification", + "CUADCapOnLiabilityLegalBenchClassification", + "CUADChangeOfControlLegalBenchClassification", + "CUADCompetitiveRestrictionExceptionLegalBenchClassification", + "CUADCovenantNotToSueLegalBenchClassification", + "CUADEffectiveDateLegalBenchClassification", + "CUADExclusivityLegalBenchClassification", + "CUADExpirationDateLegalBenchClassification", + "CUADGoverningLawLegalBenchClassification", + "CUADIPOwnershipAssignmentLegalBenchClassification", + "CUADInsuranceLegalBenchClassification", + "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", + "CUADJointIPOwnershipLegalBenchClassification", + "CUADLicenseGrantLegalBenchClassification", + "CUADLiquidatedDamagesLegalBenchClassification", + "CUADMinimumCommitmentLegalBenchClassification", + "CUADMostFavoredNationLegalBenchClassification", + "CUADNoSolicitOfCustomersLegalBenchClassification", + "CUADNoSolicitOfEmployeesLegalBenchClassification", + "CUADNonCompeteLegalBenchClassification", + "CUADNonDisparagementLegalBenchClassification", + "CUADNonTransferableLicenseLegalBenchClassification", + "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", + "CUADPostTerminationServicesLegalBenchClassification", + "CUADPriceRestrictionsLegalBenchClassification", + "CUADRenewalTermLegalBenchClassification", + "CUADRevenueProfitSharingLegalBenchClassification", + "CUADRofrRofoRofnLegalBenchClassification", + "CUADSourceCodeEscrowLegalBenchClassification", + "CUADTerminationForConvenienceLegalBenchClassification", + "CUADThirdPartyBeneficiaryLegalBenchClassification", + "CUADUncappedLiabilityLegalBenchClassification", + "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", + "CUADVolumeRestrictionLegalBenchClassification", + "CUADWarrantyDurationLegalBenchClassification", + "CanadaTaxCourtOutcomesLegalBenchClassification", + "ContractNLIConfidentialityOfAgreementLegalBenchClassification", + "ContractNLIExplicitIdentificationLegalBenchClassification", + "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", + "ContractNLILimitedUseLegalBenchClassification", + "ContractNLINoLicensingLegalBenchClassification", + "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", + "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissibleCopyLegalBenchClassification", + "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", + "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", + "ContractNLISharingWithEmployeesLegalBenchClassification", + "ContractNLISharingWithThirdPartiesLegalBenchClassification", + "ContractNLISurvivalOfObligationsLegalBenchClassification", + "CorporateLobbyingLegalBenchClassification", + "DefinitionClassificationLegalBenchClassification", + "Diversity1LegalBenchClassification", + "Diversity2LegalBenchClassification", + "Diversity3LegalBenchClassification", + "Diversity4LegalBenchClassification", + "Diversity5LegalBenchClassification", + "Diversity6LegalBenchClassification", + "FunctionOfDecisionSectionLegalBenchClassification", + "InsurancePolicyInterpretationLegalBenchClassification", + "InternationalCitizenshipQuestionsLegalBenchClassification", + "JCrewBlockerLegalBenchClassification", + "LearnedHandsBenefitsLegalBenchClassification", + "LearnedHandsBusinessLegalBenchClassification", + "LearnedHandsConsumerLegalBenchClassification", + "LearnedHandsCourtsLegalBenchClassification", + "LearnedHandsCrimeLegalBenchClassification", + "LearnedHandsDivorceLegalBenchClassification", + "LearnedHandsDomesticViolenceLegalBenchClassification", + "LearnedHandsEducationLegalBenchClassification", + "LearnedHandsEmploymentLegalBenchClassification", + "LearnedHandsEstatesLegalBenchClassification", + "LearnedHandsFamilyLegalBenchClassification", + "LearnedHandsHealthLegalBenchClassification", + "LearnedHandsHousingLegalBenchClassification", + "LearnedHandsImmigrationLegalBenchClassification", + "LearnedHandsTortsLegalBenchClassification", + "LearnedHandsTrafficLegalBenchClassification", + "LegalReasoningCausalityLegalBenchClassification", + "MAUDLegalBenchClassification", + "NYSJudicialEthicsLegalBenchClassification", + "OPP115DataRetentionLegalBenchClassification", + "OPP115DataSecurityLegalBenchClassification", + "OPP115DoNotTrackLegalBenchClassification", + "OPP115FirstPartyCollectionUseLegalBenchClassification", + "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", + "OPP115PolicyChangeLegalBenchClassification", + "OPP115ThirdPartySharingCollectionLegalBenchClassification", + "OPP115UserAccessEditAndDeletionLegalBenchClassification", + "OPP115UserChoiceControlLegalBenchClassification", + "OralArgumentQuestionPurposeLegalBenchClassification", + "OverrulingLegalBenchClassification", + "PROALegalBenchClassification", + "PersonalJurisdictionLegalBenchClassification", + "SCDBPAccountabilityLegalBenchClassification", + "SCDBPAuditsLegalBenchClassification", + "SCDBPCertificationLegalBenchClassification", + "SCDBPTrainingLegalBenchClassification", + "SCDBPVerificationLegalBenchClassification", + "SCDDAccountabilityLegalBenchClassification", + "SCDDAuditsLegalBenchClassification", + "SCDDCertificationLegalBenchClassification", + "SCDDTrainingLegalBenchClassification", + "SCDDVerificationLegalBenchClassification", + "TelemarketingSalesRuleLegalBenchClassification", + "TextualismToolDictionariesLegalBenchClassification", + "TextualismToolPlainLegalBenchClassification", + "UCCVCommonLawLegalBenchClassification", + "UnfairTOSLegalBenchClassification", + "FinancialPhrasebankClassification", + "DBpediaClassification", + "FrenkEnClassification", + "TweetTopicSingleClassification", + "AmazonPolarityClassification", + "NewsClassification", + "ToxicChatClassification", + "YelpReviewFullClassification", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", + "PatentClassification", + "ImdbClassification", + "ArxivClassification", + "EmotionClassification", + "PoemSentimentClassification", + "Banking77Classification", + "PersianFoodSentimentClassification", + "HebrewSentimentAnalysis", + "BengaliSentimentAnalysis", + "BengaliDocumentClassification", + "BengaliHateSpeechClassification", + "SinhalaNewsSourceClassification", + "SinhalaNewsClassification", + "WisesightSentimentClassification", + "WongnaiReviewsClassification", + "WRIMEClassification", + "RomanianReviewsSentiment", + "Moroco", + "RomanianSentimentClassification", + "KorSarcasmClassification", + "KorHateClassification", + "KorFin", + "KlueTC", + "IndicLangClassification", + "SouthAfricanLangClassification", + "SwissJudgementClassification", + "AmazonReviewsClassification", + "NaijaSenti", + "TurkicClassification", + "ScalaClassification", + "MultilingualSentimentClassification", + "SIB200Classification", + "NordicLangClassification", + "NusaParagraphTopicClassification", + "CyrillicTurkicLangClassification", + "IndicNLPNewsClassification", + "MassiveScenarioClassification", + "MTOPIntentClassification", + "NusaParagraphEmotionClassification", + "MultiHateClassification", + "AfriSentiClassification", + "IndicSentimentClassification", + "LanguageClassification", + "AfriSentiLangClassification", + "NusaXSentiClassification", + "MTOPDomainClassification", + "HinDialectClassification", + "CataloniaTweetClassification", + "TweetSentimentClassification", + "MassiveIntentClassification", + "AmazonCounterfactualClassification", + "MasakhaNEWSClassification", + "GujaratiNewsClassification", + "IsiZuluNewsClassification", + "KinopoiskClassification", + "HeadlineClassification", + "InappropriatenessClassification", + "RuSciBenchGRNTIClassification", + "RuSciBenchOECDClassification", + "RuReviewsClassification", + "GeoreviewClassification", + "OdiaNewsClassification", + "GreekLegalCodeClassification", + "MovieReviewSentimentClassification", + "FrenchBookReviews", + "FrenkHrClassification", + "HindiDiscourseClassification", + "SentimentAnalysisHindi", + "MalayalamNewsClassification", + "InstructIR", + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MMarcoRetrieval", + "MedicalRetrieval", + "T2Retrieval", + "VideoRetrieval", + "LeCaRDv2", + "SpanishPassageRetrievalS2S", + "SpanishPassageRetrievalS2P", + "MSMARCOPL", + "MSMARCOPLHardNegatives", + "SCIDOCSPL", + "SciFactPL", + "ArguAnaPL", + "FiQAPLRetrieval", + "NFCorpusPL", + "QuoraPLRetrieval", + "QuoraPLRetrievalHardNegatives", + "TRECCOVIDPL", + "NQPL", + "NQPLHardNegatives", + "DBPediaPL", + "DBPediaPLHardNegatives", + "HotpotQAPL", + "HotpotQAPLHardNegatives", + "GeorgianFAQRetrieval", + "SwednRetrieval", + "SweFaqRetrieval", + "SlovakSumRetrieval", + "SKQuadRetrieval", + "SNLRetrieval", + "NorQuadRetrieval", + "GermanQuADRetrieval", + "GerDaLIRSmall", + "GermanDPR", + "GermanGovServiceRetrieval", + "LegalQuAD", + "GerDaLIR", + "SadeemQuestionRetrieval", + "TurHistQuadRetrieval", + "VieQuADRetrieval", + "DanFever", + "DanFeverRetrieval", + "TV2Nordretrieval", + "TwitterHjerneRetrieval", + "EstQA", + "Quail", + "Touche2020", + "Touche2020v3Retrieval", + "TempReasonL2Pure", + "LegalSummarization", + "NQ", + "NQHardNegatives", + "SIQA", + "MSMARCO", + "MSMARCOHardNegatives", + "DBPedia", + "DBPediaHardNegatives", + "NarrativeQARetrieval", + "MSMARCOv2", + "CQADupstackTexRetrieval", + "TRECCOVID", + "WinoGrande", + "QuoraRetrieval", + "QuoraRetrievalHardNegatives", + "AlphaNLI", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "CQADupstackAndroidRetrieval", + "TempReasonL2Context", + "ARCChallenge", + "LegalBenchCorporateLobbying", + "SCIDOCS", + "MedicalQARetrieval", + "RARbCode", + "LEMBQMSumRetrieval", + "TempReasonL3Context", + "AILAStatutes", + "TopiOCQARetrieval", + "TopiOCQARetrievalHardNegatives", + "ClimateFEVER", + "ClimateFEVERHardNegatives", + "CQADupstackWordpressRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackStatsRetrieval", + "MLQuestionsRetrieval", + "TempReasonL2Fact", + "CQADupstackGamingRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackUnixRetrieval", + "TempReasonL3Pure", + "CQADupstackPhysicsRetrieval", + "FiQA2018", + "LitSearchRetrieval", + "FeedbackQARetrieval", + "HagridRetrieval", + "FaithDialRetrieval", + "SciFact", + "CQADupstackMathematicaRetrieval", + "RARbMath", + "HellaSwag", + "PIQA", + "SpartQA", + "BrightRetrieval", + "TempReasonL1", + "HotpotQA", + "HotpotQAHardNegatives", + "LegalBenchConsumerContractsQA", + "ArguAna", + "LEMBWikimQARetrieval", + "TempReasonL3Fact", + "FEVER", + "FEVERHardNegatives", + "CQADupstackGisRetrieval", + "AILACasedocs", + "NFCorpus", + "LEMBSummScreenFDRetrieval", + "LEMBNarrativeQARetrieval", + "CQADupstackProgrammersRetrieval", + "JaGovFaqsRetrieval", + "NLPJournalAbsIntroRetrieval", + "JaqketRetrieval", + "NLPJournalTitleAbsRetrieval", + "JaQuADRetrieval", + "NLPJournalTitleIntroRetrieval", + "HunSum2AbstractiveRetrieval", + "AutoRAGRetrieval", + "KoStrategyQA", + "WikipediaRetrievalMultilingual", + "MintakaRetrieval", + "PublicHealthQARetrieval", + "CrossLingualSemanticDiscriminationWMT19", + "MultiLongDocRetrieval", + "MIRACLRetrieval", + "MIRACLRetrievalHardNegatives", + "NeuCLIR2022Retrieval", + "NeuCLIR2022RetrievalHardNegatives", + "StatcanDialogueDatasetRetrieval", + "IndicQARetrieval", + "NeuCLIR2023Retrieval", + "NeuCLIR2023RetrievalHardNegatives", + "CrossLingualSemanticDiscriminationWMT21", + "XMarket", + "XPQARetrieval", + "BelebeleRetrieval", + "CUREv1Retrieval", + "MLQARetrieval", + "XQuADRetrieval", + "MrTidyRetrieval", + "CodeTransOceanContestRetrieval", + "CodeTransOceanDLRetrieval", + "CodeFeedbackMT", + "CodeSearchNetCCRetrieval", + "StackOverflowQARetrieval", + "CodeFeedbackST", + "CosQARetrieval", + "CodeEditSearchRetrieval", + "SyntheticText2SQLRetrieval", + "AppsRetrieval", + "CodeSearchNetRetrieval", + "COIRCodeSearchNetRetrieval", + "RiaNewsRetrieval", + "RiaNewsRetrievalHardNegatives", + "RuBQRetrieval", + "GreekCivicsQA", + "AlloprofRetrieval", + "BSARDRetrieval", + "SyntecRetrieval", + "FQuADRetrieval", + "AFQMC", + "ATEC", + "BQ", + "LCQMC", + "PAWSX", + "QBQTC", + "STSB", + "Assin2STS", + "SickBrSTS", + "STSES", + "CdscrSTS", + "SickrPLSTS", + "FinParaSTS", + "GermanSTSBenchmarkSTS", + "STS12STS", + "STS13STS", + "BiossesSTS", + "STS15STS", + "STSBenchmarkSTS", + "SickrSTS", + "STS16STS", + "STS14STS", + "FaroeseSTS", + "JSICK", + "JSTS", + "RonSTS", + "KorSTS", + "KlueSTS", + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17Crosslingual", + "STS22CrosslingualSTS", + "STS22CrosslingualSTSv2", + "STSBenchmarkMultilingualSTS", + "RUParaPhraserSTS", + "RuSTSBenchmarkSTS", + "SickFrSTS", + "CMedQAv1", + "CMedQAv2", + "MMarcoReranking", + "T2Reranking", + "AskUbuntuDupQuestions", + "WebLINXCandidatesReranking", + "StackOverflowDupQuestions", + "NevIR", + "MindSmallReranking", + "SciDocsReranking", + "VoyageMMarcoReranking", + "MIRACLReranking", + "ESCIReranking", + "WikipediaRerankingMultilingual", + "RuBQReranking", + "SyntecReranking", + "AlloprofReranking", +] diff --git a/pyproject.toml b/pyproject.toml index 1ce9e09356..90abd35750 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.4" +version = "1.23.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ @@ -57,10 +57,12 @@ dev = ["ruff==0.6.4", # locked so we don't get PRs which fail only due to a lint "pytest", "pytest-xdist", "pytest-coverage"] speedtask = ["GPUtil>=1.4.0", "psutil>=5.9.8"] peft = ["peft>=0.11.0"] -leaderboard = ["gradio>=5.5.0", "gradio_rangeslider>=0.0.8"] +leaderboard = ["gradio>=5.7.1", "gradio_rangeslider>=0.0.8"] flagembedding = ["FlagEmbedding"] jina = ["einops>=0.8.0"] flash_attention = ["flash-attn>=2.6.3"] +openai = ["openai>=1.41.0", "tiktoken>=0.8.0"] +model2vec = ["model2vec>=0.3.0"] [tool.coverage.report] @@ -121,6 +123,7 @@ ignore = ["E501", # line too long "D107", # Missing docstring in __init__ "D205", # 1 blank line required between summary line and description "D415", # First line should end with a period + "C408", # don't use unecc. collection call, e.g. dict over {} ] [tool.ruff.lint.flake8-implicit-str-concat] diff --git a/scripts/create_language_family_mapping.py b/scripts/create_language_family_mapping.py new file mode 100644 index 0000000000..50700d9654 --- /dev/null +++ b/scripts/create_language_family_mapping.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from pyglottolog.api import Glottolog, lls +from tqdm import tqdm + +glottolog = Glottolog( + "/home/ubuntu/isaac/work/glottolog" +) # Download the Glottolog repository + + +def get_languages_with_iso_by_languoid(languoid, level=0, prev_fam=None): + # Recursively gather all descendant languages with ISO codes + if prev_fam is None: + prev_fam = {} # Start with a fresh dictionary for each top-level languoid + + if not isinstance(languoid, lls.Languoid): + return + + for descendant in languoid.children: + # Create a copy of `prev_fam` to avoid overwriting + current_fam = prev_fam.copy() + current_fam[f"level{level}"] = languoid.name + + if descendant.level.name == "language": # Direct languages + if descendant.iso: + iso_key = descendant.iso + if len(ISO2FAMILY.get(iso_key, {})) > len(current_fam): + continue + ISO2FAMILY[iso_key] = current_fam + elif descendant.level.name == "family": # Subfamilies, recurse + get_languages_with_iso_by_languoid(descendant, level + 1, current_fam) + + +all_languoids = list(glottolog.languoids()) +with Path("language_family.json").open("r") as f: + ISO2FAMILY = json.load(f) + +for languoid in tqdm(all_languoids, total=len(all_languoids)): + get_languages_with_iso_by_languoid(languoid) + +ISO2FAMILY = dict(sorted(ISO2FAMILY.items())) + +with Path("language_family.json").open("w") as f: + json.dump(ISO2FAMILY, f, indent=3) diff --git a/scripts/generate_imports.py b/scripts/generate_imports.py new file mode 100644 index 0000000000..469d894441 --- /dev/null +++ b/scripts/generate_imports.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import ast +import importlib +import inspect +import os +import types +from pathlib import Path + +# Adjust this import to the correct location of AbsTask. +from mteb.tasks import AbsTask + +BASE_DIR = Path("../mteb/tasks") + + +def find_task_classes_in_module(full_module_name): + """Import a module and return a list of classes inheriting from AbsTask.""" + try: + mod = importlib.import_module(full_module_name) + except ImportError: + return [] + + task_classes = [] + for name, obj in inspect.getmembers(mod, inspect.isclass): + if ( + isinstance(obj, type) + and not isinstance(obj, types.GenericAlias) + and issubclass(obj, AbsTask) + and obj is not AbsTask + and not obj.__name__.startswith("AbsTask") + and not obj.__name__ == "MultilingualTask" + ): + task_classes.append(name) + return task_classes + + +def parse_all_from_init(init_path): + """Parse __all__ from an existing __init__.py file to aggregate imports.""" + if not init_path.is_file(): + return [] + with open(init_path) as f: + tree = ast.parse(f.read()) + all_assignments = [ + n + for n in tree.body + if isinstance(n, ast.Assign) + and len(n.targets) == 1 + and n.targets[0].id == "__all__" + ] + if not all_assignments: + return [] + # Expecting __all__ to be a list of strings + val = all_assignments[0].value + if isinstance(val, ast.List): + return [elt.s for elt in val.elts if isinstance(elt, ast.Str)] + return [] + + +for root, dirs, files in os.walk(BASE_DIR, topdown=False): + # Process this directory + py_files = [f for f in files if f.endswith(".py") and f != "__init__.py"] + relative_path = Path(root).relative_to(BASE_DIR.parent) + package_path = ".".join(relative_path.parts) + + # Find classes in Python files of the current directory + import_lines = [] + all_classes = [] + for py_file in py_files: + module_name = py_file[:-3] # remove .py + full_module_name = f"mteb.{package_path}.{module_name}" + task_classes = find_task_classes_in_module(full_module_name) + if task_classes: + import_line = f"from .{module_name} import {', '.join(task_classes)}" + import_lines.append(import_line) + all_classes.extend(task_classes) + + # Also aggregate subdirectories that have their own __init__.py and __all__ + sub_import_lines = [] + for d in dirs: + sub_init = Path(root) / d / "__init__.py" + if sub_init.exists(): + sub_all = parse_all_from_init(sub_init) + if sub_all: + # Import all from the subpackage + sub_import_line = f"from .{d} import {', '.join(sub_all)}" + import_lines.append(sub_import_line) + all_classes.extend(sub_all) + # Deduplicate classes + all_classes = list( + dict.fromkeys(all_classes) + ) # preserves order while removing duplicates + + init_path = Path(root) / "__init__.py" + with open(init_path, "w") as init_file: + # Write imports from current directory modules + for line in import_lines: + init_file.write(line + "\n") + + # Write imports from subdirectories + for line in sub_import_lines: + init_file.write(line + "\n") + + # Write __all__ + init_file.write(f"__all__ = {all_classes!r}\n") + + print(f"Updated {init_path} with imports and __all__ = {all_classes}") diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 873264bc80..f3e6b48260 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -3,8 +3,7 @@ import pytest from pydantic import ValidationError -from mteb import AbsTask -from mteb.abstasks.TaskMetadata import TaskMetadata +from mteb.abstasks import AbsTask, TaskMetadata from mteb.overview import get_tasks # Historic datasets without filled metadata. Do NOT add new datasets to this list. diff --git a/tests/test_benchmark/mock_models.py b/tests/test_benchmark/mock_models.py index e0b9cf69df..7024d00113 100644 --- a/tests/test_benchmark/mock_models.py +++ b/tests/test_benchmark/mock_models.py @@ -33,7 +33,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): return torch.randn(len(sentences), 10).numpy() -class MockTorchbf16Encoder(mteb.Encoder): +class MockTorchbf16Encoder(SentenceTransformer): def __init__(self): pass @@ -61,6 +61,7 @@ def encode( convert_to_tensor: bool = False, device: str | None = None, normalize_embeddings: bool = False, + **kwargs: Any, ) -> list[Tensor] | ndarray | Tensor: return torch.randn(len(sentences), 10).numpy() diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 329b110d75..7a096828e5 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -51,7 +51,7 @@ class MockClassificationTask(AbsTaskClassification): "test": { "num_samples": 2, "number_of_characters": 52, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -61,11 +61,11 @@ class MockClassificationTask(AbsTaskClassification): }, "train": { "num_samples": 2, - "number_of_characters": 52, + "number_of_characters": 53, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, @@ -80,20 +80,22 @@ class MockClassificationTask(AbsTaskClassification): ) def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] + train_texts = ["This is a test sentence", "This is another train sentence"] + test_texts = ["This is a test sentence", "This is another test sentence"] + labels = [0, 1] self.dataset = DatasetDict( { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -107,7 +109,7 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "test": { "num_samples": 4, "number_of_characters": 104, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -118,7 +120,7 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "eng": { "num_samples": 2, "number_of_characters": 52, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -129,7 +131,7 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "fra": { "num_samples": 2, "number_of_characters": 52, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -141,33 +143,33 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask }, "train": { "num_samples": 4, - "number_of_characters": 104, + "number_of_characters": 106, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 2}, "1": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, - "number_of_characters": 52, + "number_of_characters": 53, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, "fra": { "num_samples": 2, - "number_of_characters": 52, + "number_of_characters": 53, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, @@ -185,18 +187,19 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] + train_texts = ["This is a test sentence", "This is another train sentence"] + test_texts = ["This is a test sentence", "This is another test sentence"] labels = [0, 1] data = { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -1337,13 +1340,42 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): expected_stats = { + "val": { + "num_samples": 4, + "number_of_characters": 112, + "num_documents": 2, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 2, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "none_queries": 0, + "num_relevant_docs": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "num_top_ranked": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, + }, "test": { "num_samples": 4, - "number_of_characters": 154, + "number_of_characters": 112, "num_documents": 2, - "min_document_length": 51, - "average_document_length": 51.0, - "max_document_length": 51, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, "unique_documents": 2, "num_queries": 2, "min_query_length": 23, @@ -1365,14 +1397,14 @@ class MockRetrievalTask(AbsTaskRetrieval): "min_top_ranked_per_query": None, "average_top_ranked_per_query": None, "max_top_ranked_per_query": None, - } + }, } metadata = TaskMetadata( type="Retrieval", name="MockRetrievalTask", main_score="ndcg_at_10", - **general_args, # type: ignore + **dict(general_args | {"eval_splits": ["val", "test"]}), # type: ignore ) def load_data(self, **kwargs): @@ -1380,20 +1412,22 @@ def load_data(self, **kwargs): "test": { "q1": "This is a test sentence", "q2": "This is another test sentence", - } + }, + "val": { + "q1": "This is a test sentence", + "q2": "This is another test sentence", + }, } self.corpus = { "test": { - "d1": { - "title": "This is a positive title", - "text": "This is a positive sentence", - }, - "d2": { - "title": "This is a negative title", - "text": "This is a negative sentence", - }, - } + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", + }, + "val": { + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", + }, } self.relevant_docs = { @@ -1401,6 +1435,10 @@ def load_data(self, **kwargs): "q1": {"d1": 1, "d2": 0}, "q2": {"d1": 0, "d2": 1}, }, + "val": { + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, } self.top_ranked = None self.instructions = None @@ -1544,7 +1582,7 @@ class MockMultilabelClassification(AbsTaskMultilabelClassification): "test": { "num_samples": 6, "number_of_characters": 156, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -1557,11 +1595,11 @@ class MockMultilabelClassification(AbsTaskMultilabelClassification): }, "train": { "num_samples": 6, - "number_of_characters": 156, + "number_of_characters": 159, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_texts": 2, "min_labels_per_text": 2, "average_label_per_text": 2.0, @@ -1579,20 +1617,21 @@ class MockMultilabelClassification(AbsTaskMultilabelClassification): ) def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] * 3 + train_texts = ["This is a test sentence", "This is another train sentence"] * 3 + test_texts = ["This is a test sentence", "This is another test sentence"] * 3 labels = [[0, 1], [1, 0]] * 3 self.dataset = DatasetDict( { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -1608,7 +1647,7 @@ class MockMultilingualMultilabelClassification( "test": { "num_samples": 12, "number_of_characters": 312, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -1622,7 +1661,7 @@ class MockMultilingualMultilabelClassification( "eng": { "num_samples": 6, "number_of_characters": 156, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -1636,7 +1675,7 @@ class MockMultilingualMultilabelClassification( "fra": { "num_samples": 6, "number_of_characters": 156, - "number_texts_intersect_with_train": 2, + "number_texts_intersect_with_train": 1, "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, @@ -1651,11 +1690,11 @@ class MockMultilingualMultilabelClassification( }, "train": { "num_samples": 12, - "number_of_characters": 312, + "number_of_characters": 318, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_texts": 2, "min_labels_per_text": 2, "average_label_per_text": 2.0, @@ -1665,11 +1704,11 @@ class MockMultilingualMultilabelClassification( "hf_subset_descriptive_stats": { "eng": { "num_samples": 6, - "number_of_characters": 156, + "number_of_characters": 159, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_texts": 2, "min_labels_per_text": 2, "average_label_per_text": 2.0, @@ -1679,11 +1718,11 @@ class MockMultilingualMultilabelClassification( }, "fra": { "num_samples": 6, - "number_of_characters": 156, + "number_of_characters": 159, "number_texts_intersect_with_train": None, "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, + "average_text_length": 26.5, + "max_text_length": 30, "unique_texts": 2, "min_labels_per_text": 2, "average_label_per_text": 2.0, @@ -1704,19 +1743,20 @@ class MockMultilingualMultilabelClassification( metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] * 3 + train_texts = ["This is a test sentence", "This is another train sentence"] * 3 + test_texts = ["This is a test sentence", "This is another test sentence"] * 3 labels = [[0, 1], [1, 0]] * 3 data = { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 660dd50c80..b654bd62ea 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -12,6 +12,7 @@ import mteb import mteb.overview +from mteb.abstasks import AbsTask from mteb.benchmarks.benchmarks import Benchmark from mteb.create_meta import generate_readme @@ -37,9 +38,7 @@ @pytest.mark.parametrize("tasks", [MOCK_TASK_TEST_GRID]) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_mulitple_mteb_tasks( - tasks: list[mteb.AbsTask], model: mteb.Encoder, tmp_path: Path -): +def test_mulitple_mteb_tasks(tasks: list[AbsTask], model: mteb.Encoder, tmp_path: Path): """Test that multiple tasks can be run""" eval = mteb.MTEB(tasks=tasks) eval.run(model, output_folder=str(tmp_path), overwrite_results=True) @@ -57,7 +56,7 @@ def test_mulitple_mteb_tasks( MockTorchbf16Encoder(), ], ) -def test_benchmark_encoders_on_task(task: str | mteb.AbsTask, model: mteb.Encoder): +def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): """Test that a task can be fetched and run using a variety of encoders""" if isinstance(task, str): tasks = mteb.get_tasks(tasks=[task]) @@ -70,7 +69,7 @@ def test_benchmark_encoders_on_task(task: str | mteb.AbsTask, model: mteb.Encode @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID[:1]) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_reload_results(task: str | mteb.AbsTask, model: mteb.Encoder, tmp_path: Path): +def test_reload_results(task: str | AbsTask, model: mteb.Encoder, tmp_path: Path): """Test that when rerunning the results are reloaded correctly""" if isinstance(task, str): tasks = mteb.get_tasks(tasks=[task]) @@ -91,12 +90,12 @@ def test_reload_results(task: str | mteb.AbsTask, model: mteb.Encoder, tmp_path: @pytest.mark.parametrize("task_name", MOCK_TASK_TEST_GRID) -def test_prompt_name_passed_to_all_encodes(task_name: str | mteb.AbsTask): +def test_prompt_name_passed_to_all_encodes(task_name: str | AbsTask): """Test that all tasks correctly pass down the prompt_name to the encoder which supports it, and that the encoder which does not support it does not receive it. """ _task_name = ( - task_name.metadata.name if isinstance(task_name, mteb.AbsTask) else task_name + task_name.metadata.name if isinstance(task_name, AbsTask) else task_name ) class MockEncoderWithInstructions(mteb.Encoder): @@ -109,7 +108,7 @@ def encode(self, sentences, **kwargs): assert kwargs["prompt_name"] is None return super().encode(sentences, **kwargs) - if isinstance(task_name, mteb.AbsTask): + if isinstance(task_name, AbsTask): tasks = [task_name] else: tasks = mteb.get_tasks(tasks=[task_name]) @@ -134,7 +133,7 @@ def encode(self, sentences, **kwargs): @pytest.mark.parametrize("task_name", MOCK_TASK_TEST_GRID) -def test_encode_kwargs_passed_to_all_encodes(task_name: str | mteb.AbsTask): +def test_encode_kwargs_passed_to_all_encodes(task_name: str | AbsTask): """Test that all tasks correctly pass down the encode_kwargs to the encoder.""" my_encode_kwargs = {"no_one_uses_this_args": "but_its_here"} @@ -147,7 +146,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ) return np.zeros((len(sentences), 10)) - if isinstance(task_name, mteb.AbsTask): + if isinstance(task_name, AbsTask): tasks = [task_name] else: tasks = mteb.get_tasks(tasks=[task_name]) @@ -212,12 +211,12 @@ def test_get_benchmark(name): @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) @pytest.mark.parametrize("is_task_name", [True, False]) def test_prompt_name_passed_to_all_encodes_with_prompts( - task: mteb.AbsTask | str, is_task_name: bool + task: AbsTask | str, is_task_name: bool ): """Test that all tasks and task_types correctly pass down the prompt_name to the encoder with prompts.""" - _task_name = task.metadata.name if isinstance(task, mteb.AbsTask) else task + _task_name = task.metadata.name if isinstance(task, AbsTask) else task - if isinstance(task, mteb.AbsTask): + if isinstance(task, AbsTask): tasks = [task] _task_type = task.metadata.type else: @@ -275,9 +274,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ], ) @pytest.mark.parametrize("is_task_name", [True, False]) -def test_model_query_passage_prompts_task_type( - task: mteb.AbsTask | str, is_task_name: bool -): +def test_model_query_passage_prompts_task_type(task: AbsTask | str, is_task_name: bool): """Test that the model with prompts is correctly called.""" tasks = [task] diff --git a/tests/test_evaluation/test_split_evaluation.py b/tests/test_evaluation/test_split_evaluation.py new file mode 100644 index 0000000000..a2ca249747 --- /dev/null +++ b/tests/test_evaluation/test_split_evaluation.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import pytest + +from mteb import MTEB +from tests.test_benchmark.mock_models import ( + MockSentenceTransformer, +) +from tests.test_benchmark.mock_tasks import ( + MockRetrievalTask, +) + + +@pytest.fixture +def model(): + return MockSentenceTransformer() + + +@pytest.fixture +def tasks(): + return [MockRetrievalTask()] + + +def test_all_splits_evaluated(model, tasks, tmp_path): + evaluation = MTEB(tasks=tasks) + results = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "all_splits_evaluated"), + verbosity=2, + ) + + assert "MockRetrievalTask" == results[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockRetrievalTask"]) == {"val", "test"} + assert len(last_evaluated_splits["MockRetrievalTask"]) == 2 + + +def test_one_missing_split(model, tasks, tmp_path): + evaluation = MTEB(tasks=tasks) + results = evaluation.run( + model, + eval_splits=["val"], + output_folder=str(tmp_path / "testcase2"), + verbosity=2, + ) + + assert "MockRetrievalTask" == results[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockRetrievalTask"]) == {"val"} + assert len(last_evaluated_splits["MockRetrievalTask"]) == 1 + + results2 = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "testcase2"), + verbosity=2, + overwrite_results=True, + ) + + assert "MockRetrievalTask" == results2[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockRetrievalTask"]) == {"test"} + assert len(last_evaluated_splits["MockRetrievalTask"]) == 1 + + +def test_no_missing_splits(model, tasks, tmp_path): + evaluation = MTEB(tasks=tasks) + _ = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "testcase3"), + verbosity=2, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockRetrievalTask"]) == 2 + + evaluation = MTEB(tasks=tasks) + _ = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "testcase3"), + verbosity=2, + overwrite_results=True, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockRetrievalTask"]) == 0 diff --git a/tests/test_load_results/test_mteb_results.py b/tests/test_load_results/test_mteb_results.py index 84071b735f..a83eeb7979 100644 --- a/tests/test_load_results/test_mteb_results.py +++ b/tests/test_load_results/test_mteb_results.py @@ -5,8 +5,7 @@ import pytest -import mteb -from mteb import AbsTask +from mteb.abstasks import AbsTask, TaskMetadata from mteb.load_results.task_results import TaskResult tests_folder = Path(__file__).parent.parent @@ -14,7 +13,7 @@ class DummyTask(AbsTask): superseded_by = "newer_task" - metadata = mteb.TaskMetadata( + metadata = TaskMetadata( name="dummy_task", description="dummy task for testing", dataset={"revision": "1.0", "path": "dummy_dataset"}, diff --git a/tests/test_overview.py b/tests/test_overview.py index 73df5dc193..127e54f279 100644 --- a/tests/test_overview.py +++ b/tests/test_overview.py @@ -37,20 +37,20 @@ def test_get_task(task_name: str, eval_splits: list[str] | None): @pytest.mark.parametrize("script", [["Latn"], ["Cyrl"], None]) @pytest.mark.parametrize("domains", [["Legal"], ["Medical", "Non-fiction"], None]) @pytest.mark.parametrize("task_types", [["Classification"], ["Clustering"], None]) -@pytest.mark.parametrize("exclude_superseeded_datasets", [True, False]) +@pytest.mark.parametrize("exclude_superseded_datasets", [True, False]) def test_get_tasks( languages: list[str], script: list[str], domains: list[TASK_DOMAIN], task_types: list[TASK_TYPE] | None, - exclude_superseeded_datasets: bool, + exclude_superseded_datasets: bool, ): tasks = mteb.get_tasks( languages=languages, script=script, domains=domains, task_types=task_types, - exclude_superseeded=exclude_superseeded_datasets, + exclude_superseded=exclude_superseded_datasets, ) for task in tasks: @@ -65,7 +65,7 @@ def test_get_tasks( assert set(domains).intersection(set(task_domains)) if task_types: assert task.metadata.type in task_types - if exclude_superseeded_datasets: + if exclude_superseded_datasets: assert task.superseded_by is None diff --git a/tests/test_reproducible_workflow.py b/tests/test_reproducible_workflow.py index 308153d2a9..04ca6c0acc 100644 --- a/tests/test_reproducible_workflow.py +++ b/tests/test_reproducible_workflow.py @@ -6,6 +6,7 @@ import mteb from mteb import MTEB +from mteb.abstasks import AbsTask from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper @@ -23,7 +24,7 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio task = mteb.get_task(task_name) assert isinstance(model_meta, ModelMeta) - assert isinstance(task, mteb.AbsTask) + assert isinstance(task, AbsTask) model = mteb.get_model(model_name, revision=model_revision) assert isinstance(model, Encoder) @@ -50,8 +51,8 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio "Speed", ], ) -def test_validate_task_to_prompt_name(task_name: str | mteb.AbsTask): - if isinstance(task_name, mteb.AbsTask): +def test_validate_task_to_prompt_name(task_name: str | AbsTask): + if isinstance(task_name, AbsTask): task_names = [task_name.metadata.name] else: task_names = [task_name] diff --git a/tests/test_tasks/test_all_abstasks.py b/tests/test_tasks/test_all_abstasks.py index 208e7221c3..ddece7dbc4 100644 --- a/tests/test_tasks/test_all_abstasks.py +++ b/tests/test_tasks/test_all_abstasks.py @@ -90,10 +90,10 @@ def test_dataset_availability(): asyncio.run(check_datasets_are_available_on_hf(tasks)) -def test_superseeded_dataset_exists(): - tasks = mteb.get_tasks(exclude_superseeded=False) +def test_superseded_dataset_exists(): + tasks = mteb.get_tasks(exclude_superseded=False) for task in tasks: if task.superseded_by: assert ( task.superseded_by in TASKS_REGISTRY - ), f"{task} is superseeded by {task.superseded_by} but {task.superseded_by} is not in the TASKS_REGISTRY" + ), f"{task} is superseded by {task.superseded_by} but {task.superseded_by} is not in the TASKS_REGISTRY" From 06fc13f5ebe93c527390df8cbe27a1b2f1be9de9 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sun, 15 Dec 2024 18:26:22 -0800 Subject: [PATCH 20/40] fix: Add documentation (#1567) * sort logos, add mkdocs outline, add index page * Added tons of documentation * Added some more docs to abstask * reduced docs to only include API docs for now * fixed import hell * Fixed more nasty import to get docs to work * API docs work! * fixed link * Apply suggestions from code review Co-authored-by: Isaac Chung * format --------- Co-authored-by: Isaac Chung --- .github/workflows/documentation.yml | 35 +++++++ CONTRIBUTING.md | 25 +++-- Makefile | 8 +- README.md | 28 ++++-- docs/adding_a_benchmark.md | 40 ++++++++ docs/api/benchmark.md | 26 +++++ docs/api/model.md | 26 +++++ docs/api/task.md | 36 +++++++ docs/cli.md | 13 +++ docs/getting_started.md | 21 +++++ docs/images/{ => logos}/hf_logo.png | Bin .../mteb_logo/mteb_logo_tight_hfhub.drawio | 0 .../mteb_logo/mteb_logo_tight_hfhub.png | Bin .../mteb_logo/mteb_logo_transparent.png | Bin .../mteb_logo/mteb_logo_wide_github.drawio | 0 .../mteb_logo/mteb_logo_wide_github.png | Bin .../{ => mmteb}/mmteb_overview_wide.png | Bin .../{ => mmteb}/mmteb_overview_wide.svg | 0 .../mmteb_overview_wide_centered.png | Bin .../mmteb_overview_wide_centered.svg | 0 docs/images/{ => mmteb}/mteb_overview.svg | 0 .../visualizations/benchmark_explainer.png | Bin 0 -> 162731 bytes .../visualizations/modelmeta_explainer.png | Bin 0 -> 143877 bytes docs/images/visualizations/task_explainer.png | Bin 0 -> 241373 bytes docs/index.md | 45 +++++++++ docs/references.bib | 17 ++++ docs/tasks.md | 5 +- mkdocs.yml | 89 ++++++++++++++++++ mteb/__init__.py | 19 +--- mteb/abstasks/AbsTask.py | 31 +++++- mteb/abstasks/TaskMetadata.py | 4 +- mteb/benchmarks/benchmarks.py | 4 +- mteb/cli.py | 17 +++- mteb/encoder_interface.py | 12 +-- mteb/models/__init__.py | 3 +- mteb/models/misc_models.py | 2 + mteb/overview.py | 6 +- pyproject.toml | 7 ++ tests/test_benchmark/mock_models.py | 2 +- .../test_InstructionRetrievalEvaluator.py | 2 +- .../test_RetrievalEvaluator.py | 2 +- 41 files changed, 463 insertions(+), 62 deletions(-) create mode 100644 .github/workflows/documentation.yml create mode 100644 docs/adding_a_benchmark.md create mode 100644 docs/api/benchmark.md create mode 100644 docs/api/model.md create mode 100644 docs/api/task.md create mode 100644 docs/cli.md create mode 100644 docs/getting_started.md rename docs/images/{ => logos}/hf_logo.png (100%) rename docs/images/{ => logos}/mteb_logo/mteb_logo_tight_hfhub.drawio (100%) rename docs/images/{ => logos}/mteb_logo/mteb_logo_tight_hfhub.png (100%) rename docs/images/{ => logos}/mteb_logo/mteb_logo_transparent.png (100%) rename docs/images/{ => logos}/mteb_logo/mteb_logo_wide_github.drawio (100%) rename docs/images/{ => logos}/mteb_logo/mteb_logo_wide_github.png (100%) rename docs/images/{ => mmteb}/mmteb_overview_wide.png (100%) rename docs/images/{ => mmteb}/mmteb_overview_wide.svg (100%) rename docs/images/{ => mmteb}/mmteb_overview_wide_centered.png (100%) rename docs/images/{ => mmteb}/mmteb_overview_wide_centered.svg (100%) rename docs/images/{ => mmteb}/mteb_overview.svg (100%) create mode 100644 docs/images/visualizations/benchmark_explainer.png create mode 100644 docs/images/visualizations/modelmeta_explainer.png create mode 100644 docs/images/visualizations/task_explainer.png create mode 100644 docs/index.md create mode 100644 docs/references.bib create mode 100644 mkdocs.yml diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000000..a9e50f4a5f --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,35 @@ +# creates the documentation on pushes it to the gh-pages branch +name: Documentation + +on: + pull_request: + branches: [main] + push: + branches: [main] + + +permissions: + contents: write + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[docs] + + + - name: Build and Deploy + if: github.event_name == 'push' + run: mkdocs gh-deploy --force + + - name: Build + if: github.event_name == 'pull_request' + run: make build-docs \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c715604500..0106b2c138 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,16 @@ -## Contributing to MTEB -We welcome contributions such as new datasets to MTEB! Please see detailed see the related [issue](https://github.com/embeddings-benchmark/mteb/issues/360) for more information. +## Contributing to mteb + +We welcome contributions to `mteb` such as new tasks, code optimization or benchmarks. Once you have decided on your contribution, this document describes how to set up the repository for development. + ### Development Installation -If you want to submit a dataset or on other ways contribute to MTEB, you can install the package in development mode: + +If you want to submit a task or on other ways contribute to `mteb`, you will need to install the package in development mode: ```bash +# download the git repository git clone https://github.com/embeddings-benchmark/mteb cd mteb @@ -14,17 +18,22 @@ cd mteb make install ``` +This uses [make](https://www.gnu.org/software/make/) to define the install command. You can see what each command does in the [makefile](https://github.com/embeddings-benchmark/mteb/blob/main/Makefile). + ### Running Tests + To run the tests, you can use the following command: ```bash make test ``` -This is also run by the CI pipeline, so you can be sure that your changes do not break the package. We recommend running the tests in the lowest version of python supported by the package (see the pyproject.toml) to ensure compatibility. +This is also run by the CI pipeline, so if this passed locally, you can be almost sure that your changes will not cause a failed test once you create a pull request. We recommend running the tests in the lowest version of python supported by the package (see the [pyproject.toml](https://github.com/embeddings-benchmark/mteb/blob/main/pyproject.toml)) to ensure compatibility. + ### Running linting -To run the linting before a PR you can use the following command: + +To run the linting before submitting a pull request, use: ```bash make lint @@ -32,8 +41,10 @@ make lint This command is equivalent to the command run during CI. It will check for code style and formatting issues. + ## Semantic Versioning and Releases -MTEB follows [semantic versioning](https://semver.org/). This means that the version number of the package is composed of three numbers: `MAJOR.MINOR.PATCH`. This allow us to use existing tools to automatically manage the versioning of the package. For maintainers (and contributors), this means that commits with the following prefixes will automatically trigger a version bump: + +`mteb` follows [semantic versioning](https://semver.org/). This means that the version number of the package is composed of three numbers: `MAJOR.MINOR.PATCH`. This allow us to use existing tools to automatically manage the versioning of the package. For maintainers (and contributors), this means that commits with the following prefixes will automatically trigger a version bump: - `fix:` for patches - `feat:` for minor versions @@ -41,4 +52,4 @@ MTEB follows [semantic versioning](https://semver.org/). This means that the ver Any commit with one of these prefixes will trigger a version bump upon merging to the main branch as long as tests pass. A version bump will then trigger a new release on PyPI as well as a new release on GitHub. -Other prefixes will not trigger a version bump. For example, `docs:`, `chore:`, `refactor:`, etc., however they will structure the commit history and the changelog. You can find more information about this in the [python-semantic-release documentation](https://python-semantic-release.readthedocs.io/en/latest/). If you do not intend to trigger a version bump you're not required to follow this convention when contributing to MTEB. \ No newline at end of file +Other prefixes will not trigger a version bump. For example, `docs:`, `chore:`, `refactor:`, etc., however they will structure the commit history and the changelog. You can find more information about this in the [python-semantic-release documentation](https://python-semantic-release.readthedocs.io/en/latest/). If you do not intend to trigger a version bump you're not required to follow this convention when contributing to `mteb`. \ No newline at end of file diff --git a/Makefile b/Makefile index c1404270d9..c86ff62859 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ install: @echo "--- 🚀 Installing project dependencies ---" - pip install -e ".[dev]" + pip install -e ".[dev,docs]" install-for-tests: @echo "--- 🚀 Installing project dependencies for test ---" @@ -35,4 +35,8 @@ pr: build-docs: @echo "--- 📚 Building documentation ---" # since we do not have a documentation site, this just build tables for the .md files - python docs/create_tasks_table.py \ No newline at end of file + python docs/create_tasks_table.py + +serve-docs: + @echo "--- 📚 Serving documentation ---" + python -m mkdocs serve \ No newline at end of file diff --git a/README.md b/README.md index faf016bbc7..06b9d817a7 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@

- +

@@ -454,17 +454,25 @@ evaluation.run(model, ...) ## Citing -MTEB was introduced in "[MTEB: Massive Text Embedding Benchmark](https://arxiv.org/abs/2210.07316)", feel free to cite: +MTEB was introduced in "[MTEB: Massive Text Embedding Benchmark](https://aclanthology.org/2023.eacl-main.148/)", feel free to cite: ```bibtex -@article{muennighoff2022mteb, - doi = {10.48550/ARXIV.2210.07316}, - url = {https://arxiv.org/abs/2210.07316}, - author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\i}c and Reimers, Nils}, - title = {MTEB: Massive Text Embedding Benchmark}, - publisher = {arXiv}, - journal={arXiv preprint arXiv:2210.07316}, - year = {2022} +@inproceedings{muennighoff-etal-2023-mteb, + title = "{MTEB}: Massive Text Embedding Benchmark", + author = "Muennighoff, Niklas and + Tazi, Nouamane and + Magne, Loic and + Reimers, Nils", + editor = "Vlachos, Andreas and + Augenstein, Isabelle", + booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics", + month = may, + year = "2023", + address = "Dubrovnik, Croatia", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2023.eacl-main.148", + doi = "10.18653/v1/2023.eacl-main.148", + pages = "2014--2037", } ``` diff --git a/docs/adding_a_benchmark.md b/docs/adding_a_benchmark.md new file mode 100644 index 0000000000..78c9d018f0 --- /dev/null +++ b/docs/adding_a_benchmark.md @@ -0,0 +1,40 @@ +## Adding a new Benchmark + +MTEB covers a wide variety of benchmarks that are all presented in the public [leaderboard](https://huggingface.co/spaces/mteb/leaderboard). However, many languages or domains are still missing, and we welcome contributions. + +To add a new benchmark, you will need to: + +1) [Implement the tasks](adding_a_dataset.md) that you want to include in the benchmark, or find them in the existing list of tasks. +2) Implement the benchmark in the [`benchmark.py`](https://github.com/embeddings-benchmark/mteb/blob/main/mteb/benchmarks/benchmarks.py) file and submit your changes as a single PR. + +This is easy to do +```python +tasks = mteb.get_tasks(tasks=[] ...) # fetch the tasks you want to include in your benchmark + +MY_BENCHMARK = Benchmark( + name="Name of your benchmark", + tasks=tasks, + description="This benchmark tests y, which is important because of X", + reference="https://relevant_link_eg_to_paper.com", + citation="A bibtex citation if relevant", +) +``` + +3) Run a representative set of models on benchmark. To submit the results: + +1. Open a PR on the result [repository](https://github.com/embeddings-benchmark/results) with: +- All results added in existing model folders or new folders +- Updated paths.json (see snippet results.py) + +- If any new models are added, add their names to `results.py` +- If you have access to all models you are adding, you can also [add results via the metadata](https://github.com/embeddings-benchmark/mteb/blob/main/docs/adding_a_model.md) for all of them / some of them +1. Open a PR at https://huggingface.co/spaces/mteb/leaderboard modifying app.py to add your tab: +- Add any new models & their specs to the global lists +- Add your tab, credits etc to where the other tabs are defined +- If you're adding new results to existing models, remove those models from `EXTERNAL_MODEL_RESULTS.json` such that they can be reloaded with the new results and are not cached. +- You may also have to uncomment `, download_mode='force_redownload', verification_mode="no_checks")` where the datasets are loaded to experiment locally without caching of results +- Test that it runs & works locally as you desire with python app.py, **please add screenshots to the PR** + +1) Wait for the automatic update + +Once the review from (3) is done the benchmark should appear on the leaderboard once it automatically updated (might take a day). \ No newline at end of file diff --git a/docs/api/benchmark.md b/docs/api/benchmark.md new file mode 100644 index 0000000000..c68dc6c9e9 --- /dev/null +++ b/docs/api/benchmark.md @@ -0,0 +1,26 @@ +# Benchmark + +A benchmark within `mteb` is essentially just a list of tasks along with some metadata about the benchmark. + + +
+ ![](../images/visualizations/benchmark_explainer.png){ width="80%" } +
An overview of the benchmark within `mteb`
+
+ +This metadata includes a short description of the benchmark's intention, the reference, and the citation. If you use a benchmark from `mteb`, we recommend that you cite it along with `mteb`. + + +## Utilities + +:::mteb.get_benchmarks + +:::mteb.get_benchmark + + +## The Benchmark Object + + + + + diff --git a/docs/api/model.md b/docs/api/model.md new file mode 100644 index 0000000000..078c0f8eae --- /dev/null +++ b/docs/api/model.md @@ -0,0 +1,26 @@ +# Models + + + +A model in `mteb` covers two concepts: metadata and implementation. +- Metadata contains information about the model such as maximum input +length, valid frameworks, license, and degree of openness. +- Implementation is a reproducible workflow, which allows others to run the same model again, using the same prompts, hyperparameters, aggregation strategies, etc. + +
+ ![](../images/visualizations/modelmeta_explainer.png){ width="80%" } +
An overview of the model and its metadata within `mteb`
+
+ + + +## Metadata + +:::mteb.models.ModelMeta + +## The Encoder Interface + +:::mteb.Encoder + + + diff --git a/docs/api/task.md b/docs/api/task.md new file mode 100644 index 0000000000..55852ab4f7 --- /dev/null +++ b/docs/api/task.md @@ -0,0 +1,36 @@ +# Tasks + +A task is an implementation of a dataset for evaluation. It could, for instance, be the MIRACL dataset consisting of queries, a corpus of documents +,and the correct documents to retrieve for a given query. In addition to the dataset, a task includes the specifications for how a model should be run on the dataset and how its output should be evaluated. Each task also comes with extensive metadata including the license, who annotated the data, etc. + +
+ ![](../images/visualizations/task_explainer.png){ width="80%" } +
An overview of the tasks within `mteb`
+
+ +## Utilities + +:::mteb.get_tasks + +:::mteb.get_task + +## Metadata + +Each task also contains extensive metadata. We annotate this using the following object, which allows us to use [pydantic](https://docs.pydantic.dev/latest/) to validate the metadata. + +:::mteb.TaskMetadata + options: + members: true + + + +## The Task Object + +All tasks in `mteb` inherits from the following abstract class. + + +:::mteb.AbsTask + + diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 0000000000..0b362a401f --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,13 @@ +# CLI + + + + + +## Using multiple GPUs + +Using multiple GPUs in parallel can be done by just having a [custom encode function](missing) that distributes the inputs to multiple GPUs like e.g. [here](https://github.com/microsoft/unilm/blob/b60c741f746877293bb85eed6806736fc8fa0ffd/e5/mteb_eval.py#L60) or [here](https://github.com/ContextualAI/gritlm/blob/09d8630f0c95ac6a456354bcb6f964d7b9b6a609/gritlm/gritlm.py#L75). diff --git a/docs/getting_started.md b/docs/getting_started.md new file mode 100644 index 0000000000..861e937bcc --- /dev/null +++ b/docs/getting_started.md @@ -0,0 +1,21 @@ +# Getting Started + +## Installation + +You can install `mteb` using [pip](https://pip.pypa.io/en/stable/getting-started/) simply by running: + +```bash +pip install mteb +``` + +??? tip "Model Specific Installations" + + If you want to run certain models implemented within mteb you will often need some additional dependencies. These can be installed using: + + ```bash + pip install mteb[openai] + ``` + + If a specific mdel requires a dependency it will raise an error with the recommended installation. To get an overview of the implemented models see [here](missing). + + \ No newline at end of file diff --git a/docs/images/hf_logo.png b/docs/images/logos/hf_logo.png similarity index 100% rename from docs/images/hf_logo.png rename to docs/images/logos/hf_logo.png diff --git a/docs/images/mteb_logo/mteb_logo_tight_hfhub.drawio b/docs/images/logos/mteb_logo/mteb_logo_tight_hfhub.drawio similarity index 100% rename from docs/images/mteb_logo/mteb_logo_tight_hfhub.drawio rename to docs/images/logos/mteb_logo/mteb_logo_tight_hfhub.drawio diff --git a/docs/images/mteb_logo/mteb_logo_tight_hfhub.png b/docs/images/logos/mteb_logo/mteb_logo_tight_hfhub.png similarity index 100% rename from docs/images/mteb_logo/mteb_logo_tight_hfhub.png rename to docs/images/logos/mteb_logo/mteb_logo_tight_hfhub.png diff --git a/docs/images/mteb_logo/mteb_logo_transparent.png b/docs/images/logos/mteb_logo/mteb_logo_transparent.png similarity index 100% rename from docs/images/mteb_logo/mteb_logo_transparent.png rename to docs/images/logos/mteb_logo/mteb_logo_transparent.png diff --git a/docs/images/mteb_logo/mteb_logo_wide_github.drawio b/docs/images/logos/mteb_logo/mteb_logo_wide_github.drawio similarity index 100% rename from docs/images/mteb_logo/mteb_logo_wide_github.drawio rename to docs/images/logos/mteb_logo/mteb_logo_wide_github.drawio diff --git a/docs/images/mteb_logo/mteb_logo_wide_github.png b/docs/images/logos/mteb_logo/mteb_logo_wide_github.png similarity index 100% rename from docs/images/mteb_logo/mteb_logo_wide_github.png rename to docs/images/logos/mteb_logo/mteb_logo_wide_github.png diff --git a/docs/images/mmteb_overview_wide.png b/docs/images/mmteb/mmteb_overview_wide.png similarity index 100% rename from docs/images/mmteb_overview_wide.png rename to docs/images/mmteb/mmteb_overview_wide.png diff --git a/docs/images/mmteb_overview_wide.svg b/docs/images/mmteb/mmteb_overview_wide.svg similarity index 100% rename from docs/images/mmteb_overview_wide.svg rename to docs/images/mmteb/mmteb_overview_wide.svg diff --git a/docs/images/mmteb_overview_wide_centered.png b/docs/images/mmteb/mmteb_overview_wide_centered.png similarity index 100% rename from docs/images/mmteb_overview_wide_centered.png rename to docs/images/mmteb/mmteb_overview_wide_centered.png diff --git a/docs/images/mmteb_overview_wide_centered.svg b/docs/images/mmteb/mmteb_overview_wide_centered.svg similarity index 100% rename from docs/images/mmteb_overview_wide_centered.svg rename to docs/images/mmteb/mmteb_overview_wide_centered.svg diff --git a/docs/images/mteb_overview.svg b/docs/images/mmteb/mteb_overview.svg similarity index 100% rename from docs/images/mteb_overview.svg rename to docs/images/mmteb/mteb_overview.svg diff --git a/docs/images/visualizations/benchmark_explainer.png b/docs/images/visualizations/benchmark_explainer.png new file mode 100644 index 0000000000000000000000000000000000000000..456f9a39da65e2160dd1bf15aa55547b0c746d51 GIT binary patch literal 162731 zcmeFZcT`hr^9D*45GjIyfK=%SC{;i@3P_jUm0ki!??jL)y@`S}rH2|I^rCbSP%uC! zBE3uRA-OxA@0{az&hNYb-?eVmirMV?zBBXAJoC(KqqQ`YNUqXd#lpfOQBhWWf`x@| zjfI8NKzIe%b1;7BgoSlY!$CnoOGQC}S_7|z-##rkQ$K;8Y*n40)1@8&|& zWSSVu3;v6Nla-5=VK7?!U_{(*T&iL?nGv^XrXCKpbl?6*#xNDs?{4Uu76Lv>txS*dj zmLlyEpY6(^uCG*AllvNT7o6u^NzV|^yNVuzE1(4{KXMuD7iyG*OK-94kL9z?m_LU} zT)WBl{PiogCsCIy$3uge{Gq-qU9c}#hWoYFJx1F{`;nQp=T(KWiQ=*F>T5ULa}`+z zsXn7*#`fA(%`4pPN$Cw8jP}16>V3ZJs_9-B3AszMP(OOu#}(y;CF>S#-(enZWHj&v zuTIL5S9ka6@Q=s1o}sK_@iKkAukb?V$?4_Zu?)q<-mK*wNW_DHo}#|Et4PZ*S_VZy zx?kls$YoOQuU9J^eE*zy(C{)aT~_+X%g5*%LCz4!c&{h43n>@$x#TXdG?67wR|tM` zmvWQ{OT6wJKDF&0Yb0oCz)vDV_!kF(EZ<7UMDPl1a;(>C* zL>SZv5VE7=V_$H0zDa$Q#ad#fj0#y~77}K4$W^*p>6S~5Bu%=>714`-?eLWx_L5B8 zQ@wZAZ*(jeBvjYU!T8{ z_wu=2M!vH2r@`o4+zkS<70$~|f3*bUr>|d{=kL0F`aZg5lHfGtO#aL!oi3$w{}e>rUPArMtWF0fJSF5wU~;@qvNxg#`k(R~N)!L|5Pw6o68bRrW@;$>DQ+>=bcco(NgDxy9IpKp z&W}dViA=G@I?NgZBtCk4AvS+WMi`n%F7Zk68o5r_RX15va?*tuJeKRqcamfCS>8t7 z&}Q{W>Q?54QkTejGmG?*^<7_LbH{d2`pHa|B=ss!SeHiRQTf+uE`sIv^Y#xU z@P`!ba>LgsJlT!o_46*i_#d+Q6SanT=Z<`pzV?*vCb3ZGt$UBceK!!S=u+D-YKMH)9Jp|XQ_*Rh3BCeEUhX`%Jh4kzoW z;&W!->!)EVVQQb$Q?zyd)$f z9weG^H%X#<7#&+atbf^Zcwo$od*?={4h#L7RST_j%={Gb%`-ln{JjKY3s zqV$`@doNXpW@d?$hU8;O&4BP@^KIfOGYK{Zrz~cMq+yDzEQumdMxlF!_$j?N1ZjkP z?v31)^onj0Z3>(P%|4mk@tXDwIPyF+J)$_WUDw-SfNws1cE^c2CvDKUTcWS2-(P0H z_l8fZ@3(zzuMZXR{vTwF=PWGBEZ($55 zcP-%2X8bWD-z_u2^B-##v0AqSJ{W!welh62$Kc6}ur}1!ISsk+bm1TGJ0CdXUYj`8BF3v0Y=*i}lO@8_y`Zo1E_3|Mp zlcw^V?Q7FK)0^IklJREov((MJ$HlFgL9c@fE^P?DeVmnBsOruOy7nw~_2&AU(tGu{ z9O6xTae9q=BQ@?65f-r)T`8im;_EGpH4{3XnH(8*8EzbIezy}pMoS}FVQ)ItAY-;= z7RH~(KhZnZOORl3x6KWyYCvF6y{SJj?V?s6?V>tsJR{v0&@ec=Inyk4M{4weXu!l*~20tD~MXV#a4*VM3CR_CRF)7E9!KZcbli0 zPhj1;y>wlj@@w=n$_e!r8V4%wI8YRhHnP}D`{aZ3dpkw+4Tad~_>x#ZI!S(Sm#xP8 zwllR2t!|ZzxC;i|bLv&DcNvmLYr;LkA8QpH!fNwtTOFA2wL^L2EF@%t zJp3yYIcK;6`S)qcX}n^Bx}3U41Oo-fZ9Q$ytt$q%hW7_~1~-y+*{G7!5qXGd=-iUx z@>c))i2A^6j?%E|pd>3iX+>F6Q|zs@lYjGPIsH4EZ{|g;I%CcK8UpKawAHkQS2jN} zUFsK<@6#`L;>?8(gw_jD+A(GnNU2IdykM7G%QZPvS7?I82y$+}8xx!W{e0g@w(7RJ z{U+hFX3iY6!}4P?eipF;wZ6c6{sE&yk0g(;Q}5fUIr&_SEnBSTlY8gF0(DHvjP3}u z)qM+ChuNG8_CsOIi|MY$s76Iy_v&wrwPlBkSLa{F%*9{8kkhC%THa~>#>%Otj0WST z^A);w5x7&`1a(DFi_1dO`O(%9`zCtou5)L&-~`l}zJxxFUUJ~}0KJ%zi*f6^)kFR6 zxPcb~mzi;*0&_^)82dqZIm%=cM(fHudmP+seX-5JBDn!6gA906_)u>JHu1VmH16D= zgKvq>m^EFDF|KFFZ%l5Ci7<+Y1jHRzls`Jb?+R8!7zK{%CQf^K&Qrk@Dr%`S*yji1?nRp5xokl#DZ4GD*=(hhy2c zH7f%j{FMwCJ}{cNm!W;<_u43-lv12Rn$j+B$K6l9-=-0#q33V3D0TI6l0w!@k3>$-Om7GTn7Xtq_!X1{$zN1Mf(9I>bX*Bw4Qk@C$tIq%F&uD{|6nREM|6_9g? zLPWl0QehgDK?FIT-$I?4gU!M1YRoj(kXN6|de!!0{TPwL`jUeE^pnSJFX(yTeJ%2% zx87JG=rFilU&sQfECaGAVLb#aR*!YRiWtkOctjbWIfGQTg;TaSfvG~lt}(=;sFZvj z@igFb(P4|{uIsCFwez&O+a|eByzp;NgRx!KUgGrUA(?>$gM}68fQ9#;b2NZo%qJ1}Vy^k)7dJTqivakG0{Hso z;{16wzI86{pW8SMz%eX29R(E?;8(}W-Nwen!`{{NiFl>m6 zZ5O!zgoCc3r=hxOB;S)=jWLFU_J1a05+X%JfAW9I)hw1Bz&b8s4l%n#hHZ6%&4KKjq)z+Y0V_MV>4 zB?JU~e0=zQg!o$*cZcVm z4z4cDnEO7nboKI-Vr9iV=&#QoI^*7 zQ~Gax{paG}5B}%E2LhO>|0aq*Fn)KD~PTMzt9&b4)YU%>NfR$lB z*rvcgTz~um+t~UuY$P>+ead2~D9Y*jVsB*;yfD+NYd45^E&cW_-jh^3xm|&W4xre8z!Z?&w0xhwwU)L=qRQ{@DkWYZa=2y!}`06H8G;I`~awVG(`pD$s zosf?_A@<75+YTRPjaj=LNsD9i^oyaU3s1-6ZbN1)Y(iNq9P$@fc>mnY^~A{{hZYDG z{?9Y8aOAL9RF(dZYyWdcEp{A21W!mI?f-fP77n4&`M)3i=YT?ZuFLl66A2Ifk0Ao* z$@YyB{Bz`gUXD$u8;vCodz|ZV<^P=Fuk++zOaH&f;4vOKR1Zg8=>HPXACDo29{#@w zNRRbJVmXC6I#Cv6!%b5yt)BlVnk4`u|PfUy}bz;r|~nfk03=$it(i_40TcX5CNW zRNxAab0Q3wsYBCkse%|$p-w$n+w4Y{k za9L1)D080kk6rMeU=b0Kz3g1)wle4dPhq=6{YELIN3^|%`d>>@OXX<+4Rowfy@tlz z_akaA0X(_XpW?jnt>n0;w&Zv`(-ragtq^EpLa@($nn=SuESO6YjZY0Yl1({|xPZEt z(5gYYU_Et^_%ydP6whsaBh3NdF@m4AI<(QZ`USFx^X5&qMMchz=-MmVl43~B#@dE2 zjur~bLi<)mXG@v+>#ctjj+F}NJ-XN}LZA8WR)0EmB4YPDWk%`BpkQ5|-DnPU(VDgT zckv_F>MpdXh!`Td8#Qr4!*2|7y+C~mcbyK!j{$uT?MA77Iumsome0Hw(AK)QVtR36 za^U=A@ml8Tz^16zl!@r`ZOthS$sJJh&!$WgdeP5Kp6l;@XsG|WL{15>8#;t-<)D#v z&4;6^Tdy-+U=d9}VVhr$oFmI8YRW|!)_P}=Ic&7_(P}}m!zgRoGHA9+ljsG(TEmL1 zh_vD6@acJpv?1sE2b`>#;>Xb|Uh8daj+glYZX(u>_w{5HE<#Db=N8ho#yyma38xCk zI0x$Qit@6I#)e>yx~nGjL| z>z#&0(u;o0N7G?@nuzZfmn{q}Cp4hUp)28BX?MLK8}D^DdeKdaY1f|UJ6Mmc`)@z%=M$6= z3SqES3Z$TD)MAb^q`bbCQu6`gc+!fj^`PosHuhL8xAQ83GbeDM?PPXw%M~2(HU2x; zMDkFk9wvP>s{kA4YKG?(fz+f-3%LEB_*EMw8>&y}iPjWP70Q?ov~SSqQ07bSLsf51D7bNu}oBB1p!G|O!tCS=vI1$2edQnwKr>~7s>QKFJV@1!1^oWeiCfa^pOD|@1 z%Pr%}N$8+SBSw_IY!DcBOnbeB>)mv(l6O|&LOxaS2gMv7G3rpJhiNbJbaRJ>y%nwY z4jIAVcFrg2{3vDCcZ|<^(AISV^p9vtUdg4z#hK!8dzUS%>Kb1YEbLgL5PaP64xsiAI zL#OOV<@fiD)XHn%SeXT~z53SgU}^ye=+}1tFZ>M)&n^%n!7hb(;l}q1F?dKkDS~s!wuDqpj3#8VJV^s`AQg4 zHLUtbfyf}eYDk?QQ48N2b++!U-3d##z5V{IQB8Nn-6Z`-j@J+J22vi2HSky0o8a%9 zQ8Ha~WM`;@zA*6Qt`*pdGh6u6u*BxpgMq5>zvAZ~3Jf7t%bgA1@@ey^y*$}*sht*( z`6i0UZFiE2hP=L|SnogYTst6SST}d;$>2!0m(fR01B#md6WxJNla(rDxv!#0JJZ&K z1t8OPi^P}!(0e-g0UOW~Ze7_!>fv51*g~B(X%vYlzGTo5I6zO}MC3J`i z7K0RHz71{c_WFk{`u6pwUFBDGdkjhwj$cwCYuc!2zkTP!s454C9Ve_Z8jwo-K1#@8 zzy!7!++O9%6!b1D=oqn)^)F-oB1cr4nb^AUigzo2hr#s(wZb*Mgg(;roe6?BobJ0m zZ(7@pbZInHZ}jIkNTE?p{^DFR9O-^^z~XetL6xLKQd#Q!`e>uM1T<4{TPD7C@-=jw z?GN$jykJI=gALkFa#qpf_7>8+2HvP}8pL&_Ti0Ek%Kl-%vbl@b?P*vpmQpmQXd;8Z zqAs8>K?FHM`d37_$8Dl?9LDdKtJJJOMY23{DWFsfB1-QDjo}PLl}%sM$s1X$#)t@H zFXC}f;pZd%DxNf}@@Pt&{>E>rqtDe=7j6KqJLQUMcazNwIT&(@60$`O!dAadOnyj+ zt>deofi75toBc~!TzIAY)~lc2T4e?YQ%;}o(odEsZQl}9YU32v%>USQb8yL_VHBAs z77XQDEl81$p)c>R?BYXs;x<)sKom7QhEI;X4jRTP*WFVX3!fMXY}*A=7@P*U=AjEB z`Ss#czNtRp+!Of~RgOZwgnS1FBV!1*dSu$HM;=_*ZGQcrbWi{PSX^?s8}i3h3S`wZ zC}i=h&vv~tTu;BhkhJfx6kJM1v=%DT_?1n=%G{8~s=trt?JONBlPAg*qZ@vdKtoCq{bXCtR^(xIC9s3E$4Nd+&Y zln5dDvtf)>Y6KHJu6wXBvj9&l3G~HE6x&h2xp?|t6W|kbCB7A=09oM;O8@r zOgJhk?$X827SB9%GH-W`(y3)qHoj?R2=1$` z+RK=|k*p8ZKVabLEPtb^zr1vl2On6)GMo1|=#4D$(Cj$BqQFFNXt0~!-uG(3HuO6* zgkUSr&UX5Cg7oW1G1xz&WG7@RiKWW;6o!LrbI8j9ub;~Ci02EEJ2$~u2@h;fc~nzMe0NT zd+xCgGPiNT=S+*5w8vdc3$CwHcsDOb=*L#4&Lv}O(SG|5)7B~K?=@>e{}}*|*|n%x z0=lk8P9w!~>IapAIy2Z&3kS{_%XQ?13(O9s>;fq}7tyAS8|z>Fa~~(;nRmDnUC)eG zoTaF1_yRw>uhmmVJ(5unw2B4x%J?=U|IP05C5omqY%e|T`q!um43>EG<*+_6?cj1X zYRt8o|A-#3Y+(Af=f2ZFy^Fm8P606jpO|_wBglW+x&6T;3=x=kK|&iZU)1B440UIE zdlMo%ddsqoV>U^evM8WM4HC;fyEu{Z(mBbwyzSLb_?R?u9dP;K6nw7;x|r~XABb}B zpJkjvEVjy#J2H#ee%)=s-zv9{CW?$!hqDgA=Kt1_2Hs;DX#I?POjhu~-tmkSX93f_ ze20PhWWs2WfwNSM-dNj)sXfAQIa&6c&8C|g=b;@m)J;=@&Ys7+CmhV!EB0p7|{dRF3p@*lOgLq@~5I_hMe z(FB1FrjKIKm5C)Rs#Y!`hhAEj$Zi9xG?q-8stD(_+&E~Sg$@I4)oxibH2B>&rzYk+ z@)%e5vrIwP%a^c0(!G-5g? zeUbufG1}R5m@rYRh;CY!xg-aYij;Ge;ezwMqLkLN@mtfr69-$%=@zhvh$~)()P$*z zm{H(irwRQz^8s`PkgCHldAgfJoX)-9MX!&8nc_XGp2@b(6KJ%RMgrMdbR_u62wMp{ zr%8H$13j7YV;ATA>6Vh)W3e;%{rGDJ{R#I|h8-rdjGM$ZlquB3bZvY5qqibJk;k}1 zzt+IQh!Q($_-#!G>ZCI6xIP*U10qFXtEe-q6}4g-;IZ-`b9-WC*ur>&HZXUYGNjZStzgmI~+*ARagMP^Z89^hMByP7ZV=__zj! zu+20|V3aDoq;Men-Rl=naOw3qRy56-`sH1&Peg|6!Zlg*GVId2HT4PMhaQiU!nM*s z@QF3AA9JNU`JkS=RpuJ32JW!F26*ebe`-RhU>dJD?<&QrJrf4|=U7HAycD>4&8S#8 zGMF`nwks!|32c!mI_T1L;Lv>Vv3*Ji{MK43z_aFP!eT`FtUqMjZYX%X*=Hy5!ECcn zeT7+b1kxzz2(AuJD<}dD!A!^>1jh-hKY`4swjO?JTEK$Ku3@9>gna3`qL@tf6EXt+W}eagHIJpdnaGf zXOU+9vy0v%Z)iX@Bd(Sf-<{t$*9?h@KL61Xe{Mdw33z>J<`9(>4n%|7;@0Xgq`R&J#l0)DW z!y-dRbViTg=IJ#ezjGDsp9M7~<{TdQdFwQBrf3==TH+I1gUPmu-^j9s?i!%;(A)>_ z_EvFUo_@f-?k1R!5 zBtrUAIH%*pzT4N&dS^OUeY|o!mV^2Rn|5hVI8PWFA^4k6*uBoCRhoEtK5TT!*ukmY8Fo8VOej6(gYvy=g?A&k2Q__qmkm|u!X)k4$&z*V zB?46@94rohRB9-oDe-}f)6OQU-L*};SL3Evq+M5={hY)av*c2ug4J;U%{~BTgd{M8 zGA~@aBv&Te}FSzMc8} zZ6oUVq5iG^U?85tsy>|j1v{mfb~5R`WCs6%A=vz*oEo9w2ZQIw+Yp+596&@J<^kIx z7xQFbdx5so_hvq`UdIIjK}0;i4UgQAn=qBfTI_P>k@v#KOoVxJur!+^;a9cXLNZjv z6|Yem8;63~tuR;ljP){f$qoGV2sRo8#;&Y>F2&c*GanZ3)@j$%JsE23C^0r$T3Y0Hl0$sqlYF>J&Y0DUKf*ed%XI@`Y7e?kGOy-4wSj?vchqvDpM7u zCf&1(pUtkqLstQ3(lLrB9QQGt#@v4Ookx7!&3l&XgSQlZqvdluuLEAD0=@3Fg-r2* znMEMmw-lPCyXw_6$#eFF5OfEP8;%dwAw6;`32e`9>X5B*LCso8+rMo^FiY=Wh(}YZ zU^5hXCF3U(KBRtU@l_rO4n@7W=rgQm$O0E~+w9XZ?S;y=%jf%kK2vc%mk9pWt#nvF zN2Vgu78ARF%o090nep)ce6M#Bo8Z<{8Z*zNUhhi_mj4Q@5*!%4b6z>Dg9O*6lOWs~ z#PUS;WP1IdCVqCXUUHzrv5l^uqR9SI^5>fZ9t7L5P95 zUw8cd1;Xiooz}$=iyiHEgTL$B{#Rg)I5+pcs3e8*iJDJ?Wep1EAF^v4~agOf$6M_Z-U>; zBKyJ7@Ouk8nt>UiIi{Cy%zQ3gld3mzfjqn8f|Z#YatEQKA>OSaeYSMsB@m7o?4Jae z79Ym#xXdAe2-ZYrq%I^BnS~kW0&% zf{43Wzu@XgMw-Ti$%xyVRy+Z94DxaSIrR5lUDUi6emZ8pcM{TT0iB;~9It3wnNZUq zznwyb$Gj^?Xn!86Kq}t#6JQ`250B)q+{ylq%(ClvkEx)La#-o*$TsJc-MATqTzZ|; zJ%}eD!v@+TF7uj@NaTjM2&v~ychMY~@q|E9gxZ*Gyjb*PS=)lCQ)4D^kuEPLXAszM zG6uMa_BBFA?kJh7H4Y_~$RMY4)9Jtj;_ay03Cq~#w85f)fCxBq34+Yrx~Yzl2O}mq z(m~R`4`+z}mJn751!k2R7}-!=9588CYm#i41-7L>O+z{!ulhJeH{R-eaVi&hnowh0 zz)o4__H~5KVBzFK>KF6Yt@BK;$luHCVm({eOaf5c7}~PS{*EB($@8d&>$=dV^Yjav z)c#Zt4-AbpkWxRI#U>B&rM6bnQLIyzzYM;U;&j-j&8juv1ZLvWxZJ7VefeJ|rGryI zgkV>FJy{B=a1@vUHmTIsE45U~-EePGN!Dvtnd-5ba~*amyHU$xt?;=g3$Y#C?y@hFWj}ZC+~8;) zlOUhnXGVF3onEYkMIV!Z6DMAh#Z|k^Ha%Rj!{{=%obYp&184KWu%z?FcD*aa zv4r4v6vy;R)e(GN4+qpMAQ1!`Qzj}C^qj6&t zj&|w~RA<$%FKKHTX0!E5pHepPQdd>xlIa^POhJ)&Peo$bc_pNF)zRr2R_pn#%XgLz zCf#!-l_O9R@AJG_uZdaM@syL&&0jGfx12~V`vJmxF32ZS*dh67XRf^*QY8JCOT)Ng zSInv~sSdTUi;mpP_YAR0dCOg zf3%Sn1~NPGmAVHm>BnXf+nPn>*qToc|&V3R?ijJp)NLS zeKH4bkE0i>s1+@~`L}8TB>p&8m_^d6=Nbf3S;MXQ&dIB_DMB_)OYe~_1*C1ql51rA zTHXe4|F_d8pc@+QqG4q*2+t2c4c1#;*U}pq_jgg_7O8DSLYSMy$OYNi34q zu1@$$NobX@8FkP!J8`ZEldzp6!{t=;&_CM~TqGaH4?FmvxninfX?R83wK@V63 zf6?SNls5&fqh|d#%SAoczCbhI2mfwWchZ5i>3x2mx@lH zv}c0lsaNV92W7ba`)>t~w+slsAr0$o8l8U|m<}(ec+1xIx zQF?^@RZ7eS;n?K_d8;M4qs~^-XKx(+jt-wT;ytE>`bBJw>QL7bcF2!JU=1*RPir^M z3Ge8JFooL;Dau02g9TH{g890fymeTJh3Co&<+&IM3*Gs=*sxVDDAcN>)O&bj@G*dE zL{IS5^13AFy|Jr`NsByQca&lbQ^VxWBZC zjmT4*)4U|^Ix`GK-sWfn7I|rnO#M|=$m^8v;WPBV&M$l29~|L4z=)UD!z0is8ZM_ z1gf7|6DvgfVsa?F#Ju3RZpmgpNG548g+qYy$vuZjHd}=Oj-*_ZLl_4YQ769E~ijsM+Rd?FFZB8>C63OTaRI8mF)Bj^dGae&S-j_!c(={W~CjX}u zt>{pj>s*l#DH%_Mp4E*DG$4d*B>f6O*8PQHFwX><1Dc#d_P&R& zn0Ur`4Qi6k^v?7Gf%XYXEixj9O6esuJGT;A)hcDrK;^}$jyZC6ZScc(>ez^g)X_T( zZ7(ZA7|S3z{wRUbnUYhZ?2$(;_U}gR!ww{q&c&7WV7~}Zt6g8j;piZtL&m{gDpm(`hodw`wtPm(s%kC#)@6-9n-j}Td zE-~l7!}F>KIPAm-|I7t>SR2g5+G<5mWXX{*@6gMzwPOMD(Fo>rI-%_9Ri4rRfmK2D zB6`Wp#!?UVysA2YRRO54zQ^r$xxom(wDSwQVp{}cFGLDM89s06y1MsaF_DR%#HtkA zFK7?WTZP3;|1~U3qz&3EuP@7! z)1hW#;hj&SehX()u`yD}TYm!Co$9Q{GUpMEy)NCWhoEaSULc}$>9leMz?unisL zXR#I;qWZ6-bSmaQ(W6k@+9K=js3Sn^UWPqfd7J&CfF0xni;K%i{F9(VkIbkbK*^j< z>a0U(nDv8maRC}HNZ=jqs$$48h{M}K<{8DCYiSz!TPnW_Rb4sz3p<~}W6FPe6ivP) zleQ1KI99KY`i~Q60k9Jc0G!jG!ik-a;06MXBBhbPl_ffwDY=aL&QtLzpi&YhaU*&e z#ABy)pg4$3w5&xk*&(DYg%d($d6)6DK;DIv{n^pes>BK^>gr-63U0mR^=Y71Yngrw zEIFOy#)NBe*8vhoiA6ULj(*glmM0{s-*V){-aU5~Lv6A%lH+LA6I(4Ebl$c3^cN-u z;b1zYw(IAxXByco z^yyaYdmHJWSnB#x*x;ih8W#(=;3LNKoGT=Ynl}J+P6@UfF?t%*{yZWM@Rdcdk_1#A zhMJK@;2ok16(X)xV_+>n01vpt|Cg)St-u6d2U4RP=u>24Hp{d+-JL-VI80no+5g37 z$r?j5h9;pDL%3O6U!KFTflRy(6e3*>`nD$S(@jW9LzR83j5tAy|F)69ET z5kPy{>p5n=|H`+_2zBN9;%l4UKxOI1!wosKQcib0BuWl~S+{LD11U$9?JhM7xKza0 z8VkMjYjTG1RA+7Yd5&El zt2M3!0N`>TYH z5iyJ741%A=q%kUgx-*hGB!sX$$D!fMGo9s9xmAoxy)3SqO0sri-<;4(K2C87k+~Ge zVGP?_#ST49X_9+$w$VNJwuXB0Mfr$gUr#d?W+cWy!!4XuUGlFC3JYiFVXiIhzdX;5PAu6w9p3sZ!KA5Pi%TOub(q zFOi7{@;1>2=%_ymZVQ@SJyG4;G+?`7^QV_m3daEE_~F!fmPuyJNMUA8C#A1dwP$~G%5IAt{Jg4U+bx#f#Ie6c~ zQcnOi{6=K%+%-H=4{ZhX<&-J`y|+^1wRnF=`>H@7K{E>CE30VZA52V3Ok?!IPDOPE z$Jv#y#8^W-Ws`637w73*f|4Ko$vWvZaoS9j>du>gC`9^8n_+LS?ow*I0ko&U^`}5| zEu{t9ea^7UnLH=3?pwi*;=^Osq5|5sOP>~A{~N@IP%&c?FI>uvAW}rnD{*U&5;{w* zW8p#TxQX@N@KBQGj8^}-gvzP<{FaRNwR{Jm-V8;MzJ*&w-ONz9PN@hFP?fjSG4`#@ z#T3}?`{pyFPJOw=P{U6LLa*JW2SlHN(R{wT$#0qmU|&Vz&}w4&wsBw^S<1#f1QE!u zWN{VfzWSU3o5Z40UR|_=@p?fB0+(4u`}N!3P`%dDZf&Z#eB3U7@d*zDh6Pw7dgN~! z%CMqt+|vVDa&x0JazrMsfV?H#EBCdrIv zkryBnHPu9-q`=@2(_HHJY-l-a!`TeorP1>+d?OB!a*t=xtK}0%NaafJz3uwR^+Zja z(N9-V8~3TA+_h;^10SMwT%RGk1%eenSrPAXstX-RFYd&?J#g+ACqz7`#k4z>DgfGM z7_P<8LjM83yWjC1Q$tClBexp$S81Ut9yS78u_YRfm-~@-M8q1GzUueBTtTIzr zNkKzf*Ut`19Z}StF%Ygv4H+6P`eyVeMc|Nd%F8nSm2u-Ja}L>qq1Yu)f4l^GPYmjs zciN`2{Z?o*0UoNn^>_+~MqNyEWP+00V{S}i8uwxZ+U&uXn-+^ZGU&4rU;m3D=_BrG z*Y?Y;lpLhBw&>j1*$9|%$EPWHNMu4$GUsy3ZhLwT1sei5n~LV25}Ehi-f!2AX~0&q z1B~lkX7^2Ew%gH{uFdli^zXO*4ptE7o0YE9&h=SA>#7z!164q;c`e6_MCYNGxBn6a zR><*fEofqSy#Z$|bSgV5<$k#e*$=2KO~L%*v0B{ys)w+$SkOb#>?H>_o^zg&AK#)^ z5}TT2C>ym4b|T-ztr+(k>rF==Gh~7F7=D%M5^w>8>`E()1Bx{h2&0C;lSAN=WHS0j9 zdN%lC+j*+ifkhjQI%%hwkOqV`oyx7V1(bxihJzOA=c<6aH{TzRx5kuS==Wi8e3{ep ziFU?iIEMT9k^#J{AVL=Zn;A|e$TrH4Gi``;Gi~VY&_YMcvag!#eQcL+j&M8LWQn%c zZ>`01yVO$7E=8a}P_dGJ`om;PAvz8oB#cxc6YpqS!S2Fmo<%wcq62KB=J^w@m1B!|l z8wTx;ACwaGlPn!TpxPYNVbSs1k-(nRWn&W)Cx9;0RW_T2ECNl1ljDX~aSPd|eWJp` zlL=ttA~}TOqw$q%H=nL%`DAmC9hcP%Ls6F()0Zc67BiAg<9Fz#0~^!TgeP53xO`;PNEzt&JKf_P4G>f>O zW=k@bA?#!v@X(FlnYSA<^(A?5Dlxv4RYd7RgSp@S6q54=9r)bW;3<--{am|}JlW8A zA!;}(c^ttb$&6a6#pB@Pn-T8$j)@fJU!E1+xxbvMv(2HB{`Gjj`>S?(p zH})TT4733a;NHXHtCK*2l#|ZBE z3u-5WXN^zb))wln`!54?j-@t9{%F}P%39n}%Ta*M(MYV^f(}wZ2cD;41&OinV*Aep zoggew`{{4!y?bJ5_hKsD7JJsq%Z9?r!Zu%L3yGdu$IfBlBWxyMr!=_w2yrev{4$*yR-74BG9LZz4c(f)Po@}~TvnO}orngW+ zerluT)ma86`58M^S&Ri(>=J!s*1bW1cgh9UvW&d<_ZGk7IM={Vz`&?5Jz-lw2oV|* zf;e2M1py5oN##@e^7>@^Qvg_d|Fbf1nRjou`o3Y$E31$W6LpGf=t+#9`6d(bF0X zq1jl{Oj5DbA5i&|p(JDDixs-_i!^>(buql_0N*rQ3uA6d%Ai{_!L2)AET&1st;R>y(52y8Mp{1re)b<;p zKwTcr5AZMVrMv-CIRT<|kn5w=zm1!`m^5TGtSp0-)W$@Xk2DS(EjLmZO!Igb090 z?1+~^offYx#0bE$e7=hx)9@Kvb7uR#mpc5S;1<|JYc~l*ViVy8z~%OKw*PE781~hA z!w+hhk8lgxnylXYUUKr~<4c0xLxK?*bfDW#Cz%WgD02V>VQb0V4&237_1+AL`B%IZ zRlt#%DX1SEcsjBYGWyBw1PAHbuaCshb)!%ASN^m85=R)mt3$WsjLq{%Oo+T3-gAfZ8>99 z|DiAl%Z=XI51vW}jpG1DGs8_>sGeyH#L*8T4#92~!Zsg2BuG0-`5#6t78o!o{wm&~ z7G+SKg=%jpj2bfD=jP?HKy8j>1umaFnWJ6@0DzenTu;>gORoKKC!vwvZtG6a$j{c3 z@5N3XFR)=igZ?B2z5%e#64`=*ZDzq|%WfD_?u$GY%k-yEso$bA(6#x0Ft_ZePq>dS z`cnqVB1qImbsy{tp0(xlEk8?1Co7>#E5@0vfT*x-Q0rA5N@peD+I)7*3p!m58oYS4 zH43yBcjeq*5}~eLvSOPPU;%O_v$jO|Ad`q=>TKY_&{Umk(58v^s0wE{3aXzm{9^=+ zs^N1bMF8Dab3l0&reEIQfgPx9 z8#Cm#x0glsJ;*fjuJ|N1VjOt;tCkGxmHRF8o7EavgZc6a(5?FZktgs!yCA;`z^0@z z$;_OP^++vX)t?-loQS{MY_$OXD5|Xv}89=tImiqF#C3Teldr3p$ApT zX~Xz@*sM7`oJ3luZGakKr)W?R`WlP@}w&U%PsVwdGRWSt??JL?+o&#X$xtOZXUSrB*JP1 z-v4q$$%z~&T`?2~i^{G>6A@LqjV!3A_xLf(hZw;ZuB|F=luTH^?u?TEasYX%zNB1mYu)vt-m&n z3xaa^!y~5&kM*f)>qqBVAt%o@*2pm|aG~A*O(Pf2V=sqnYL(NKIm^_m&!~S^KQ1U~ zo*~}9{k-ec|A(~|@Z+^4etS=3&%r_ry^XaPX<`NB2RU$eL3^CK-L{`2!Y{)itzpDm zCQLZ;#T``?rNb`~I`^LjOv(Ij8ae)SJ$u}|=!aEK?ekHbpB8M*BqjH>9~Cg+)~Nf4 zNbXEt)KmQyLD7DQX^dLqxXX0O|DDb4gZy8rc-?AJ`%b)nqO&Lo zV;TG|b9=FYy__HwNAU)n!3kChtKD;tel@Ny8>~learoPG+!91wp`vFMjODnQHRu!Vxlf$pyl$^K&p z>y2os1n0s_$o!LOq+sV{fGYQM4Dh$T-*TFwU{ToOR+1dD?ob6@OSh!V zle^*2f~j7UT~=~dKr<(M?k#P^srERa<<_q7Z>L7S!?4iB3Spe6?E&>O-%H}aU@%4b) z{l_~gM0d1(cBkVuVwc)UFM#&)LzZ~pOe()qiEix+J8d54zc8D~f+xax#``8fpl5k+FhLxd^U~a!V#?ts*wDpSBmtMoKV^G zVB_u=7I&b|MBdK?pPi>2gx<>7NQAFGgc-g7LI}Q$>>~)Ea(cQ3P{LaF2^;M&Qquq7 zs?BaiVIZU$M>M$<-hrBVFL#7D)p>0s_iJYX`G27g5cnYAng|C#_&oOZl2nIyQ}nDf ze+b0wLZZia`~=0VhuyqlW!lF7Lg}XG&SdAYsf*&gw3xDU?mmo8!liWk$1y`8L9NB4 zl`zrWcy^uZfENv1O#sg3<{mum`Rl&>&L@;o`}t;^NQ8$L`$0;pO?lIEf$hdhR=tnI z8it;Su$1@zQmq@A0fMnj7N5vxA%d1!bi3F{7wjcavohf%QcxWZvS?^eJ!#`HsDq7i zua?ZyOeg!5S#ZiMo{gb2@7gr)Sz(BE1{4m+y?AcD#EU<=1?o5=_0ZDTVPOLp_J9{J9*aZd zm`&nLfi6bjir&IVGu7t|IG~1iJ5E$Rf{bG9f`s9+l|i}dls+RhyS3*54AXP3uRh(u zBrE9sO;;-{#AzB-%)Oo$^s`+H|2H=pFAjLx*i&GltOK~w)5*#;sE`a1 zw2;&h$yYp8#f{2OEDM)6%pq@_lI{y<-(9oq zjSlAxIa7;5G%1J-&tsX3t%eR_m*w%v8PMJn;iUc}c_4JiDdRvyJiYHl$iLKu4|>-! z3?WRFJ`7f$?iO8IJhGF`$jsiv#Hg8})yhu0OJXDZ-Y5ElbT_XB)?P22ZFFgGh`Mry z6mPTQ#-kt0w9i%Xy#|N6faUY}*MjQvoQU@zDtvAE%@l!#4ZCHl zvyAx}HlR-S1VR9xegi2Nmq@^+B%DHUhF&36%UVDU^RI{TK1NvE(u6$g$Ir=N;rLG7 zD=IwgnVVMeJtXWSk?6NcU3lErnkfyd?WJ80pHAgyEd_imgK2vf(Kh6(uq}UP98aEm zdj3wDOb%Qsp%Du-9f4AJ-V@25UJTT?0+U-o=u^rbu{D(7mf20$3i6lT4hTE8#E|J9 z2@P0cBlV$|hev?;i~ql-j9z@$DH*In{enpdYW)sd^@=%5iaJiZmyyGk$$>He7sr-Q2!3y2#a{CJ4cBl?yg44H9O2F1gTe zYBk$FZyab~CGG+rdef@#DTN%dR&=-@0j}Hv++!`m1ktWh@UM8dh-i0UOm9sh+LN$B|E8(t&I=W8joI|(A*Eb z*OKk1E_KdNWA}ag^3%E07YS;0H8Yq$e3gm_ieGopo`Hn}#S%J|+-N$|Ksbee(1Zas z;D)yfnx5NQZl3+i%cI&B)@pL5e~F;(I6wsDJt2dw>ZDn8(+|Wokp;7Av!k&?yH%WG zWGU<_UrkWkC^nCU(mKpUpXzk0t0(9ugvC85jQx1(X&}y2u!lJF*LQHf{?hi^`)yd~ zw6n_*|9cfZ1R@a0G($qzsuAS03&K6~6?gn!?wL~rOMZB??x(CVTj_+G&$D(@Z(`Gj zo2*b)ZN4}3{&bPCwXWl^n#g2zFBf0+eCnJQqrG@{0APhxQtAzX{c`5@E4VtsWJ&}T z!USR>F|_>Su0zAxH;}?FF4u)O+}A`jb5%IrZI}Z6n0yO21l9}Q&lE@&{T-if1$0zd z)M22T{BEGzxO+fJYEk}YjUY{a4sD3*yN}7zoWhfgcm*KtS{*A|43ke+(|zBtXwcK~ zJnoq8M+wZ%ocO{m^-w)k^Cs}B9E6X#BaFDH=TA4+{NqnP$ zO3+wT)<~XLYVemFEjo)B4SCJVCB8VoRi1|zYS}V(*r<^tG>K@?jbJ4Vv ziz#}wFS_`E2pV#4G0I1hfNjJfRYr3(`6R~pYS*AaHU8bYBu*Xj~hVsXG)%JcDxJ|FEwNb#@sBXGB zC`GcBY*Ws@$PYS0;J2W92|&m!5MRu}tOiHoaa9Izy8crRzY!wXwIRstPauvLZBb(+ zMi=Qt(cdhl?TEJ>z%FWzmQ$CuGnfCN&OZB`GYm2RScu4olDodqVtXQFM-+NhIzxfA}+Fy zg(KCk1N9u@0BqtG&_MFJYC**rJM42X-QRFY>Q}e>|EsO{yFOm68(TkBi%fp znKfRW+ZRJ~zw{Y)#!g96)6NV%TNp-%Ibr{4XGN1@sQ9fD}LHKsPSAffL|f0vAZ zr)=r2GpZJO9brfZdo^yZgW(Tb{M98l7x9sDFL#5%+9Dz045p)V&@`Zy-@7w|=d2vI``4`2bR$QIPX2E0vr7 zT^JEvgspd-u?Arz1YWt);w_SL%@_r!()aqiLXM6ykebaAp=7eI#FGezz58G5HtLy+^L0o|P% z42maH5rRiksLI!UNEN;}LSQ>OS&G!1EYnXCuPMz``6~#}8c6@2D>e4EYG>p@pqq2g zQ>)w?%mV8dGnsax38YQg|E)}>?U|ks^SO)37^SJ&UVa`A2s#6%$Y*Z5AucWfbR405`=a>G<%+} z7$Zbv;8T@Y0AfjKooRXFo{T$48f}8mSuJ4BJqsS!5&w<;?L@)NHv3boCV|te(pF<& zjfpW)XharmC0b25;txIcG@w`rp6>`Zi2+>oRSvDx+)`*ppMNo1JI{DN>>K?SgVBzn zrw=^8(IGswU0j^@=rXkoVOcfrny+&r)VeB#8UcAC0fUThSmDWT`-#kMZh}G4VE45R zUmTBbCDGJFCU(%}yFcMfj&zJ|q~Os*Pv2rt$0+MxF#SbszHJRebSwY1BkEGv#GjyH z#nR(G@GJDX+oQ_rn&csTUpZPz-he*GWaNXgX3&%z?OJ?6hkw~%m$wD96OrLLk3S9v z4o1&cWM_t9F{$q`O@3B&>t1_o7Pn0m6{_sLj%$y)-$zQ-j=FaO0^eC+ubajW=n>&g z*ZaXk3F*75f7Lut^H;VE{Ds^aYxGkZd|}b`T_XIn*m!G z2M&fBsnr7${aJpwZ(I}1_sq4)D~it#)ZOi>Pw21tC~GEU z;9GpGv@ZX}T#RSrD#QU_iAf_&_|hcsL{;*l>7tWibH&}TqPVQN?5Bg`lijENH12>< zn*w^WN~#6IvEOwh4NU!FGU>>nH^WEs^|X>rIz2WhwmCslRVk5qE4v z0}BAg)d(33_;YlU^VnrK^x*hs|D~T~QfP;(7#NjMx$%4Mb_%XhG1Dv)(9x{%=GZb@ z$)2lBUDHn^?5}`@IkF``vboP7Mb+LSO;81=MXRiSohc@&34XR=H=HTj;R+~iz%-y< zr;?85p;y?jKngd@V%v$h9_JOzDVUS7*q&tH|}qO)vIDI`-sG-`w21o!sry4*7Wdg zYnsbUkAmRg*z5ZfWaEguC+Uli-^nNW{^b|7#sz?FV)i~E?67(c)~`7v>6Ap`s2Z2{Aa`Ki(H zd56MmgwAed9Sb0@Q6V!K!VdV)Vau#X1`r(MaDwke&f)rW$79zcYp3`orb27TbNwip zdh0^$^2cFoXdoP+Z=m7G6C&N6=X5H_xTNuVEt$Pu>Tr#(I0FG+WT%(F2n0o`AkYV# zatZ7VFWCViSAEQ2C3_8>)=cs78ZE^^wVYV0-wouaB3L{13qI&#-E4yLqI6k6m}STl zcRiZ6U{#9p3E;)(Be}~{TWTPp=%A^b7OkQ#y(LpZ zFiL2)`nj)DB5fzFBmzST;Zm-NcYn7k&oYb!BU7ABD@kF^{%QF5d35bPpP(O5DS0m+j%(+}>}o`Z^W+Doe`bSRBRv4@o}-&XBIMi)qO)X*aljQ)wvT(z~zI*dwaYy}$_vY&lLyeC^12Pl57#+x|q3qf&d% zzSx_SrMk7tbDw!YtcXU==-1ae7htlSEr(v(%mOdP7T~IzuY@fw-Qn$u!k1MbRr#6L z@tTGJ7xeZ2Gn|e<{{JT3(n3{7=a#_PSTGn_rSp$43d4w<| z8_xkPtcwv}_ajih5<88y!0qNOzzFUSkNc<~S;L>l@Db1xh#JD7x%oWBuI97w3_5!w zHrC~cZ_~d2mPr=KGJuj@BcdIBOhQ?K-B>vU!&2&b%f{`WgUSU6cYG3J8saB~Q|5 zP@v;Hti?G)VEo1D+_qiQ;Y29Ox1n{AtO;|aqVzg3o_#ec4+K<~O1qlrPQde=cok)_ zAX9z7#ZCm~4-k48BodmXX^6U>umFF*O*-aa>Y@y~HZycH2$ zT!4%^$zV;My*|6|7qbj{wsPerusz|b-5X;c*WO*I5!n2w*65woyLeOg=8hK8C^aGP zuSgc%7GbAAh_;h!Y>xG3iSy`z9nI~mD=Q-oaD{J?LfsLjVE|nzWwM(=p>_escUl}v z&X-!KHdIO<5Ex zd>IZO87^MZ2pfSoL?b)WF8cOn1pO6VEqwiJG@b0?_F_k<-Ir{MT}{VVK!xVD7$i&K zv59X2&ept&Qd=!FwY1qcOW}51+rb{L!n%36C*E=& zF?nE#knYSs*vnz0ShGA4U86*G5Jt--vU4p&~8btRLJ@F_@ zr1-D@n9>4C9UDed%9h6xb;sF08EuTV`65+ey)oli!!+8wANQE{l`3QJ2!)NP zd$|uCg7qYMyutYP&v-_?aHn=OSF?aia10Av))6iC>;@)Hy!_J?9y_8Nv$ghTQh+cJ z)0lR1Ju*oDly5lP&Tdt7;bw`6L`jEq%3ifs`Xirngt>NWcfa{B1Go5{+Af<+c?7us z=UIagmv&axx4bY}G2S&L>9P1Vw9l~TFdokv8Ue*sh0VN0cDT<^uZCBX!ZnklRumvCeqje$ZO#f$+Kd@fBYCT&upXs(La#Lqv)Z$A%n;8EGxu6JIx9{UaQ>YFYe= z03~O*{2b2WFf7e`X7q2kb*&eC1io>QZaGvV@cB(xGGDf;l(A};^lI%FHy315C`$~FK`JF7z&X}O(VoK1*7MPOMZxx}(WbDnq@aOy z=E3TCn|d01kc?siPLkxFjvDy3G6nYP zID`tP0VGS4z{3FB__$T9#$TKP)rWgX%Xg&h;_mixQ zab+FOPb7qwa*ox~*xhSIfLpdQr29<%?BS{NR=)eIxdguJY&AC-5tK1L$lpYqLUmCK zq%=d4aQODM&b}{gA80$--5`cZJkm{a0Rp)l{NfamY;pdXAP9g5BKL|IbwPgSyth~+ z3mmNa->UBXhtr_i`GuHKq}rKb1F)tJXNjW#$sSFZb^S^?N5*{R1L_o=IzLvco>y@} zhf>bDR+tyb5#~zKLGs5;m9mBd?3|4uy>y0_hHresyUq8D34#{bpRl4|4uMGSo}ByQ z5K3eV*Ym;Jo4OfRpxJ1b*{wEi8oYvUU)2CrYzp)7&&>G59gbN5;a0B=F2`Y`3IYQN zOphsTwAdbeM0vGKIq?JQ7HF_dx4LN}H>1j92-Q82q7ML1Tx!KO4TSymmK!<%7yf72 zMI5!CM?cC>WiR-b&!U=D|>_gY8@ro-(*b!{AidI3F(-6I`Xl8FO z=|S&uHrw*?sR2_#jLu&%>?-k#xTvj`abo81D8vP0H&&pGFjmsH+q%)5o|JCaZ?Fj* zj;Su8Py(0arlX)nW;4i`?ULHKG zu6?-zk$JVC0E2^03`MK?0Nb;+clAMsrHw}KN}1Y2wPlw#!z_S<0>c2@x2tv>1vg83o!^ z?9m<&)6@}qn;pW!wPg=I_pIJOnzf$=P7J3MUb|$3K@MRrId&HI7p@0>PRco`t{3Nj zO7jO8&I3(^ODpe0ylGufTqAj0^$SzGjlWCWyK&s;Zq2j{zJ$VQK%z3c4&PVBetL6m z$+1Fv*%OVw*p^+Jpqy%O<*?ue(A>`?*9HBLz<`BU2qpkWu^0!MKNKCM@0!O#CC=*~ zWL1c+xLV@0t&T4I{XYBl{xYL24{OAw@uwkKgZ{Zx~)#qZ6`KAzU^A_lR+i zSM}n*t!JrX2_8fV0;^QsIU&1&ZN|k9hWB%V$6rT5W~k|awZp+{+w*wNB0M}C_*;vo zubt)A#ig5lhHyt^w)r|>8JELa?oBX_;P0$|z5N1uK0A--Ab7nl-smEbim{gAX|Lv< z3Bsd{uw{oXc}SJ3Vx73vN??hecL~FduTC?rh!3sd(_oSi{WmCLSTlkfrarIUg9GZM z4RrrSe237hV<@l7rWQynr67R41V{Lz0j$~T;*~sm?F0MQfECI+u8kk9SxL5;0o}Kr z!}I9|0E1z{Ul9KIevb8)_4tj{~z!DTR&BUxV(ngb+i`?n4VSJ?(1W;DczpSs8n$1N_5Bl&z^XJa($)Z@ zd@k1IUN8xoZWCM^S$0A9qbiVxYT6KQGH_d?J70O!LQHMH=1xaD^kiS|>PTu}BteWt z2F_-qZ5qzAIb|BDpC>oQKHhvjw><-l+bt!!Tl*PT9{qRJ16_do z*-zvGOz+l99gSGg6NH~Eg*yFXcJ6R!t^`$X7R2gfDVt51t%@?XfRXrKo?DUU@<_fS zWdrbLiR-Pd(s9voCe#KksVWMZFRIpAmQ5ojeeS7kmLvYu#iLS|H^5FlGi{MUPTjQE zctlqVJ!zxVNnq30(LV$Jd;418Hn{M>qnCSl=$)H@=!>^c_gOr#x|VT8y5sv2X8v=g zZXj!<{=cobe)pmTfqn@1n6cqc4^KN6~OaZ zABP;TrB3im5qhTTzSqMbeqaDbEWPtC$~Z44w0Q!)c`O>s z_r`$GYhXtUwA9Dclsdrnt~N**75YVEWjncmK5chkd&C?C`Kr3pjPQ zrPULh=Dl!mQY+rkNCjl}_pvvr6UXT*18Y5<7s@>%CAB!GFYZx!Xu}66vh0*DnA(@R zV2+zGDu2@x2>95e^z+MeM&CDa>Kcp&y@30+j{ejefzK6u_CD-nHt^kj|;6Uv%b$ihapUD@#P5 z8TTLzc&NW@D>oy05C!YxyFQxVl@iI=Hdi8)T zJUMad*?QAT{v25PMZvNMJ8S~IGiocFt=_kmicjLAkM&(tJo)@+F8-L=j4qNcgE^s4HJ@P1;MEt2nm)D$HG_ytYgBFX7N8gAz3cFpvC5wY zw8#9qvzo$WrY)#A-|9Z5qfeWodB~4v%2}}@@;?q(0$ae|KMTLQ(}?uXQUU$rDWXp( zci{cU&CM(5As&O#PHmB_^5O2gqJ5wdCA&BVlUOWz=#|I9k~}bOksizR7riiVx0dSF zJbRSHH$(VI1POYU@!Bt*Bi=cb$RbQQ%gPwfp)0FB{zO#{Eykq8O(CRQr`+;loD6m; zpcGI7<33dPG?xPZr>yMYYdlzQ!8rGAldMxQI_U%h?%cO94Isz#c7Glyw`m>&m(wGd zqq9O%PW1cen7(9mo|bKZg2Mv!*F-gw?|#2ztBZ;wgM&GCKlBAJ&r2-@?I1vY0o)dW z9=E*T%Dlc?VL;UQeN4l%k%a6u!>hB&z8_^)__Vu{VMNBsGZ4dg2D8JL>;q}Vw;?Hb z8$`N(wp}N$7u5gi!cQ2iuBei<>uWkVJkJgivFv+K-ocg1*``qr33?#PI^3HA$uzvp zLK#i>>c&=Wql5}FOWdV?-OxM^m6gt@#C*YNeE z<~om^ShFpxhaRY`J*~M3?4Y5o{{GfVwS zR+`lBtkbYG9jx?2WJ{D_2u&R{yHsXnvpcjW`#9$*Iw8DUr5yIjwK!t<-? z2F%u!-s3B??eg++c@3t9oNk#cx{LST)ORnVRV43|NOoHiW8j*r!|23%6fA8DZDGgV zC&C%4g4=bg-`sy)V$K)uG7Yo zu$mtR4V*L_w{p}#&gVI_cYvrn+{fO+;-aZprpLH-L_v)M1E~YjIgp>j#ZP(riQa~G zGE>y|p(-jb+q3MBP{L4oZCgoyjPeDe^c(HDlZg2;Rj&Kbr{AFKH1#;gNBQ5e0C~db z6ur%U`mTVIj2@)>cLz*t-;UQgS%3^%;OBJ*Hf}TM=`!J@0UIw9Gx)n&g4X+z`T{PS zs5Gr{Q5e4Kji}&j0F7_tbfhIjaFmem-@N|8b{E+s4`L%nK>MKwsRoiVn#l5YVx9P5 z?q^?OaU?e5yB|fb4BgD^eYwQlg`=We#vkPCA7rlB%6;>*BB{I*t#2nwYJB;L z?zG7rjmG#7fgS}HH*ar2H6&RGIAK3eVv?7(n|)U!qWtS_^y3h7nqluA3G}B6rhv&> zOtl)NIuhD2KBss*eq2asBeQxDt+1niGVKmC1EVo$qHtl(vo&^VO>WL#G7t0I!4wQD zV5>Cwc7xX_W&@7yw+XoawHcf8xFReP?x6MYV#0|qC(&*n>^mIyR;%-Em~WMuKQXk8 z22z4B(%OqOTo^&{{zi%4=ILQ$RZYlS>P*xJNY_8~x>4K1qN?f9Ysxld9j3X6?F z5h@?QlhmAX4g~rFFcs9XZ>Z`9x7co>SD{L-*%Gro#jr<5o)OiC$V_g)#&l)AEt^K; z8;9wSeJQH|?8wkaMdZ9%8^N)iCt%SUb{9~-s-ri%CWy2N2AiU4 zn-Y#I+0pyA|G3~YAhIf6_Ki}zP={AtCGCoLWjG1INyK<-=+Q+Q7o4D$6`#T4gg2io zqW7lqSLy+@@aToyiC+_6OeT+x zziraA%{C|RG?=21m@{(8RLT-NB7k5B?E_()VZ2B^6cp?4-+WPoUA_U`<<-uDD3f7x zW1`N792RaNQL^`?E~hW20pOF48?~AX`Gz~p@!YPFfK8s_u zz|WX@tjC`D05Bp#)%-nEzrkjbtsX@CmLHEm3jH=gpy+1@q2c|{ggP)Dn~7K!x!@y3mFH($ruR^LIAzgQm0iQ! z{PB1qPV0)kSf&rY8JBw>0uJi*;J++ICGYH2EWft|IZ3>s*D0O(PaySJNnp=fF&mjVV>a6vZj!zDwP4m` z_};Y`6b~lo%+;IJ)^hI0@Q3#XJM}RMI2AZ~iBy*-*#Z~|mbSq{0a?KC^Kq53+}_^49Dmga>zk5k8EiXfxJ*9CfYFw&LP zn}wKL<3mE{EI)Zs)vKDcDokGRafgB-JUS zMYf+iPomvtfDqzAW=rh#yl3rLN5E2}%a)Gyx5m1tAAn>?JB~lQGDnV$x=f&aOHlq# z@xFc$3)I-UV!RMSJMC|Qjw{{$&u?jokAE@1|Ew0I?X|l~sO^wW2-QPBy=!vg_*B>( zBeI>PxR_%jha-ns#UC5(qs!jp9zHZq_`P7+CDtvJ&bZc-x0LoQy9G;%n_cssD3MB^ zdMj!np%!({8yb+#z2&~ZK3aYcjmV)z1lBa0WxE~x5!~AqTghXbSv|qrRCb~o0WX*c zN=K;hvVzf(DxL=8X1kKG!t3fS(6q6q(;hqH+KmF1R{v!WNDRP=92Qz{&U`TTeIk{} z6qK`^S|aC3EzV51OeW-!hzR5(LCSx978606M&EF?w)u01U;p6CzzHITptENAAt$mt z^kYHATAEsl{*W2)#Fkl#Z{QdkAGw51>-+5rYi^aSZzi z@881?X}RfZF1f6Do zMmBSR{vJ!$t#_T!o8eM9JjZm+X8sqqTq}mqoP+dP=WiexkriN5oj4>emha_XDt2XP zg}1mLlPil>ZcGDuYEh`jS400dA^BFAp8pf8A6MkWJ*eVLeFLuxT!3k{a~~W-LLf zA3Uv2)G41*3ACZ?=^q7UGY8hSQ{m;K;;CyzeV8H&yP8(p#1THIryc8QAFia&7?s=n zT|(FBX0BdcoYsO(5E!AH)hlQ<4Kv?MY9{VFd;@`G`fd6jaI>e@7^&6+01{eiv7Psl zPyxM~?r(VU0BIE+iG$7zPfOfoKg1xGi!F2~U(}45hPFLwS;L#?yrSnGw_&)TRNF9# z)9nBsd?O6sjhC?Q8xgnY>g#x|$o3`~#rs}U;jJfMiW!jpL>B8XM7*j9;Z>uHIq1#< zd?PUz0FC_gH^=qgpuXCe2%jj`l|p3dZ9Sa7S1akh5~4Oj)As1dH61!VL9QPB`|0%f z5a0)&6jnEF_9>(pR+yly$MZNO`)(hRkEVPZ3uCvYWrbK5Pmo7-J9Afz;2XaRXJ=!& zCd+9@H_e84LAG6M!3VP4I;@mG8R6J0al%{B$M6f?C3e(lAN6!Fkx`Ob5S-E0I>hF6 z@~?`2|F4r9MTEpbV-}8~&8z((`;kre^V=AfsA~KfIMfrinRsO#ch$9vzcXRn2t-XO zm;2yj%Hv0EGvd>3K8oV8=!X1Ms_;yB{7gVNC^oE&ylK; z|K3mFh*2RSt-g)g65lm}-Iyutx8p-D`u>>jo;T|1jU{KK6WSbz*lA0`11>Z_22q*j zaP9zQJXT$4UmIgat#DP;M?{pSt`#0>Dp z{VBpL(b0$3kW{1ur*%>npobjbJytr}oHo@@UHNyt%S3(p6oD1Q#MovUzk<&MVBWSP zd8kOmQY>B*BB=TmpA^~Z~e zWrOSW3kYOAu$d^~U{^%{PfziuEKx(SRmBLB6?HWZj7echiD>d|>~Oi%y#Nj)+3@+F zX8O^c9Kx;D+$8P+M6LQ+`kzv|CAp}q!|{A3qZx7io}uz&WXTefz1S0f%4)c)dEILb zL5(j2T`p+p^e(wQcRL7U85Aij)FxxDQGA7j5%_}0D=@-j1Q16L5WE_zoRj8X*b343 zWK)61;`K(G9$-FK@xWwpOJc$|7_-bxdhG3p0rt3h=r&CJsnFfmpJ_>o+ZQ9~j2(vb zXqaZ-`7@OuKi(DTPiEnp`cZIwUwpPx3xx4x`yoCYIFTv0JDi_#k zm4Jpw?mG6Lr$aO{wJ-D^dtLCEcSk{L{H=cdeFnk#5V?O{Kim9H^mZcxSWvaG-^<-d zx>{6h(x}3s_$K z!ACG9tWGvqod4&!T3h@@`cbtv%~C?`x?k*0MOEh%Vfz{>lFMsYJuz&!U-F?ClXf#f zaqAXSxvls_mKAx#ICh1C*9)zObv9@WF~w#@V8J4!(c|wTN&_d~&a8e zS6QGbQ3ZP}?ejqnqAZBm;+$-hw-ku_-Vy8Wtf3vy|8K+kdjm#@*rI9Mi1V_Ao_n!p z$a0-J2TW1i6iBe`@NbK`Gj-$X!|#rxyFT?DcMRi$c+8mRUs3xii8g=4626+;yNQ+h zHtVGL!s<%=_3&<2SZ?d0USvyMnOlI+g(YVki?&M0r0?$?CAk3T_4nX2mcu~BH_?y% zzXwo)YQ=AJ1;2?AAU}wbzT^1&j*uB|J^#uhR#tE8w&lF1577kct872Ce(}SPp3LEX zTGE5(x-ja|(-+;K(=6V61GQtmH>sDh@78P5E$9jg88hLC^vl-P&p&^jEQOJVbxen~ z+-4G%dM_E71vBrf$1avatcJH1_0CeHunK6AEdd~|V3FL{_@AAQD?!!iXNxo&wI#n> z0n(pSixbr~F1e_?Qr7*LTG{yO{--y;w%f&^K4onT zsEV826sE~x8WDI}X4c~cXV7lwktZ6_&;A3E!9}bX*0TJm^B~ z`S51fgoN)U6~LrIE9O==|7RPEa3h<@cJJjGq`0b<>y<#z72?#tklATw>D;O+epu#V zn5g!!n2Z#;h)&l*mfMm)asMqEQp_y{xsTLB0>M~z>`~RJ-i)JYQplhp5l%lGSsDST z{U`ZX`JoM!EYUce5{nMw?A-ZKDh2IUR`{;L43Yj>y@Ic zR{5(5_9-Gv*zLMzQ+1+(QPe0^uhvwOg`r+yvLl<Y7S-TNnMdur4iEE}t1wVNg6 zbwLPrp;#x0bu2SgNoJOoXOQY}$>%DP(WHgqrPP1Qqgw??1^>(aE7KIyX8$-gxoT6uz|Wt0Ct{fn=ag&KyF+o#;%p?PxgB@{N%T?0^@5o9 zNOlIgV3~MPZKV>ox7y?It!KVKy3cCytMFRdmu*?$k$;L=Cg32(vL(>1L7<`}bmZcH z-*N$Jdwi5RDNI-6$UNqD(!DQtqkYJOl*#;~{69&eS;%WVf0(%ya)mpJM(C(9vh&Fd zqEnvV(GQ`{H^xOK3^ME1$HeEHu+vFq7zRc9)N#gtH69O;uILf?%pXJh+gPZ!dSK*jd?q`EHYtdagSnOp1m7n} zM;c{o;)XZ#aT;;}{NaycKi{V1O}mdhQH}nJ zx-t-EF+}lRg{)4io4HeTb>#ILqj#XKy5-0xyFA>gHT=2O~iz<&g z>*cPyrURP)+R0RF*N@6pQ#WPyX$9Z&jcR#6rt&)74ShEaRIRC?xS_7J`bvDH<`s18 zKrvWyRe-efGmYlsHg;XK%r`4smlb%$g2cgKR{IcWLnN@oO>G0CQT$uwS!x_Nw4-r}DgZy3JUzv2 zWIj#29)BCt_3e+=X)~#%Jg7XDa7;b>KK5pEoUGD@d$CGe{W7C?K$Xg)mObr7Wqd2j z5$U;)X?ZqNHq9Z*=hYWZfzT^UKik9}BvgMP=qRNcT=o4`LWE>@KoSd|@Q>}3j2e~0 z2E!~xq0^@>LQ8|dhe9v9^@N!Dq$LzK}+zm z-;V$8?Xspi^ul9~^8J7PCLJMcR4PjkZyWZIPsd0ONJ93TMlE6@OSVr;Fz2*Jq1#lv2Y%Z~hM3!epF}H2SdGk56 zMM|2?C-JFpCDi;1{36LOUP=Zad0{@RBE7Qy?BF-X!~GyUqu0;&c#O_W{?ke#c19e9 zopMb5WqHKK9=1+%NrGSOVY1{xmE40qb^=S>nC?ZBVK3HH$5+qj(KSBw`0V9si61?QZAp+~&6(^h%beYq69r#~<66o=w6!8Q&&-h##+PIO zJit5(2_7?5;@MFC1|S#=1G3Z#(68K3xJ8wDA8&7rFd;spcH-Xsp!|~yenvJ3>2gq? zj5f?7tACnPIj^6X(KPQ~YONJo!+|llX&Ke3imM>{(5oSo}QB-4cn9xt`Hiq zn-pVVmXW8*aN36s7dRbaJPU+g2)HiNk_&eV-=PpxkisNirMw$C{K*r=>T1xp>h!1titH_4 z|M}p-a(9FtN|g*p)NKLO3A#oq99C^w$31Lz*>4|-<+_E)1;i#M@ zpP)@lNjxo!m3vkbr&Yq2!73}nVcAm5QL@}cb_H*n4%qyFn&MW`gX9Tkx~kZ`ddGVR zqemZ_n>8>SE~@^`Vvt=^KJ{oQU#=KA;=aUfCi}uV_NO= zp~)cl03Hwbs}rTOpKVx-ui?l=dihBDy2WI1Wv^~`bXdd?n!3(?YPU7e^UKE}-FLk8 z{pFqs)rQn@^1Qo9JN0l*=X9fB(!-;DjHuFIx_E}Ze9MpM+h&vfq62mBz?S_eNHqIU zd~V>p30^|)$ly>u%t8=yq4pPoAC9RmtIi(jRGy&daBS<&G?_k{1Jj}oPE5&v zCfk%%+{#N=Me|O?g`wGsyWrV&!vad8$_ls%F#2_jeu*dY7)J#J%W6=?J=8YpzIiJv zjq#Rv_5k{iu8&rQ_An~k8qEa8r(BXka>L2~;-{QyzHfL42S0>A&t7>F^uB4LA1OQE z@x0nY@|5iRygl-4)Zy&d2AqUD$AadGz9*de6fE!0D;&FSg|(-eacr*uHu@yqRq%3r z)e_v_*%caq@^J&M=v?wYShyPd4fq@aLSy89xgdhL&A+3UZ34wj<0yGZ!so_BG9~na z1H;Spq38L)Behxw`J%}SNhZ}bN0zOQo>`0*tk(9VsP}Sj7Xk3=dRWfu5qm?=xVqJ0 z2lG=BD#r81*gHCfVwihLZ6iCLO5$%$qhgiX#f748Wt1p_U5`-4c0b~7JsmrM0p+>Mwqh`K!_)penKGoyDNCrq{?}a&GME?mJ7{Kv1Y73F|#93v6UkZDA=k# zOx!aa^5M_CLvtp64I?$MzMDQq?6f)#EpI5%&F#Cqn) zC^}nUH(SH)eq`I5AuRmb`Zv>%Du@Kgk@shZ8>1jP)$U>ocC-Xd3lN7bGU`@dexdpR z^aZ!Y9?XNT0O6m%0`2n!k|*=XO3;`wZ3`PMP{o@B+xlTu&4+=I8QE@6<%{qrKA(V?-i#yR_*>-Ahmt!P z#4o1X6uJyM+ion1wNc3N45wb2eHBZ-SRtz~c`EI;ygXT;Q(biDkzgLt@4l7`G`CYO z(Uu&rs!(}q*ZK=Jawp`1CY$890^SJG=W!67y=XrPY@&)VoAbpC)4Y6BuYvMgje(zw z(Jj)9(AD&|g9*P}ZpRVFnd;RjM%o{V>2`viT&q@08hi<(ul?Bwgp+Subertb7JG0E zMD-tWIsW3yUPiy*XcNv3uyvY<0$@D^YP);9aYYz&AvK18@R}7E)BE!9Z|NWWgc57L zzY$+HqbdM)Y;CH-lpB1Ggyxx>8iFoZV8OARZx%G~jSU8FR%-rgtI+_RDoasNWd!B2tY?`-~K28n?>FyGc?izAnn7N1d=YH3^_YYw41BdfGXYbeETka?`j~W$O1sUEbLmW0@ zSre03PRmd^ofP@&A-Caq4){F$%O-PfhN#VG!Ul%7Htu2!gi34+x4Pcq5^|?OV+F|h zR^L{vq%o95&r@pXP{V77&$3#*gP6rqV)X41LqaGD?Su2@LTD;6rEX(N9QSe$^A z&*aHx238z9qC8Hs`>zz0J{i6EtE^N(?lX(q61qLh$N{I-lK0}VAzdbz)SbU$WgZy~ zdOfYKhml^{M3RG-Rsu_iswR_Q-{Gq75xo%^IcV!!eh$OhWwL2=0=Dsjhr)8E;CyubOL}uM(8b6+~3kx*vxZ(aW>K%aI_NnPJII&&|(!3pe!D0C$lpj_n$WM zVSiymM2oK>rq@QY`(%+uzUz_krop4ALoe3~Z=xpbhK>lX&AZ<4U;kZ7FGKYfdsEAp z3>0p{7mcn5ZqnrMjg0YAfs3sS>J;aROcvRRm6XegoZy&(FCVAlib=!U!mssH2}3KglBESR9B04`018r^o%p8c)L=PoIi;h#~v+Z1@{3Bm-|{iY*ov zrwn5fQDBAkmSKLVJ!Pvgot0476otrV#9w;yw9mE04bbVul*0y2gv?*E)204}$1Hdw z*UZEx;HUbg!BDio97F<(vdaIn6o1d=ZGW2>I#dqDkj!o@S%+cMmu>;SGM){N&EL5= z2U?RGyVwkUKFeJK_G~U7XEv2=2u0r<=lRKo7or>tUQ&e z{TRPP)rCi9%eQTSwP?R;&{5pc!)4)lXl9OyP$1@qzDgq0ZCxA1opZrpv)H=VXL$w+Vr2`s>9@l}V3ilp=9K~2klKE92hpR_5>QbV}&z;NF)o4&6G z^FeEwbo*2I0YyYg%Gx0@aoD%1xY}d?z&F8(Z`-lU6*yPNrTVmW0_@pg)$9+l=_r4| zY60j_iAI3~+GDYzUj5WGEg2`n8-dh$J>>i)ZBG$1#}=1}<;C~KXSga$zy}n@sbB}V zvENTtw(cDGZ0apQ%W>;e_^FV7VEwq$9MdMO8<|7-^{b>xGVeD?PbvTw8!54 z^e(@^bDC}V7Kdw%3$K>UnB;`q+Hh-V(pJm#>P<1o$j;r2D$ z@u|GdJlB}Yi{Zcntw~{}S>fTjFPa*tBd?%cjb-e=qr>E9xYI=rza81cyA>B3h@X0S ze%fToz01SU6=S9(C1cQg+u4?3+2>8c{Pb`x@ZcX!xQU^dkX!+B{|pC%EC`uUv^eXc z$wgGPnFI^3MdsHbKUyOxM8|IM8V~ChJ8!0Z9%5Po`BGko4f)rfO0?&^2>#RE+6f=R zD(7e`07d>B!0Ll!l7JihYHxb2^=#nM-;me|`wG0M7UDbRd+(h3@#HoVPk_nQRA^LcP=G7w>4f%a5}EY> z`G_wCzawjrlWn4Cn`<+Y@3!z~dEbRQCY%ZhW~66K8Qg1j*Q@Xl@AXdv)NEj&1Rx$~x48tRaJ*dJ zrup(+>xD&>Pslg3>$WLMEe(qxPyS-|dV>B}py2FUBW{^fUGJgONK2_txPXvWJZiKF z@_*uP`s4;GO<9#qH^>he?=)`df0WAZD&2bZUN{UUZ7*00+?({=EB96F9$w< zXrDYxeC2}|3*0L-0f6o=qL1?1>00;w%}%#%85;S(PXEG4A6I1Zj+{c&{5ip04%sG| zM`gmB%DrZ&vG@0X#^q8&Le}1&ZgBs0e;DGYI3+Lm6~)}&-Y!`wtxt>NDLZ`gii)fr zap+v|sqTv}e6k{T5{TTWgHUHa;P}IBEaca+ZrsS2d_!h6I zv>dH@K*=0zco_JUJC9Rospx44AY+6pMO6Q5uVNq|rA6X!d?JdI*kGW4$52sXX5T?0 zWtqGp->V9o@JkGHu(a{+5{GvwvGGv7K><&JEB8hN1NXWv+bylHWo`YS&37>@MV!^R z9&=2eP|KJAIadU>8jSF&G2NC}7~!{x%A-w@X{Zr@|2h@S62}@H6mq(Sv3!Y1#>pzP z3ECVUd;lXu*EA!vd!AUCePhKTMD5R@RiHKtGL-cPxdeabGbK_*tz{MalHwkd{miGB zjmvzNz?><>TzJ8lb@$2*m9OpLe&Is7n*es1xNIZDHOF~M{R{cKT64Vfcfq^aT+1x1 z*BWoqC-RjBFNB2ewYNF_uPPNJ1oKBFbfL~3>EQGg)wZv{+W3#NzzDnb8PS^K_O~H} z@lk6CxyMtdfdfpxBYtbJqnEd#@Tpfqr=$I}$kRf-E#=*=jpOI&oyx7=;q%!%QT?_h zjb1?L32ino6d^QF7vVC{tgEO@=^2(0oN$>ZQ9(WBMh(uQUL~id)F1vjWFKs2q7$Q> zZZ3QGvF(z3AX(7=0q(fyBI?+-jSQXU@$?(@WpK&Yg*vSf99OwMefDBEk}7pEIoTou zO+P^QiFthu67{CvLHsf@lTV14_=fVng9w*Y=MJHJvVfbfnvPZn^_+UHJCJi zjuLO-i}wMZzl^{QyhF|(IQ-gaeF&N#5z?>+*c2pTaLL}QIr-`CA$4{uM+BeHG7;^7 zMp+;?dUSp>v|XtEY9?$GF_nh(<5=A1*VSvsQjjro1Y@Aq%=Wt!e2-vzEZ?6m+QM0h zQ?(l`x@tY$M(^#QjXVF_M>A8aPt$tnSkhXV?zC?~>n86=giCg=f34eNvFR~Y62|y*Ck^mJ#L0M%}Wh`Si0~-cxOU~u6DCGXzY8-P+E~(B1T-p~zgUxe; zKN$UXG2T3NFp@QaQ2`%}k4UOiZ%L|*RB-6_bJq3K%p#sBDR8viER~PxGM7mjjP`!b zwV-Ly!DTyJf2WEe(wXJ0Avy^w*aQi=r;4+xq7OWDtM&+Qwl@mLOghVoKfZoXC{Qfx zvh2VY6Q)mdRS+ACK4n=M!%XY_u?f98)T&nBBIQH{J$0mZTd%2SX3x!-J;n_MQ9OX3 zjg#^fexEFgWg;Q5vj}e6AXF9|g?2<6?R3psTa9FYuL%vV|D#XQQ!p^yja4sOZz!`n zS$%{7&e#|*I#+2yMY36?qtm_QEDvHo@_w1%zHyv9`covw6uOzkLQ^6$eBAuL?_{H| z%?kxRo>w*7zszUoMd7{Xq9oZ8ZauPSB0F4!1}xuMVz-EXOXB*yt>P&-iw|);>zP&m zB%;)#{AXbjt%eFF$+uiy%h>Txvfhp1^3MK6_x)rG&ji_698!V&lo@nf%pAaU(^#6h z%dY_BNAKQn26~^o8vHX>X*7`?*X9u#zJv4DW7bw{yoI1FYkz?3?V(c7wdo+V2>pTL z6a{QgKltvwC%UjQ|OSNyc)Q}Xj@tJPNrv7X|uxe$h*oO_OKn|tx zIAVzRkCWJ3#6OSwK|*87{tN`Qz3pz$O-^{rzq^{p2kZ0WL>u`JMs^=I?z<5BQR+4YY`zFk%PEiQQ{TT&5n zLr<-HAcSGThjxor>=JvqdEfCR2Jy(lo_(wn1$iEe2fzW!?7M2;qXN)@oq2{vD*FMc z1eLH(_?gzO!Yv_H9DPfW3(k_N%AQs%GA+Jx#gcHKmVIQ1>$|Kb`xt66e`G92$9e~$ zWj+~yc-u11Jf9I%i_~5Em?TtRC^cVg5$n!a>q(?_>{$^ebbj-$M1-iy)aYwOiRHu) z-?{X0Bwmh_=&574tfFiY8?QSHfvwDD8P@N3UPO?mR26?E|ChoAudb>` zbhiQ|RhB!kZY$#qOW;f?x#%mA&`Rdx^C;V0xQUw}W!XzAFk!PW@_O;W*kqzB`O$XJ!zZCZ4~VK9f50~JGqwSlRDwL0!&yh#VOH*52$;VaGCmj8jfm*9I5sO8O8241;pzUqpeGQdBIc6WF8 zrxpa+|A%5TZe$M+63|)i$g_hnw(vGeQ)lV4172q1Y`h|6)~@|GR0#YM%Wt1#f6~{* zzw5cx)WYlUSfcfB2XK%4My(d^^`vV{?(gUtBHZ|0m@{SE)!XS)r6M*6tL#?f>Fj8cWidHwLnV zGo1Ok#LAVQQ|hMX{L;tV$*v{DH99(u1GJqZ22Et%fv@q-?CHDnPva@-)%V2lI?LlD zltl{Rvv$`adz2jzi51R^!mhIznUr@0nGnN&jwrnH+Uyi-!~Rz!3D7>W4ZHO^QRTkY zm!Fa?`^wy-+*`zg9<|ht?OJ>ue@4)rj{>!w5`AoKEJ)<~xAJ_acHBrUBP(O#l;n+9 z6gOFK#;&SI6T!#UB326+64e&O=Y0${v5Dq0At^TdJ|muSvfv-f@(^M)acZ|@EBQgv z`EnaPftj`(sW^qh3%ToU)A*ikyBAl30)JU^Kty#D1_fM~)EFa8ZOZ5~YD5JO<NU*Rz=WH`}*I z`eF4iL1{{ufw`J8-$F-Hhl+^zF1vGj-N;<)XN+9<*(_$5^ z4~fmWtYbFaZTeds;{$5b9UfD>bm(s5o#(JJdxA=fwiJKVsYMU+_(!uC!5N6|sTXSU zZKPUvm5nfAEYZaXp(LiBa zp-Fd$6Oh-^1mX@cGDpni(&425QiUePpjA)cGI|d{adb#l$4M0d^|P(vT_EGGn@_E)@Uo?Qg#+{~mW6C;=0+I62!EbsN1WB)Q%ip|C*&bR-by8kd$?)JTF4~Zf0 zq8mEv)3nfd=Z}oRrsOn%3o7w@h!F!SR_A7B6fzn`KKN8;vK``U@KZZ-ntt;|m`)-s zMmCrA?)FS-^bA*ry9xX9tG*UN8Vb*JX) zS!F|;2%X!@SCdbO+%z5~nC|15{Ne53fU?=me7rl-vcj?3AQ|phw(8IRjdw9RMGn}lTf>W^TxB@Md7E*rxR>y~x6&y{<#*g%izgT5pc^BETAvZ%u zxS#I1BiooYVCFQ-@JnXI#&w4r3#3eau)#ErY zL+r@ALrt-TKOJAua-cq)EH)193Lp6PDCn_FbUDysYnLqO$gB#--@QE^-hT7O_d)3* z@X%{WO8R-yW1c}<>488toZPN6C13%NC`QP%jRWaZ? zN(b+Y`ivpJxLa^ev&Tg1d3!9MGe^hou%ey-9du2f_Ik=sMJz&cHOs0?eG5x%Clx-; zmd(Odbm12ue#yDtQ)Y-i-exbD*Qy}Opc&MX$J4WI_}NYxVS?OLO;fhCp>98;(<5dw zf`z9N^e-Q+gF|fl#OAFPwMCH_@Tx77$jf@Q;eiBVTuxYm>+AgAOL{I;IS3{1JPV_A zj*S!}_kb4k0Or(n-DCJk3>+D?m410V!GbJwVRW(bO`U4FE1jzNDi~r)j^B;G2D9uk zEF<@8UC3mnvpw^chNr5#O6l3QdLKd&q0)Uf)5jASr&DLAdI7AQ>b$;q2Q^gTwG7vK z=y2u7*#<3h@fN(d&5k;HRXyLC7;oj_cAJE5@ns}mtJ=%I0$k39+p>d{m0$cDZ!Dh@ z8e!+?R)iIg=rmNQ6!AI6Xrkb=E6pg_P553Pc-`rZOSk8Ba`INUZi{R0ugl}R%C~T4 zi@~IqT!bD~`idrZ(6?8)db&>{wKGzz>Sem(;4E@C z1;?h)dvu|locrJJBZ(!@hr$lKj@OJ{sBf_Tji^1J|q6c<9i}Ua#ua;mr*lnHVU9y6|piGV5+M0{Sj! z4SIf!H=tSBG#4-&!$MdkW{r~)4Zb@k0m#X8e!FG%fRUr6COm$OZ-fDaKS3~YT7Pk( zLQUWv48?hfy!Hjjh=;5NJwqD%f!;f84ZW%}KJ`~z?fkI|v|#=M0#rH4!#iohE)zi7 zjdfjOYe+k#sOcqfqKF%Jtw#xUOPeRIS&!Hg$wv5Jyk}2Fl2i#!Kd&$lg^`D^7*Lsz zjt^1##9-kO1qJikEHfeA>Co$~^I1Fo4usvw7fk~X#@&Y!hH6}W$q=mM0R5UY7^my5UBW6xpj6d3Uf&xEa1#)_w-3V zI*a;kpY1>aa|ilKaq5qgR_+&xeyB!Y11 zkAo+PnK@-c7Tj(zGJK?wAhCmTfa7YoeA?inrSyu&P$I2Y+Al$`S2bt>)27Nv@a?id zhFIDWOzedUijMaU=E+|+N6WsxAFT*u|8nQ6S@~zEC|-yZPv9f>Me~m`V(^WDuHA4$Qtv&zaeoE3QU}a zDZs9X$BzR$aUT)wFwER1ErZM?dROpGP)WQtLS8&*z#p2*z^Q0KnTYfPd}RzapkPzD zy)^0W*Tupr9Do0;>Jfy5YKtLwrzifsWsfl!+Zl28hT%cB<8lIcQfuX_J$+zCNR`T{49JPrGHd~^E+NHFE4F8g zOHhMOO7nJdhhYGvL4n< z0y{JlU`T5KUbT&P0h$*ThS;K1OnSmdmZJ3lYImDu|0#DI_H&_tGX6YU#3tx@yR-+2 zvi|7T=4*MAo>_;~~=_fB3RF3XL0x1K-i z0R35v6H9S-JEIyarTcEF!v@&+R3v;hxBS*%^xAOeEV`otr|@A+FS9`fGN=yZ{DF<{ zrF0t#;i7H$rm51_4Ac_=J*XJ*2m3t$@%OOlu45aioHAdqP3-tyv5-R(|FVu=-x9}v zFEZ-CJ6mP!hc4nWH0zS&M5$HG#dj1VN-L` z#&hle2N&DN;{vEj&=;1O&Qq_ z0AI2T$h75YFYSA%{1pBuQznW@Qs>2>7m(G^mCudaMb0%gyWEN+<|SFSO+@9U@B6+Q zj;A9XS z&@5TP$R~>CT+9U4G6Y`|9BDXd^8J7L$Js)g(m@k*ky;B72@9gia*;*tP9GPO>!3Qc zdjD;4rpUJ_ucs^*D&OOFBsR6Vt` zT%CLU?Is57_~$kxEG!eR)+=#F;$yTC;|m$7+mV-DVotuVMengDc(8-o=DFnmi!A$; z*MFHRka$!40!Y09yz(NN8>5uQMy3fpFKJ+CwO_*5P_@rD+X`c%rC7 zVFMXaWDhWXsKSW)o?4sShb4BdZ `k3xvsM1rkWKn#Ffcvl0QRYJas`qN-J8@G0?u`+% z=%>ytdvw*l!Ey1AKP-8tt&r?U%hVBjNu?5A_#cIBTza$+NUj!Qba(LcKix#XU4IdAXOc|KV6-k zt-+M|Am)WQR)2!e@(6{R*Bc?mH;ymk?Zy&}N)sSiICG+g+z5qEi)-^$5_WXd z!YEA9pt9prfyb}omZ@kygIa;`s;?8` zTiIfL0`4x_QvrdxoI~zZK8uVsa6h!zm9!8ql{Ds=dCg4w z3D$)nN7dW>XNQGOg*MU4c&6bAomJYL#1B9$1qT4BW%r4<|~eJG88=5-Xovrg|GYjy--{&h$I{57x9Uh zhVG4Yq7kY`F+bZ1y&sspUnVk-TqsHsMWoWb4_9iu?4%_x`)33uLeutfM4=2h!B^n9hH%K) z#Rb_>Hor=dOf1^+uzYl3DT52mSJJZNcGf@>`PrZE9TY*yXGK1&Q^U!AXxpryLs>VP ztdFd$)Lpjr2-pnru>*T0E0S)iSN%rrOA~n zq6?MFzITU4=r9!f(xocU{xT}5@1oM($|%DP_!C(;(vK@>ew?|!P03yLC}CUsc1=I% zY0IntK{-vhn|vm_=AnAH{}tgEdYq)qtjvGIU6cWkJoj1i(J&ezRD)Dy1R#SVC*e0E zQ*EP4Qzbe)qX%x^JWT(;5i+)Y3T9KxKWvSuI(ZGcxeb5*vB&Uc1cbd|qyHr_ST9KE z>Bm*fA{G%^QK-dE!&WmUTRrbrl|W`cDCaer*?N%EI(CUY*ujA^iX_~*dsV!Ve)WK0 zlwi;>)5)qLm}~(uflt+M1O2UcGYLv@_NXR9Pu1&Aaww}kbS+GH{=6Lu0n!k2v0o

Oc3}nP$M+YVQ*+@W={bJ-W#q-WG=JKe4K4I8aP(}+lHKU&pawO#joTy} zTKDS=$q*Om@NIO_+s2vipoL4SSbCV^Q?@#0G9LNQqJV)mTX2mE@Ol^lUA_ER`XQAB zCmm{u(5O&FQ=ZRYxTB^|EP}70_9*+r#?1jwF(DH^^zD~krh9Ez6Ezrxh>V0IQECcI?q=B{e2>6eedXi zt_Yoo|99|N=RLOQX@G5K2&9ALk|H*;-Xgco7;K z@&UT+Qk~aP!n9EKyLw%e>`O+Q5rP!xs-1CLjI8Qr#8|2OMoj2!GxS0Mbz`c@1p=N{ zB%ZY6&ROIKv4O|7=#wi_%~{msH@3d%rb2cVn$t{}Zv$OUg6gfn*UvH7>~`K?C%Nm^ zhw387B}3&c5z!HpLGC zWhn#tRu~^qJ<5Ck6qUR7ir-vBwn-IB;j@U0@Oy?bhd}qsP|WzoC{ye!mU3hkYz&@j z3hrYH%!`mS({V^=YA~ybeb;$)dXsR3juu+uGQ#dh3M$*nXU$MNI6d)GC?#07k@CBIgIX_CA8 z{Z+g`v_jZGufD_06uF^N##)v3WJ38S&c{NgQMBi1TBYb={Xz>T52LlE>g_oFR}sq( zBN(;YS0MJ;zkiARv|YXaaSz3LL8Wwcr2fU>(+1WU4O&oT#hOCN)sW=CcdQHYGw=O3 z0?Ga<*?PrK3bv#qx}VU*0B$NaV#7|06UMUN)K9cT+F4vsdX>fm1p7h}{I(11X1fm! z^p=M-L67IQ2f70e;$L{Nrq^~sB34Zx{eJU0=~W*d_>Oe_nOB~0?Xh>@kbqB_6_KgG z9jAW=(55%jH6zgi5H?)|`Xt-z7-f1s2ol~|{H|v?v=MhcX}73??HQr1cE0-|Qm9Jh z!-rWV=f=xFKxJ?Y7s`yn2%IYp$jrCXt6l zDqhGSbmqs@rq)%5O5yY3Yr&6ep@+_$U9o|%A8iCy%aNo$N()E&N(VUG7Bqpht4WJz znd*_R9NY4@R3k3T>AWrZM#v{efCKmh=k&O~P^+=V$`b9u^>%Xa-!QIh=gNjHO-ZoI zIU>Z!&9R`qbMIr7HqDLmdx0nwFhaHD?iW~BArg#M*KQ5}S}MM7XuG>h;Oa^>9EFxY zBpha~gad)I5F4q>1mL@~It!b`RkOA@lu6f{vW{-QzHk=PDYD#^gTHbGYZr6BxaTkxXIzl09_tx(wyo0hl}-^z8Kh+J>|b%$$5TSZQ)et zs{Tt}uzmWspM(ncZ90>`cofsm0Bw-DWbw#mz$t~y#3|EE%BA(I0K5#g(6M$TFcdoG z@lyjO<{f!`wyjdLSx$X^wPjGVecVz5y`mGUxFVt!bP9-|5|d?#pvhmCLuWzXr)S6X zsuEo?`plHs`VGxHh+~!njjmx!d|6<}V^n^c-XKr5XG5j>ZXxnQMA)k6G}+|u&PHk_?=vIGs$!DCL6E6mfE)BWbQjHSaznoLMPRY)+l54CGEsUbMN*NK{<|E=sRgjz^ z9d32=f=;hL2iGOV0r$NimE?)EEmUtGJT0t5w(T0Lnb zT3uU!W(Mo&#|Zv%%Gcb*=mHOO}>k%)-yuEu~l2nf7y`eTY>|Tfi-wn_BwX?4uV%qM5@7S zN=?!@dk0!9xJgJIFY#nGxn($Hzvr1IJ-7fu{$MQP&xu^~*k!r)8QJu2=*zFzf<#rt z7Gf_>5s{{6Pv4vo?Nua^@s`S1Qdxa|_3hV>_(=Cfi}dL2zTdqs1yx2M6jibkpL(FI8>`2uQ~SeaD(*X?ont%9>c93T^sd0hxkTVLdr zMtB4=UpQE5R4d`3j=gLA7Qox8eG$-$hy4P%aF>ve0^T>al%*>bDX3*fnDeoqGW1Mo z=>6rf3HTr(8RL5ACvlBwWX3zIPSM^UB%di8EKlAs_4{p>+uq-OU<8!a9l2OgJ2cx1 zU)YcE)pUOg#7r9ya2bblXuLs{+*_yPWw9dYJLHp!!UW(|4J!bW5YW;Fw~+l^@7+np}Ez{U?aTYN73y~zn& z8$t3c yj-p!&Y7I#5PHS((xX}!Br~*D;mW9P`VPF}ZG9jX)u-p(R?7e|in%EZj ziBn&xxDDy+{01+SY(X;NMUE#@BL&8l?!df}X=8KA?h#QE`^uVmAdv~v?rEy;ME-VL znAMz^r;uVVg|gLTH9v_KBS%egLlrzlm;&H`n5Ht}f0 zFNJk*Cg8X=NiB9C1asUYz4aA$e^S3Cr70x!KJ`w7!^pqWCB1Q^PUfj{OS^;U_TP~ zn=F|~bO-}UbhZ`einLx!WQ5y+D(?WDnS(Cn12R{X?$D_9X7p#k0R=N;IEE)t$5ur) zGB8Y6e)MS)^`j`Sc<<4ba!;jygPpPzoo1)tSPyTm z6N-I#thnFwIIwn!3Bu6VW{ciBy!GURNc0_D-*i8(hmH@d0B7A`o)hoICUf8b6p%m? z_#mLrOrVXA|2N#_;9B8kC#JUQ?LzcNC=qVklszY13NFevBYmfsZZU;#7qAu5O#Zu@ zP*z1P;OR}Dxjktoqzmo`-GJnt{oTYC7sAh%)Q&yX8hGDv7Sv+Mr$glG09;0L?;C^$ z<2eegXUlvk_wHSs5BKlx4?oVD2M!MFSMutYDc^Ic9{TP^K$T;yMr%5F(4W^xRCMGz z{46A-9E?Bvc~FtS=V_qNy%LMau|x^ z49WquqJGT4Bf|&5dQwokB`Mb{@|zpABDE|(r>;;0(CtPFfYSd}6AbzHEQ$)VE8AmP z`6qaZcsBj>sf+8w%zn38mDj$XtuJyaoCpKQ+&v5wvf@EXjuMN@8fP*$GK=n38;fB6Te%1ff)IAy zVZDc2!tY#jkMs07zrH$Whs0oiG_GoI#J#RR5?Y>h!nFZ`7qVqTPOk=xfL~nqC2MCzm>#t;%t zpb|oz4>AuRCu6B9hzb_H+;JkK+b}7kh7?_a!o8ZveZ0$v#S>I-@lI6wYQ~BZ%?HcibKl~Het)&>!L`{R69mkEDFE$c zSwQ8E24GHiKf&~-j@)>Y$8Ol1sCEooZu|4hnG|C)fkvZoYL~yn-(sqP!3cTL@lTf2 z3G6!2ueHjQ2`Kr)KEwtZNdtw49>k&tu)I2}$>a#gGSJQ70=m{b(-Dn-GjfabUZkbJ z*0N*afrSCt&!CU&TM{})gB`M~w2?fgPnam>tG^z)yPy5Ovg|t6+bu&cy(iF}dZCF` z_YOie2v;9zoA>^&wZZsMp|1|ZEt{nI8^S&>I+s6xW+O$ z-eeYNZa@0@`Z)Xt$y}gKw@L6?q)H1C_&9R0UCGXaBt1hCAn+1LaaWJz`YsRkGYnlX zNQB%TmwR*z<(Pkk;^l*{uzkF|yrRA@>^X~mhu&H0ijhhNVm_3Hn8W8Fn>Dn9takcTUs z-{MVvG~u%jF5-8rQ_w(xRUySLkBHn+F+<7l#i{jh{o@=P8+_8-)KGXXpSG0A;WZZ^ zOb8eqzWR^FAGy0OULS>eAO$^7aKkCNf8>Rwvm9%dw_OUE4S%`J>^yMKY6AWz?hB&y zft1Nvh?pdRK@YHFRV1E{Nma#Hrp#)$e-22u#cL{N(0P!g%vcI5NgZw|i~`UCKLQ=P z@^@S`Xj__Buh52d$ATWKk#+7zh;(7L46;SCM(DATTc1EpCRXQ!v0ue`r&RtgM_K5TlKGjoPhXCeh8qUsH2 zfF5n&U&sv=yb4-C>9Fgm+x9siCT4IC1p*F2>lBX!rvM31J_0Gn00Bz)AbT6J@l=S= zy26A+_Mh}n$2Mf}k=MC_5yk|6^M~N(59Q7}jdwlVPF~&F?2nNeo5nk{C`E!th-4<6 zc4quCix_Nv4vz-hZMmnrTCyRF(sNUTL{$%Cu=~4LDpJjDf@=!N(z{+CIZ_+qqJ6Pl zAtSNrVxMC`Z(8j6H|1i$u`#LJgfZA?K1ost4GIX~vFK(3t-@JARl6oozHC#2Z981v zdJ_T^X}zfFC+!2n;>KX58o7^se@Z6<@BLI@I-XC+4bFIRw3GrY2ynJ;+rZf`?>&jWcx25>YA<74D@{OO%=w#^4jrV1~t|%yN4P=@4n+b8utR`fN)m< z@^)nch}fIiKD6C9-^B!%WYPd{goN-h;D}*5Y!9@wWRthBB(ArfeZTiYH{H-k|MhJgXc*G5FmI6i-g^`}LbktC=6syVCsC$`dUtbi zw(WVZHxq^ZSvc16Ho3k6iACPX(nGqGNi*@HP6;`nGmA2N@)6R^hfn7 z%S&waBHrm^p`V`)BZANtn)?Vd2}Z!u8y09YE`1uAM|aB#&`UQ)mWZg6ZNDV|TuN(g zJ$yu`w>scA| zhN<3SJpAZq0o=F?zz^aO$7>LQ(mmk{h4^J8&z=Dxd$DX!1RT!_0c-#T*IBRkgxfj^ zP?#ps0Nr5)Fc5>mY%FUbT4ktwAbjzj@4!A4ZG5N`QkOcE08(3w)pA z-<-3kG*8eFJ3Bf2rc*PR)*_5WH=P$0Oh>dKZEbbSS8tL2VORC)&XA=_g&xrvtvSC zR&s{G7VqD-T@5^^06Pg0c-gYn+5(GbTsCC?zm}&nlRpdo&IbNuZaVF_>Z|OA|AMO7 zDEGG?RJ&rR?-%F`hX$W&0$&oN`GaQVeOv$!WBN?%uTCr&*&;Jli+&Olx&3mkU_gz% zfvMD3QgG$F=gG9bz|+L{z8L6nkiC66E-gP*kJm{t+$a-Nq)-j zE1g~}3(#`5$Ew$`&GP~Dx^n?wD(=c^e8RLN_$h$|leLRGfJc_>4j({BIL+VjF7t9H zPkU5HA1jUdq$+;YS%6c%>~&mI`UNdF&49W2qGMfD&j|VLR`f$UZ$FU0b*oju2Eu?+ zHrbIVEnVwe@nN#V5dKdhqxT2qJJ6b{?Myl}fT$c&U-ix9opgf9Mrl<1fKIE|Y6N<> zSY@CaBbl1vWHi^q$y&Bn6R;pWCgBE&Bx30|XaB0#pWMtTXVoBB<{J$`4wVmC71))n&h}L-{m)|{Bgiov z`NcaM8352(Tz-he2Wx*C@6~WN<=ozV%h?de0e=rgS(E8A-7EJ$-k=|9BdLwV{jJ^rmxzY{>OAj-P zUk`hSI`Kye%iEfDxTba`_d8CGwU{;n@=YEW%U7S?pVmGhN|2NC*%MF_h~ZHZXaVbs zvF9BrJc69Aa%@MCMd@woSUpAefT4`p0)Q=6(yZz&&~Sms8c^=nfB8(Ptz&_c{iFTU zmj|S|L24iAYgua)2gdHO!X{$UyFctrtTgZSW2KZpcmj{oM z0IFi>1LEiB>e0FyRA%^glubiKes{(0MIy7#yf?%CJITm6vuEfoN%-BcLhL7JIJbP$ z&qJVeN6%B=CZ7X5Qrsr~iSVTb91pI8_&uqq96g3} z;-~a0^ZsZ)dc0Zk`FPbMPioBXqQmq^Xe#iqd|A1}Thgw)Pwq8u?)85*Pt<9A8_rllE}g~QUIx`q)RJdv)s3@+wkCIPk1^R!7SYOU$Ki^&F-y|5fq#K*dUe^Z zz+S5`qUgAgky$G*f0Zd)tBDM`^CkrXzR-44iCFsmZuXfPHHhB$>xNBT%O*^xv*b+Z z_E_UP<*8$cRG;;x+J>r>xS%;4Rr`)8(TMcYlaF(t!w;EcVtk0dhxjE)5IPQ2*Uu}o z%UmenHSCwE*xTY?LWD(}B#678wJ^hxFiD~slBjKgl<02DP~T^@0b=)a6CKP}ERYK%rKu z{s57T1k6U-{gw>pLLJP*sC;+Q8Qi?}6$F=RMfKhm@V$}w%^;<1V?i6MGU{ve4r6m@!+usM_+Dv1JC-^k6MJB|6tbT3s$Q za^f8>5Ma1>*G7@C2pjM0qfrnHe35@5e|Asy5}u2kwh;Qe{c5zzne~_gp4=U^Cw;<2 zNohuEH6Q^e2xNH{!H0bLjpblDzpk%1ESJoWKI@yxZKLm|j5BeIi<^_tHN-Us3O-qa zPbr@3%fBw-!TuFk;;a0c?({Z~EpV?f$sF1H)cyICO94Y{UC_Isky~YoQ}~J| zYEjBJ!8zmYw>?|`j`v6^VbLbNkh8uMoG$+7i9rUl6W3Lcrx~;y-+!i?rKB!3Twly6 zY{xU|vVgbYz=H^C-r4<0CakH0l0tUI@sS`YrP7GKN}5GA_DSvbFLrYHIov)?1mEuF zl2HpxzVf0x!}CH68m`HA>v(Y#l#bc1!uHm99*sXg9k(Y0kGaU(mOoBr`f9&lIQ183 zJuWx22E%`-G*n=8Js>(9HM%$r7}=*`rsD)|8joYn(7(k8(~4co!4r8Q7C#eh0sNTPrcCyU6}?YfiWwJo>^1ZTm*h68E~JY!f4KT6(aa z!Hz}N8I0=L-W~a_EwWAz6%oltgb~h8WwhQS6H_sC`?0B9aI~E7w~~2^GDRWQbnq>lR_y7B6E0FEFFmudUi*>bKwuM{Z3<2=nrf413~FikiI0_<*oI? zxF*^5U`+El^5^kifurXuUIdV+y>>p2QtN#PU26~i7-S=)=zhPOdu+))iW)fvZXo)- zx6g4Sg|u1f^gE3ikvyjKEB>=~#97~&2~zdU=7cL(`E_2LL^=Ee!Q)! zlZ1V@bXV@l4%#u71I@wlWaGGVYv=Z1JvxBz9##jRWa&2@I^S4Rj~`qpse(rI`nW;f zp48UE(gTowW&|8H?-{64v7iQgdU0(8tj;mAso2Eo3SuNm*+&SCIq`?oJL45S&Bh8T zIBMoDbY*|BGfZkGiz3|kh10JFH&`ldUSW^{xKs>&uk^rvm$&+PzKv;diR|7PJi+;Y#Y`xO(Hh?qLPPTQ&)A zO~S!&I=yv%J);O9=xd-7;%+0)>NoppCqdFAxJBaGd2U)(WkHCrG)nGbIlO+0D~k2I zlIg^*2FS6)jiLND6#M%ixKPP;0%4a00qV_xg4c_tz`Btp+g<|c#hHdLOq9=xa=(tp zGzTB2FHM>reSi9oND}i_8fSWG&^RT!iwaikGi>Ku>A@@tHx`c|MKeJHfMf|7xG>}(!hs#@Z6w<_&QVg zIGsw_Bi)m7kv2`bmDqo8*V;H1wc1!v3P==kERILI;?PTEHiL6fG|*i#8=C=%2x*c=vxrC(=gps9n3v_kU;Y1b+ZV)>UW&zQ}@!fcUyjAWUX=w%!}=zi)b zJ_-pMJd3jAfq}7LwS8`$DD302-}M$cxXn7KB0EFP#UsUIU>n0q@v-mGAk>1yLAkH4 z3s>M0vz-J^@Ej6NpGxHl*Y-g}dkd`J?&FP!YfdPO$DF5eHb&R~9YVXk<`mCyi1t02 zW^}O-XFlzw#M<$=nfB^U9gp_mOAp?gT`pZ%IwS>XbJVoH>3mg>0U>_~3flqotQA_9 zT@9Twp~9eK*Zj%M0Ux+Mth@ekB68)}PjEGg_Ei3QqXX4B>wM-65% zOQEb-Y<#OGn?FQ&cVqKt-NHzeT+pZUwU#!aSqp78-W<2#t)W-FF=$dxXg4R?5W7F* ztD)Dt@%iCfQoCj}gY?SxQj*KSa_8(NixwTYJ+tLRfP&4$MhJ0@_7&QAs-=F~S~D`X zLrrRHnt4vf_H~%Oo|SyYF&*ANzff}5=JoHr}Kg-S+H&8@zNDXA5niKDckRryPFQpxib%+Le?%` znLRO-#(^=-m{%OOOTTEZXt*tLGY|~cXDG^h;3ym!Gqd#U?EGVy^%Ufo5}uqinCL~E z-{h_tBt=6x^-{5<$0*c{Aa>H1>HZ$k;h?6G_f0KvHps(I_Vh%rA11EwgPMcj9mI#C z(>5lT5)LCNYyp-+OQV9D?nXD;mc;rjMcJytWe;Md^U7cB$CwPSd-T{OXjWbMHyk!1 zzCsORe-Fp+j0LyoZT;GmykuNhpdb}_8HToh+bA`@?Bk!zw=xlRwQqziI?0E(1xY8M4IvBO*bDoh^ z`!5j4Vu9w<%*_TbzOKgzfL1S96yNCUq>wI73XP9W;eG1yZ<>UQMpUE{@ImX%!c*$E zT`u z|9IIJ21Ts&L?M9e+D|Dmt~UvP3~3kF{3@sBM4;+|ut0Ns1VfF9rPtF2!P1!02b_q{ zPO{HuNUsyBErDq0sp-tr9H|gt#5fw@^0#B|85IW4Zx!(078?s$X{5% z&V#x4wn5gG;mS+RKq}#s_V8aox4M^jC|t@z#tOA!gSFJB9#4g@c_iwLJx%W+EUV!A z_M~1ni7Z3Uaa|tL-e9vMZK-9!dPm=8u{TJ|y6R^ui?Ktv&yxQ$mhp-GI-LA~zau5KF--XBlw{QF+|d+9i+ zv~qH?puN4X*~^eJL50CbP9%i9JcBv3VJ`B0zmU&zN4oC2+%*KhfBsUL5&!t=mmRN? zva(bm&D>qi(xj^AS(8xQ_g=*M&tneg3o}etO~LP%vgdSC*y{AU0RPzM+@>4eYeg~V z%=al7L|?^Ggesq6XUkkbL6ICuEolvq={-{Kv4aC>EWh!eM{eOde5fNt3R7o%gW9+s ztXbGDmu4;v1$v=>$~-Ue9T5g5^tQi2e0@QA>(0UOfwNGj(Oi!q;fk53?&k0%4|Mxp zy#&Mdd)(Ys;W3CP>`UuDPCEa9Ue~ewyr%Y6eDrcnwjj>NU*3{o{g5kA1PqT$W@vi5b9=h;&a;hYt4taW-TiOK{Q<& zJ~^K>ho3;hbnt)s`QaR55aC6G9f3>e@Dx7yEk;EA?6;Uc>5nW2s_dkoydZS;)Gn|QiF!J zYY0O2q|$p<1=R7oWQ=UmOIK>yHb+sSBGwV~hsNo&W4-1PZ~|SLXwk} zGN|n0q-zQWD7vg~a*wwpLpAP;bb4Cz0ZWPpNq%Ny&I zjDOaw)BLvmXDotf``40dfqW!z004&1u#pN#s%xLtKrGHC1N`}bDvcs92y+-d*6$JU z?0i$xGx>!4r-W9foRP@4ZR@bMv#QJgu<_h+X_(~E(|I0z+f%tWW8Gy*|13T1nt+W| zPdfU(v1J`M{t(mtD8b$k9OYl<1b3@1gSw8k0|^jrC5wlX$eJ7Wc@C31li+Y8^)zE}@xnud?fMBixpa>)O@|*YAa)XeACqQL)D#g4Ga|JCffN?^3<2 zc7R9V>za%x=L8AnOc@9Mnf#z+MZI`+vEbcXE(WhV*f8rB?q@szsroH>@x&`vT}@og zxVFD=*Tl1IGoJFr3@cQz)y1<-p6fYJy>_1rn?E!$x^f&bD>9De2$qwqm22-f8dV>B z(2p`$H?;DLEk2ug{%E1V+J%b}+9Y_njiKR9Ohz%>UV96~)r7dsPL`%^>cTQ;2;D{* zrmbD6o6q8Vka~f5sW3jEp|b8x{jI-By|qI) zlX^s0Pq{~RTjb=ifS5t`O0SCARcRfxora-C_7c)$^)@=r)u7U2r6ybM@}a83)zY^^ zGe62X5jliKER3a<$vh(+KQ|8o#*eV z$g0AeZ|^lAUZIvKurgD(_pKjGwouC8(MCk<`20f51Fv z!GrD@n(MO>O(9)dsdk=D<&f2sE1$WQ_gH}p>xPoH zBinR2$2h7+B3YoB4{akaK}MZ_np9ha0Y8FtUQy9~L;?er7Xr8iT)-{BXk9ap_EWpx zZ)c1S<2YYdC|6;IBu4KN5=aRC3U-+KreV$j_47_rCzVEJ z&uF?7@bXM8 z-AsZm9-Ep$ZbEwA=9TIDgWvjLm1svtjY+11YF;nTZR16PliJ1jKVko3`s1TbR21Df zQf$UW`xP$r64{8lQXEaGw(XNjRpdfZ!GG8j63%^RxVXEhMDyB2ue5nXCbuBuWdM2d zl?;`Ou^z zOI))fLqW6S$3e8MQW7348{U{K!TJ{7;C=8`;KRwyr>jz`JnsvqD;#R5D5n#ylz3x7 z=LZ?5Z+GV(<~ z?J$=>gH8_C*7sONDgUszPt;zg+A9MwR8iU{zJ~^p?|2}O zjA;ZRrnO=gg4Vva$o_g9=ze3Ylis6|2+t^NEqJbo`#Pq+_0CfWBvNlj5enNK6dN{PLfg+zGO$Y&Wzfo%LYb6^y_0u0 zjDXtnq`uRE8Di8dyj+NiQxdqEuj>@#sG(UThz3nZb`BkZHZnE5mk-`l{uEHd_+shU zRO?VW9FT1c>;g^IymQ9*kYQ<*V#Lu{OWAK`zttFl2Mh$zVU;s;w==)_qt=;J{PzBm z8~!m$Qpy8CMPky9f1&2_-Iut5RLQ%izi);N;lovAMY8P@Q# zZfv&Jnj;D`TB-V)^8%q=5^(-k#HiV!NQ4t{d2NuW*Wp`cA}$Y!l_9F|e92|5uTPhI z`mQU4kX%fVcX<_%SS!vZVCwAtPp*AGIY<{OjOE9QdrfFFXJ_Xx(@V7@a+=ouNc#FB zzK{%dr{cvzT@DcmhoNsLdFvTnUU(cf>6`@2d?6f+JBA5ywgVAe0{#=aS^gW$%}0GK z^x0pJ>ONmPHj?-s`_Yt&U4uGBm}2>^MZF83ZjfhGqMz`9DTGPa_ZGFpOH$G8Jf~uM z)Jv;zs{10R9AVWKdt=%JQj5L0~vct%4*x`iAJFBgVppIA~Xh@3_{hNk2!qQFr|9OqgGgs zWHG#=RbPj!=;BrhHo-)>fr_Cfq5nK()vx>{)R@2`A*w&2>xqK%8`Kko36@0SK4TEP zx@_YMmb4Q9V5%|x=sB-lYP-opgZv^%;m_}F?|DjGM8m$$E0P^Dnk9>3U);j?&@wCH zAVz=gl!M;QUKWm5t6Dfm0pvMR)3?igODxRYr|M+TX*<^8>xI;Phgj{GM}wyvh$E{jecNm-ljaljTR#H{sr0YEBJey&!$Ks7k#tDfBG7~nr=fibo1a|l|<8dAG zD)5!=PYZ*xXVmD?v)HV|&OM+ie}WTf5{92)G@4t^HqW@OeEbO*=8aB4Ej`}|#y}>s z_gF05!|^P*I;skl}&rzoe`tQ=9p4Xv#R3;tGZOX#zLqY zUp8gL)K@n7jrLC@8J(9qLhdZiE;a)^M&<*-Ksu%iS~Gg~?X+2;+T_MiDo|nX>T3p=ziWD3WQY_Eu*%MnQU(>8p>!_EKr_H@p1hbkj~+ygQ--yfoo_|~XD<&j)DUZs1we+bM@ zxfa*~a*k$PyPax5TvL!pkxF6QJBfn2yr@Ypb~$|GqZ}owSU8{J!<`LVXMWJd-5|I% z=H>{+9lu2fVrJPB!}`mqg$d#Vmd411=baEM0&%v`ZJ<_3w#oENZ^L`}2}vT%8rK5$ zqY3(OFM=B$r~W1@8OU3rYU4S7F9DSA^99!ZLLR5e#w;O01&ejo*`$N_K!WP{|6& zGr*>MH7a*(^OWt)=M(7#d}XtUopeNm8};Q9L@Aa3sz<*es5^eBVCQXR?z^P)D-JV0 z`~(u9CLvrwa0mBnbvd#O;!??fZ#)h>27R0lgms{X`c?C+oGp6Q$bWj*)J~BI=vgl0 zMcSY=cz&c*e7WftH8hw>*V6>#VG0qJdMVzNtg*l zz)(|8{NbJ*u^Mrx%z|>C^6acg_>n({Btxmdk^eTacdk zEz>tc>~Sn+bPsQYhJtZQZaYTQ_|8=8q=+&!6ls8X3gVqTrdC)vu5>XFGlx$%;MFhB zUm8>@K`PpcQwVDWj2G~@qxt?F#`+FhC!V&hY*O!0gTL{*psNb~%AANjQBO1AUJN`h z9aNbPf=@@_15{t$+cs%0W(*3QuEHzg`yj|7iN7PV$v0m6)PYXt3rE&E1<{N!kX1#Q z4M7TN5_^%l%zskY<;4#Q2kGZ!ip>3<<%4nU#ZsExpZD=gCKBuM zuIjIIGh=t&J9006*`#t~`WYzXGC_AbJDHnUk^|cN1pvlnNCMD^nwNG6+aGbmH=TEc z(9E%h?A0D}Q8r8>(fG}T;+ETHMzB(o{vn(F=2XkM#wfR6F|f1M5%djhcid|Mcd()nBqS}HWLcaCxX?y(-ZX)xkC+3Q9@>oSGapZF4)9OQp5~gRmQld76}(>p;U@d{^yd9B zTTkN|CKs+&_n8coKm(wX*dKFgrjUYPa#@(*j(s$6)t!d>HQ5s8rarg{#?xkE*wFH4 zt9a>i>vwIeq-IbZn}xJo{KImolg%VCePur&9ZuiUGUHT9VO(~Z)$`V)M`YZbnz@82 ziqylu`z$zMff~-_!gf*JDh8OR;Ayt;=3%v216wC#e>Fw#wE!k0lr*qL1+BlOdz9j>n|+`7QZ$K{ z*gUQTM(H*EvQX`THjN68liJYtjx6Zf0OOyOwbABEYvDo~{Y!RtlS%#GLY}on`we@- z@@}UFbL2RlnGg8KH`>(~pUYuCvqT8^`Bi|rOY>#6Ey2(3JDOcKg0 zxiw5!HbV2j)w=w*M2qkW^(&v>i{($jh-_azJQfu0S(FB zpwi}VTeSj>>(2}SAge6U)vx2KudGKqQ7Ft-Z0TZ|P(w8C{rQru!S->hB64#UQ&e%{ z!K7p^%B~=8oQ|;$KI``GcD~w9sC|^=ZwR5)!j@QTC3eIHHk7h!k6JOS7sq=8Vg~wM z!fV>GuXT2)qZvixcp5IlEl&@h)|~33i_Z(@{vu3%m?6&msvZ@VD79?Ypc z!T=>7&gsh5eAy$q!|PWuPNcDN{Lb^C;6EE431+FxM=ie?L(M5E{>fbS5*3({tzmBlyVIF#a1?R;C>b5&A)9FY)!yQOF!}Y3F97vP!waTM1gz zuR=t&=AHRlnl4Pnk4V6@nneGC$^!^t01BWa`jH1N5_nd8v=6ey700j60rnLasBjVR zC$&Qmz;&|zdG`FJ`_ujKmqJzyPoDB-JrR=ou32F7505^5YlLUS%X*C)N0V+)1_3q) zaY|9O$RHAja*Cg3F~VCX-M0W*(QjTr_GLT1H-$+Fr)A!R8EC|0+H{ys6LZQt3^A=l z%U}QM5T9&X*6f2>#q&jjErYtEn_U@EQ(?p{m82c>ET`L^&cu{v5e$S)Bv?&Ne@-8lHm0s}e<$8*EJm%}77fwi5(3k%D zd|Tok{1im7j)HP*+tmod_xFO*N>eH8EP~XX!3vq-dGWUAGgfM#vow^+R-XR zaGiVnPphDOhb|mVxtXsNRiKh)Pk4u;#Gfgqg5ZwE#oV=>Frg?9uM&mh0a`b@X{mub z(GM69-euxpwOYK?6>2*gFe5W7zN;um(Y!YRwOVBB4fM;B9% zkyXQih<8ii#=(jQfMTh}QL}#`B}X#F=2ZXrz?XJ1#q9}coLc+Ga6WPcduDl(%pQW8 zZ!=xS8`VQ)Ur)s|H-)n%ix_p5fRC!HPIco`Sb5u+S8la}>ERLM6B<(WL_Sf-tm6(3 zBn9C46Wx4r+E9Z+_d6=@&gxfmuB8H1dl3zwxQH|2rqAm3JE2|B}8i8p5}eHRTC1xrFfAy3VHbX zI9pT@`emp<@=<~#$NQ(&0d8L~`>BZA{UkyerUfoD%g`&c4aGC>JYRCyk+Jzt z?_=wHb>A~1wL1AfGlfYMdULS{CkWP2(gd0(l4OS~=w|$H*dgpF2ligLIkyd8vH)rD-r@lNl5X#|49=%tp^hAu7{*9OC2tNZN% z<-qvQRrh~OolcySh?1xX%?#@!_U^V%kV{h7be}LoVF&Bfy_D$q zN8g@V*BN`S4@(I1XiwMn@QczeBearrwft3pu@`oT4jxe+m-t> z+ZG%p@TAWsBlx^l2j9xsx$En_=Ua|ewAJ@j+8;>>n988dsxOe&8u`d3klHLTv)(R~ zVfR^dva1F{N+h2i*VxS{(z_p>SEl-@Y@u08&VBMli{Gwr{w{;bd+&EA5WUaI6+-WR zOXjIyO1$H2lpD$n`E3m<^-bYBc~t-WL)zw&Vxq)nTrMYbUpOw^mXzi}K`N<8|CsmN z$JxxLX`gA1d7OgH1HIH#NYx~MN`wQVgX>b&A62AI>r*``ZJ9v)wE|H{i+YE(!I(wf z>Qrx7^wwmH3ckLHGl^&zC6BO_i?KoGt)I&$KG%q9FgGuf%TRL2PecZv(3>vl?dk)&?lIn`T74|THZ2)*LVS6Z(qgGD*gN!M5 zs02gHkayFU%ZC`zE5gkt>&W3ONEePf8d%}z?)(}8F>=OX8{Xs1WW-+TvE9bW9BP04 zzZCzQQvR=oy8nBn+;i<1BjtC8;A*AB{sRelG6kmWrkZPg*r_4 zd%tZxn7@CtrjT0WQcrF;7b%=Ed0PGm_D$!z6naNt=pHR&Z{GALaf<2h&=;Kar4u1$ z<#WKrFc;W{pYsfHTkq0{p^rJcUic8yyB?cciLRBMW`J8RYD@^*|KAZ!AwKB1syNwS zFe%W5W!b1$2j;FS` zjn5X{xsa`Hm z>kyR<32cz&7`Kj|K7C8$#CMl%@<1gPi9y6h2uV-y9J6g?1m9D z_v^1riy?w{2hzv1mZ38xwnfvgTF6|*+f^YGp(cFa2dY_}InyUhre*LsH39gZ>?*>Q zHJTrUWu9nTF`=S=;<+AMt_lvm=!0qsdhgSU0xsD`xkH!c!&Sl>U_5q-8!FtV3;2d` zX)<7fKF^G$PfXC+oFkeac`D6)i-|K@yPf}5f>f`2Hqom$xxdwYhP;KsQUx|1wXpp2 zQsV8gln_cTOESUWtX27|{FUci>*mNu^~aK&)#&(L>fbCpKObb3Mne;EW;T}r#{*y7 zPWlz(AG5b&&CnOvQ9FFkyOh5Idjb~wv%`n$L}Ml$ail`USRDxR47AyKY?DHzgfrRQ zev|pXvj9G0Y%=?o)bSa-xqkyStb<>d?KK^Xtz&#fQ+U40a7%uwByt2kp#X5{j929r zUy4N#b(1O_Wh1-W37#+!kur}8YukD&e|d#Z{WxK&SZ->fXsgNrG7jXO&o z99a1GU!XM#px)$8c@D2!%q{uX>_La!f~DC1!F z8bM=gZ<=E2HT~hl)bRpYA89@&eUo{YyUbrH*fD<5dk!qVpYDN#8{CS^B6z)!%n%{> zLLRlApj7QnG}rQJ{JB%-|4Fls$RSXS25ux%)&_UfRf@W>8r5usy-i^8XwvZelPado z0)dNhQgYN7EUPv&S!o(QHl!z19<;eZYW68hB`~L^E8JS2ZG5O;b}QXMPHa3(cyzIv zLEheq@RRl*EmLrE?A^NOg)q+%bga_Ke+WFOh+phMBqa}|^_*QW@Gqq9O_l1=!T&v+ zu}YUkU)bc0t%K3Xpaj$<%WX2i@^d>Ok`NjZA^dk(>+TQjNWKphceC#lyN z6yfy4JD%+w6}jsqApXi|26yZ-wD_&es5MAwOgTnq3>XWyc=m?w#$Ap(qDhY#2DKTN zcyiUhBrIikSFyeuS!p5vOlNna9gX>=E5hFumNXVzSg9M3e|o*8K!f)<@DV9^C7gF` zC+Tc9Du*R2u0Kjapi^<9(fe3iTJyz=_ZrNfB^zLX&*qRz-u?$?ldw}xKxkOOa>(O6 zz2`+IW3Pm}4S&vm&1-VFD@(s>RJbe|zRlor+QK@Y7<@*&i4r>bA#UnE`SknSvIj;w zOW%O${-C3O*{adO#7==zu<5)2F!4`5sJbR`{=T&$(U2OsN=8o`wqW1L5n1A;PsEk zu#Ep$yX%57g^K-w!A26?=8{IddtDtUpu1$Dj<-$@mTaVEUlKDby!Xs{;C~wPiFiB2 zEq!(RXQi75c}b7+M0Uj;l{BK5FE@8T8+Oopd(PZJCchxIUn_6fO5<@To^?=Zn=N9B zv+O<0$h+uNI)UB8g)Ii7v8dMLuZe3Tm-UY#Ap!QN0UB3KgN1NJDVO;D;Wo5*&0z^7-?XV+WB&Nv8tC*`py#6C$_Ibikst2pIMqG(?^x(;ndY!!_>%U>a(*lA=QLTv&hn3`u#q+Lgrf7g z#z47Ii5HziuKZVia>jpy!BuW!`vM>H{NPYKWy1~G-I~fX!*_ZKA&Gtii&U7H`Xk{4 z+*qd)x9$@5J5|eh(A=AIk?g{P6nI{-G|S)oS6lsOm0h3i^~+%E1U4`<#JnC@z;A0u z!f<$9FE?TMB8`sG2knhY@gUgy_Keg1TD!6APVW|X!|UqYFql7<<7a1Dx!Z69cL7ZnZEJ${hc^!M0GW>dmx z$Qaf>%^j(R=h8pPeq>k1g>@a};9j%2Olf&O>v>EaANb%d_T;P(CXfB0fcMP;{=jGA zBcPyduqbx%3U=OUl#d4=9nFj%%$BZKu}2}S-u*H13bPq7xqj{mpduR!PEYMV#ofE_ z0xh9-Ce|1r&s%{7uM$6MkbPlB()W;B+LcoXa#R>Vy*r_GpK-?_tYD=mL`0Iauu|(< zF|UCC?xqdkJ@vf&;5cx;XlHr&1<=|vLilA zF_5Bj*=3JKVLgVt)VcQ)f~X#fEU<3zzPY&ETz-$~RRF;Zho=h(GMm{XeTXAC8Q)}i zW`Q^LzxZvDx28KYRR>MnqWw78l}0}JBa70fWXi3>+EpDgn?!>+j_nX_{KCq)+wD>I zx+wsmCcAVpu`n7b3-5FY9ALRXzQ%GZHp`C7g>WqyTc< zo&nxMj$f^Vy7U{4nuZW8oUWpjgUdApN_J?eG(zLj7nLi(1aW1GL=;u%CRMd#ug{Zj zC=wejcy2cL&yE{|_b2H?@Af%U5YYGSl8QJmUwbXiM6OGGPP%wTsYDu42s|QeaH&1E z-RRbyR|e*+u~TyXe;JL{E}mEgZn5^0_%AG*#%TZ7Ub*E`!S&K~A#N{PtANSi)5)Bc zs*+8N$lz>cf!#j@z|oRZ4rRnuFoNYi&OkFx#0Me7NNnz_UZH@-ij@jkpE0Jr5> zJ+We_@LWN?WGzl+Z}-o<$j=Q1K1THSL_x7)LW1D;g{ zii@MO0hvUH1Kj<;G~h>TYABf~ge4f5PK-ImHO1y1{4yEb&9zCf@|)vR8b~@z2Rt%m zb%q@|G-jr7V-oE-_u{HN)7mh@S)Zs1dx;!H zeX)LCcA1Mh?`Jyr|ME{y8QFnL=PL&5*B3Nrw-hSwGFjVlEX&tI_U{umHpY5!xm%eJ zj3N241SEG$`kq?b@N6nmT=#U4*d=)UZ8q;x884F^%BFXvthk-l^W&2mSKW!8tw6<# zLyuoS_`IJ1FWM)XgZS~c3*3d}58yCMNfvMm-_jIhwdslPatgTI#|Bnd)ZoEVZukgMn0fpjOLoOY(i@7g&i`w`DvV@!S z11kn9D4p9*?}4A8V$SP3N@U_OWPQH}B-fRAb52e-{hJbzG05;50OXE5Uny};udc5mtHF1))QsFbzZlfsaqk!1@hl3>!!x|h$>HcA3ePz%&qZ*Jaq0U zGI|{v6D+0+QF!thQT5R_&%x-!0nFcf??M+Qq113b3@C~qSXcEY(AEyawR zsZHL?4@R2#$+-%)#8N1@QW1Oi5oMT6m=(tdf8`EWV@OVt)=E*)f zK{^gJ03NO%vXOn8UUb*YnefS(Bf!#e=KM%|j%e4u#^84fO zxZfbO2K|@vDfq&2zt`R32iqNC!|>CR&qAaQ)c2<%ZBL#qC(XzWyyB&MW~4~AZK!0L z?7X6a_|_|;`H=cC%o>jHAt}73=A!aR`cv6uB7-o+K`#giJGO=@S5@q_g5p$j=n?=n zz6{eIyj2Q-b0dZy0!!?Hdt8oL{soEzplj#KNa&5#E^X=Gyg*+@5-`uGnP%?B>UN2- zHi@0O&Id$TyzvDFW-~l@86p^$>75fQ(@{vRftf;s25MCXH$~$AvcPzG{uQY{C(>(u zm8#NsQpS@^oof8jAbw{He*^^~q*~wb7h0nrNn3@GBgTW;obCRA+WMvK!j7gdLEboN zH+*c+DbB)KDGE_Qu%>~SnSud#6OqOouHq+R@$Ke3(Ta2ntLwQ9W3 zBhYS>Bvv1r*Zy71(6 zfE6Eb^Jwq6zOg^wO#$%>J$Nt>8#J&4{t7&7JN|9HyVKe_vdFvsKwX~?d=%cl|FhfD zi6;uQ4VJ2b={ZTdAgrNvz&0PV^c&0e-BSuIJ-yo&oGx1K)2jG|AH++D{FwJs*A=SNCs8)S5{^#07f zIp=`Ze|bMW-zdH6fw%|v*bK}R^qnQGBfjK2RtGjJu0V6$*K&)=^YQH%88ut{m<=nf z&BW2@d`Ub=33tu-uW+tBTY=Zwg8P=jP!ZmXBb#vOLe!3CV0lf~wV}TNlQ=L*PMpss zVyD`3p=K^JO7P7^Zy>A|8cK!akWUX*8=`D0e{*C1>5@Y&Q>Y?Uvmc4HmROi zLg{!9c=`bR+W$2cM1S)xm7sx^QWHjFU?>Q&gd)| zMt>qj%7wjFMJe#Tb^1ZL*Gl&~WHy689ifQB`%RssumBp-S5ycYpdlheu=(a0-e921 zQV1JppWMtCtU-EC8Pl+ha|b%{Xgg_t@b&z@+B2=R@k=aU$zExQrK<}2-ruQ)*7$F} z1V!nID&Cz+YV~3aq~|vP$_o?he$&%{()a2oXnf<-;ubgEpP|`aj+c{g{H8gO1@Ptu~97^=UVYEMsRC+$V~8e3}enL&8KJk ztIq2FVOLe6D72ayC1tamHkk3esqnd*e)y`~`Cw^t1Jqbq#25=t@cHGRsV|ZP({uP^ z$8KMD6_a<*2ktJ>{+9-b_WGkgQ0?w_LrZ38o9egEc|#@4o?pHh+g?X~{a*RxU@hv{t%9 zW}k1It8<6Pcd19Zn{tfs|V3j)dN&2hN0U<93YQ zy2^rW0JhV0^-ZqJ2Y*SAN9wy}s}}@=(1;v|<*5T(SvK2wJ3*>jJ zZ$m#GdeI_*KYt=yW+XI1u8jJ#(2ipm8i^)J@@5kpsu(GjJ$)THOqgs+D=t(#5eDZS zunozBK57|~1rE;R%Czen(l_vP%t<|csW^>u2h6Lf8|m*|g;gOorG;z>M&2Ha?pIFp z!4))x1@PMx3Fw(A2?-1_u2X%}H@qt32}R7TRf^7Z(uh)12RhgAsMG1#BJR!{PB zQ|_9me26GdEa(zG!%7-QR>T`gg+o_%b(q;p|1=eL?(~TVUX8!jFL5XZRWL>+&U=yP z0aXI|Y0tl*y#mz@5F564vy%j1ME!a*l5yxBH%OC=6$qNCBRC^d%|*tfRmL`K=Llhw zqs@rbFi_3X8XQ9F(<)zOWAJ}!5mBqXKVuBc7}PQ}Yp`MV+I^Rdd$PJ{gG@T{OyD>3 z<~TwUiq1h*IKd}ctDYmSB8QM4VNWyZYwfN}-KJ{0?;mjjxAo848)S47nA{is@mQmA zP-f4G47(rf_UF3#URY^;hFb-Qz1Fy5=5%%V-N5kPBg#*^K(?TZn+bQ{c)c-55lZk) z>JylR9o?tdeQ96owrEB1y@tmB zai(HtJF!7D1JV%nUe%oz-oc5VjNPiLuKS>G79bymLqQMZt<`4z)c@C5qoZRCdxrM{ znt62~*iE06EJ$Z2n@blcfuBa56)I&Me7#^D@U1M`?Z0f zxVrNzK}O#?yt=uu5JvIRp7AP z(%<|O=rK#`G~wiERn=uP({Sj`(vMN~pKn4kxT^@A*ms`N&tVrcc*1sg!$iZb>2v7< z{ox9rhV08D9d%_L?TOnhSJ9|6mFOi&^@9dIJL>o!7+kKdwXFMyw^g-nV!YoHl<|)QmFqo||cDg~)SuT$ved z*Op5xCuiU-5&MhUng3L=rWrg_n8tbC%iE4)H`fnD@A5eTyDb5u-*2fus3>>JaDh+%Ru6KuqIxgtyoA=ZA9AZwS+Ls5ka%i3MG?LSh!0 z%c&HadzlO;Md$)U_oOmnQ~W_Y3MtqNDE05v5q|+zB zmL^C4Lg^PV*=g#)*pqfiLE=wg65uEwqL~_J*;zYP0S8Hkwe|!%wVwXe+7SpmCI)h$ z-vuq3^teTKknRiLyi}}EwpKtM-MHZHV*X<0-4h5|vZ-#88tFs=mz9alGAD>NFPZ>m5*;4drv?yGwG%Kc2I?u3C zgN9G^e-_`TanSY4km|kuICKm@@p?A*()u{A_E(?R9NF8h=9O)MUa~ z17j~PAmKX=UlOWO2M9Ey>o3s=XThB8k>18*QT9ErY19-{QM7@6!6u84cmq6AIVsidV%jC3XmzWF- za%&t&>TTuw!BqPpCGT{vo?*URD`tWa4d*BF5lT4oCpm`22!!cNqsd9V91e7PgvPM{ zbACFVRj+Wu{B6fJ>fYfc|7T=~-jUCi*zEe0-#$71nYfYK%L}<;RFh)rI(?6K1-o$7 zVAf*`2KSHy-yXv|fWkEK9D6Rq5OUNHSpC=mD-#J)+cP~Hyvj%pK-h)a+XS#3{NK%{ znFU@XwUzrsrTsg3Wj zM{HlVtz?Gt@&^5j@mrFs>tGFiuR0&+GHtiJWr-imqma$3(^cC5*fgka;=k5bedc&3 zp$DoKU7CQat)2;qzb6BjjkEx-%CVu4nU;Y9|K|nTXPum?&*VauJTL@nZbjOB+>i~k z*B2vDs^r_-SQ_|SiK4rkOuSgRs;>=y8B{E7T`@osN_R_vhr7^&HjxC*tg^6T(}BcJ zz1wi8gOi-w_A)%k_5M!G#J6pc3H=h;e$TL&(1%9ptl=Uu^QipA#R~+!%AK;z`3#15oFK{1K!Viv-3!CXlvP&dM%(S;gTrnYY5v5hYRId#RasiI( zp1{ohlp&+IAV8KN_xk}@w z0WbTkvSIK73b8=ALQKM-+#%02qDCW>{F6udi_P&mS>3mNcUg)%Y+)U*imUpxFp3Ev zPV(PMTJcgp zz+Q9r+zfF$2K@n4DDw#@WZY&Z$6zyyr9A)&tA$MDn|KVip$HQpkC>jmO(zzANqj`4em!1cz zJMro`Xbk058|Njb*Ifegmc3Ncn98kC?yck2z0Z`D{v zv?5Y9%6ls_WMgX)WJ5#l1;dtfOp>F>$2sN#VYsvv<I*KGS^jm2UJ& zrX(&MDDLwKd#QrBF*iZFM6g046`(}As{#%ZeZJ0A4AmcVvaaJ-yOy3_xretKu&ayR zVXh>y2JixcmyMjpfJ(^fRMQv*I|!Vk>YlunjIw`DRsSgHsuV&05P&M5VU3Rfv-_ST z39Ed_d%SKxUf#m~o-KjMYArE~cwGUjd&vg^;fh+57jtW(U*$RGhSQB2g}o zv;$lWskXa}H}70lj8=rIm~=R5FBbWC?xOjLBlcqI)F(9}Z0p4j)Etc06kz8EJ6bKf zMQ#)Z=5_L`gQV9)NIyXhtuZtYA)JE%OLhQRM?uo=&LwXHLO7*?)SWOK#j6thvE-x>sA^B)I z6e}112G`)6v#U=pS^s`8Kaivddq#4X{YUZ~d3_Kr!XzOHNb8vZnb64_U#F9q2WpD? z{i@k%=tjRwgPwsNs&lVZ3SJr_=4bQSjel?RigZ8;#DEf@(38uBjKZg%4W7cinKXf{ z;NNRb$8k@(Uu?ytZf{#%qfU^qN#2gB@7hWJ&30}aBE{Y3dpn-S3gii(v(VvQa3=dF;sRe38?KX5G0n4h%feKe>2`BQ*+f?0t%of^f^%3 zNG#WXV!O_GDQcc)gs6#hR2?4}XF}>m!mz3cGv?`puNXtT!1lkhbT9R0nR)qG2YEB{Z@Ae~I#F zL79LhGxkfH6x`B!__eWgY!8VBb9CN_+vrRlyIVMVa06*0Bh?F!9}{nototH!lbLgV z0i>@WTcMiE1gLX+I1IsZeVgeX_-ak(RFwcnPc3*I2CJrZ=uBUHs=nZTZ;38$^{F^? zb-&+MdPemvEA2N0Q_=`I*kC-z(&S0_e6^lVMx_hdeG}f_I4bWeZaDK?n;{W2%!ZMs zAShDHaF3O+yA~qk5{>mb-uDq{ib{R3%*?yF!LC67Inq6_VNk%sp!6@HkQ3SZu~ufu zG#Oh8_2cTPAARK!^YL#x?8orugFAwG z-(J}t0tU$W-~W@n0G?nv_%m-b`Neq**G{|!k7_m{N?FN#SCbdP38hMb z^#3pJ%N6oNL;%cb7N!&-&U|Ucp|P#MX1Q-P!gVQ^nHjCh&0oDD6!+-~2Ii8{V?V_G zmY(R&t^8fQ*7h723tCPOc3C^S>T^8+fn8sT(!6t}{@e4XCuf3&1{hG8zz8HKH=uup zM?EfgIqOGz92%`>W3Z5+J{Ce6u1Py1=L z?Zb62Fh7)wbncwI0@a6D2(0Q+yq9A+C^yUPm-)TWu?40*N$Eo>t}1$C)MX^fxW*V4 z@0+jo&}ur9(e&{4!%6eTXU1X?&pGC1Y};qr0w$lg@>+(f0t$VK&2#@V2m6R`|2mU9G=a&Z8=`JrM=vCKXkf!R`bpQTNzegktpnS8F6t+sfgVMq0ZA7b z`kh~(`vPu+JP-iWHbBSLZB;Y*ygrxBvt*EmR6i--@taVs`OmhOtjS29u%a|#o9c_3 zwieSUU^!CYMfX}ww(D$NuL$S2DR`ZZWjW}PHEgFdj@#;QZ|p1@G~Mqko)~OFg&LOs z7UDjoq&^|@pjrheSn`Oi{t@$Cy)=o#z|!~upV95hk=?V-%jaO2TIMK_d+tM2NPGaD z{5K{{f*;pcYy2(dlhd`92^zZkHcgLSgjL>F3UVfw*l)c*kcs$CC30jZ42vaDRLF*H zRK1d}df9XPPn{(YR-btrBDjBf^=DM7>S;18$V(UVRjnlN;R0&E0I0`6 zaNT#u5wOCsRKG=1?1+LReEy9ijh= z*4SLUvV$g*49t;1n;b0sG>(iNWBaJR*7raGBF6Y{Jd<8IG+!SNd)3_JD5Pdu>rt_R zHH3)N2u}+kC}QX0;vzPzX!;-mh*MvftXZjA9pjk>xKlfaI^mfNl0o8aa`si#HdEIa?sK$ zsx^s^0BWRBEFcM+dz6Ijw-#qsu^k?%=+Mh-@guAp@i~6nyeh^L1=vc|vF#l|#F(Q5l$Uk!gW6)nOWtu#q zB`sohRns^rr(HBtO>-dXI-$SZ%*Eb43|wy9v`padh)t2uaE)GqQFPQ3d-dmG%!Bu& zrEk9M+np%0zPve)*|OtAE?V)7V5R7wQZ=CB(a(T$sE1!&-(Q?P{&4g0rD2|jKmm`~ zs~-n4hV3?=5-yOa0KY_k^hp|KIPfAys$KI z#YaMP~sM^V?c|VU8?x8ZhNTw6i;?7sV%tz|7T%v-ktT-x7%yMysKh zmKc&R*X`D=cSA9Sln!vki~>on+R_@Y)koJ59x=@0T+K4xW3;$ppsiqMW5j$4J`u7pH1jPs;96_U2+T0k1}OY zGIFqI=gZ%)TJWzs+t)5NLX<&#N`}v_28BN zN&a$0J(Et9cSa-wJOOa&T%c8B^7(fQ<9VPrD*4Y9UEYcA@0Y=Qs@17I_(2J>Ndm^3 z!E!TBWQ`o8w)nYXpV{Zz`=*Uu=Q0SJB>G#29}x)RG_Vcwa;&V&{wL=eEaZR`S0R*~ zuN-QlL~R0PifmZvKBtdR^@tG?3tq-sjIwq2%7_F8mq?3*Tw5fr6jL%q=Xlny} zX0|5nHZfG^3maoZG~Zcmk1b2v9DhRUw!6;r^skG}zx}3^---MHaHS7#uK>D~e2QvT zAPpEpngEZ-%o&u=YmG|8Gk3uT*d;X&wpGsEzYLrdqZLC9apu`jKzGu+^M3w*{10N_ zh)l+%5sy|vW7?h+at;xRjD_p|z*LUbfH_o?$4a^!48v&Um0oby=@n!%d+&WWOay;D zC`FcOfU)-rO~ZkLtxM*fOTXWpL+5nIT-aZaALUz-J3ww`YT+E?R~rhQ%o9%m zVt=lT@b&^0^&s!eG;e#dqfynuGOesy%{u&`4&`Nw;`ZNsnG5JEX|~g#-!x|+!C2>$ zk4a@mBRxqyYh&h**{meghH_4bMb#M%okc?&aQDYZ_a6Mp$jMyxM_mH1 zOPdD4t(aXmGI-eeacmg53qOrr-<@KTo;6BLsn3hxd^~VVW1Q>tdWi{mMS$khC9vD zFk{C~3ToS%G!++$wawlt16S7QW_+?SJ_qiEIvibnFX9+1V3;hJCj}SV zwbO5vc&8=>0AY#b2&;rBer=z~kXLAg36^zm1Sd1~Jc^2ck<``^_q_oZQAs!miM}p? z*;F>OqtY7V26d4w_8>)QG)2ChT~YGyd0GVwVn)XQtHdf~5j2IBa4)*RoZm>P$@S2`BHxM$q zghcw`&6K4D8_(q|EadtO zXEncRb53|qq^p6pppl-V!^~rR2sUxbv1xj`R{@eW(O=517gHt9+64bILxER=R;WuA z@EZ;HE{|2;O`M1{l>sxdyk^L=OJ{hV0`fXx%)mT1g@Ca7;-PvD?JRYrT@OR37LzoEaVw%1 zh?CLEM4h`wWDuyP|5(hW(7*`FdNxl_$JE!KXhUiNNbC`86#L(Pc~$FYh=%#uKjwLJ zdj#=u0$ofEWl)#TkEDVKrF^(Lq7X02#(tBL!qMa8LMZ0yqC!TFLeJ|KNQ4W*0Om-Z z{DuZ&keSX^NEtKY>S|M2D>-uau7TpHylcr;^_OPv++vYdMwII`Vn$WBq&BbYw{2H*~=Mf!YSx zW==Rs;uImRStA7mXy`g2EoMueu5u`R<@StY}nVP3R$U{ShL%O42V z%1gW0M`~}UXx4*JAKfF+zO$QgZYzxLU`O2?rwuw52h9&x=NCH#z6_VepDHOt8BX93(~VDi7V|tu||%7rvi>B?aWQ? zZnPaL=s(t70+fq?B_v^DW0?10ve4S=vc|CKQ5pk^`Lh7`>fM95>so@u*Kz%v8FGt3Arz}q7ND8QyMDH&} z;xsm1x4$+p4%8ggAPIU-c&unvaA}}_*Wv;Uo|{l6#xPJ0dOZ66ny&++=l{NimzhvGHPRo`B9U^3D+d`8RVm+ z#BDlTU5w^%m`*qi(2f)`<^II`WQU$!xBaACJ ziKQV0helFwgy^*}X|Xd=QN&YGsZ%r!YO0hVqEm}yEk6z>^|-rqvb18jsA(T`10<5H$y;;jC3vO?pjU5M2YUXBQM`F3`7 zYr)naLaEFj4(Yjro!#SiG1f|6EROUfaA$(j4+@?8hxb5{bZHm|?c?U%-h28lRQqKB zSg4ILO4^dV@4rxE!SVJ(Fw;8KtU7@dQ($szs(nosf^7R1nSb>ffwX=gQ}WfYoDH4C zV3ZP0L79dVT6NFfV^hH$|AYoazz=9{tDyPX-3Fp(-k&j*YkQ^Nti0Hn^O(B zg6NsZfAu-)fGB8XKYN(}K~6|U6%B}*;T$M8yDw{{CdN_uNz$vQ2E_stKq`0sPum;G zILxOCYe=lR4fMRKJPF2m=oB1%J3jLwLR%{G7AbihG5$w4;|T>gmh0#}(o8EqyZ_|^p3Us0u z1EECdhvjBflf8_38@p(c(fL&(VzjV@*mNle)N_H~qPwL6wHbQ{vYIKF7nQwf&vkRO zHfvRhM|`h4BI1?1%lYiyrwt z%dfXF#)(Nt90H#lG6S*{sni4D6;Oo{+PPRDjm~4Gb72=@5n^mAW#u$-U0gBe@o^5b z|LDgsu9(E0INND?PqBNuI3cWv@6trj9B$oxq=PukEsPKUdx%mo%qJIIvc?iGcDzC z{O~WoAa@(uMC<(hbysm~t8g|wBp|Hb3*2*1#XVOERiTj7P+GJ_L?uJ$LnvD7r}23;^wB5*y9#J zif7+%anHb;kz{7clJ|$|fenthe`i22ZLZF9ncERbSh0ur+pAluOv)e_IS3onQNGWdUSYRljc6+X8=-@s5QBwsZW9x@*`X=d`IVo%y@;*P0|< z=Jaso^FQGi{$<~Y`SU|-&z7(vz8bX7@#nw|YhcKhrURf=&(ZA{0?15Q8+O^>a@A1) zMgWfbBtEHSv^?LoLGjS+CVI8>g0FP96Md8BY5hq^Rh?Klgyt$$^|*s`qW*-gG`I57l2>^4&Of4z61n1sPF!M654`8a*Hx} z*mwFkEM4K{)0o++78ZlisuYGrsX8)Y2xL?Mr9NOYGl11%c~U%UaE9Vg)_1T1MPm!r z3+5YRLoSL7@R$zE(RI(4QCXfgDtS>Qha=6BQ(@{1bJBOW=voBBP);|v-ZLD%! z4z5UIJ_Inx!A??1FMvIyD60DQ+)}eZt#fbwxFXmz)^V`1#;6O`_nB=uxG#FM+AFdYre`ha2q!xe{WYw+c zne6JD|AdYO?U1O!rXwf9)NoJ0A=UvG+e4Q?K*l&A$sAV3U8qoB*#IIrqsUX#C2r*e zzq=1$1>2^|J!&;`H~@I&0#5oo9tEtsR#a+ic*;W zRTqB0YH|16spU5?Za8V60DHurB zc>OJd%0=|YPcSVT{RN{@;}cWezCj#-{bfaCCeF1uAo`1n~C7?hA9#N7d?R#KC!uE2ep0DugK?i0ceVG-v4AIvBA*_iC>Ouj?w6L zb(dy?V;MaU%$e$D64}nRlCCZ_;r1ZN}G8t~A%1X)+5;k!p$+i_`R;8e~ zKl17mWnVOs3Ox<;Bn;6R=7UPU(7y9Ct??ZM3Az=nR`bYm1nh0J(gEyh% z#SUWFAA4D>7?ZlBUulveGw_ZvlIs~LeUx+lfXEyxPiwg(EVIRoM1lQ2!l03yd&pu7BUO8>tXH$da z5pP$BU!bDNAg+}ItJDvqB;_eb6DY=>vuc!aND&_iUp5bC`NBTpw|Mu1BQTSx-L_m4 zj!1Ts@7YeT19&x*u+vd$djK7s)fIv=rG_wBVm7{VCV>BDJ z5P>ySy=8uUPYKdky<+yudC1{)yqy4B-do=*FNHij(2Nv5-}Tw>IsU=_O02O3W7P`E zBIEnPt<)>!S2@;*pD4@to?&O0eW@wq-Y^a6Gf3W7d*u}hYQ{1*VdGV1Q6j1>aQ@$y zrWUL6=#NLsf|t=V`}`xSkL(<)=>G5>vv)bM-CWuI@rJgtU9#-&nbYK2@+qE@TOm9DY(E&!`(~hv2-Ef7>l#bMRq00^)zUq`%lRq`t@S(vTE)z@N{+q z^S|rlVjipuPN5s+*utdIKjrm)ci23$xEzu1$3y-hys5iqe6A>FN${$7WCnH5^8)hy zg!xybC6p`(Ce6680kykrf;GbSg!vqIl1SG&aq$c}jsye*mc+O|doQ-j{vPc3yMTie z&(^QP0D39@RG7nYz-^4%*oQUI=-MjU|0KacA+R--!O}ta{qtA`!LQ*Bw+`JG;m_n9 zFY6E8V5RnkFd2P=2dQULSX#1dCGz**4rygbMQk>L|DJ}{TD&&%<3Uvkb;?EZo&;PP zrZI`DV`F1Zx#TOe>s~FptE;QyF-ET9!=h9?;rd=sc3moTkv|rc!CxgkM}N)Ho8%JE zRf}MX=ak_G8`2v@grw*oyx6_cQSPGaoPI60^BoPnex7v^&&+gQ{(NP)Q{#7nx$OPZ zUmG{Ki`#Tw-_9UH_tJ6@^Pl(^+U}WqG~Nxc@UYNsR%`5=fx9n3_Z<VsTX9zYF~IHnLs2#^aNyufv~2F_2WRFHq3;$?B4{WI8-c9JopAtVq~Y z0-d;Pa7;fJOo!x5PTci>|KDv3U+Ga+KA5lnak1dNJDL*uTqX0@pJAX1)Hz_y@_YgO zK#Pao^Miwf0;+LNhx-%y?{tk2CM9@ z*uEs76&jY0y;f(!`stN@XU=T!FkZ-PgwuB;8_l6jI%`Hn7=5C0x7$@Qzq@}d{}b1` z5kdbthe9sXE#ou3pGFnSO5h+~py2m3jL%l1m1?Iby^>ey#UBctqS&*(3k4_RpaDWc zc$uLUO5=uC0ov=cY_A@bp39D;3Sdt+yUQwAe(eXPb}#Vz*UEbVu~LffukYFI>#l3f zpDqIYzrDwOsL~1{oXa!d!Sbo{u`5)X93iG;sP!2xmEw&u~+~0I<-a4{>dT3G0piX@Ky#M3O3%494_gbs{)^f7r{5< z$2($F-rL_V(++GYSpfbVPS${f?cGMoLI5w_*MxZm+B@X)FEDt1S>u#Kv2GDY$oI2c zL$;nx^_OO)4w~@|^+qg@N>wAhnw^&HA2UBD%z(D#L@Yv*m|)Xm^_$7-rKlr6lTC3! zCaT#V_N$MPAB@g6ZpLuSLFb{j>fQ~|$R9@0uqK${w;SvGZ{09XYv}g&_D{Ldg5DA{ zzSNWoX{A+J%>ZC`-r(E_VkC3I&bsiLh-M8w8e!S-BM z4!8z=UJiWwjCn2%=R)ct-csKxQKB01U5<=TX-fp^dtW}x_3=w2q@n(lQmiBO=yS^+ zI1dc4o1;_1cgEESTelrYQW)j!;q;uZiz?Kx!opwSwEPoOxKfHFoayxs8ˈOSB+ zfN=k6o&awua1);cqKHielaP$dkeIo`pyvy_lvG{9*wrqqYN=>`#jltdq4CI~t{1F6 zd83freVn_o_oK`QSEvzFuFoZFFLGg4tPu0}DSI<_=%V02PN{^acON9f=X&^h?)~q8 zRl%%4wtK=?Q~j#aGD&q##{KaXlRj6(!3+ZGgK&E=1FWN=vAa!{cSv7=3YG&np)2q{V9B+t}_=}1rmUb(sc+; zJs_<_@ChJ+WD(d@r>e?p@Ca)9Du!3@Q{vp;PHR0}%Q`54*qDCu?P{8joErak=zKr)x z7w=^szZTcey3N%^I(x>U$RZH@FvEA6L=qhON!5nERq&s1iPDm1k3}1 zu%GPY@ndo3iD|MKA~ubcWO=1JW*kF50da$yrOTZokLs6=;EsYgp?_b!YUrO0kfiwC z&(g!D83`xK^eucs)EvB3kB_J!4?3$T!seq3G46G{*7DMhcOkJa>sjta4MGF{)V{OU zvkFDN!A2q$RdUwP_$rjNOvu|3>4-FPQ@zpMVIf?CWs zIgsn$(Wr$oC?{FY~n7ww%3^(D_4g7-{s~D6K|9Mgq-$eu*CVLQ!0=m37dJO z*@~kSQkTjQHhje$V4Bt{{NR@HiJ&SFd@`c##9Vrzjk+(hW@D7j_6XU-`3)X4boJUH zN=F-KKOTe*qXBiT_eRhkl9nCwe{V!tFMECG2Rv%gGznl?t=p^@Id z^Fqsjh;!l`En$wGyreu>C5NLls^=#4#ghV2LWN!{Sqw*!VZxcdqziOa6%fSymD+ju zUXoG-aU*PwIRkmN$LrmL#A)jReoHUjr!%WZ81aqRA6UGyFjuO$0PKtWA;>mBLyzUI zRzd}iP2MXp%MsaS-wmF8YC!0Va6t2_3T?+eJg<}|decE!#5VMDPE9_7V}>csO4dR@ z(#Lz8z5oZ3WWN+%jSU%B!H6nz8#hskT=SebD+5+BF`mEuFOKrx&$hdwScA++ZtyaN(=cJB(Gu=C?DZSA4H8H2;k2vp zY3%8Y5OKaBMEHx9RW<8f{zVuSyIx3TI9XDY}%8U(p9;oK;Ijg5Y_F^2d z1A#@G_Cy8?MgF^IU!(51B9akO-&i}#<{bC2H}ZGs^}Vg6cU{oGLNFD-$B(y^Xc)LC zUvjwXe4v!W7tHfd65h*xCYLPi{G*eT*D4W@JnHN_Fz#znR(=P5uVn1a`He(GZ-9yo zO>ytpW&jlDeGkVCW><1!QTKs%Ab_cDYvxB2tzZ+E+G7e#DzHZA#ZcmP5@VE1>aAQy z(9uTWGvPc^(S<|qLjS7F!$6zI&q0~kA6SGn&-vy12Rtg_*(Ye>)CyQ>B4d`qnj>C1 zP9_y-E$%FpNF!N(7MCHV4I~2sln|_CpG~QIWMP|w33`*4Nf3PJpUaEc0i8OsIC(i0 zjtSV$ASfjJ$>OQLL090jyDLY zK^da{uVZ<#eFt9cChr|cKTJ`9-zT5?i3))$TB)~B&u;crTd#u?{E3>mw7d5bxIWvshs<4V_dMvo{oMvCp@%ow zh}uOySSR1UaEk*AvAcpb&2ffyZxLdRADWzw9LFuS#q-q&txN5 z&6W7(W&6r2cc~btD&I9Br<)@T=Y{!Ii(lEr3IIX$ij5VbnWuBrF%LMT*LSem6j@)s z62F*Zt(ySxoP*7IlnQld=YGrg9su*~OO93oh$njI{dN6tHTkMkl0YJ801m6RmFK%0 zE+&Tf#l@$Low}MkE6KD;Pt+F-7OTfg57;0+XRTJAK^+PF?DbmAFXB z(X!;qRXQ7ujXb?27ZIY%2h>G!V}^P08&~x=s?L_M>Yvh~6TEK#io?P1;o0p>(gHqg zS3~sjx0ufC|K5;%F~9Omv$w1lD|>6jQXZq-f$w^e3pk-m`0IK(&jRChUx;;j18pDA|DE0>^@FP9g_#RQtvUrANldU6w~6apz% zhlc5|+l0o%X1z>jUl!1>jPEIzl25x92QAQiz41BFCB^BZm=H@`7ITLvo9kA`U z^_+E~KJmS*FH=5H9i!X=Elr5+99hg75X`tQyeu+~7VLkQF7UGr*~A9R%ggob3pPh< zeK^+pE2?;HXo@KxWd|*nWN9d=#Ndtj?+4-b<*#o0?VktwshOU<&#voA3I8dJPQ=2& zz(TaHcVFU9Vq#*J&&kivHH(C8@SQZd_mlVH>1IftPOF;*ILvXY%Ad^Ie7t>?Nl6Wk zFUZ!v3#YU(KIRAu_mYxC8Pu8{X-nUb!VRnIDof3Ru74Iv_r@xd@7W84l0qY99-O%$ z^wgg`Pj9Lz7ne}JBbk@;5(of|1yIIl_IpI7I3ngR_5Ed!P|)WKXX!S|A8J70uof5z zs&F02vv#8$^?=u+ysX%7-q_9ei>a4?nxae*L?EmZd8>P*l*l7I%JPY$DA=R(QbXyP zy3A>|{tn$7U0j#&tO%h8rIZ16ziP4oyjVu93`bgpvuqfSx@U*g8%MZtf154lh>Gk-cDP2cG--;XMN<4H_3sk6tZ=T^?TWIwit*x|KXdA`c`2G{*YQAb1?oHdmIQjf@fpb1o z8M*JzRETm@kSs}{Cq;4?wieSY%WgJCq-p+Q>nBb39Nrfd zyi8E`?#G+ii}^lPXemV9{WIptz17N%{MK-h6`Yl!%aDB1cUR$a|93_5A&LKCnfY*2 z^4SNa-*7%kCG;~(gWXJlHxMIVG4kwP4JUDw*hgY}Q5)uRdUbY4C)k~WYnmj7(8Sqz z4Evq#7r73EiWhI+)TLO_7R~xb$mjokv5yiR5FCblQeV`#h^;@oA%0D=c{)dLIY~F+ z$qGd>-n1i@Yc_*Ab%Aycns#NkQ)3mKAA7;$1psF|Q34Fax0N2#j~rPYJ~UL|@CmxV zAOr-2hX-4;V&ng2(4KT*=9$W;O9r>Erv6gn#ss@P*>0s}t9xJ68vg-Bo*Yea`9%oU zHd-R(I6X+d57$SImY(H3p3)OdX%u~L6i}hG$%ocZ6peYqx39m>--3D$bk_Yk77kDM zd_?Pe(yu!H=q-sK{5TW$sX79TCIU5z;HMwc+4YUyy^9vVsm69Tn!sC5_OJPJy*xW> zas2m*F{!(duKgw+3{tgRe>T8{_cW#D*+E0eihSMwN7!42McHfFda!LnGx-5(*3`3P{P&-7tVO(%s$7FtabX@8@~n_c->k_cy=cbYym4#OMroDR`EmhP2tfpe^e&k&kyG1 zTTQ%zTRg?U$Q!7F{6)Ctj1ofe%ImRR2znvnB8>?_t4AM>C$@am-Sm4h+Jd))Z}lxw z*qRXsJ$Q$QUGz1@-_czM){}C}ORW&GqHW0h7C}B_iXyboCc_NOG@Knaoh{z?3&s${ zh>LQ@LOVekyq(nUf6?Dqa~@7}#5G0Wd~ zkbh5x*e;JE7+={gD9)rhb5afrAS)<{9#iuVJ&?WcW@_!4I{mxLm%%=T{yRI9QG6`h zUt%9&t5<4Ibsw&Y>VimX7Qm>I1Z8x;@Gv?Hn!^?=r*1EO<^3l`$KX<+uiRzk(!@#j zf)+3ZzkP{va}3 z&^Z@+_>Dj}JZ~9Z6ge4K{_q840Te$J{2?Ir=66-WyvoJ=fPROkC|{|&Z^Uo5vrS@C zj1&5KrkA~F!NR@c1+n0J;wP5w%;O3$DP$s3Cua8+KQD5Pbgz0n2eD)wPU&|WT6!H* zZj19ULKpATi%gAmKWv4x`?kiboT;I&520NTgFe!MmVfW7oUp$fxFhQN)`K}jZm{V3 z!en^id3k$p|85}<;z7$jS&EVW#dD9?fCXoqt%isd@jNyDjOz=SgZa#FBAb0v2=1R0 zF>COLYpy>a&irxQPBqEQLzj7}Mb?7%&&4IGdVe}IZ87b`-i*(+kgWyka2?F~?d|H_ zA(+8eTW#@98cpnDN`)@l4If7G?ce(KeE~9YNsmDt1vf0gy5zUP@Ax+g2R0zkwvXH02@vl|_5b@&MnLNW{&N!OAl%@KYBVa6j(L$3aRz+Hk&w4nnA9 zF{~esthew9ukzA7S0;@_k2&#z{VB&@5&4{yz{`LRph>^0O%n&+xNjvP>>3P~>mgk+ z_zc8@l#QKoU!J#Se2gc9zuO}is-HB_l~}nyecw{nqcqWkLosau^}&n5oJ63hq_tto zX);yHQspftO07j#oA*cYKDh8tk^9*tJyephxm5NxT7sX@iYs#0@c?|T_Ws2-nAz1Z zgbqhU2BQkRnqhrRbr`=q-G>n^;QP}lDD-?-Uybzi4~pO&V-MH`_t4tNTdDnFEtE7* z>8O30nRFT7IBCt~B=jvJNz0}feXe|an2m-|lEN~st3?|X(y*kyt$tzsZEe;BwNfN{ zn!sl_;O4HVvH(}R6R09 z&wWgT4EDouR%p&vwWwar)dsM?xfC2Vy@hcv(hrKk=V1@s`YgC3Y9<}@5OJCZ_m7|Y zNSPq~U{xvm_OMl1YPD8v_1?^K8Ceg!cFnXo(yZt0`ReA!@WH)PFuK{~Ap3;piu1~# z=t>=Yu{17-be`&;n9%%kg^8K5H89w zWlZa8+^f{ZOz14C@ZJ06i8nP{hbB>qxhCvvc)>ECiBg@~8AW|BKGe*IGXKiT8mh3w zJJ9ha!F&uUp_Cutp_6t1HdKGIwOqVoZul`5_Y(SoQ(W=L4let#(flKH5? zHR_Ryuj$gVt$2c>qL0v4u_#;f(ZS&-Y}J^u18=LHc>8($HPx3W+uyzO?7U z>zx{;y624)<)P4X7goT)Ft(wMDa1;h$}IGzn)4B6iRTQ09Q!Vc?J-8blL=l0Lq2X8 zukf_cMLME7Oq zb=vo30S%A~@&jOX?Jq-6N9Q;m_t()afGwax9BUbNbVv+{{@ zrvujFF7z!xV4ES@xUL>L9qm#;%UeH59KNl2i`KCNTgfG(P^N6MgdGZ_z_MV5*_j_WSqcSc-jbOg{42#~A-UgL8vKszb498IkJL z1jV6{s!GS(MV~M>yC$gpry&huuo&r1lSkVo+Y~Po(##FjURoTeU%)6yBchMgpC7BE z-~Oo=e% zQObUc)Zgaw@~tXXvJzPwPuWIXYRk!O~l;+Ya!726pc!L4^G^S0* zJ96%?DtOp&@$mlBi>^;%CeK)YM-E`|kdd-Q`l7tPWj+F4kUx`U7+1 zv*;mtkm)9zB@E9Mq|FRM28D_9)YjvyUC{Y2RQ*)%C~N{&@jOY#S^dA7^8PCZ_G5o4DJS|Q`>wi z_j6>`fXz>WOV0ZO+40H;C`q)y;qFcZ1A-*6w<0m|e!eIdG-^hZTs1@8{_jB%Lz^sgP^J!|DeEg1wr>Rwj19Dba!3nD9|=NO^yu!5Q@A9E_@HrVkdqiAv?$g8AIaS`13I3cob_vPpWU+`V*6AAiHsvpMMSsqBp1rS?)ydy1^qg0er z$YF_XD`YAA%5$YB;KQ*`SnqLd1)`GSe-_EGUTTXX`X@44FoTBH@S z@&z{?>d$p!_pa~8mi0yfWv*{S#GBm^pIZ|3xfJA%DiRHqkIALSgnrJGkrnIy;xqSmo}1$cc=@V()wx^!@Jbi; z{Vx%;L-s&PHm8)f48r^AB{gFX(+{XHLqoH$mCxd@e7OzA79@Yyq*^ju9dvi~PI z^EjIi+ioTSm(F6)r*1uuMs2Xc;jO@ll`Vbx^r`vd7rt}HFA{MC2kC{8a|S zXFZ5HGJPr1@p(vCexHiXz!G$%De-=)f--|Fz6|H8x7h`IUE$}w_7r^FqA;x-29K*a zf_W2B?3mYODBqli5kRCSUFLzj2~3g_SeaUM0iQ!L9DY=kH{73{X(VQi8I7*oaiKzF zP%nmx@)JB}&21+bCvT0Bnw^+q|M~>nkSkId@R>%ANS;^wNwyF&`X`dN_T(j;KJ+2! zdxFGR-QE`R&*q}7h8ybLVpAPD90_x_Hw{&y%*F zFCKW_t}-8m0RdY)e!$F(am~iByseGAqH=%>m|TTmJiiKAv|CpBPOsjhLcZWo(8YRo zZ%X;g`YSuTWF1HICLGMZ$&!b8@7rMhA&IN65XDM)3gJ)~ zcmtn8&%}}RYC;gj_;4gz=87RZi1<6z;pxL}`uk{kY3MxoTwP-b zUPSIEKFW2FoScym_R(&dJLYe%qTTopsBh~h$%E*V-H!Us>#+vXkM9C*h#MORsGKzz zL9dS8fX!hOK;uhPvAM~`VA6}gC!-IvgZ#lFr0VounW7c!fT0m_*IV{Tz7^(~WZ|z^ zS<%kZC)gM;qN$r4 zP>V0BRHjv)Q{@En=kTY}au2QTPAlsZpLMMhE!H*Tb?Pe2wrZ zm5$#(R~uGq`Sam+--{63@{|T#@EwXLyADBrjS8mpzkM?s()sEgn zxBcf8WI;E}FlHc#cY{`biA$A}RQ|s5BXT_%igA+svJirChJ>?5Z1)vvUzR1ah*|z$ zBF!Y-v#LY!@wlH}egDnK4l7OG6&VK$Phr0xCa3*kmQ2Rrl6%G@NtCmvK1iFa;m-xc zdV>%f3WJYm-!+ht+ts((u|N=d#?NKfcjR%w_{p?8$fIY+YRG!a5}<}FY=?e&dIa6q z`Z;zo>ELiZChUDlk7zm2?J}ut@O~M(#mBUdkB@QYJq1aq8(c>~p)!6cEfc=JzUYqH z>(!gH@#@mH>XQ4x!2VT>6JYMz#Ft0!^0Tu$s;jH*&JUJPkeskFTnb6kNP6%|E%1;2 znY~c4v9GhJHD2jvxISw0f?JkCJ;O+ z&3k)BzvV+H8A-Yi+Bzq}#BI;CDO^W-+9}0r4RX`U7_Nfe`T&&+cb9b&3O&J+UO77c z%VqqL5rq8y{TMMlF@l$o@>6kUf(eXa*h0$TgCDl^3Xl2Ze~2WS9XE0JzTo+5wyM$r zStTSkBc+3O$e-2p#-nJ9q}k|-)z5QEIm;m5sP2gi3Wxz|x)SE(Ncs(C_mpA`ggK(P z+{8^gVT&rm7!JJS(Zc11asVP&bj?w!p{QK=XWr@P1=FwfT-#TE{(ZnkNVj!m#nn zG83r;?PrQxbS)YS-`PkJHWo_;Hb13VmVEbN*ui_*q^dUx(8c;|-WQ{Ng^!J3BPD3x zgHCCKFzK~U*S(pb?m;hmBZb?98(|llP;Ga}TXpDP1=bwtbcA1s%zMfVd$WjrMUM; z53QXou%fXT>MCUxIw&Mq%W48d3-jrS-y)9B8qW#n*SZ#e)aHzGFlhKv38XT+Fu!~rUgPJy)OJR# zCz5f|K#cha2w8Oe?PcuT+m5K|b~Vv9)AGk?zqZOhl4vSSCcI%!eOvKh^?6uV)|Zo_ z6GFV}|52uThUb3C`LV-EjL|?<^Tcn4H4m<}7zTU0w{JZ6O15<9IV_t_*lkJ+IYP)g zGJ-Q&##HxGnEB!slJ)YF4=#<&VjPQfPE8l`82Jy%W++fx5c;aQlsXFwjb%msl@cbu z8|yme2Y6D7EGUOe-I2r92_&z2WJ~}oN$<<~oa*`&5e0MxV6Af9(M`3$uG5$VIsrr& z9kIlzvT}#1d2ZDkY@HQ@=CdnEge*_3NXKc={`jmHY;{xQyy42imx1Ztaj}B#$%CQs z4OJi6YfSGKMw`$GLy6EVh&F%%D2RiAtwjEjI>JPQ+}57I-Y(+3x>kRl3|xRP#00+1 zs}a@0Na0YRAQ|X1ye)0x>Dk|+Zp>ics5Nn>i%&};p8`)}eck0Q|MoGM0vag2G2qp` zPYxw+js>F`V;dF!jiZPkb>7){u|8Y94P{W*VMPp3Z0KBnT#(E-+hp)>tlx02gokk6 zgivg0b!97iF)PI{I!o)S^xCHBHM6z4!z883oI2~Ud)7~U20D3*)wNN4hr7n(L4BOE zFao_n=1^$zCzSy`ou1iSKQ@^ms`cAvNgn?9O*Wf+!#+tmfbl{tGX ztYwfBzCldcCI-dF0MIiUN(QxIcCGKe*mqOPm-htCXm!vO+-I~d$sDdJ@ifJ8ILDuDN<5s2^NXOr+EQR_@Ig0}m5_Viv8#8aHi$6BWMt ziNo_-Ywez7A5}<#__aS6ug%l&?!VWQW&l>-_`CfH>e7*nSw)`3lk{2gY5@-sd_MX6 zD(_0u`dZ{141KjDw2_U;4BKL1u#~X3dH+7Tx46CG5ry7fBNz-ogio@7yM*dg|Y1PinWoUlhOCBTOSlrJi`~@#{M02TjV7mTJAz zx{ODwR12%%eQ#98jF@3ca6*Hpjk99#^?J*Il7Oi*8zm61Q#|ZXT`0Y1^^Yg5ZeD;^ zTA6Kju<#tK=QjhTxsf*(h;qRP&C84TdE%q;b2-F_atK849Z4r1QV#wJ9n2XU`fE{g z`oMdNW0t2n-#{fi-#XzmZy87`SMfhWk#drEBo}$p8}_nmwQ`6!%3Dey<^t#Os4Ctc zf*Kvat}@4?3II1Su7NqqUs4-=ydTr!@I9%CZL+Ikp!4f5Ax4FB>BL>#oR`hYNX53) zd-K0Wqf3d|gkV;rU$%po?DuDxIoCaZ2$og_B$CB@X3;0=?$e_MXn?y%eI46mO31UC z$01qPO;i=@fOlRqIw1h=|7I*L*?*Hm5McMJNQUNnBLPepE|GP)3Xm_!R99@PAfk=n z-r}a{@3w;h&)tdj=Jst2TI3V0<5hR$i#NkPMY@{dq(e#fj`3S+-G*~Lkcy<^^Jjk(TCC=;L}4VHopOZ@FVOoMb1^$p!qT^nA=2)E?6_WkvWJbdqVgm!VBJARm+>f- zYln<6x}`-iDuYqf{q|T0xzf}Z)KK03^N|Bco+64dj!|^tw$l<@htkt3{$DF5g#^=B z>U!jnsH~?gaO^%{@W_;~8Fk-XCt1G+4I&`O55U5|s}gl1=qp_$rNK6Fc9m2i$0L~h z9j{?G-+4lz**>P8cxMG>yf#BJAnHVujwJDLB<;iIcSms$ zD7i<4;iOd%QS^)8r1e`gZBQg&@&@Ss38)7fswQ(v(AhO4V3h$LmnjW;A`kfqf2U{U;yu&&< zi8~1t4S-7xm^PqNv~F^QzZJ%<`(`YHq?8^!qrRC+Kl4}#$OyG^S82nWOX1e1A_b1Z)^NrlTpQt&>YefV;31!WRw-M3{d3D0c zmIp$U%-G_q<;5xwSQ5oVMFZ&w)tyq^8(&_RYhXh-z}Ld%W~=oR&=2-mdsCN8MM{>gY=;c_|=p|e$h^;ETzYm3qC8Gr>rH7!dPZ;LZp``62}WE zjKY3c_sc+;`oWvMXZE`3UBaAnchOSGC?R8`21jD~J$LOF;#aRaXV+=PQfxUmxz*1j zuH1;V7ffU`u&nI*OwMpsJ7+&0!zQ3V*3Z)?OkQ8Xj`~6`!kiXX=kDHbDH;ivEuUuf zmn~0o@DZv+whwF6QexK{n~CH6To}?A`95$@*a$gwNy|8YrSC_*^_f^|XO)dFs;pj% z!Q@8r=y#jq3wkdPl&F+0+!4nRzCFfXd+^{lV-|^f*Ivq#5((5<>vQNWkcs>!mNL$$%`TYCL71NPlJwT=1)ZZcz79!T!p*C2tcK38J(JIW@a zwe9|FjBn1l23r~lwp$gv!VK~Ezt;ji_F`Djx8LV%z874TE@)Azo8`!&!J&-Oj>H-C1jY})!3Ay~(Q zIiZ(3n25(jdNJCj0^X$v@yo^wPX%9=WnfHzFK6>o!r4PSIghE{iWRzG8HP{@-x8R7 zu{x+t_ndyTBaP~x>uloVlykB=(fj)2k^0*Fzy@^PgiSO8?Jju%3gtJ28`85PYQsN~ zhw>8z<&zjD4q9?8^Uprd@hpcq3y3xgk(At<@KKy{rO>gg77|@9j&Vr+e%sAMKz+&n zg7guyWg5{h#Y~9jRz$;I7{A7wIAvwPDzcFcLhxiS#H#7oG{ILeW=j(fft@gU>ubd( zxN%7)!!G8%eYAK%)L~b!a#7)8H)}q>0i5oHm(sl9qt=?#!x|cMwj9T;U@F&NE$%bo zQN?#DemAg&NX&WympgAHa7LwGnZW`nOx8Br`Q{=VpE3ILP2J?2|2*a_Yk}(K8PARZ zs$SpP#Xm|6DbVuqN_nepe&lNpT*#fv5dUkb_y~-p;;Sfi z0^u`N9@}mcL2{qFv)?#jxr}<3HK^vL&uB(otEBrTJ9;LzXilm$$s zj0BN8laLj@h;FXiKaeD0IwM3l#$pe#F*^dm{QhF(b?(!;QD5>~L>QpprSl)J{=qYF zJ?2f~>SIQ+V>~1dL*sAwjDQZKG7Keny@3qSpG=U7xpf*ZMO`sT@^r%?VRo!2$vIZU zq{R(A6JJ!EgSuBgHt~ zfGbLpPxXBAGaZVpAaUfrG{U?`5~B<%%}!DMa+)Ejb-Ye$%i$DHf6JiNkvKyqx06X3 z_pHW+I5$88H@|gh?(ZPVx)gQj>SWOG7O}2`@-<(Z)iq zaewfsR;gg3y(;aquf&Fkf(ijIK>X|@`Q49~|N5m_uWhI{+exORrP1W35|Cu51j~pb z+rJr)uiZN2jC$(1Jm zE30UQ=|}oN8YTZ{PplcObWw3~Atq_Ls%I?~%%34!UQpM=de(+N(kq6H$shn&iYxQ~ zAC`hGW=%drmN`r_(f!`qzZZI#eGtOfmcK_;= z3(KGK8nf8s-YWyS%t0G#2*=}SuXX5Sos$VxLiSL*PxGwqIsd=}Mw~HZ2$Ce@Ti{`C zIE9^Ki!`|m1_o8R^p*-u-fl4rx{I2PrZ;f;O+;_|C_tOjpYc-`8p(UTH(I^3bcFjIz3?6%wWufy)nj<2-?2KA*9!`1|2PuD2)*zRr zjH&c0|w1PqpIe;D7@J$Ts-LXgo*Wy)XqY5h+59t?EPU1{HXLB+kPE6oqY5o1K-KHiq-~1HjOtnmRUsDWb2ro z8O(8^u|5)%rv!StWY^WJMoZa@E%wP&Nz3WZSb1!J|H6J~O95`p7VG|9**3v0P5I%4)u<>y zB@rUaA%ar=Rbl&RCKtUd7}vy|`gFn2Ibvxwi<+^>zPuwVq~rZNA5C!@=*y3uTH)GT zpTht1(cp^R{yPfYE4}w78zxLlk+ZRDCP1tV*WUyQluCGWlOjA~Yudv2b%8xm&r^ik z&>uRg$`3#F#H1^dvI%lE1sYwm5~TJ_Oj>8yr3jBjn$U5Eyx+nHzL{XAx13c0^t7l< z4>~>}u}7=o4+bn0{wE%v?VQ9&p6&5Yp7tLOfE8!IUTVDuf)+}g<0u(^k*PN%iH0e3 z3lTLx==ssvorKYaW-?F~RbTZBRECmkx%9#mZ!PP%5> zQqq$O^*21U3^+X$klWhhGtMt94D~7I_*J!y*L4f`t9gt@6je@X%-XhM9=|&BxDn}D zJk?#viEaihumM#tlCs6e0bkg?2h=dnSg$4t`?7gP3rbu!FMPV|L+hT2d=j}+*n(aK z7|!i;7lHP!Oul;;s&W}CM6G90@**1kc7-58-PsX+^hKG@flq!6g$5m-Zpu1wc731y^$$ZL0jB*oueyhm^%ls^9d=9x zW+hV3#{XP_F3+kSsw0-u(ne-xcKRwuAde#1CkbIA1?4kcXva-LJD5?G9HS<|*#7J( z^ZAB2Fcpl{nzH8!#e&7#KPB>O>*MhuV!gL^^J|33c|ovGwyCt?26%@y)?x(+t+9VTrLlRgQS+Fy!S z49dSYCtpF>Og0tcZ_V4d15dhSZyj|O=%Z=B zA0l^5{)vIEvzZFxOZZNqajR=ldzxsPoDgpQJwK>$c-;-s0;i7$y^%FE5c7DtA_UBvSP=aa? zbTcGa{-9$d_e5tzQ}4zj+jm~6i!D_5jXR|ORY2m1c5RnWj`(>lK6yW)1#cFRirWU7GR@qZ3CPBxBlVYc0kL`N{&Fduy3rMA--U4L%aI;TpFqtDefbg?^INC>K+ya6 zo$))cM-$1^p1$OhZ2*nZ5F_f>xNN{&Hn?>^s2BqVkOOs?i=k~`7Y@SWlSI1-Vz?)I zL-B1O7LWFwvm?jNws9Llfc&!ts3VM3tAG+(t`|^7Ng{J-oXQEAI7Q z1?>_2N7n$?M0;&QeKLFW1d$xI1^}(SL-nq@Ko5yy?5Z_Hb2(p@Jy`-TH(>BR&{CV+ zicKJslc>)I7!bc+Fy-7_&>ah=nSM&FhMz<(jR8TCPeofpJz@wTJ?{DopaR9SC**Mu zV!0n_xmVZ}Ib`c_k;b{neCID%K&Nj73SNhy7HiS6GPpdon6(4i*0P`rhV{?3Nq|$E zCHmm*J{c`GV{<)3&AS?U5D))gq%rGWP?JZ1U6?pEFrdkx`S1aVyd=*8FV7hRFBzji z>{>92=7WSR)d4l{XS4jsDp#q^%HuKNwr!Qm=LyZc2f91-$d3k^Jy=;J0;GWJ>yEohO{rE2wvT#1Yu%xJE+4j|D zC&TaY$xo@@QA^IOZaFmFic^A>nR48%7taiR*@A^KgS?x z88+XSTx^fmmEScfwAYu^eV)Lx4>~yiM-<~jW(lf(&6m%4q?Cg}Z6V?k(6MNzePTww zNP^Q2dCVYNcMQHytO1aP8M%8#_%1Ihjm!4Uc98j#eH^NoNh%@8`Xrd}xP0cMGU$~@ z)lW`sQwb=75OgXLb07&;!e^*_`j-t+e-;91LoRbY4ULPh(FgB2 zCp}^S8v&DaZ>sww!=UxMx@5<@-zNR6v+FSg-s@@fD~kEXoG*_hm#syjeZR`q}s5#p3UD>Sv(FMiv+t$8sIGXXnIo+r_c#9AY>v2{**s%g5n6XiYU-L5sBR`zB$`xa;Wv> zbZ~b(toQ2o9j8_*fu9#t*Hux{(CfY3mh^^>0lr%nppuG)tL>oInloGq8MhG`2uJPY zXWtu6)fLsSbtG8KYju)Med`1D6*m4a4-$8@uf=5GP4Med1*P);4ulB8{tJYp{eJ=> zWt=A{kE}1D{2n_lCl&7SGnOX@kNOm$prAPO)aG$1@&my(2RglI!P*N0+h-`F`D~_; zo+w3q)OVo)+qh>JTaU$G*@T*jn$lool+JiOV^E&*nwM%W+z}y1eZhkb^oDMEzWWm< zW2*v}fl|AkeUYJgHwj%MCD9bHSelUP#9DUws zavYCTs3Z?NedaN#_%T3|FPsrMhjeBgL`p<4OAkfiXVoyukI)}i-v*|5c_#%?sRNOk znw*5{BH`lr4VMx2stw-%xH3o*I&XrQuBp6M(3*K4(p~A~Ft5Y=CGgn>F+s--0L?CC z+4Sczv7RHc+t?g~-k9EQ6|2uU1ERlt<`$oO*!ctd7GepE75@Snf6N}Bf5Ja#h}I%L zcCO>yhou-ZeFwjkrbk$#o-Y64wS^0x*b-j5}3^)OI)e^_TdD3k}kX>4CL>b(gnhd~fgwfj! zQU+i99ljs_oK@oV$~#`qd-B1GR7}Pl=DeVJq!H&21zsY>U|#GD<#G(KqAT#ph$UxUV)~Y1`Nc-BiP+`*q0B7Hn>vvN|W|-Effl zjg?s9s593wWyPN`h<}3lobF351)--2$^wi1UsRI%HD<==a(C;@9XehA;XG-X@#_(qyV7Uw8IYP)o3>teG==h?U>yAJ#_A3I4QBGFcB$fwzw_47F1DogU?Gm^H1?v&s@ zsfarsr9Wwt>G@L}p1%jGVZ~*A+=ZON}<>@A8vhJ1A>D`fx01)fb);18Wu?bXi|+yIZg=GYVm3=*V)=Ma=-} z*;|4i>Gb}i$`cl7#eUq|ozT3cU3wxWX%_AQGaw-Mt>g!OwwVt(&wklr-a=($fY?0A zs1I))iU`|^=%uY^un{?$K+A=-I)10KlRnC%7>7eIf;2vTPRq#w**ZQ7}c~zLpgkBQ)0wAtHMPB}^>=R4slNCdS zOC6vB5sSX#iFv|$1%u)TNmscj01#(UVkYT-}{!yLwoBwCiqP)zK@t z%YC`iGYg>ViImz`#Mpr5|4Sw#FRz;%f^w`u9dvI;JNQ`8CVbdPjQTIC(KBP#;vgyWdH#ZOPRW?>S8=!->)i}#kCyaus+Mxr^Ej#eQy zhG)lVs(QUJyQS5!k~ShF=+>>IP4;yNw6z(R^G3 z^dP?nCYzQ6iH-g3FB}Lf_856O{%e+lej(yK02U-z9j+&L^oC{g=|Uw>ecJwQ?LgNu z>tm*`(F&D;+SI~_jJbi#ngl!X+CF8gzSa>qYSq(uW-)CMy(Chpa(C+=tdq9`YZVQZ zk6MkM~$N-rM)Q-94#ic3uPZ_38)kgG<()>Cp>u&i7@!+PE!s(IF!ZZ>gfkZ_Dv}ig1KE=;x*Xt{&khWlM5Z2XnqDL4 zhLrm*)3|gsLchaHYRRL^qE3+-L}Q`*!N)RPAv{sH7W%kj;(%oAM|qrr+;vx*Ro-~j~y{JD7~ z2dvO!|K%Y_wd|) d%v`3sX6U4!d}R7oZ0_q1ESH`5R9UZsA)&FHD6U{9$*W~Z@c z!QU&d{7_y0ySX~h7dun23*y^BcKoGD5K6gfCO8=4)MfwC8s2PCHOgBO_` zAIHdt&>yRo)DKpFB{a+2$vX*q^ROJy=&j|ihDzeVMw>|a#2(A*1ck&+QAkgp*cwSPKL8UPWg|86hY64ELp|bZglKX?MnvzX_@ZO&f2Gw z;fE}i5zx5@*R-^=9P2PTuHIiX(^44|RfP6kZ}9CkKCZUd3Z{PH*i!(yskHrkItSq6 zC*la8y=(L*7oWZX5e|?qpI*;Gk#AOmPB*?kKo(g5lVIoi>vuBBxz*FCR!A>H{ z`u?2<^1$w8rwhZDmpN-RV$Zq_#i{DsBv^FEm6sI(G)#!2CG*Tnh@f3h zX4Rpfx%$(iGnJbCDBkuG>0zhuZwvjsZkUj@vs}S7;n}64b%m6?j9{niDG^CAFKk?wBaaL<{K7QVO=|#pY;k*l z45sbZi&Uudp(>Od-OCN7CfZ1Kuy`&N z0=_wW?3I@7DDFp7q1nfq1wc z=6BnZ-f5PH^$B(*8>nnf|7L2*qJ6P+Q)wwqL zd*P8HNDep*W25KI4uY);M2tzB!^4z#iS%Rps2SzUM_1y~!({wBwgpjF`Z92noIqzT z7TD(o>iJ_YCNu!cKU4xNoWFt8@tm zr<;QPc4njr!Yhi$+CJV5qqMQTYWnXifMByAGn156+$fPQ<)5!LevyVFuoK&L`c@^J zVnUx=F8ZWHbxF~9blUj=Y~oVdqrMKc49=GO2M!&^ z3z&3hn3hKG!zQmzXHBRtI37DuYp>bezs(H;(?7 zLjpufT{$dizcK7-N27|Ax) zZB*LTse)37kjM>y;*+MuVnKxS0RAjjn&Z}@EmNn)iNsVR7LRL~y<=Oz=zBXmya(jz z3*x(uscmM76p@ZGIx6(gFa_XN)*5+Pyu5UoUhoror8G?cisq~j5Fl-jcTHH=O064C zEZkPp8cK7u_~5Yw6aTi`KfdWSZv*=kKhQ(80tzhr`Z;k+UbmV%-fto8C!tLu-Q@Fq zEDcqAf&F5QyAajnl<1;)Pv&;KeL}xxcr!b2V_mE`uBhKP z9dmsM+8h+oD)@n}TOaY>Zh>DG`>y`nwtH`(KQUKk@qS+hT20Y%5ky` z7*HXH%;^buI)(CivUq!XF^rv6*#}1Beu&FiWc?)R;Lje=;Sp|1JY}lccS9^rk${mM>s~L%r!BXErQ;}#KRNTwW7E^y*k~dCITSU92BUTeD z^!qH>XT_E7ZtWSSBd3oTy@0y^t(?se*`Q~wAYCF%47+j|!0W+y6@anNcOTu|&-sk@ z-6U)mt5B`X35Qv^a7hj;j()RYZ66>0#;lrUhc0M<04YalODxB)@e>fOAer{Tr&dUgA z$n34`8R3Cu?&WU-XH}VU%F+bbup}s2A@Tu3g>tPI3h(`KeRV}>J4gZd}2;MWRRSwp( zFSJ`236U29j{h5yN&N(e!5uxUqS@wLA5v59?8qT;JQn-CN2A+vIv+m^+CP;`H&Ok& z7gl565ZRZcc(4GISo8-V#`_F+=UWPw9B`Fc*qTKQZ*rph6+G*+J+qkQ7<|`tcD0_P zO%u$7vD=>7c45eFdEII9rx(4)pSv@^YB{B*JtFyk=z8yPs^kCvKaxF@ot2_tE1P4K zRgzS)H`yY49GgN&NHRiE_TG*?vO-x0$KHD#jx)Z`Q@y{R_vd$ge^=MlAL$S0d0wx_ z^D*xC+oSCTwd!m(_o#+S*{f$?X?J)tX>EHh#0wAZe->}vD5bfT*14i?+$#P2sWDp2 zr=4us348%rQ>8Q~8 z)T<)g;jT|G{%k+?e1?}ica}pkOIx2nUqU@ zdsT#IuIA=Dnp`#d*QpI)JYdYW`qZwBc=#N4`;(F#*RON8URM6o({$qXG-%cT($lDy z+nkNOI0Cc6Lov*&4eM>FEA!{NJKYx%I?HJe(1sP8V@$qQnC7M3F8b@~#7w7IEB5Wy zE!P8Q3Q8_nD;Me>wh$b@Q6zMGIs03=B1+3)`Ui7I3NeXlfXcT%F)yiqLX#qX!_NSO z@d&q#tJ4~4!(HECZlJO#&C?b48KxuP z+_cz?o#8mvl(usb`pS!TcjN1oy1aXl6@O1FGycr3EF=rVWT^%-nzzq$n#?lV44n^6 zdakwDf|*5>O9L%?$t(E=PjAQ5f|gU=0TnsU{P!nBzqR)`w?L{E-~$4i>Qd%$GKfTD zFF>=2Mx+fv`p?j}8<%F2cEnm1NL2}(UFip#HWMPnz8@QR3X5N5^D{^{KF0{Sdhcg^ zpK$8yf*^M`@m9O-e1=@K5q0D@kp7pldn=pY$Ezy88RNG1gWb0FZ!N>c zGEV#SJ*2V(@b#e!5gukCbM#Bc@%Q(qN|qP#kI;V!b+~gD;xxhwj?f zmg`ZLHGJ+Bf=vws49aXK6*qOH-?3+U<>VdsF8l`Ip1f~kL<_NLu+!e2VqR>Y<$n8D zp|kh(rmWS(%dBUjv&3>YwR3r~t%xO_a6}wgci5tkd@??<{+5V!^gTY?40jIb1ujR# zqxQd9+b`OnFZYutMY=9q#LA;yP}N2r-c=4Pot6ITIo=8p?K{_~@2@hOoMKI4n;yjT zfL#*1a68y7^n-m)58>O@A)odFZ$N38slw22ykyDv04?boR6RxoOcXG^uV*PU}&sHlBFoKvnVqCFzFPZBd;!ft|vb9j`;k zU-n5tv$D9vRIJ}(+wm76zX7K+lz~IB5q#u)PfUtI^V=AZ${}-np8aXN{JInb`0In#gXxYNqlNkZ?q@m6)P#rv8L2X@wlAHOK{}1A z@DDvgZ8yX9mQ@8nMF@}#j3a?>c1a{NZp*$Tq1WJ7ojV{&BKjRrIsfT>hnIV~9It?^ zlR;|mC@s1t;((1>{R2vy<2*F|VvdRjZt(^_h3ZPm1kE93fsA*DJ9%fY*W);s&;1i( zpj8t!RY$ECx0D1In>l8z-|J-Cgqz6r9787uI?nOVH$Lv(%iD!8qOv~Uz8FD;-U%Tb zG{3g|b?(Whq}5V$o^^B6&E+O3_F_=zP~xomvid-pbFkAZ+W?)xG9^!Lwpg$BAr3LF2I@W()Iq`aSr z7G(M&wy9=uF%w;#r)NDKVF_M;RgbYYJm^O6&_i|XpLTBC6^SO=j=7^^Aj;P&JaWd` zSv?Nr@7DMPr-7aro*jNk9(T`>n=!`Dr#Pe(t*%vV#?YTn&5CEVxFoLs?91oCbxP`rC#7YAs*CwKp+_S4r=t=*iMpHu-1pDhwZ|=`6wH~dzPuRUeIjD(6;5;6Z)~le zO@LZtaP5o4KtL`RQ7glZPlVpif7vIBaPKEQf6S(rnaKan@@Xyw9Xdb7AntDd5BQ|w z*dw6DTCPd;dfoa>|~D4-rH{XHp%aloiY8giyhzX z6BqlV=T-ppOi=1<0>V_~9sAL7YMISPI>uMzU(#MzVr-_UBzvi!t)&W639&VMb?+bh zq*^xC^#8>^k&I1}zFbVwPrTj5eZz-*{kBEPIq2-zG=qpG6MvesfPhZcm8kgE-LVXa zwU8vk`$ye&zxVOC_J1B~a1+_%My7v9)ods5TL#5S4iDmQ%`WWOP?g2T=<%1Rw^jbf ztFtwoG1*c9{3ODcg-hxw&NuGgy5#ih}rb3iyJcB>FM`%IM#%{ zpHhZDA3FHk@t(Nk7(;b|#6jy`IvB_d{zO&hBDLXr-kc>F=taKKAp)b~zrBAE!n9p4 zuZdn`>*?v~(O-A9)W7ru0vWB*o45Vl#X9ztAT{(rE`^N)@$rMPBXouBb4b8;$ zKLaz!BS!4|hdm~>G2by?s<<&?=J;luJhji&M$1p67!x3$DUZ!~#Z6I0xKIT$nVFZP z@GW~2TwUW5gu2XaRzRlE*7tooE!Pt$vs&BhJ2RITUS8Rgpae$zWszs=f(X zGVa>aq~PXI&95vhCsgmWKLps;eF+jD`{Ml&{rbb7NaGE=f)OjgZf$%2^6)K9bYu zWzf5*^y3PYuc%$7Z_Z`(CB1o$RSH2dLbK*x7%DNq&+G-J0v*j8$+t;yHE&E?5q76% zPi!s$)0>}NYsMA+KOH-D4bB)dev}D{>5XMDuy99i$TM+MNQ*eVXfw}Ti03U(6i1-( zI)aS4L;B-h6Kpa2QQgXTk1DL$`0!Jsxooq>i*rkHL{?{_2X3Bp2I+zCiFL7cY%1eN z{>-;u_CAYWJLMvE9})4CrC4F?G#dVKRI2>tDyS^4fiQhQ9PB{{R}!9&qte)N9$wG* zZDjAT!W5uJJW(Kw_k04;C9q9-1-yB7ka{u1jBb^wEYOTV80*4gD_<`Apl1OzLEB$} zxs{&d9VN!Tj+N-BcLaA5b{&hl-Pl?oRSzZ$%RW^mpCC`NjCbRXE_BUu%TPi)quqJb zAf0Qbcv;8(y*E(V!x%8B|9=ru*gOb=MF>!wtN?=%DCBAtr+Fn!0&MAv1yVYjt?&(4 z(BRvTo0SoLWy!GBZ^uf^Ht^hz6;&tNqaY2RqDPby&o{zHI-j58H#Tcm_JlipS2Yex zP8B;QOs2wG(&%ryajke?J70Lx#wCFECtdM>hi^Qf2yiDbMHId}AMDEQk7ac)aFwRaU?OZ{&)im#~9dZl77 z$JLNq3jxS_MCA??=L>|4)sq;cKsmdoLuHEZ7NDYv_Io%WTC88))`M5D7kICGjE2H) zz@6=Z)HlYlUNhUVdP}^Ds>;BzGSRX8PPi_q%}-JxIjnZ|aNZmKD3`*okWt<6!l-NY&mt@2P>I z1}%aQE6Tou>LeJF)Bh%_#KD=NZ;5+y2o7R*N3V{UiZEgW(r#aj{r_jG{FTsx3+VeJCmU8!`#gnj07)MS@-K301deu_Jsa;d?6NI z=e{xJqW^Qi;;Q9)kEnSnk4HBqOc$bmv=Bcf>2*=&SJjkC{E+0~wib@JaK>fOehz3Y z`CLE=U<~;U=U;kh#1vJ?kh7NZmxPO`D(jj=?F0SDA%WD3|#PddjilqH-ut}vMzi3fAJAE zJT|8*YMYazpQ)lV=b94OKMgS{^zAo4*GzNo2l{_BHy!#cirg_m#JekBn5uMO0gu1d zca5JgRU3TLl72A1GpK%|J4v?q7g0&Cc?CtYh5!OdSztv7%<@M0~5SfPxpS^=l4&Nc}}x+izX4K zkDtU~=lF8YHp>S?58~1w&?bMNY^*p>E!o_3Tv<%z0Z+Z7MLf|@4*TFdhyUPVkY*9% zfd3NHb>l=#Ox&Q+JM{Qt`vJ<#TvE%}FRz!mZ-qFdk$-Z!PkN4k#j;D4vZ(P`zy5SN zb!*CPurgO}%XXpd6{%oi+yz~|-imKwIxj?XJ5tPU-P604tYgB_GWpJZ6vICM1njEG=9-)w-c(nGPc@BUHmEzVw@qT8>V(D+0`DgkGCd9f#yzdl` zfg&ZG*yu-||Q zcAv>aHhc!=>@$+;fSmRedOa zvW#QT;PW~9tqTXLs#OygcDFu})M??=v(*2vN*FlVREb!&8#L%|ylU|MVg9v8_M>r) zg6^?Xf0U0vFu`voumva*w@l2M+rcTin^3DvUJxP>uzJ6DYSV&6#ByaKxT8kAJ%{}* z4DKbS?UcW$bumX|7&M6^ZgJSP7JUQ_Qhcn#_lbYyw;^E*LpV#C31GJJlV6dp>x3Bi zQBJTf@a*FmBH+0=Pqa_ueFY4OS+NHYoLhIjrKx9U68i^17~1u$&qiKM7#<5YzuDI> zzPrDfSnRiyxn%R{WjLD}$*n0+Rc-c(4OS#Pl>TP2*GXB1FEoL5V}?WJ@=>nr(hZf6 zZUu|odIZLaKk^h4aJb(Bgb?;%3rrp0+S*FpH{gEc3EmbN`PkcRmSPg&qm!S)^^PUj zf6h^?hE0ao6hClsh%tIdWP9^yaa3oULB_1)E_KZJ|5DWyEPtkH{rN+TC*D}J-h7Oz zY_Wy?YsK86v!Hpm;PW^3AxNh%fm=EDt(~#EZJxni$t&Qi(A20sn0f_ZcK#{mA z(0>1k8-~#r%Gc4QM2;$`?9CQKg}MH(O5nhNByIn}fb z$n3}t`gF<6%-w>Gtlk&W4Ol-*+wtpaI_VJd2{>(Np)8}3JB&+nX(j8l|D1d)h8qyY zRZnl%gJo-*m>oboZ`^WpE?1_OYPaCQfPe7h*}|a*aW!VApnlu(Os5W2kmKeG**uZ$ z4EG$W;F7f-F8_9BF?8!^T3&(n|w zhOp^Rt^dLSEY%x+9mA{8xa6x>LbKngjDomCD6)QUjWdu71DiN12BM`f8J6|D!otGg z9QztRnhDn6Tf={!Mqx=p`W`NWrdCY`-4M=kfQH-b zWOL>>3EpuVDhDY&)jkQUB@Wz0bvFD|`IP0m;34e*!kc2M0bj(~j12`Ne@P>92F@s; zU!i!~dE)p;n?=c#LPddi0xT?K6YI9#UU2--kLka{ESKuXT~rZAS)hIm@q99=%)cFZ z<+)-+Vq6@QwgHuisS31=eATlb(Kg|Jl^Z#tY&Ily$9%Iu@j&Q)05dOY5U=@ zIUgnDq4msXV3b@Mhdz{=}FXR90z zec(9oNs4D);<|_xc@RKJ?+JIm&0%-`yz98%%OTX}a45mfVVx<}Ve4Kk{*j9(>D*@Q zRF?os#1qAboK<>8!)sqIqG#}dan7~vOM<$}%#-+36=oU#mSPoVk$3(T<`pq-I)9#p1S_;@gu=sNcm^_%e*me|v(<_opJsX{#X=FvJV9v)f|oeRmp>DudZWNg|hl z>9Uc!(Ut{%e5cqu1}`W+=h9`=7-e}Je7rXrc`^KB0T<)1_RAR zBsW{7_&`SP)8&GeT2^ex2d0RIKiyVX38&SU%wr~fOMakpvv~!O4$fP6NdFyDo~t4J zR+<1;iytYhGY9#IbYtgept~I5r$j%Uqbd(!vD7=oU!FtuiQGh^ zg$BB^TR;_oU^?{axo`Y1?6_y%7?DY$aHC3O9-RfHUs+ee7dok2 zyoqS$ARSrrQhdes7STE#$Q}4bb=Xa4CXOTj=x+AMV}tVvkI4?h20V(!W!pHt+HRs3rLU#+}>iE_H?Pe!PAk|iRYTOoEACqV~% zZGS~M-rvo;d0GC7sKE0=6SRjXcioZ`4nN@NCynI|-;{uzYGwz$UwbRFy?d_`&4r2n z#bqELVO+qM_Uave2~lSW(P5@QZkgh7R+L~_alldj@m?%+D*v+kFKrL72|F>LrXDvN zLL&_$;qYo!zUHDWe2x22@t@0G%fkZ1+Wkm&ujE>o9#M8Kn`M-9k*#t2T~Y7TT9HI? zb=nu95}T+?CyCk=tnun24$O~MuAN`M^y_MvS`&%>^UQ2goo1H{0|WMD9Fd79NB{zC zLq;)|3GB{XEB<_vt;}Qf``0!PUOQF%$pPjKtMN%K zM9wnhV9?gl}jqn)NvpLFlF~bGi_luGI8)PZ znBgKEg!&vL770Tz@;{$-S3B!>4se4V49Fo3ap+}Gs8s=gXZCfA%LeQq=rOM^x|Hu`ARijxwZ1X+vv`H(6Su>8c*dghNVmzOpr2NO zzp?BDYlt^~nxn_bChuIQGUG0&i@f-={sOqQBNWoP#(r>};Q^$#jQ7YlHWn^T%32aE zd=ZzBpdsUp$#M2uVB0E&V)Dn}o(TWx^BzEa>=bQO<6<^PR`0Q;?~C7?fu71RuA$Wa zIW%N;n6wmZytTE34R_B?^AKf~Mo!ub<$8nra1x{$JK$U8uTfy4YFzAjbm75mE+}X% zi|q^Lf|KW8xnkt(zhSTiu2m^s5h@R`un)drjFg5Xjy((!-iblKh+9%J#wQ*E2336< z4}hh;sCJpvcXA|sxeM5pjw1hj2~R*q@~8fc5enuFe%OPd`b*K-Js;!L_v-eGxjFt% zjGC-`vQ9zCUQJpWQUAly`5;GCo&E+T`Qz-{uur@E^#dEy(A}Ce=Y9bI3#EiU+YKZQ zFk|qrSTZq0kOCUN1QoUzj=Z~Zub?$(4OK5xp5~E3H$mGKw9kkCnP1A>jcTnPT;yQ} z5CQA%U}%&XOSFqV`8veB3!pZnP&CcZU5X6UTbeexL zyTivS$})14&56llGjhWYf4LvSQF6O%`8F7}AU|1QfQV)wCjBr8KYnpz7_`VE@L3cs zP<*)Pw%bAiH<;OH^c^ivPb`lT*jcOFC=wIIKbT8*rSbk<40uP%K_75(^@~R{0f`p_ zt55CfSS%E6v9Yx!@GyHXX-qw!)=4{$o%@l^pU_M5-uUd70R6QkY&clqGYal%$>Il8 zw_VI(n=L=ej;O49^K@UG7ihGxPFA8MO5A;NjcU?+mCk@!+B*<^biMC%;MU%bUM1t< z!~J`$%*si&*RcavMpTWq_mi7yE$04-lN26u#dW^HZRVy|XoqYL)N3O0&}2Bx5ZrS( zY?ld}nuWQKRdRa>SnVWWvBr)kIc3=!Znnlh=p?+>PLQsfeo>#*lI${5VHR>-Cw>3D z_Cj}cHl7JB(nnzN<6#w@{)L%`P_+9kU5x;` zVhRDU)!*j6z44t7P3xrmDs1=O?F!^3ehJcWFlWy1Vd)DUia|n%6Mt`1W@Y9|dj4zq z8-UL3P!kKo%ZI{s&NcpEC?$Sp@;D6YXBT5ZYu9XkgKYtE`jfnVbE>oMZJRLlIg`?NO9-(zclF%}QwT%x$k2Bgx zd$>|Vb^epPX@N^bk9Y#**Hm|lTbPb6PSN9veBz{u&bBkS>m3e5ZD~{rRr-i*5AMERA$8NGwfk3C+yyt#tDmEp{P<{G$miC8LiD(0lE8p@0a$P;R~ts< z+XAz3FIa5=Kmf0u&M`V|Hy3PTx#F~tlM6Pg$s^T&KFvf!RsyliquVe-EjJ&=)`%OL zhYX+Dd}@oIh1H(WSjAD%OsuEl?!s{VxKs-09%m{xKlwO!rFaEaiiO}IKDkPa4oyKJ zlE4QIGE#*t@K9R{D%~;8e?E#WYF7l6-0SG(?@#k(twoPw4+riBJWf5HbcHS$=S7Za zz^_v&mb|Vb_)X>MWUIDJTpjtZ%M{Lrf)kiFtoz7`Pv53ZFRr)S28RR|i5eQjGu79o|+lHA+Qeso1aGL;q zgq+PXCW|82ml$5j(28Xu=rEF=C_^(`OQ6I7spT9*6vK0RpBx&z&e;Ra8*e$x@a>-e zvH|>8pwzBX)H3+T>M6b;Hgt*bZ&*|ydcka_$`?|wi2sm#O8ClJ$;W)Jhj=0k5PF&H z65!2#fUy`DS1=2)8L<#>;M&G9+pdhC%iR9_ZczHu; z=dBnn#=HACiaW}l?K9M2y?6MYmxM6-!k9>78zVN} zjCl38MhLG>rrGGCJPeEEVb)=LslaA`?FqquqjEHN0V@ZkXW4i?zVgU$x|gc5uV{D2 zx;r=}GXiXJ=k6sXRo=@@8R|W_CY6lVnZr#NPs?-5S`J!t2TSe7O?P7s0#rh9YC8}J zVr~~ubNw$q&QMz~!Uvukgtz)C zs$7VEaFq4;D37Ln66RZ~xb2Yo>EjSRBN=4l?A=_#S;;`>u;gx>5j%dw|CNAYjwVZb zklb82PrUhr8Nq2C3;+MBX~LIw9MB&>EvTI0j&-B9lAN)(ZypsV?whxr^&z9V+RpemBGAsocXF!wD*+hNPDbXX;s@O3+$8|3f#1efP+t%K__2g5VK_%2kYi|zG`{?w zj>gfa#Z~F@s1B=9TOcd%4_O211QWpxV}>Y~(=21*Zqd>L8eA6jTPgTf8-RKi#8KAy zmkf&wdu7=8RGkuD0klV6M={E>xl(#rMcPDj$N@4Tih^%%{e-3ghvbBoV$f%v7v_d{ z@?!&18gU2C%G5=<;b;jYCYA{o6UJYI-R^bBoz6YenZ7&-ZP?DpdKx)oDmsvgpxiG$ z%WazQL#dun;Q9rir}+$F{FzCAMN_0fb@xpBKi&~{Uw={$vJ;0+N zAI7J=coD-~b~_W$Zw5WZZVt?yzNT!HYy0pcT0`$i78~E2(*Grqp%|_W+5N-5DH` z(n8-jI}*bAV7>Y*R>2$u_Z!RF5St-$aMfUD!WJaDGcuznRS`@3n3C1MVY-=^R2F-( zP&MrVp8y4(wG61vh?joQ65=4f2*T&O+RG5K;z?fH;r+NP-5Q)V!C8U)O|&odKHEO> z3#5sg%o601XMA-1x=>yW!d%ErwYTFMfumaO$@(WKRIXXM{s5J-R*u&{p*hV zI80ND)XG$sEKDSe@U$Z$=~M8hbB@=Dymw?s)e-0k70p z)dtj3eN3Av-#uC)JAJ`0nNh=^$zR8>c5ot^u9hR2q}i>o{)Nn7O$@|^!2T$E7QqV`QCAtt2ykqp`Xuq ztp|hJljon`t(>dXVAJ7Y+mS0S@?Wn)wCQexbDJ+g*ap~nJg zNcA{YvJN#BD1;HpKK?78V*ewT5ZSfEvsXVdX@xj*={_4`_}RS76U-RPvQcqXRj9RU zja^~&t&HoKD5U;6@tMTcb@TRm{&UZ!XnSu2BXhriwkX8vnU6fl6xogT>pI zS`f^k*X#G6VCRWy3`8Yjqp#W(h^pTYxYJv_Tia>*YrDhX0cE?*p~9?xw_v4ajdhDs zQcjV`M}E$%>(qKz{cbQnyuj6_rp0SX=zdfja%bQQZNu_I?rUC!8ut!!??fLJYC_G< z2EXSc4dfvCo9=yck98YtJU)< z8by-Z7f<2@=F4UaUG!XzdUBFCvS2XJX1UW<#FPmReF=*l#2oE$BpXPWI&n{WeB4^z zoLcvoa8#`xglyp*$8d-IkrT-Z7_1s0hdSe%%Gg7$?xedkLXB3e#UWjl1CB2pWd=Oz zg~oq3o@6rBbvmlCFou#J3Uvm<^1?P*YxspZ%R0_XM9>it;nQDj#6Rz}qcR;nc2YeUo8%P@q*Ctm6_0fwBRKt zW>s~XUQ1S_!|v<)FM5KDyN1t)$SZ66>C4EZ6gz@)_uT`HZbja5F-&PLv}7%P7EYt5 zKiJ$6*dxkWW?M!4o{f0!-p05}!M;xN1#G(G;gTj!fza!V9uw(|Rm&=>C->#hLkaS* z&#V|;o{@k${+itPVf_BxZ_BY6xYe<32+S+@Yz+xDJ7KK)wp;h5aDu&Tkqe2+Hd!;? ze*^8iU-Sh-gFfW;Y_oB4v){&ArT3XLP4#WfwwWxi>U!eFHd8IX-G$zum`L5OpX`TQ zbiKnA(?Z{suE2K4ks&ex?|$*HNv)P(raC0jto+NK5ncC>p_f&%*@hVjn3;V2ES$G6&*)%Yb2iCn{SZy z?AF_}>HAx{e zf-qmV*aLCWDKQ0RPBJrD*PO|DXpI=hJM{!yC=@oH3_lD~+kZ-fluck6e(8KYfu?QmVM)wpB^y!iR)E?v){8ei!{ z)(`pCl};JD+N@eU-_-LeQ8KUPT-mwMf}IQ^ z85quL54Puqcr|A1rK6oDvn}0aBuya9Q*qK_furVKQNZjo(Lqo7cAvv?9+~FqjST;5Z{Y zwzu(d?Dt{CnmLxrTXsK9#Z8`gH3WLo8W}HPG8=FT zqp`X-b{SziV~G5XYnuWG~*O@!i=jI-ALzuG$^bA2={+7#p6K z2fuqVpQV34VQWpK$Z;n(GA(t-gDDLami0T=AY#xxeFJMYx3^|IOCBCBKm?5gLD?>BduX79KNHTHhV6&Os2 zbbCp%JW!c$sKaK+$aVg;A^h&^P{D&vQ=}~<@kf88=*0PwnmZJFsOm7S$ zcfzI0b*=9dEd^(K*?QgJH4{J;OW{;JN1mq9rDd%cnXXAY4;nd~FLtbkcZ#2)dU>F3 zTJpzGr`~0Od%e5MuWew{jSJ_U`6+M+$YGu@0+O+mK7Ht*?LAduUksgU>-WV97BR4S z9~Jl3!pss%D`9Ah9??sOqeuY{J`)$en{Mw{OnyXc_9KUsJZuu;4->D>pDheFAt_{Y z??=sxoZn^>gdJT*wqlsL#K_BH74$cCS+tM&y&bt_aUWul?;6zGH;Qd=2?1|~aZfIe zCq(WRTVc72Sg!xZC8UQ(cETEEByo!<@o^cwP<~Wc@ZUoJfisa{%`71@Ap_kGhMiU; z^`Gt1z>jnYg8-VRA72Y^e{vg0xp<^~jh8rRwJfvT_Zq35{%33XGaCk%wrJh9@4xu1 z6fO5SbiYq!-qNq`)NHI%$eiRgyiS30?!NbpTi7lPBbWW$MVOG`)JtNr#)q@4*KnmR zkIiq`^{M2?fQ9kA&h>Dy(BD}ZS=V{0@M(C9m7*=hgxwJYwY#t#I)+Z|8hXFXY3#R4 zG5wM=AVFsLXYwUSTn28Jh+iKzAK)>DIla)v(6gDk-6-nKJIbXzsch9=8J|G4-9Qg9 zctu|j-bsuMx1fHxb26Jcwe2f67S%0_o530EG*L^CR2v@%RIjDPTM8Ct>0);;A_?Pa zv)5J*Ag%%6w=h!ZMp2cG*PyY;*}?j0)tOg8zCH_LpO@Bb;agSiOw%XYp3U|qvR|>| zxPn*9r&=C300&^lwVIr0EuY!kJcPaAhQmkfQ-i*WIbh89xeXu=*Yrrc+eaRpJEw3? z{jTDZKT$+QaTcU5l8@h;t(y33PMMKS^x8Jfhkip&uPg>#?ni2w zjkI@^;(}SsNcjY@(zD7{!}|bmwV>W8F(-)*2Qt{#7K>q6*`c)17l0CY!FhP&9Ey^{m&F z>RXRqd5;G*_qloYOkhQbMsEbNzCTr*>AlA`Cc`UN<~ZlA_E9+ovWfmC<9WI?Yp9ox zcA0-uUGEvN8&jia2=&8)WVRrvH&|{Jd57SNEiR)TH~-KUS7U%k?bX$AF`k{!W_qUj=un|X43)F=x)@h2r;M;Czu~gPQ853HNWHjEWI0}J zgx6LVezm{~uv2veejH@pS1oTiQ(^+v9+I~`87n{#h53|%w1TlR?gF>HAjNPMZISCF z>_U?Typ(QogiS@?efY}pJR4-0b(S~tGP>GfVm_(ym=c*AfqSs!9mMA6zg860);NDs zd^%J-W$RokTzpxGTG|2Pxz}%#6ob3N)Qo9OY=~5{y?thYn>iH4RJ;L?5%j zbNC@)$0A_^Bo#*d@Y*4%c>t%tAWxB&kRjwG=*%9aT z{+R!-SC=@NPNubL8x@Ip>e2jo$c}S^0etTH=W>H)$T^SLb4Xi9w|-27jYd2 zTFe$YLK`BZI8cq&>BLHpVXzXhId4Qut!y~Xd(t>}ra7dR@5Q71VRPkuP;9A0%x|pE z5h%fF#Qm}2qEeEZ@z3P3=yHB}-0EpQuH0Vay$o*j03*iG2@$YN4vbrZ;eqOMlJ4?v z;tIu@FADwr3ck=uF1^>8!W+Lb=#mG%T>5jnI+A|!UQTfW6H8BBBlo9{!B0>-)C5}H z19v3O_qS-}>v6b}t;4<$+q6i2v>uCIBoVyi?RhThj=paZb*c-?L`zm%kR=t1x82q0 zd$lK@{3LfRZrCo1Q6JLO9G~h%8B9rJRn`Q|P~B7_Ud>Ykg{y9K={MawD42gQ&2&d@ z0QWBmxyr5E@af}JAwu2TnGeu+Es7TLuPnHP>%wk5|Zvm84PyH-qwtrV?wgnle_ zTI6yb-0;N3a`t^!pLg~~#BVQB#1_}#804YQm=$eW^Hgscww2#rpw&TAmmrA^gY6X{oM$Jlm}M`YYhh)x7~}~ z@Kp!CpyMs&eJsaJtgPzrd~@A>IqypELcfRz_JmTVDc*h81GA26xT9h!VV|(xGZ;02mDKY zdQNvifM&%Y>M?xVzSUT*I0pNTsU{(l#hI7u+BF#+(adkBvCxTwlPop^qgs?~^^T%% zYLWQ~q7h}YO~1opYjjpb)7T>b*%;rY>>AyXDqr6-F~m256uP(T&II#tacoNG2OxXo zCj}VwWg1Cn6A~#z`+`!+^Tf)Fqg2SHGkHN4<+tWJ%$Y($G?`Y#kMJlrZP3VaL>y+7{ z=N^xvXd}VlXx)++Aa9v+>(ttKw5V7k>~1QF+Z$xP`?klKRCB+AvL1M-)91(Pzx98Fw=A zBXH9Nqh&U}>-_whS^W^z>xg?-DJXb88jn6B2#HIypm(vAS@@Ed!K1X)dTUE~uum4FRI?00iQzhyF-LeK~bbsQZ zVhdNFnq$*xBSRgS)P8+6{k^_H#+{p)(c1Pdc%(T^e;Z z$xvOtA@8m8)x-9%16NU9zqoO0Dw+j;mYSzki7DPaO^$*VZXp>+f{Ef444ONx;>$RN z8*(_!#JysyE*zZ@EfE8wJJ2fPguI(i>w`*kM>u@==>Eq>O&YWo)BII4bt>y#)KvY< zks$K88s_gK@vIOiQcacUYJiEE9T#v0N7QXq(-3urjw<}%Wea*A*YVZ(s%3&RR$Zw1 zi$Y162y5^s+M83P6m4^}C%>bq)LZ8uTA6|rMC%V0bBH+kiS@=a&!aOub(+h+m4E6Z zxxj)68UJO!r?jJ7bz!vr*PV|6C3Wvj8{&Ifo(iD!q~X|isXjwWjG+#Ve-1WQdJr@U z)L`GQTAKK`6J*cB(C#)h2n z4~S4_XVXAD4(tR*D{tjdt0D)t+79n>T`bG5U$YK;miK~hT=&fKqvmmg#-tZcpwfs> zJ<_EV6cK$^1Ok?8*g%P%xM89Mx?9a`(Yx)3af}QiH&!{D=VP(!`%^cfvk*^GW?k~M zVLj#2zTUUq!hT^+WbDgW`JJ#D#qtvpcT%OPQ4w=r=i)1&9$gm&Rha(sQ-#8PDBedcscxqTOVUuv`B>Sb_a{^qAkJUWOEQ8y$lT8PRo5M+d%s~qyMqM10XgYsS*uHE>I#5 zN3`nkLVfpNSB7-hgVee`CcYXUa;NGASuHc%h+=oo zfGxp{;#P}|DaL9V@vFWQ_Qx+HU`xM?Y{d(oaud-=nOj^@9Wwc0rTFc8-}IM$Bkz?{ zCk}M43j2{4Heni<17SxjUZq&!l=qc`rtYnaf;q$&jJ^F9bWinhE&uf+@nL^_P6HN` zRs9)nIRUSjJKlC7vwnTMq2SMt@~DGkwrkpq0M2L|zyT70TW|V1eb(aKrxvq%y)b0{SVJ9Ze6c=5HoZrG%H>c=ilp z#N`zQe_B*~p?!uKmC|zg%JUAtJN82vzl18F9rgDOx?vmQYj*H}D97}AOkZs8n9 zi<>Sic-~ezR--Mme8#LX3$yBFM1~zStv9{wuihQEur|qgw-B8oQv>yeb8ml0fKNB9 z?94~0q&2+R+l`wC^(mI@p1}WM?=Rz`T)#JNSP20~Nd*Ow6jVxD5g0;BL{Jo@1YziI zkQh>=WI&`t>F%x}q`SM3?q*y(-M_u}|GDp%_ly1E4e;vvT-SM?Yn{jXu46%D zS6oe-!R6NQeLBYBqx1WxtKi%SwSNboKd+(>V$i&(s&=fJ&$@JKtENe(zwSQSB+KjT zHqE!4vMLv&j3-d1TR_Do15>e4_&I4Ev!S~uo%cF(5)%`JpE4e7U25f^9j4&BOXon1h`r|A)XObOcU$@allBzqs*Y>6;13z1n(bKyHeSsF4 zmTXZoiuUUDVQnx#$VtE&sseVU$2*MnDN5i;?H;9dKI^ZiblpXp{UY?BtWxOQJc8jn zG^m8rJ({?pf`fWb{JI4-^%xqW*hj$ml_iPP7RX zt-V3=R*CHdR`cz)TT>r~b|vo+$E^9ZW4)R0c|Y8uy#TrWK^wsq{Db=&kne5Kbn4cr zY}2V_qnuD{Z|x5W;w{D1J~k|WyEp{5!S3lB3V<`6Msr$AzNM=WxJzEZ4r;3dm%vYz zp*HrDu0f|zi{b5&E!4|r`w>UW=7&YyRl{tT4BH+=p`M0ySMCg+yWn!ud^z@=vtz0^?RsGzkQ9L zU`>0HmZP)v7CtUwg@0gIL;?HFoa#aI`<-W_KorPYNO{X5(xQ}z9yT7TfIdcG9d-0a zk0d0x$DhLIG~X`1wLHGK*@y@4F|C~B6V~)Sgl-2emE+Agl{`>)K*GzFZ}03D>~NTX z^Ihh8;pNj4l|w=fnx#(5HPhvrn&@6rSGxr!HSY>Xq0EyCKl9g;r%{vu@jOw zrovi<-XL|gJ8VXDVb)_X4<=()+rOR^{cA1q(us&2>d>4gRnDu=k)kS$$xFX0tdS2v??gLe%J}o9ww5PGt1k~#W~s5h zR)?xmo@7}SuW*<^Wx{dIz1y)EATG$P+q$dZoxGrmO-*F$n36*TkjtG&P9Dub0Wow0 z_0nq{mlmRF{asXq-?RPhF-1xI0gE$A-h5)=HtJ++BG>gW&s0n)gDu}wiUV99i%7R; z4heAQa6l!J6jW~3kf}dbSG4NSkF}3p%RwD7Pd=~gN-sx$=|`EMgN~JV&-{yNuICz^ z$v}V1%O58WUkd;qfCJ$#x1i>9l^q8yx}wg>E5LTeXb@`LSo`BIeq5)m|?h6rj z*UbC6LHL6!@l%*&y7xFzk&6v>PqLH16}nn9 zfs?5NdqMN?C=@ebs3tt2fL26+30m}f>{8Pz~r4qN!8t1^0H(MU}HLeqnkfm&+ z)u%6&;zV1#P$Gly)+0i~_{g&^XqT|>ha|AdO1<5aS+gGJ#Pc7&^XANPO=HN^201V& z;@=G`R6@$U7CY@0K#|%Lou!d~XlO`z`(+ir;HhabU9;ljJs03rJsa0P2?X2YqOb6Y z!+7U~N1OZ>-U>GYV8wFPZ4Kain%_`t5zsCTSMSq!b0jNCaNxww_?miqOjr_!it6G? zcmb@MfE@&UZ4vF6Yejp(o6Q>)+ZH`*nyc)H)s>uYobyxe&;41Jp}rlL$2Eb=$mU+S zYtVp05C9*R@8xdCJD=e-G{v6TV@$TVB!;-@#Y*CqpO*Z5%>h(cKsRdUW1z2KGK@`a z0x;cP1M+Eu;fV=Bv)wz=z|}cDNQ6Dy8feaq#`HC7=;$P3>~SZer_UDCH7y4%`iQ@B zl$Ag3vdx6b@?#en-T5YSsjXiiU~kQ-5-W1>BgiU?N5!{1qK!qh=ytI|WI3yWN3p4E ziw=bC-bdR8f)`EW>8M*8{{ zeXf8u)R>G6w1aLir?HLZ7ot*p23`Ap zrl;Bhjmxm#Oy!@?IiZFAP{mbMX4v_~Z0h!7X5^O4aiP%4RsBj^`D~itaDIU)WcMaR zI6IMHF&O=H3|t`A7t{-S6F|c6a2Y5?p}y%WO``py`w5`H1xR`Am`WX9SIiOOz20)7 zVSj#t00+edcVJRuxtdvdqlRVU^-aLc&I;%}Seu;nq1k+k+6}K%DZEV4?C}ivA}emS zGc#XLN4s4YYfdM(DlmDu8PH^4M}z3`nE;&3=1ZnsPgi=L6o31K4`G8udwOTxjPzim zdnZQO(DG3Irn^VOwgT<#o2jpjS|lsB2%P&9G+&dxe8ZQlZRF9CTPOHzU>P0W z9!^lYoSQT0+*dzWQf&APnbNfS{f4ndw5w7imqU_i-)OU@s+A%Kz{ETl&adf-Kuqt3 z995>qEf-sjHe4@-)>kil*{1T3RzX%RcD_93j->iVeNay}8P>_bKUT5Ll3K3JA)I#? zb%b12-khGYqLWzg#wK#{t=tde&PM)N?k+*ckO?aeJUt{EcI}}?G%1TQz36m$YR4Pa z?qPBYEnKwUY%mubHhur$$!-gAt0{J2Jxcj1YV>faIG{cWo&Y2iEHvmKxEVYf|l{8soO5I}asbJWQLoTrz65vus^IPL%iAJHw1h(rPndvieND+fOm<;-tB zC0GjJd}FCh`;TN3FjLpefUcYeyB{_QOMszlr-|OwXcOye(w~-|_|gO4^Hre85DWRd zxVZSC6pKBocjE#S0tlk|6aDhw4e6>rW)n@#c&F9$)LWBZfFVzn7;{IyoW}wJ9jW^l z@op*D#=f#x6L&)z5<1t}=4jmHo7=t7b2-fGqvm9Y%>hnED!y1kwsU{cEmlZ4>ppn_ zBjjd;UmSJYo{-1jF7akagzg8CWkOLU>{zL$MxKNccYJc_zRdU)u`pot>b{!lOUui- z7~W^4V5YfIo!m>V`u-ps(rYz59ldwG^Hy%fp7T*cI0u#3Xq#Qlz+v-k$YD?8vJ?GQe^SQ4&o%@Y!P zX*_-k@w>ZcvgNF0H`zgPm8YNX;z019*Dm_lpXEN@s*Z2eT^0Ay2`1tE7(47duVIzn=SQ~!qI#*>mFUjB>=W?jR;MEkCb_@7Y15_7D zX@P(di`3eUSrTkkhjqjWjRWA~b2AJ?1GpupI`#U$g$QeFZhKG+R0@QEu^4)PxU__b90)^Hc*g2fPiUJ2 zXK(h_Q^BN0nLd(4k<5{%imJ}DsuNP+YhK>G7Eud^uqRBfdM-#NMSr3^paANYmf>HR zAYf?7VBjE>Jv6owkM=j8|ws%bNqGmJQP&GyHbYYdS)kNT2xEyJW5in9Bi49*ozG zz`+l_D5o|-btpK+*U^?6Xsw(hJiKu#?=)|xdmQiDkvdTgcKE6gV_J4F=XsZ7usdxF zUpyzY9ZSAtQ+@wr+M#h{vqFAKP>jOd1m0k`Ic*MZ-mrFKF8416XP%1=nue$8m6(rX z>G}@5WAJ?Ocz7?@;VP5;+H5e3JNY@s8 zrR4c{qViESe+4yR`sIS*^;l)MKRh#=(&Z2I(lJb2T#p!xUpo?pJbw3Ww2$k5K~1%6 z487>V4nk$N00aq4!x9i1HFOc+{Qc?4Ne%G9M%%4viw?R?op|gzwTq~36NjuNpgrK(2L~yAn;2-X zJjXz_62P+tb!Bg+8Z^baxV!~mz$f>)`XsEDQlEG261wE=4Q}qFf<)J+TH7IK`IxJu z<~w$fpULV0A8~!~=jFQ=J^A6PynhjRHMay^*vxqxw-hu#imrm>CcZgMp4MSF8+|;0F3@58 z6!mlp?7atXqJeu{qyv>cu~H#+3jI}VIG25XkxutXX6lXDg@xADD=#zN&r)bDbjL|b z4rNpyN}o@7 zV`^ttQDbWwaoKqP|=ynOQ84V zZ|nC#HSvsDtMyWsobW9X zr@zNRr3C+>Tz;(uDL023PAZZ z$OE3X6byY?(cCdRPDUO|^W&5F36XbL`@by+AGhs}om|xCtJHtzf>?-0t=?{)W|$1B zWGiFmeuzX)& zDq=zM_N>SO6%2!STX~k~*2p8!;`IA&=qm8@=xk>2VmPkQvAsAIA9n0VS}{9%g)qYZ zJq2iSm~Q>VYu&tSf?wq`SzhllX1I7`wsU=Vx_P)kR}@cJfBiB}hOQr8w<6-@)hS^< zmNBx@vd_Kp#?5xB|4(>rt#eDUc135A&B_v9fIiAO`upv z8X~W|zGU9b?sUWFTv{5MLQDS%%*JZ{qQm~W(q4TO0WveTECcVciA91@ub70aT1ChF zJYyV7-cT5ousDMWxncgG{Mt?8G^Tdv7Q{=zC|-Je4pn_V?bp#2tlLK>i5%vLFqqD7 z9yXX~R)7B=JgibWIvniM5^D)W;(n^Qds%i8!`AkO*6}Azqno=yIW|NS+Oa++hecQ}#{L9={E=ZtSz zQdFU>hxq*mHyFa>mNAzUrBrQXcN-)Q>|5K5fPO<|b?q!e`#8=9(N8hy)Pwnf9c1%~ zz6H|FcsS+lXu5{w-`9?ph%J5+fRh9k3hF*(y38dLlCqwD6Tf$wwAr z;3=1D%ocnNS?WfFkEI11obD@kTi{V2z8q+O|1nkv4X8+2g3$`TsF7m1_mLant|$oh zcYIa7GoV2OP=H!vp~s)hHOnRr_62_|)*hWb!6|2rzz!5Ez+2NgJTrw?2KF&hS#ts>lW3>Gu`2MUdHk6U=6VL&t;e331#a49C@Xx$VVD zC;GG);loE%xiyIbZ&D4rjsuq{8h5-k4S^1$$Q3}JL~XX%-&%c(Sf#jw=mzf;j;OCO zGhCLWDexvp-qtTv@j)NOI!z7(h_=&q&1L}~Q6leNyF$D!&Euwx{B6hU^XM=Nbf(j4 ztF7bUgxTrF#0I?l67m97H_0qLGWQya0y?v|`M7i z9w}5Qj+ErHZ=i$?0KiU_exJQN*lPyZaM+y$mgH@8An?6AbyDG2p}NjFz?d%lRD3mRgD9{{nwOP95Q}XmM)60Z!cA^xSZ_-gu$H8$3JOrNB4}{v$f)3Pr&B=Om zR8B3=eOPa}q~C;Je@Us7`{Q>PqA}pEfGiZRl0b-%smImy|Nviqm+p&G}4S!&>7`0h#W)Vw@0$of5H{gogp&Zg4d z{RnjG>aqXUG0-ok(vqU_g7cp*;s<8u)vT*(jot{avh+13wY>QDs^N+Ty9=8+sk?;n zhv@qz;OXBo;BT+&ZE8J*X)FrRNKVdtwqr#$ z?MQwT98Qa;zuOR{ED2;{GrL5*TFUKv6sT1wB{T}kgSL80eTsZcvK9Xa{y26^W69IG zW$31s?@!YA_gSuGMNCWgm?GNRrtfm`RaecM0VW48*l}dOs6IFh&1ak=ke#4Q7Z@R; zDHws>vxL@~EFx;jvRPNeKiKj1g_4SU-F!pn?hhwChHb1%PHI(0XRzhOD`g#L2)`*r z2moNIZCVoY{P^TSRq<9Y($tXgpm&?wSErzgJ4hYPx|!ICeipH?ZmEAJGYY&|DqECiHO;`PkOY^}4Q#94*>WrDpfD;z~apgyji8 zr#@?6?arpET?&RtiG}Sp^=zf<6y7N?8qAo!e{HpK@hO=00%39-d|UpMtI{7qgOH7& z$CsjPqXP8*+ningQP2w>i)fj6=myITlL;ARDPT0g3M@zG8F_&%j&j1!YRmbJffff- z^AyBMX!l@;#ku00<^@xXBn6mek;E`wa@M*WdmU#93ldLE{RR-Ubja8_G70!~d|`Fc z6ZH>uMJfACbaH2K!%URSgz#0?m#fAm?j@m2@0CWlnvs9Vt`~W$Kz=Zg60dsKn~p^e zvjC@Cc5-)8Po(y?_Fd6EVs-@hDyYhD#lQ@+TWikO=+yMx?!5i`DcdSqOIBdr*(qYZ z-EX94nnY`OS>`#nKx*Nd^ox`OUGgdFTJ3CV|Kh=SWb6#=a?fZT#U?_-@NRZ7S&CGb4`VZkSuXF z#UtW<{1r~VqEdI7-ziQ45v#l9L|=;CYCZxd;;0`#*-ERjj9>%LmlYl_vm z)vusNbZagx2=(LxMY}c?$Ijwq4zaC(G1nE$#)t!d`s~pZA1{#+iRMASIz;uJsi9Bo zznWuD;QPXKH5Gr}QGV|WK=SYc_OF(!YPqEn{^BWT#Rb~=r+T{hNg0#=0|vA&gP&OO z;rV06yUA6Xkg*rq>C$^gU62&iAqxmQ3lv+uIQ+)HAGe;gOyq&Tn%P6X53XBoL)UQJ zdm^(|Scc=BJv8)AlWDZb%x_<@2P99Q&Dw9ujmN{sAa-INwAUs#0(fQS$R*9KFljGV zeV%h1A5%#A^RM8Dp~ z6)WL3t8dh9xnYRQvI0VQc^QbrY2AswpF}h&IxOnAh;@;?RA;%gLPQv@z)P4WiDdFR zD1KmfnjEjxf8N9*zb7BT$4T?*G1?e~`$2IXIQg1`k{p=#rmybOQ6b8HLA}4fCBC~6 zu{42-$*P1H-^pUsKE3*792R8;#a0;+Zyp5PmKwNopgk#X51QXl{vjj(aZE?x;Ly`m zRz#a!GVu0$>gK{5j++&vshrBYS94`uKt{|9a*zLjD;&27mVAmm#VC$@d5B)A_AU#{ zR=>FFtM7uM=nH^es2slZ{M|KVN}Fs~i++&+l8M6*Ld=SE2)(_6L-`~`idw5c(Ms`H zxgR}@CK`*m8ZrVXE1Jupj6Cc;P@ENH-z(tOvV6-Ns z?62SZ{NsOp2|V(+F2E%Dn0&a{3JVIQQd|%4HhY%xf+f6)5n?FRZ@5XMr_I(e_<(dH zRQ*M$CJh$>^u*tAVms3yT+OF(IzgAcLh<80JNATlBM(V~z^-es%BM&fY!ywM75SYz z%&(4tJ`uklOv@M7ydf%IW=d;YG5zYbXz+vduqgCr0;bU3v*<34&%Ed+8krvjw04kR z)#u;mAu-l#dSU_*K1>n4Ro#Y z}i_J~M>d^?sEq_Woet6Au;+b2Vc zRD}X zSQP=|kDiunTTvIfW$63ih@Ag4A5I69#nOQi*2nf8lx`eTa#=gn6m;NgbU$vbw# zGGJkMz-I~UPRwbAYQkTdQF*g?1Os71k+8Z-m_(tBbJHzh10w(LTIGH=@t_l8PFa<( zI?V~jEPq?iDjw#9D*!Q2+7A!|;tdL&2%Tz&=MY8nD}6*|LIRFA&aV#OPB3p5Itq&P zXEHYn+`1jdJc}fo1dixVVf@CwM_pi*(E_Usp03O&BntL;W&Hgq%(&(q%q*P>`@W%@ z91H&I6b5nihewzTy9Rkr0x;dl*Ktbi1(vWjHd}NkJ+>^yGRoV4g}Z8@Sq2_kV^pdU4FZq zbp|YT?eFIcG1edh0HhRQO4wG3j2>kfa#>=g-c-wk^UuX6j-OA}N(vuyP^kYZVGnmK zaaPc_@R1LH9C_kD?7N=^HntqU#RgHC;wbk_#pZ}coIowWYlBQu@u@G7GM#NE@NE!de)C)hd`_}5~vHi?X^ z8c68^Cxc~m(%tsH;9=)J#DrZDb!`6e9zZXQFC)@3>&SD2^|KM5GeSpy#^YPdl+c)T z9`sk5fd@cPxr+bn+fldHNZDtgE85gl?)D2I271VvZw}-qcl&$iJ}|`Zp_rJUUaA;` zCr4jYEe=uGd3De5deg~4c`z$2V1rV}Yqy%==6Hj$!)N`4e^_KYmg@K--r_vp+jJ^- z+M%mUlg|&9mJ75lF1%~tRBh?Qt~WraXvzk?Zp#$;fnYH`f2ZIMB1)(ucM!biOmXEb z_axjU69v{-5Fa=kd&iQ$SwOStrJKYMZpAR5s914@;0~ zBE+5R{HQW@RDS%3!KKAc_!-vqO(JJIO*-;R-^MQN4wCV)N~AlMpS>=VfZA>>ekLa` z&6d+yO{KTiSk3&*QGOZ8Iw$dE_~)y$bUw{*;Dm_I=gytT?*%PRryrtbC8IY$A}w=1 z>gB8-KS%$Y_)5IIwq|WT1In@9Xv-rBrx0U#!=M--bZ2FiV8Ba{v!s?gm6L6FiGQ|0 zu3Dsp>wzgg`b3a%T|p0DnNcs&-Ru%El}Fr%GXG%sh6y$aD{A?3$xSa-!`|O@GE77k zq?x6;^a+R^@r&`Qn23Bs^m-L0CX>NcgWIvd@U^=*f8Qo)lotzOv5%W=>oUG5rA-BZ zWGk0%*D`t<6b^C{GES69Ct<66w=5D#7(9jDabI<5;i~H|yW6lUdSyc!DfXGak0;(& zuSmMd5H|D>6yAVaAMWuOm&vO@VqeV#Ht`V=2^5LOmzGSPWR<;shL9D4&IV280Eq)8 ziO#LwKE4myqVE9h@V?wUHNRg2C>L9A@&<$k`{x17Q^&mdJ)kxKzNCkN8tMv zMztilY5|2f##fGF!m7!K%FhMi)mPBw66}U3h{5GkuUy{QwWK6z8|*G9anN-)fuRz* zr^OioS&s+BgEm0@FxIK4{vVnBF#AnunL7EvRaJt&UsklZNd8pW-%zMMv*Psy>?JZF zJnRoP!hkWTbB$SMu+2XmCWda|!Fm_aEk+9|z6_DF_bl#Tu;QL-4rEkNMhm=|XT_>B z-n{nGqcjv%^dY}S)bI;p^!w8>4y3aYj@&Vsx9TYnxka+HP`$_f$NguD(^84T7g>?dP+7)lH zXY`ogDf|-pGxbhA(f>Zs^6uM-JpvB>Q!+Mg_``$cFUKRGiS8d9jMn=prFELd!Q@(A z_~8W0#0}t$6xYYt-lgMr3|CX(9b;Zc7R9;yU;L~q3=!vyp;R!t;Za7Q%zz&_pz99T zr*d)ZSB4xuEo3LIYWL_pP!kf3>I)QmmnI2sP5X-MBl_+I$0V+^`-+2SsV6>rGACPr zR&x*!#TzCnvol@g)plMx_tf}nt}6Cb8|P&>?n;wN9M_MhH|DDJ-3>yGO5W`sUA{aN z5_#vtMYC00w3P-?u2tbo5FaPg6+o|Ty1Y|+i=q3UdhG+68Ecv2KGl1#->K5exBJt- z*KtX3ZGS2HGjsiw6>3a_otsNJ)HmP1kF&C#adM^iP$z%(+X&>(xKiu&yX#+zP@_&= zGH1XDV#B1}1exow`R>DYBBD;HO_tM`7q>H1GS%pY{>UB1F(n8{q`?9vdWCcxPnMR& zhxiUuedVgPsvm!F%?!!aNc&SEm2qr)cX!HlWl6vN_It4>#wzERs?-zaj>|EEQeH3j z;EU7vuh+q!(J0PvT+l)Rt?(F!#M;@+5uZ&n~L6O4;8?+85sK(Pv7rlm3alrk=KRvFD3xz#PrQWCHA zm*W-zUh8iVa++cUAI1j1p6AvoBhsyxfnFy#LQ{7ysOdb=AiCf?)YJX_44~2dcribK zqyBvgo+q;RrD$zcwsnWOoRqXfLqoD*zM$=>hspNk`%Bj!DbxImF1SPgt-XqK#AQrA z#fm%O*}?@@vre%vl<6!+g3Ag@wGf#Ua|9BaRWXZ4KQE%qJ6S(v85_*OeBSGAa^@58 zZ=sk3=&KT_RQ7-+UpF$+fzVq5z1%Xv9$$E5RkC;-g9bqS%YH-ImWsLT#Azi26m9kB zXc8VKf4uv7^#KZ4douyp8+wlXW5*7^+qnUu_Tkjs&?U+PMI7DGan}|8vNQ3kBZIqV zfMkSuNx&|MQEqlcKmUHptl9!lg=2bO`J{$VXlV*;FTI)jduknV3rD)8t8wvGY>}9$ z=d|)A1~r#Zkm$=~@&F-SR$qsDQhVtP!ovoib!PK~oXU+%cH^yElJz105r0K1*e zVrTdwO+oBr8da9PVsYa^zn|^LO>&EpTfD3K)b|a4CR%|&${z(yT{~^A@u+)To(haF zL%V$a+Xnnq7#O61z>H(nci?PyN&x)&dwKx@Ib*QOJ<5;NjG| zFI4JRle7X8n&J<2>l08uDAEf2S!r>{dCmE526CTh;2nZL3!%r99{_o3&L<$3?xWLa zj$nnX;paU0N1p14|0En&gH|Zogdry{m=GfgnuOVl>qd&{y$p>hb*2I{_k!wsK02y&!>-w{?d?&Y|rnKBKf~QwIJQ&;fX6ec;IZkA<4^qF_j5# z^};+0HKhEwvU61|X00yx7u&H8Df!5i1(fk{x-VdQPH{#JlMETibKPlpm4>C&;w`^I zYm2+PRVU_ZNV3z-nmpcAMg|UN0K1lANCL4FVC;aZk4MH*$I#9RHXSRX_bf>6(+mLN z_n}FK6A*!KK0;r~nh(k;fCX~@+qTIdkya;<@Mh&dpvS(3J@2@GDW|NA8cPZXo+tT< zo~UrX!H=Z?03G3*0 zzE4`LB>!E^WQ0_TB=F&8Dsd3AUiQ7lFyi2|KZE;V@$EjZOEa)S$#nZc|6ag z&iWxBSSc^0=BvE)$4d(YN(Jd+2pF{ub*Mf&v6dw!hJY6KK(FWBj0Vib-e`h!N;MIN zyllU+DKA5^J<(zyaA;~hY@7m@yPiq=*>j;MfwSgY(Auv7Cw~SwU~6Cc-N=_C!%+`3 z@>%h6eTiWV<*BE%d-%{bdDufT#NxP{EUg{OU4ZP-y*fO9QF6Xz>4|GS8t&(feDD5` zM!x^F!vD0w|FpvYw8H6gNV@0oJtGL7(^>z!q>ib#!#1M}4uQcX?nyme5HS|ww z!2;<0zc35G)cp)eEhOR~C%bsi<+DDWo-W_uW#2Gv6UyF z8g0DE%Es8$S^BMCZTP7jGN#Lx2fnGo%Nckx8Gv#}T^tq$SO@zvS)>wOby_-}=(5a1w=Il5(6F*qf(#_w=?ZjY*rqw=G>HlkJmgx;hi3K4wT0LaG&vU4& zv$9;rdn0!fFy~MzjhV8&u z)@gx%)^{=_w16?t7}EpW(z7$8()K+&tj}VSfi_qx5+lu*Sg0`MA$>B*N<-ZaOCetY zK57qVwLs)+*OTW@x9hZF-d6=k`EhNOwN_b~_e9@FqG^G0Syj&|rn8`_N8Xg$K3$5*xeAxOWyz%cQyF= zS5ZcH0qt&UtHKIkJhv|$s(@MzDGxZ32NlW+@bI9M6tn5b2yG*vE6lD0u8hE-62Kk5 zQT1QCS$OeHN|AP(?cl2K8~IxN*Sa5w+}!oC((#KGC2fRI7Kd0d(8;bvK0k(;8pRUa z_I*bBlRTU(+?9Sl6d0z}#}@VzJj`id;0;X2V229;c9=07&(mYX4REHIz;j}m0MyTz zBX>*{7?oZ58msd$_=vMTi6G5fbZ-|4IA;Dd{LxAC{`)|<`!++mI{C4F!oSdJOgMVH zoi;!hZ+A6dyfNeYwU^wQ=|RqBS;ze9=ovZA>#_yaU`R&x1WD(}>}Q|&@=mxc zueX1A*0AjY8dB-7#G%C!3JmEk9y@T zqo9I>UogO$`;F1Td;6$wKi!^;t5trDA^B?w+!Vj4)6|+`(&oeXX|j`}@#~OW{Hu%) zNxD_@`ta5*#KUeD2b5SO{mC9S(p$bnOoS z184h0=Y{Oo=P<0WtT3bSJ)nBKWBLIGoKVzm?H-5T?V>GhK3UZZ zHh+1lKe#WCTI;2axh}+u?i6PDc@?v&4JFCj!*y1I`} zj2K1f=>iqPQf(s}KrIZ{Xdey}h2!QmsZ^5-8>;EwtZNgz(zwt3B)40;S#=*1UzR*u z-GH`6nv1>{Up+@Fqp_bxqP_ztB8}!bis&cD7X{t@0NWCLsPq^ORL%-ua$jFP`5VX% z^ydm;IdwwaozTodws^No;!9t7r?H;QQWTFq77hQ4hf%E$tEtXS16^74rCHCzJTtg5|0?rNJ|Y%M0)R zz(WQe0*u-LM^sfgZ;g{foY#@O4FhmWp3?qXYn*Sb|Izqx^^XCloe%3HeA4<|q9j#@ z1ogeEo;Hb@7cRl2&3>YZ^CfU8OxI2?V7I3_9tDe75+b=X!@$>5Aajo^52DScfH&Xb z>x8|A{L~dYnW^4xLRygj$`Wa4xKP+OcuwFzo!!P~ay1Z^4lYEBAe8=0xUHRqo@t4IfJWr}SNky@62$qK#uefNdi{%1#C6JWS9rXWqTESCHN9WywYldoY>#%)5KV0A4Qs}!pkP#3XE4H zlL!+jrP7$U zGG}WXdu}WTG-@jItCNdNy+N2-KTs<}#9(acUt%DA7t!Oy@ieq$O3$%4IPDz|Sp+`#?5~Nzxp-#@z*`?aTB@`6=<7zH$sZV*O@&m2T)$U&1 zI;<9KI$6*tV4z>41b=gKp2nJtpwW(6rC0y9w*YsmPnAcrXiNRKE+MH}NP!;mjySd1mqB59_>4L}SUY;( zS+`Er&tigsRGo&&U7)?)Z&3SPRwI>2&HMTf$#;40f=<@&Pb9!A?Dk|p{^LU_oB)x~ zIxmg?LncQI?5}Vs=toifqw^ErlJWTe0vQhhAHtZie{B+}#n@Tql%tG}BF*EjE7$7b z8bkc@WdS2c_oyD$CUS>1?vx*hGbZ(tbP9ZxODR`p!SOy`ciOJZ`h92RuAK-A@%frc zx2ZjE{&-vj#~+MHGQC76t7^l843pf`$b9HMe7>{5aY5&Se(!V8B2DyBW-OW)oq+v% z8x{NUNdC-7f*lk>m-F%ONTAr)_c)k2%w?tUy&LxJ*vjJgop17s8d`3-*wS7_R7OLBG}kQYb!A|GiL zD@7Z5ula$f6~n5&fYAV8DUnPgul6fX$|s2Dwu|uRPZx*aCxZev*uM!EK$DY4nT?q=`u{BHh5t?FUX6<|9*G zw*6VVerrb%Yv4nNOqE=kPuGaupP#dgsY%w3KTGYuSVkL{{|A;a02`}&+$HX`HhIeq zkETS^;=zS)y>v#jj~Gl>2ZMQZZ|}yByr9N0g3c9aC7-|!e1(HJbkx%=l~|);h0l@* z@fmj3sBb+CZ@5G+s})#SLA9&*`L15v4awD@^tU0vgs!HSO}N|yP@NTEA6;vo;XO^< z$0jZyqr*q{3}O($xgf<`R1%onK4zX%q0Jd>(WJZ0oeMTX;nL1OL@)k1kt-{6h4!C> zE44e>^M`e&F{AU(1<(((+^dr_*bEyg_~fzi-~7Y&Kn<_J;oS^^_!B{hE&cGgWIdO* zu|+PN#vg1(I)v*e2MhNiMYss)DtrtN>i?*a;~I8VP^RX}3J2m$Ncva5P7rWU(oY#b zZRQ(O$H4@d;*yd@IK`UHop4-REG6x~pQjO&_!Bn$a+@92T^7UW1#Ts}pHySd|5A;& zga4o!-zjpOS`21IE8JEIoZzu!RKJJ&99P7w>9i~uXLVH*&p1Yc)&cM<_}sd8a5ov8 z{VvN~p$(z@EnY4Q=m+dhmF)3?({QXprz&2|1CYfJ%+2mu(oa_Sv!~_}Auz-0BHj4V zke;q+APS8UpbtDx(DYfeGApW&curH9j4=r{{F`> ztW4lpxSd-Tal89Z3NZcpYExwVW!Th`ee_FirmMIhmE)v4bdSSa5djBNpgo>b|NHxD zbtZS34Sr%ugc0B7L8suh-j-;<-Va%1 zYO>L1kIP(Zi;Wd?N#@n~|9*cb#^R;308CoTm8%h7Ho;qH4`ahbiTmFjyRquHn&X$Z zu3yW#qhCIHb;R9~liWCyMs+1>k%x{#EN@%8#w#^n%nr|9Do#lE@f8aEI?QCIXhS;L zo5w84@dNB%U=s&JV&5*9MTfQY?sx6XiqI_YESDZftMczqp$AL;8Q zBh7vJKF&V##a_K#iktOcwIZ&{h=|~e^}C6AUL~Zxt{R!w?lvvR9MQ6AiA zhWv3w4Z5vmK1$<(Jdtm|%J{C>C`*tj9B)x~gJr!oTfWFCzN%N!WWgir{(bG&rpxhT zI|qbCNp5{&-GN(?h%f#WFZeRw%XP-0UgrK_Z+TwwNdAy zZ2ku^o=Smuno|_$19bIhb=!CZy#hSBMNH-jbO(GKKXnM#z4^@>L_tcoc02Cs-=A1g zq<;CBV16TZ3+f|RJ~N*_ne?UNd#D7H{9s~kP6qRBy2^Fk#}?}Zd}{<05dPa?VIE&* zGQvlsdtN6ONL0+}4?Z_4?bAy-k*zL*xYI-8pURSlS{Wt0EU^mNWR`izK3S2+VyY~* z8eN^Y2K2i|n)F@75y=)(@>8C7EWPMA@SD$FBaovUC^jme9r0ssw z(D#}s1**)`lYE-8J`UbOPjvJadLP_GgTh0P0|_!IU8TV+J=LSWrDFk;EIKv3mWV$V z>d!TcAab7%0(ZroHO$-nIAYIB(Q8DWDD}QzVV23;XZ6jfR!ij;_C}jt zGhCD6+Ck4!^ISLzFnC|#v^zbEELjON z530I0`FS_<^xD;E`gi^1Q9VImxX;z%@u9v=&7c@k;}$~dV4P9@g7~EVk3Ry(`^@Cl z!@HADLs}Ay*B|?1CDjhv&ZMK*y+Npbc%Q@ob;kH=Ahb!7Ug-lB=H(9Wb(0bljK@O! zi6NPr0o?y{2wcE2=LPHnmd0vMH;k1h9BVeX-Zdu#mEDwySIwg3V5dD`qgRYxr{NcF zt2u4Y9o5s}542i|yN}yPW_v%TdoZkY(NCY%1*SY38Z|t$>X8NSb?oZrAAb-8>#Ufj z8aq<h$0g8K-&{<)?i><5Hj9%O$l<3VQGaBJ z?hGo9rYWm08JlXapbe&fzdPP%ElM5rCACA3!hDwtTV?RTC|&C`#O|?!8U}_c)y;lJtN-rWkbgQ2}|Amgm(8%67R$9Ii|(zZ&HgBM9&6y+UVG z-3#wEXg&2!@%wzEeC>B3v#-8^W+2-wmmoMhgEsz{dMDp3l`CS_y0oj zUN{hR(tlMcGDY;c##PR+$d(U*_#e%hOuiUe93%eWGn}SK+3}wQyH0Gr+P!7Gnv3hc zawcpfJ^)Fos;iN9`B3}Qg*WtGU6SBmzoebEgV)COLj3x0TXBqKF?0%XR(FFqbfO0zCHIG)(s&b;nFkgDm9U`@ZIWcaRh1gI5t)aX0DvA}AJd zr#vs7NmkQ99q9H^#RT)A?rBqz5>j7UXAR!#b!;Huj+kk5e(mO)(6_qJ_A^+6Rs_EP zmXd{4u_W%K>tsb{)k42nfIrgja2S6Va>$5v>9IkPV~^yN?%1jrSJ_f2-O0h)L+A!yk< zl~PHG#F?VqTRz-Jj~;X6WR>IttjQx@{|?1<9Dc{w;v(_NEWQ*8$`lF?`&iB7M@&+% z-ImMm&qfI4G9$5b{R%tgSkMNgT?q_DFij?9J>Ne;W8e1Csim-nh?-E8prj^ZR|{X^ z?fRAOOl$?L$5J(0O6KM$9^ScT=e?kekq-?{GX%uGeSl`)NWrWQfvetmB4VbC{i%%Z z9s#NU;uBZ#Yg4WldO=qAn2C`nJk<={S!N~yV%q+q%21ZgPH*Lj@Ats5=}YAgMXksJ zd-ak_LW}i_B@`VU_0;y=GBQV}(x#Sup#+;a))FH%iho$x8*T2`?|h;a^3U8$P$}x+ zublqG-q@=msFkcP?-~A=>ul0aNU}j=ec$%&fR06?-gDq;P$DBhT=P*Ws1L*`1rF!7 z{1JeK(9@kucfW2D0hZM`O_>@PTR25wh`Lh%NhY^iz5r-gBwudf-&uux^kT2d@Bx&Nh^ zRoQWNiw?_p;<7kMV>;D(XoIza2I6;kg78tUBOQCHGM&y_z>)rZ^mY-t&B=-g&D&i5r1 z7F_CIrvE&3te70fWT3n=lc^|xIZ8v6PK1jG)I|9mE=_%6pi#R1Ps}}+B!7&5WYjof zvg#2}c;y`tO2x{hY3Y)ASflBU6VY|KpX+ocHC!NaV12`>nTLp%>>tk;hnV+ngaj>) z32BV>E0(F+$QEe3CMi|urYAo_^s!nR#nP`jbgt5t^)ke9M%5f^n?{GGqJ;5}oQ$lmZb<|=rr@h*h|c*Fg!v`m4O?X064a?i zl#f-Z*xzR8g^`+*+YbNR2DXdZn8egg@y~AYqs5wheed&~(J9{K3Qh`<8{5@~T;Cj{ z2bAE_qTI85M*99Y(b}?3wkBcAZNFI@2&;GxzybL2EB> zR~I(6u>lMkC0ZGAn-E2+3cDWxTHV@%YD(F%Amusd1x{aOFE7oh^lHZ5p!#ARd&kb# zPkg$*pPtI5cyBrhQUV?_kPWoR{x)U^J zarGRMK&cOBrWmW}(TIs?5zKZMy<%sAH5Yw8OCOgYb;5RP^NhpjPavgGYql$k6u5Oj zOTo%wFL)&S27^_4R=kFaB2Q}J^DcEHfi37!K8|SoVQNQ;*;e(7_?@b6*)`W%VO`SR zUm3+#=HyPYZ$5_K0UA4g51{}in3Xbw#cpUf^4&Odrt309l{Wpbggh14)?s&(09_iZ z;Ynb9V&Pj-jfJ9U+&TlxCzYI6-h4%)jz)99{j_%`tGWD^57UhcZdx@OzSnib$~du= zYk}Fw+^2QMnpdqeY8`H#a;$#22lEtoRujWDhMktv`+6mbj`gf(8XpQ^G8o?wI`7@n zB~y6!L5%d7$ZSj*A5vu zUgMRY%A^9XOksynL)IZ)L5^fp6})r*1*CE5?Ym6^~tY}zP6Xc5uG52+*T(5TIlP{@_1~VMwPW=Xc^a#-`>w**fBcW zcSG46lueW9u3yG=`4q}b?$h#0*n3)iz|!M@Me~sG_Ava`)jidi*{VA(o)i!UafKY;oHpmp_Eo6 zrDY8+XcDs!C141=vgh5KIi^k}*s4?-ZxacN<4r(cVU zrHGIk(J5;&962q&bqHh}dAAWQ=@^M-3g$tBU;a*BaxOYm%})4k*C>#xL}bo=XC zVSZ!D3jLuTR}7y-?e7?h_7>S|zRGXf-CaX}YU6o)P*R=qhM>;*{jIx&Ht26Mkv1TJ#Mk!O5+lTZO9j}m4}1)#H+hi{ z-*`B|BH}ht2s3ZHX5iAa-=#HBEJutqZc~~+cTURMg?J^$pMsyuQ(uYOq(@Kw!PfX| zQI|AR0dm^DB#KXOJK8alzARd)RoLV zbQ@IcT{xMRXa7j{O@TpIVVEMaixe$Og5>%lL@A!UT2TTna}8F1EYGvtkmlbHsiW-FkWy4i%FSV?;tvYcMjZUi zt~K~7j%@B({tCU=XI;Pefx)h_yt#qK`t)oG+(&>yAg7}4Q*djRHX20r|e1JVkUM8EmHdPrt z?tMXanp)b?&{ey-i|~i(t3OM1Foj(uUClv=PEELNRd<-~XVkZU_yU|`4svxAyBCs= zcho!(8Z-7<_Ik5#C3$S~*O*sZ8^Hl+Jzo0Q9jGpIVfr|<$g z2l$4Wk!5Q-% zyEo2(GqquGFXuL@?{O&*rwwrQ`@`b$8K!0Xn1}*|EmIaIyv5`G>k4=Zo{u1CW*$V| ze5&PzH$C5(n>6n|h0WJGb?wCxf_pK=+6|AxvS#&-Pir#QUE?aGcX?;x4J3->TRgSv>U zx7@tIQHu~J1Bx!hSq=4m{AqSmhKf%*H*DyL_@XX?C-NyOnWY8YONxiZ(V3bXS!?;@ zJU;7p#}w2y!a0A`aF(D&gEVQn(}XKK)1G?RCKqrGen84rVLut(c6#--Dp``+6;R(+ zty1q?E#S~!z7_eKlh%EI*&mw`c!N-&ZlV6bi=rb;##2-OThRK`*yu^mLW6Yalf@}fyo(m6ZV&90}!)%|X zt8LKn{JVoo2VpA_q=(bb-a$5N!O>y|oz&@mTkCw~qh>Ko|J@m28p!xEQmE;KyaGgW z;@zHiDLRn0*0riD`wWD#4Z9vjT<5ae^N4qJzsbgf%>+d;R-(?{ueH zbBhMTy4HE!7cuk!j$#iiE}gq6K_VI>_qbksZ~q3?LA8YB^i)^qO)();Y7^2)fD)9BtgGP=J+-RV)|cb z73GVtFPA7pP37oc!7B^|Ou^CF(+EB=AN9enZE***D8 z8*{gzHiC8Ns`pmF9iCeierP1e@t8+(YGZ&t53$z|axLx^haygxhlMQ0R_?c9MsCYWZnmy z>TEQM$;0Af|AN&&6tSiKWS2~FsRvszsGhwRi+nx0*De+ z(;Sti(ODV(NK(By+ue6MDF#ND$xHah9G{W@AY242hFs7U`kYliyvk@?b;fQxpS19 zKlq%IvnjQd-9;@^&rI-n?+)DsL*JFx;pctvnXJeAv zp(AqJEp;}n%+TWQ#EV)>M_Y%aONiShwL|QFzVXgk&IA~rRI?yCdpi=kM1*PUO5XIy za^R$EW)lYQ$e8La3a2im|7?7yn7k8tAGj3GfA!acQW9NI^72^0Owt^)z+y+G6Yrvx zh4O%#k8J!V(R2jvoo=idLbU6hbE$LE#oR%2&Wie}Hkm|32SI7FIoBX4h#@9l5 z&o+%DOyR`r&|jpOweEhDmx&mFXsyU3DT%L$n>4Q%r#+aNeXHR?MO`qXXq~3ZLD$8q zq~+{R4SE_nska_NqunwX|EQBt&*vBX2%m@IOuzn)*YC{W@si2C{@cSXW)`E24%3ZC z>67BRTP7_3YtRDT$T9kju&?ueZ9YI$3%Iw&-Z$fuvt3NHGEy>P7pk^A9J~x?r!h$| zjV0#Wh^Q)vcTGyj`M#><)7Bo$^N<(DN~X%(V-DiIu9oECceDC_MZnd+jB!)tHa^`t zNfgO;fCMfTPp zXu67s2YR(*k3}Jud&&I~oM~K{b9JFtHtzmKqZ*_G?$|=Ot1?R=2YtJLeS?#MXY0WMvVn zv`K+2u5nX@_s2BrlM&XZCp~RyD<6A#5|iP^N_0BZYL(GN5tN!u(Ngw+53UZ-^ zj+vYN=?^}-39ASDWA({GyNnlnZlG^cbX@w~aG(qJiJYKuPo0!6`--<@y1`zlz;QVJ z&ieQVwzA6aEjRU=^%G?IZg{)|zGzt|={{px5004TuH^ z+6A`A4-nc5D-`^v1Ww`|#JO=@#pn1VXk7Xg<2BGUjR-Na`9rkwxvx)>^rQ>+qoUfYdz)lIvwx=rWl+cwBaBE?6B=oB;8zGLCm40W)`fk8aTihP{vQ zyoHTVeceLN%yQGo&b63u%#13 zQrgk-M*b6Ho<4^?_!abJ3Y-xjXGn=K*Fum&cPZ}Q{A)p?u2_fYxo}bbGxyZ2f}ui1&g4N0 z`;(p)t<|~0aQEpE0#0PTW7PCnDM+KXjM&!Kk@(`TMwS>xLnxN*wjUFCR9pWX1cS%E zT8q>}&x3C=9uYHJf*#a0)8 z4rM!g+~c-)ka=JHcKqrM8&6v$amE-kLrg58P-s}H$ zJ7dP4C4N6EX%O%OBXbK+1k11Mr1$loeZY(^1UTfAF@*QoqdO*7b%QnaPsO5Uoi;uG zc#rsz%nrD>2>tEbfZMic!anzhVwe9Sm+>tHn}nwQbhR1oXtES%JIrD1XmUJv8;mN< z*A{unm`TuHn0=p1pDom1+M&ORfL2SHFdHj^Qe)@7WV?`*&j@guNQDG zQS`;-uxoLw{(H8iZ98w$^KiNu3h~D;RF*&gY8FE0Ojt)PePE&vHD>KZbNWY~v&x?q zMB#}|)<5~dE8I5Yf0$^(sJ(JSwMi?EmssrbO9elaCO+i;HS)sdOR2e^xN1K^5~}Pb zCX7`{+L9jE+fgSM9jGVib){+{D(*_D{MPJ$4=mL{`#>>bhdD-neoF$k-y7c8J9+Q~ z)>B1N@s?QbT(&t$Je`xaS$gjWqxVEpfqV3BimhrpG#qP|8MClSYJ1xuS;YKGUQp?4 z7$ucFb<$HQF)VQA%x@(KHwr}wY=$Y$Mu;R>gx^(S!n(OrG1HBsyaQir@&%u_lI`*n zs)ASSbTVleYkq2x-22WoDFdvJfAJJ%o!H75;ONO>c+MuvA;drXG8{VSe5#@*+zT>1 zdpVqjJtM))+A&RD5gao`6ZdVXRix?UvmC=*qwX7vH%*|(>auRdRT%5oWzH*MwX+?h zEfjvJYg?a~Q4=)r6G@+Ally9Qs*iC}DXj#$?I$zhf;;LaB)F9R#;H3exxLOU8{qWE zW>Rpu{mQ4rNPjfhEchJD`V=l{@892PPlX z8nPExFQhckbmi7mqjHxPH>5pytcK1I^WSMwAbovblcsbhu?--F4OHV%fx0vtgnZ7&VZ z`?0L)RHGMo6lsGbEX1^8#4KC{54d6oDztP{EyGTF1zTF*{-R5r;N{~c%qL&@XT2_U(HfAHnqTq^8i|X$6Y&+_KB1ASp_2*8e1}gw z*&c-=AU(&a8^=?V>(D;&vFJbMLZ5qV{K3@unZAW%llm+(`NBmiy4%Sru!l+oFJ*Y5 zmU&qnvA715vrkJ08092FE8rG7_AtwiI%vqCPPFdPjp6|WrG7(NXD(|wLEtffyl`Ia zVR7jj`D!9~XKnB}y`yKkgu0uKz^(0?A|}+cS_TPxz>Q^!;#7of;l)>Zv#lrWK^*WW z!yOwRA&`+fO;w0!EJ7Xp^PUZ=)^*-IT7R5?G!Ek~k(je`HdEO(BBd0bYQo4qBF6t0 ze!T;~W8t%@+K(I;q<1oKY^sYw`X)xvJ?#m0;p+*$!RQzmavW0%YM7eYcbwdxws!Fu zA9tMe7%TtjaRn(HI9e{b#&;Ccg6d|)rlqCN15Bv(q$OQ10%UP3Payo`)dD5F#fZ-= zT>@Ul;g|OWhQ`0eZ>}wIuEKmNOV=qDsICHhTr*{Q`U>(ze|$kJEQU4u zrE>3An~v)z>#cp$F~FLqH*K#=`Jhul<4V9sRot+NZ|F%+h{()$Mp)QQ4%E!oz^2)1 zPw7}AABg9K&{j=&Vi2t?#qv9n+6~FV3v6Wtb5o6!0S4&UV(9hipa*om__}K`D*MnNiaA5vK!`H z+)vpPQMKjBv*Xo+?1j2iC+_a1<<>p66 zIlZbEJ&ebrKUXJJ07?z>C#ryi|0oG9nlTbIc&mqg*|3)7Mi`?9&|ueoE%^yRbqvr# zmn4VJ#uqXsmi&rB;j;En{v%C}KZ;8Xx)Mc=Qm}4XQwD?h5g2Mvb>rxRWFU)EhR0~w zuV>TDodVtzGkfP!{J*6_57yUA27)7Z1I~16{v-9?&$tlan0;_T9CY?J|4nn$1c^v0 zJfx$T&*9p?EcIcKl<-vk{~hBm1OA^i{&QUE|E%%bPO<}gn-hB(n@#dP!iHzLk$fA zL&Ffm4BsB#_nf2WJ>P%uo9h~8vuDS%*R!5`-RoX!g4I;yuU@`&84nNds>0L9>Uek; z;dprXuZb@Kd*ZVQF5=-`QL&blRa20aWm0njTUgtfl^`CV&4X zW*~uyh5bzoxgOq{9C>U6Au;<8|5Q_XT51E>dlNZr83PyBi?2#vYSXlB>Cup0=Gj=N zg~y68zu`r99j%~O`k?2c`~9MpE1xdB8Dx&=bb5XdZ!i614<3PsN?*DzoI`4bN0WL1 z|2_>d^#cbN7s5B@*%*kY4;51`Dn9MmMhfPou z;yu2(bkAa?Thjy7TI=+asdbb)G3Q4wcVgiW?MrqGW}f6y7v3mQ5ifngvqC*eHe~AY z7rS!f-s?{xZ0heZ%&6Y(1m3qE%&nCJm-;%@)|`hLhdR#^EKZ9vr6WW?jFw!v?v(nN zxtqEND>b~^sAyE^WOa>B$69x9Ku4?RK3LT$Gbr!=)rG2|gN{4zUGbzHgRPp30(5n| z1_&!aHawa;CVg|y2wdK>h=fUXw1*J-&6Cr~Br^AY{BWb3t1E&q&&~up;G`fa#bE05 zKCdk#wOS^DYHzJXb|29bv0wepJ5E}1?%lKVvIm@gd1LJ^rLAW&K0P`2c_c|pxm*1% zMzubDFT@i_n_`OCXHMLJ!|H-;#9IOMLhu}~6S$}wy2G_`(|7~g(qesZB? zy`;MSu87wF0~z^hV77-)zNsGF8qx)RLAi3wJp)>?X-|)jYs11 z6!Vw9zI$yMpZ-*`xjQ(OV4a9;g%h)JHaDg=e3h)pCsj=Ihi*0V>sG^g34>{}KIA1` zMCxpAezCmO`MtMAFRTW_7L;VT((YidPerl{Z3@i9%f)N^R#M zLrv%;hbZ*}>6`mra+Z+l2#n#u;g0zfwryd#J3C>J2gQql7NPVzoJ8v%rUi^&yogl@ za4vAJ+Lw1VZt!@~u>NvMIj;AU;)Ali34`M{^0E{4ufsPgL(4*W@N&1qPY+St7IUz- z#d?jeL!lR&Rz0B5b5-N&;zhi(xeLZ5@(r&mZy#F^;?12=d{s9}U3onkfH!;l%JIQX zJSvk5b}uD(o9I0VpyY(j(%0V;b((M=NRn2|+gARFjI@)dnIW z83L&ah<$e+AT@eNM*KE{T&(%=6>^Q%%Z}0pk>|*a@%hZ6MlrHRgVK2U-2fPm0yh!T(+*11oC9jy=xCXZh;c~BhtEBGrlD?Q@>`k^@C_2<`@Bo&kV#~-sq%9S&FHCgh1`K+8MYalcI zXj)Z5#*R~x0~Ps{jUbAkGw8Y;WyGFBTe@<(Px@kdUb@PVe(9x&804dC99a<=ODs+| zJ^4;`__W1eZn28vz7{eHjF7s!c%LG~pHnN9be`u8Dgk~>~h^^mT z2d(p5y?WL8s_MrZSA%~%`snnL;MS@TgH2u9+5pm8LXTTnL0@4?R$K-q3z5qjE?C1J zT=$anVXqJL4Z*&_zKG6QOYa=NZp}N8KaxLQmwWP7=A{mQ^@p4fkDlCIBwu`xCLj9E zyr6BE(yDhncSP))t74vNLJmkp{F%5aBmiZ!NjhmL#zt?O#6%z2N12o)mgT}Az@K?B zy8ZeCS^;NDG}Q!bE0mji=C%0r7at#$g*4U?C5ZD~?syY^*l)D-82WlhT_tHqJNx)@Xblh=mYT%0$S7eZTeY(mQlu_1F5B zALf->3IZ*!K%Q)bp&0JnHhge8_tW@;8Y?7CCr$K;_(#2uryrC*$bK;Yk?=wF=F#V3 zr8DJH<*|g@0&BwN@u%_6l*SVnL@*x<+E+@QCzg@JNFxE(GBtsWiECxTWn?SDDQVN+|#@3bx-*or(R!0e#Kc^h-q6%SLvjcYv20EijS^Qhw2-bHhAfL z=o*y2Di`#E^lJ-JHm^)^Pi?q877sHFo1ua6pt9=|d_MVPV9bd|zR$=k6t{i#x$^SE z>W#I}x%^ePt-}o3@!R#iqmGK3 z_&PCFI=l3E8QHnkuiQ10BHyRjEzU9;x$;z1Rpg7Ltrx6E=EdEO&+|fNEgy_LtG%o6 zpDR5VT-j)5#JtET*rQu+!JmEG^|nfY%90^I1EeUH=URzDE|;ZHU!wIDAxdFQ9DXov zxAm=tY}Ijf^K*EQYRYVBlj--Ui%F!iG%xtR@pkDRI7d1UPJU~o;k@UdXIl4aKB|4z z-&;dJU-vFwL&XSWtnrab>(@$!5~=xi9e@@xtldy{AC zMNQFUZG5#}?di&`MxjyLigB7kpE`$y+S5biA^XPp zx(<5vYi4pU+CFx@>B1y@6y}>fvk0~79xcG?Z&cm{^UR=pVXx4e^vvSxdHH!=Zj|mc zNbg!6$MKr&hqI$d;c3HK^f1F(a@ab2eOQP=NC@%~RhTPh8(T4hRzEU4k=#N55Nofc zN9&>$$=(oj5j0{VP%XXh=85#LIbIvZSRF2|wDh#FA3nt(PZ5Fnfnh(EexO)i%In3~ zC0sjSIv7s2C|l`D^OD!5Ph-$`%0Ks*-)*2Or2=sZs7g9~8FM-sN!J)xZBKL%r}AAn zo0u!h>T~R;m)dr2IC(oBz)CAhOUGMdoC|if1)Y^m^#@H&Pp$Jo3>@kfJTr299-&L} ze6>}LNbB5CV&|tBbde&HV2G4h2k~tMp*To?jRWHHpN!#^mwf?s_Cc9 zXpvjv*7ob|ho|>3vhiq3>js`^$- zR~fxRpyODdebOmfDeU~YyzO>6qAzYQ4g)RpFyFE1H!g%$9LODkJd%!1TN0wGE`jrA z9T7>8lmqNU&=*Dp#%`$zADh$L*b^hD5wuZ>iT29bWfN)F@=mH6G#L$EX0GxK{-5#+~!mCqYyo z9ue>#CGhr0#sAmQ7vZS{|JufX4Sa_uqamxH0DNkgIhmU~I9q{Trd0+PfgL2TpXxf} z;nA?;-WL?q*>-^Ik6UZ%xacS=iJ5`zd0)N)o0{`_*#Ei?o`i=OuxW4Z@{-BJ-p;{U z%mc*o>j*Jm8@HQ}h3VHJF18>R9c48pS+J8ilQ8dn-uo<)mzkKDB%EGZh^aq*^7rY$ ze;^hs7nj#!e0=Wi?!4{-ykI9wK7LVAQNH^R_#QmC2OM$F+0()0rN=!7XV$+i@~`VW zHg`62vVQGi4R&C{UH7Fa*wqEZ!h*Zezh8fi)7-=QKX-C){`;|j2js(j!^h8ipYPw- z22Pd0?G;nA_As~8eQa$H*bKObr0@eliC@S6L%BKe>B`upVHzx?||2|nDT|7ME6?EGslU}(w95`6!jG|9_xZVREnyx+Ed ztf~oo0$zrDU3dumx%1a2uzi8B?!!VN7!OYxPvNnQrpEaEK~EX$!QtZZ#_8Ythg zy)uscvVCfNv@fx$M5*|sFR^pUopWE}$hZHj&&QbaB*nHcE3-2?k_qnuu{0h&`5QdK z|FNmVi!Vfs>~0I~`~Mv9>oh;6H%)y{{`VFB-dp|#4;3^FNca?um z#{b#ne>>^_+2#N2@*BRO<)!uOJWAH`N=o<}B+qta^Kx>6y)`s6K6Gs_Yb?30CvcAD z+6rK`s|mSNUZB4_`t?{Mv_p(r~b~<9llvcq*Pyu-Qya=PJ1~A@nWgY zu&Rq_*rZKf8`|F}`b@DXsbQq@bUjaA61rI}`_JV%AVxS@-z4d}t_j&~qun$W7fuzm*LV+zQto;1{00$5LW+tqHyH>K)Sl1^v!RC27M zp~|&z!y&2GrL011TsP>Ntu7HQ^9y62d-}a{!K2#xGa&pKx&aWrt&MH|SUrKQ?F;83 z844A|yRS~8Ui^<=m;Hzs8|7%55%1~-O$xE`*m~$b+pFiU0}HFLz#8vZEA_1PGN)TR z(xNp6x1Zm%%?UW9x_K;4gh~;3eJxSy%N3-mcyXh{9LO9sd*o(z+qvpAw7zfyS=6RBDGTxc=<>BlKL09Lf*+`V9Lmpwa|!TmK>WQ*Y8H<6YGh8)pQh-9p8)1(AHr z2XSB5uSA?~OgX@|EPyE-w@>jwRjMS3S<6uVk4TyFRvJbD2M%kTG>5f6EY#_?Q z?MKfODo?H%mnm0HBWhi2U-$B-Z(Iq^qdFBrlXJ6$KwO!d{aLiuX+^gqv_w1`^%%3h zMY+_GyRv)4k!dABt!>P87_q0)>=|%DoL7=&z!{N6`T^Vx=RFCB$(W6Rb4$MS#`aAx z)mGJ);Y|2Zpm%?m)Ye1vP+UXI#9fE~5w^Xr`^6fWG_EE?>{22lYRrzh4G% zemOom1$)3Lj`dH0+n%2?PA#XJ*{}Kp>BVgw?hTtdv+ua|)82V6L?3Sa` zkW8>xpYcmW^%y#2I6>sq!^R0;E@EoelF*?_b;eCjRcn3gyC`?h&F#c5+mrBfNIl4@ zJ)?4MRSFzu=oD$+h(VFu)&3vrdSIJr%z#MY`Hwf-X9*0C}U$gQ*-q=P{c_{NDsKkJm5uiAf`Xpqu_ws^vam$2E`w zvMN2YBNo^C%#t6s42k>M_B`0Ny3D2nyBOl~gEWhJ^7N9}{h!RTkKX4~l+dnQ8d&hT z=Gv*z5d=8k-`>|dd*cj|JPVD#MYrik*I$M%1da1H9GlV!+v!ZGs?hwy`HF2Qb+`yo z!zxlUyfEZ5^lsbvctde-DtQNcT##k0L8q@AL<3z1Fc)Zj*iIS3fhuZbatiY6B}$5;$+x`Z%>HlYMP=$ zlo@6-o*1q0`UtHv#Mi_Il?M&-~8_7b~t4Fo8vO)be zhvdVb<>@6@x7`}OWNfpaAvSOIp8~UzF^io$dkHOWtUEuR`qrq>u)9u*%KIulRRr}c z#Owq~9X$IdtecFo3~*dTNnv-xg*^sL#7}ai;Ix(WKa1XzPAhDp5(sy)OAKyne>`Uz zYHcY4Ex8hB6zE1yRK(WjND}*Rf=N0%1ssqYdzfhVlV)p6fM{eBU7k3=_{6lQ3L|I)rcKO zWud|4jpv)98^A5$MGS8wNZb7|Ug647!=Q|(!p&z~d}Ig2u2F2)ic~|a_LNk8y5w_`4&|<3ky27O z9Une~0;b~Img=2_CdA*`tV?sRvKN^h@ZC>HILK6d`;WvmpOSDgrU{!Rh3UL>b#lVj zwSRxiw!Wco?rE=Wa@6hQ^{#yd;y9=YN4Vw3TDexM-euJcm#a)(Y}#hM!``3RzIhq8 zM!m#5u64PWrIj26#Zp{F^Yv*~in=VGogkV3u+#QCN;yKrTS-kpdFarsisTWx_mc+ByK{vNI`)>HP%zZd0x9Q zijVPN$5z!erQhK4i_|YMzYGCHvBh7$YtA^m0)BuE`;HE8v<;C*eRT_4DeP7)OH z@!1FG2-;hoSYw|GPa`V1w9`0O#kL*Zdygdjb%2%ZYkKitP`ixbM3}k;(W^z4FA&B9cC^r^xR))oj;fzW?!A5z%pLPHW5S&fswx)kcey@H6C>@{e$6 zXS8ls;6Cl$S1QF)?25UpixAfH8DLd0QrGw9zulH1LwGbOpY6+{4m7A<-GBOzpy=nO zoT^NWgkm;~?c3?y%R?EwW@=r)Px~B)sxf(Ee9Fvf*~sM9WYy)nlU~^=HH7g-Jl40K zEt`f18L8^YyM?Roiesa?4Q}K*R`jfrtxAjqNuk5{gD~H*dsr%PT=za!+J|`RZi)+tq4W_){m(sDb+L0u+br z+tIV3UHO*$FU(a38~l3;Gt+ZmO}uh5JVQ?WY_YmhJ16WIDY0e#&r!ccln>A%_7X@E#}iHZt^-bno?=lBdhFuAs%yZL0fN0i^0jy zmO#0%SAj3$RVq25O*iCK)Z411nmTsKs30Q2Daf@qGU^o8oZ(o*l%o7R9#qhcgJ)dO zgc~EEk~8S-Jm`7a$K&~AVf*##8zVUysO0Xe{{Wi-EU~7c2_wj>Jn3xsyxjO4yPO`b z_$UreUt6_wH+c3-N76_^`ksQR`zS2)1x7;(Q)L{^qRFM6+ozZT%XF!zA3J9wnN0|3 zNPg*aTy`Gf+y;Q&&z6k)WhZ>hCQ#4wlk3}!Escj4VL$7f?K>ivGmea%{)Y!WNnOWB zlU)<<$-KAaK$PU+M@qx%4vSzd(C=&%<+Eg1$$kA&0<6inHm7jIy#zEpY-r%lerI^9 z#SB!Dm!j$Q(eb7vP3PFqZ5xynj>;H|sXKD~LcAiz0HiVtv@!UI6fjILdV7;L`BlAk zioM8;OPbQjJ6e!nCreDNfX&{y9hcw-^;2-%LM{YR-=USX9@Ugn=bxk*_roeN>KS%cs4=c) z`pJzi-hn@9K-9v4hg1T9Q{kpjxBV5@R0_uMAvmla8@_6{*Jxas1IA8Ss6eJx`aYXF zTQ*pd*4fX$^+W*~>F(it5MyLF8J8wHkh;JOm~hLe@bUg0SnLqkKM2IVpTKjPsFM|>@6cu~#DE!bY*)aFQ}nE`QJ z_HvZUbgB0p30+CSYelcYhY=&ATFSc4A&eT=hq`ljJ+;z)(BwU?Pb9N#sW3d$ZLc=> z{Mf&tJFEMi8Ss!vBHsrtN~LMdCsfh+fmh`3b03>P->RxSHP9Qz_I=)~1RVxVAty>2 z%f^j7e$-ls}2_KdB zg24L_ufdu}c=#!w-Xue!vpg<^#nE91?k&^u8QnH!JOzi>onz6v6|d(XB}hRVdXvP* zE&7w~fdtdRyMFfv9cn$VXtL*vz-G5GwsaG=-Qc$KooK3dqcB0WpZ>q*TaH9i%jCTO z{7f}rrz+TOiO)f2AxMercCWAVLqYCV}j#uM^kWxt)^& z_(19XI&<(74TCyf)7-4L#zLtQB@Nhjr03$Zi%{NW0!Bm@TSt*r4jK1(>AC&HtZBaeapGu`z z=G~==cPU|v7ycN&RX1TScQF~g*a!k!45NR2Ry3V3s6|KHCChT87#+H}$E)2DMGlEe zy`u{#Dt0;P&pXxngBcmz1vR-7AB5w&6gD$!( zB__rChd82k5$AqKC#Z%o^ECQRx>+gy^uz0!84>l-sC!?>^_LTq>o_t3#ku%ksKrOG zyDYVG?hTLGo+bMd==x^uIrFLN8cN2Ey$=pu)0H}I{%epOpVq4aCo3^Y0zUCG-iTe) z^fA#Uk<@Vmz2otG=I35O$}$;cyAMW4z_6zqn=1+6%K9V3RAJrTGlpuIjl)G>o8Q4p zGe%nLA}Z<9mC}1|m|4^)m@$$>L}63K?@kQg$hH!fwxR-Y|G^G<<$in#dqTuNX%zXE46O=zx1VKs-2|HZ z5^C(>_2Hw~B2ne&r-Z4S=bp8PYr~x4snbk&ep*RQQ{YB>K$9t~KNwPU>%Lb7cYtkH z=CaPO2K~99nf!7uBlUf(>FCZHRVy~#Fe;bZ=Z7@?T}d{l_&V)J((0?DCVqa?@^^G( z7N8Q$0I#UO-~b2aC+{oF2!?x|#$B>7V@*4?>TB)=@}I}J#3+EZY-wjxb$f}?SOOmoMsCxF(-$}-9IQ<75`mtuv+g+IALDm%_0!Dlu7 zQ&>(z8~x2{2qnLv8X0)6M_;T>fy`PWAh^>YP?k0Bn0H>4VI3QdW>r~S)t2=40-fb*5HBTjDKBGDQkq~0C0Z}Vp=wI*q;UW-4D&V` zxfbM55vgrxt#4nUWwBg?W8gZ%evRjtZB!Z~rsJCE5;^R6A#~=51!MbqqCO|!4{I{* zbmGQb1l#g*JD#YT#ASD;ndd~DM}lOXPiDO-du5TDFAVF93y!+=1=*TmkImN8BA-n} zbabFW3cI?Y{jAJ2&+)%t!EL8~3ayuIbdBl=C1KzHcGSBbxs{lD*VbTE73WsTMiK0X z@bv4J?({!-&q4=0?}%wiIwsM$q>_w<-aw)xZT3~sGa1H{6f z;IA3ZL>4tSOStR<)*I8eYq=_N1>JWMz?G&fcBV)}BVgA+7Vd zv`VlP7}XwyTJ`4^GO20bbM)-~-0-rjJoxv;E1miQXP|4gF2&AUUVURTKS{Co6j~Ll zj1{$f&8BgB& zi)qB8+_!qHQDxh{onau0J7j`aH+ldA9p5E9m>UKVc`O-<_1qZE8@K!tQu4=aHE};n z&Bcc?+;#o#!#6caXBV|&`PQA=Bj1&_olY^gT3e(>-qYTrT=52(vgBH$^;0*kV_J~E zy6m!_5#>A0Rpyt14{I_xFGzCihvhReK4kmwH#6?Mm2Nv#+^Bs;s!6QwJlW7^nRuq~V2+gJW8{u|V0eu?Ty}B@Tr%pymjUro=Y#Sr=7s$@tC}lDhpq@yc-{$V6JWqzwjrbl$Vi zf8h^rDz>4>j&EwP8f3wd8{`n6`sQty`3^ND1kZBcQ6N65tLtISgHVpN2!QGaM-fQJ z3~K|56}kO8>}05E&wBF7GWMuzjI;hn<{0Tq=jF6;KvO}gS_I`;Li;5=h^EY&;x^mc z8B=LMwPPF7#i=3Wq|UO2+X zfKkgiMRI* z#;+r?UND>M%mxR%zkKe^X#K7=lglj)_2Stt2jfJKu-}~S8C&<(Y{p_2`4*At(Bd_! za4Cxo)6iG%@%6o%Yg5&&5B`w%i#vv8(+lj4YkhW*qwC zfrxp|BwU`Rfl1U+X*8#SAb}w4tt~O^zRAq9PZW7tkkG>xhjURz=8eYq1&eOj=P4_ZV72Yju3bj;%4695# zQ$d={ijFp?w8!Fq*(!kQv!Hd}#^+*V|4MMwekh0l_gx z8snyJeAx}3I|QEd9;#7oaMt}=oYT@KP;Lt-W8E4)WVsWv`Du3g=chQ_$WomMuZbh? zqx;+&%(9ZZz}=c*4+MU%7N+{(mO!EL_szk2n4u7geq*SB`MK*y?z-*_BcAQ5f$B2b z+|J4AvIi{YkgS1$3_l*@TQMsiL~W+e4KZc>)*%D%3ZqExyBn4gZOz}CjA5*^Y%B|0 zC=x(}Sfyz@!bi3DKMvU3o{;(pCi33c07?WA58wXMo;5?7eh#K>4$3oDWTp7lsw>{- z`(t?y^bu<+o1}<3XCD514%QA&TEK_Y+kR3gj`89W6^MSoI)458%V|>32w1D8oKvRV z+!@p=_^88wC_`I5tsd-hCxvUdLd8Emn7WiG;q=B>P$Qw)X6~M0+5@yIXCR=E>_$@r zrMYfy<3o4mie>7NSTwNuF{hm!(YRHq`Qc4-3$mHduPqtx&X zNOjW|1JY1{%*_#~OgVhTW}HzfG((K^lT{yOvkj@e(fkkwNPKR9#J6l4g(?(n88!T} zQdZnBOhNZ6L3=>nt6o!ar{4^^7k)>(4ufeISOnD3AUIU@#yGMf6<1(SRVTc3?Y0$8 zL%VuQF#Dgz3j1X&ezU;Q@$so_$Kia9@vX3OgTg5J8b}sTcFAls*p*}Yl}P<1_Nx!I zR+N_oJoy;;7?*HLA7n%!M7v)iHvCsy#BrNS6KMOiP?wQ6(hc4FaeVnlyf}`4g-|}+ z>tNlCkiAy&vL#TPaXrk4;NykJJJpK^_(dM@=BPe`c@IVx zC*x=5WsN`12oHs=ccr6t-uHLg@N9evp)k!P7Hv5V4k`18PV)Q}2E9;6VWChP`qjpl zjSn|I^q4xo1F&697vD13)51Uf7Qg9Zb6QikKVz{Wd3HmSq;D=z3N9g33SR!wypiiU z)VohkxE{Y;|Ftto36S-M$idw{;X-mllcP*v&8s{4u!gLsQ`Yby?Xe)JT?||xIn*cC zH~JAQ07HA%B@vfI)042%-)zjcLPHMw$w~@D9EusSJ3-d;g}C_3tu)ugQzQuD`B^#T zkhk+piR@2LA;NhI-Mak}+A2k2AG$D0C~LyG)Te=e(Q#G2YB+E?6;OtybAAerMVA9;Nj>Qdn4^WRlE z>ykTAHzrx(HV1{4aOdf5Ds~`kp2bUUkTRT=fv$^lEK($J&uW#_oAQ_&hUh=<#FD;G zlxk0{e}@Sc*t?0`zDeXe;VXEcHf0tL3R!$|31D^7>?9Hg1F~)#1yz&I{UGg5^UrjD zCXt*tF)#ceD6#7uC>TcRnPfLTB>OZiyVQ1ZDDMpH&lJ(_iD0E$xaY|W?lfZmc$7K) z($G0Q9qak?hO3%T6s3LN$|G8_?^@Fi+#&Ro8su*VHV>mJj!g->tLfDHGQ==lO48(o&2ci66z9M%vx0Xe*~g z>M~;6X!d=Ye`)RQ$bB$R4V3mZ?x7*aC`07dEW$j&u2i8!suP#O zWV88c8~X?`!*3w3UKO@E6WlNupJ_7;*89>$SGb%^ z7QmSo*Hw7==+iX?Lqk~#mC-#mRo=B*F@&WzQm$707)hobm6Nprgt|P-dyd_E@!Un0 z7{XgvWaGmi93}3Qc+rLRJ^s!jZU^H~;4ya{-(A(}cMk#c>jFLzC@EACizYzch--HhngCp2uEku$pd85Vlqr;O1kPFG~+w#F%?0iQf`L*x4=;b~RzS(O%^_t^38%}<|xkURb6I~>df$Dc+ z8ycBQag57$TYrk_UR;+h&i{&1#p+4pxuQ)egp1!nDG)cQfeI2wwx=&z

Y;qNXrNT>9CME3+;$N?$K585TycGoyIF>z8_#+rrfqVozfcLsZXY zz2fet>j*wCkUSkS#l{`JlY31``Ik_e!aX&6k#uEZAgOk$+Ho--V2ciEze+~js&g~$ zvlV~{H!)oU{7`PSFV;AM`gf>K(K$Ek$Wwg#aX0~Gmq%( zF2q!mC+_KL*>sm5-fEP;LG|xKeMs=$;V^;m35D7Xd>eZ@L|%8U;2b ze|7@IAibbB{)d2gnSd?m3W5~0Bx?OOI;P?WRKGZS|@VuxPIn%?qk>~4RM zapU?U)3ng8d5R*^4e{i+dY@?dOFA9?78V|mCUe_EqnjE4$$mt`-}-3>PIxbnHW|y; ztsGVn|B@)rxa^X@=bGJ-t{J9F6jNu403)$mlxJmEGwYv+1^!w}<)1J~B2yqZ=#n_IY4a6F;5K@}K-)ZzpT*7pz}^Mo zaw>c+TDZLD`_C#2EF9__9$9bO_1s0^KMZzy#YSpXYb0{nKLl|kAM^{d!MM*9qumG8 zJX7OZnMOUcNw2CDt`#PJx~KOkHoXhSXJZ))IHZJekz!+FR+x?+qx!)^N1t?OJE4>MpRntzP^{7eP z%5Gj5+!wuRnI!IRWD~}vkqNi2nxV1nPm!7xo^jFGm~CtXD4~PmVFo@v`5g*r6TT9r zKij3eANw3kU(tTYdNSeRjuIKzWt*qSZ9C#@Y;>?O zOyYj)vBgy*xcjV6 zTH_c8AnqtqNt+nQlL-Am>zd@LvJ0EG$VBN+TCZ;m8Ip-A`+ifP<)iJ29$=VsFH4ba zHfA`J!-x47c^hyGdwPe*=NCRd`rIXlB1hEU*6>Ktqg~aV^%P^Wq%^{;4F_t*83yFV z;3w0%0+Fx(=)!8hs_cy~@N4jteh27kZ!#j&w0IRChBN_)3-Uc{Sn&mdOK@DV3|AWq zAf=rYX*^LDKR??a&3GEksamPXThrRG6TtVmZUE-GVS&^99;J`##HM&Gkmx)23Wf65 zG!+KZI{&2OQ+rDT%FO)65D1M<00nO8uL{uWrRes8b*9^jaExc+9G7H1)K z5^!N%)j(3L??3Wt*H2MSSnh*wo7yZ7A>EiTy#x0(+2S>%1$n5uMp% zJ}($&oHp6crQl)d^ZjYD%T>V(95QcljV4*newdte05l*^60XC?buH=9y(t zl+!TDkgXD+oZJJns@sTf*EtjKU)GAwGCo_N-38f|4?aP`W-+|Y6HE4&Z|(yn)(3<~ z{ANGkw`}txMH~U<_5i>PyNkcWm9&ZHzw{BIk?{o6V)qwU+}LSfFB>M;Nn>7yJZIl_ zQmFgLzyhj(b$b%e4MC#!^1`s=L~JMH-OmOY${zDOt*15UmNA&#+GXZpK)9jAyHvleCBbcSm+obyW{`YkZMq@i|FIC&>RzP8Av}x)2 ze#&`yS)W)pY8HB4C$^=x!DD<9;&VJ7wCl6tEVA+V0)F1Z3xp^wZ-Sl0-$gzXQCx|+ z2%KFNX$HCkgB0^9z@zJKa*fb0mnx}4LKxO#LlVMSfAv?de z_3|alGG?UW^9*S&AIF|jjN%{NgAB`;(Ha)QGf#Y9pc5Xo=GUteBlo1^QZdi*VT7CV z44$n<9`gb33kqi!hKS--r7-1B3-0?F;-3P3Y#B<+7q-0$Jd<~iv9IWQcA65IQT=*u zp4~vI%(C(l5NFk*Adx)Ys%uj@oKf8taz8uV#27_ zdTv1%>$$GDOBoHmt|!Bo*=Wnsz4dQ=*2wwWDMvkBHI{X!=|Ha%GbM%Ucdl2i$Ks@@ zlx-5AzeChF6Sc}y;%`Fz*bK?X+tCxhSSY%b^&;FQW4_qO7V*^Oh7c;Ag z05P*tp*Yry$LEQd@-v&~uWuYZ>NkTO8-BBh9$#b5q-bUuas4=8wM|FgW%=yHTX?2r zS#Yby(raBq@>$+5xCSC-xc-m(VFi{@Z4NS2I3-Su2hU=*+&2%$jjMnd)#ZySOZl=; z&XXTHgqi{~B9?fbnT)?v286`^&uF^Y<0ldygf4eiXQwdKJ_9Nz@c`O_``+Qot^Y}L zc6JGKd5U=+e;`EFo({R+WbsQQ2n5++Q=}`OV zOnsFNY98a`*>~iJOFc6=-vmQ#aZ;a>h#OJ|D0gX>tv$$;_{ZCZz0w$+aQlDn(Z@Z*G74)=W-r^xl=kJ zCmtuMLB?#!<4cz*($x4N$dVvKU^TRP z;Xqnd9?uUm?D&oL4?9Qv>L(L*$aQrhL$NG{b|WGUwpCpBh+;=LzZy2AD(I+Q7#SV5 z{WjFzX6S0J4)JJRHLr-pHAoh|@^OsJzH;szU3@;II}fyK=9)uSUp%}7dt9F6j+j{b zUO1AWNE;pJ*&arpR=1SFj*tG#Z-#7_!a6Fsu4Tkc0;RiKCr7i;SrxH0Hptm-dz$y@ zTGkZMc%gGw)MIDPD8Lz~5Z?Q} zJ5kzCKL-W|?pTZysIbSVrbvQJGmN2_!+@U{T|zgXea>W$Z=kcCmR;xkDuh?%;1jxu z7%mND#kf&uN)_}R>((nYes_O0of1D!34JgCi*@*#gDen z|Ivz>2Y`mqbf6)Wtul|RO(U*6R-HE~F4QneRg-ZhEg>%ChCmdl;SDn%B0^j%9|T9M zD>k>^>I!WBl&R|#2j?n6d*g-z$l;@Og_b&1nKoO zx4CPH0qMNb6HG4PWnxo91Zn612qGn7eH&=iF`_m)MQ>K6;2I9b(%PdK9cX7dq)lQu z0B?BKcy_k)yFchS;8b(!cQYWkcr6j6WBE&j(b@IqDo&ovA0SVfjdSmLebTbP3DuAg znSTk@p?A&_!1<-kBKUPMJ=BQ!jVIb-EmLeEDWqn8TRZX0P% zREW-Cz%*PEZ#5r>+r0-mQ-^ell=eL}wu%cy)z!}>T>QkWm~pE1wO=|6JsA+4I6*mI zU6Dnm#bmY@{Xh`eIh@kS1SErH6u(0?p5HPv&_7uJwc&dqgGk#&w{sx?r75dqXHZH% z5xVAuKAf(6hljq(Ko{hQSB61|PcfS#=(1ME(;P9r*eyxp=)BDeET_UTLi6D8LmgPR zaV~e9UGf!q+BC_8->;sM==!HW2+(B3A#$Y^ zg7Qi{-#;G`C_h?AoWr%DvlA=?u~dn?N2oo0vIje+>Yzhm6kDC z>G`tjO^DHtwWPkT)A75OfrrnC)1%NIZVKch%z(PX4P+8*1lC@ir}YJ1uq`xR_#a_< zJp=k}NnXz=1=p-f@^`Z;ppqjS7j~B9WvMAuMS*IW4Se>5;T`5@5vuh6vG&$MRj&IV zr~*o4CEZ;rNQiWIH;R-X-Hp_ubCLIX@$7TdbAI>!b7$6` zab}Nuzw24w=Nq5+zQ5q7BN?45FJnIcBfQZAOGPtjnrnurzc-|Li&vZn{xSx~ZHzffg<7fPe8qMMkedj@AJw`%pShHJ=V!2` zdKZ*FaqNiYlegCqB`;rX4Ckwo$Lm=9elYwgfMfa4M8r5sfNwTN6h`qP{i(0k|Z$67jgHEi>KzjLzpK0wH5bE9Z3%POT zhNDk=bvWS(0YKk_{DiWRBQqyDu_a^ zMTe=!hvcs(Ef*w-2xBkP1j>{26HN7}@viQS4DAfk8>ufEEc|omV&V^j=*>q%7IT_ z{0;d2e`C)n5??!gtVZe*)E>q=R0~%4jMgGqcC7tq_{>UlE{MIShU5_NVe&z(Sh3cp z*KYkEYJi&p>(`ai_85PAuD*XE$fa}2`7aop#*701PS+4(DV~~-GZSz-7WDu;f)li2 zt4r^Q|6uTxJ5bARd-4$3JeheDSmz7-NE#=)5wH$e=vKqO@W7JvlYElOJJmgAH%?ERKRvgHI3Lh(l>&|{ zX3P=@vcTzx{D(8BzP;9$DztP@$KcN}QisAQ3E*=}L@>hIVZ#@E%l?&y94!`gBgZsk`x$w)I#0A!*&=>6tS1lSa?~ij{{3O1 zCS;j{N&CVI-J@6g%ty+g0cTj%vYy@Im!9E6(_%-o`O%a^<(FVQRrCBh*1zo@ydR)I zA{FjF&K}N|IJz4~5@S}Kv`%m+VMEY^SY)WJR!5GrPv)^KS94NZM^xf`v5;Z$L4+s$ zniDSp!Gj?Y+j|xi_$qu)k=})Z&87?0DWyv zD#=M={d&51&5L(o-+U>E=FoN-qbkWZsHlls>f#e7SEW6ugj4U>Hp6LPHx5NAs`YdiJp14G7dwgwt{d>8DoUPhLfDz z5188S(W@!1rvDXQM#vET)8!4S+H}D#fLpn5x;V!B7g1wT>e)2Li_NK&6FI+LKi|D~ zJ8+wxBqsiL&x>}+X%P&@*l=TUMk=4duK_f9*3(~4mIh1e0s^J%y?=RRB2~5r@gZiD zXCs54pp>p+l$5R$G6?hgqSgV$rafTM#`9E4pVVOw{NADgskA3Fh&lrZ)H&eYcRQXa z_k_SI)V#KBG9Ukd1-Kxh=1usYf=C+z@^ZZCYB9%f68(jK(ZcQLDw@ooo^|nh&!5pf zzjhkE5!!9$KW8US({e%7xbAGf`uwrkgn6ZU5Et%M&ixgG-)Se--%pIf+_$~RzjoQEUaa6Uw^aG5idh;Arml1#|5cOrTi9S?qN|5<_{17^3Rv=md@P5vo z3?+V|+JHI-t!f!9jaVd<#cBA|t^n(a8LZX@i_=ZsM?~z(4Fr&m9y9B%C+MyRl3aJi z2%Z^8rpIY%{F$5xv9fBK+{+;%g+wP#u2miOO5x5I2@E>!&pX)gh~9c3Dcw+(8c~gc zQV|6^)Mat&0rGXMF*y}M*EU@x#cKR5{eOI3kq_v2lwTW^vutdAOC~nc9M76|-%lep zHolTf2jNtK-c_p6*a@`VO0IPG{=`#WxnIHfPz+B^daENitV~;Xpdrv|Pp&W85vfc? zrw}Er&V?2nZc9nGNWa;4fBp`mC+!z%%xuvqBK$SmN{pQF7EJ{EG})@GOrQIJ5l>MI2E{v z9Aw*B16o4_#LL;XkH5`M2hK7BuS>KxHTEH8^ytTcf$WrXH&|sF49-|N*2AWu)JKU3 z5nNG7Vcff-T#FGxW}`*gv1)$Re};~|Ak;GgPe;B;I1#{FUaGtJ#Kaq~lDKBRMZzvd zBR1*vIU%s?|7NNfbC)XI-k6(YmnypE-s94fmFNuG=sRg#>%lp;@@U<%Z#}lRB~r96 zIReVWuA;f5@zT+tPThLfVI!N8a)teh>QYE-N)hme0sM(|>TgdaZy?mdeW@_ZDugE4 zy!$Y5Na4us)O*sp|4Dp$3EP=~N7&iKA>P(<&pqd*v!>B1Ns5bN13z?^Sz@_qZl zK$ZH3ds)c8pbqpjuU}B+_@P7dWHqaq3_~r@gF~VQ?XGI5dj=sLh(kFJ5M5oUN`TQ= zoUXO*uLML2jUElD(=Wmf?!z{l3&^z9cbz}WIK*@@iEU6j$NUX5h8s2ika4VWQGO@t z#AiomaOAZX9LSBcB!@8^zQXB=2SG*21SsJ)zUS1>5}1l;zFj1pdtYbOOP<n?s6v(=J>caW0!Ildcf#{wgTly?7UD+lk^`&eDxrTV0U8w3BdX!v5hIh^&!u)eb4$Y zL1XjWT7l%AN>sp>UEY?FP5*q197p~x_t{>!(Wr-YMoCX9!E*1>&BNtqV>1s1rc0ua zPF#Hg9`L$4T7+Hgu2D`!0@NC}e89U)c=DxymS(En`&)lJR>Hqh0~P2?G9AAeSF6#4 z+Z?@gFn&3`(oxBYLtmPp5f^nc>B(Nk?t8LKO5#CC*Qoj5x<(z3ORIq!hut3MU#FAK zGI%bI7lx~CmnW^RnxrB@lkn8k$p`)vQ}_EpvlOECF3 z>0OTkpD%GB8>yB5c&1H$AZ8TLV1_*`f6eY-ADEe~t(DZ@4-ygWIH{QHPsuUXNvLS5 zkv(bUn<0ZWzIg>*f0KzBTaUgOKcJ4a@8vlf1+x*XY9XvEkBto~8Z>@QfFCfB=7oQT zoMhaKZ@xUzzxR7N$zwvwVJHd*!8ClJj8N5PqtumdRRc?Sv&W6(Ut{28X=Fn=Ac_5| zd|9Gl20)=ehpG!ZizW}g98D3&YTdekun4w z(W76Ca;(J{eFF$GC2uCvfFI7YS9Df{?<6w6eE7BfoG;#2Nkrs3Av5nDFdOw5CcdBh z?E(P1M-G2V4MMF>Q7y8I$dMLCH8)N_`Zk0*0UINM&|$MSDK z^m3~1pa5b06N_|?PalA`=2Xyy@_CNo3M-X%JX^v`>r7nU@A?-1W!>xH9R zz>%>ZwDfPmUivU8y5OEm3jT^LV9wH4hRsQ#W^%6?k0Xtw!SKx_2jwukW==XBlrikjobr1v81Dn zPlbkL_QmeaiiPNB&mR7B_g}b z)WvkmLdh=F9kox60gb-M*4dUx=s$QE+IarIRGX(BcTP$OT^3U-+KF85o<8ec5&8kl zIlV-Vv)5|@>Uz9w)va8z2O9djjf8uEuA&SS)o*Bb|4XC-jldrmu z3IpgK69%t3J_b-D@W0^4V6LC_LKoE1WTKVi`24$a*oo!J8TDIY=^}%Mvtl}z{T1!U z0Cdi*aSoPQyP#vO#xCf4D2`oXcRSPW=#!#d4o%=vRIR)^K911YC%ODe=JX%goGsrL zpT#({R*QC@1U~rnoZ)%8s>NXIRz;G!^A8kA8ep4N-(5@OdQzHv-d!-J<5tP20g&k-BICaN8+C7>j6 zUt@gpzx;E-{)s>-44{U|>#vv$D12&vPA$$?)pg;PkhM*dYdm&J6giC=cicx88??nz zqc9;$^M6X*QoF2MUuG~^+K_UbT=g$YXc_9M%L*$YOu!?+xY7Mw?_(UzIlR{+3>lYq zrwym#U|4km4AsWKUacGa^L2smDzLqjabC6m1JZov&ivWDdJ_PFxY9vEx}@qxyq%Um zR|e6xU_+oCuD!piwl?xT;n8ziETR4JB8bY1|eti9miy;Z{Jjk^K-aLKhecdScPc?s;V3;vy#n)f#P zoz6V7upZa3;tbY%3-2(cs;3dS71+oM_q$+k4mhW?DDw^eTKQmMqxYbQ500FwounT? zW8}eiHjBswqxpDM832yzGj^GW_eIYg@V-#8Ed`RdXGk@G=j%1-`JitS{g#qM`cqOH z52`yDuu?dY$Sl@{3c#9#He}6{Ym&r1c3aatI@#-pB|v2`!+TWSI|dBy4J}F4+dyM5 z0hvI9@6iR7=k5)ZKKbL@7L`D#tOn@*`Gz>{R+Cy}edHdG*v{<%0JIlT%Y~78ZSE=6 z#-z)VoHKUgk5IU|%H(&3sBnl5L1z-8N$uvQICzF5c<3t38Y>n+)$54_|I0^Ew_LReNxRiU#{PswVw3*HK~)Pj{O%|DY-3v2iKKcm zAl81~>uZ0!aAT@e_3&Yf!XY(5J!9Z*e`xeJkwmF`GoAiyp;Y_DfCaP~jfsFDt_+?I z7`+B`m=-D)5ufVKM%m|!s5Ovgx-jDdpJ@DIA0ycF3iZ~&&I0n3vMZ+2> zcX?*5_Jhsl<78T$qbjzu`a8;*Jo%r$ToT{n{u&CF{MJmGge-YzXRr##ldc4^c(!Uh=N!|+R{c+} zvcFSydlki#8s)M8ms2yX>bnD+5Bx}%Wd~){qaNvL%|H)amIO)dGJqn54Z%g31i-HR zrV#&3*PjQ$SpVc+$d~XN(rF{5MD&T;>BouTgiZ-X!3>_bkocv~r{ zyZrQlJ7NZfKV}y{BqANV$^t7~aq_P!2lNApD$mT`p~HZ$OraKS;3<$JLW^$Aff_HJ zjUH?z;!hxXYC22{6=KOo^RYCML(4(dy{Vnhe?QdK^O=W2ym&4Us^Vc_mB!#F1ujGW z?<$7~t)$F|UUl_C2PJk0b{)}$iz7U*F5X-2UT!>Z!BpTe$&ztk^wEP@A1bj4(t{l28Q3XwB-`lB<5)3 zM7ETC-gjuGEH)=`72=tGsk_>sWKE&-Y-Li<-y3Ml1M_F&UzIfit#rj~v-_Z|$nz}x zk3qZ4HjAPJ>{eP%L+w8;8_(^z0A!uk8!19$%m?p~=B?lh1&6v0>9O2?=KXFa_Kbc1 z8&Znr@m4`zem6Qc2@irw?JC5KMift8QfJ2?dvx45R?|9?{m{NZF!-%&YT4+@Ab!~Q z6@?*8ufnJloay0L-BH(gXQN5Rrr6>yeE zySKHV%?yxYaTg5+)(2P66jZfy zXC32N=o6=SiFwm~%+>5E_3uL0Ze#JRx~1Omdt2bNd$&`o8=EC^Phw?51EM!~K-Ujf0xq<`# zZ-M60eAU-u3~UVQAvyyYk93*dQW>V{{|%h7IK6rQ?y6VXYn_!%V)xKDXf+467+QU% zj3jl9E1l*P1(8iod?`0wcvHd5C_amc0(|?MCE()@KW@1iW}lGL;1dvBwNTI+plRoI zI#s|%z-*IG6E})l?|_c4mTs(`N*XMa2Cyfq3uq(EF3#W;s~HKeFE7sdY`THu%=l`t zh~up1@RU#`&vp}HMjtzn!26jOHQxj-a7^$Y*$9GnJv*_opRBUJ>f!)>x^aFqQwh2X zzJ^|)HX9k%aM{@1?AZnS@n>H$JOFkxUx~nb3GZ$1Jq)#UO;h1R!!)9UECSCz*!@-u z9`*4;M=DJOBllD%wRen{h?bqU?EE>Ru<&ICuf!gEf3TxbaFXVBf0Jks)zpeNS@`mg z^dTvH7xs3;^Zq(s+;z&Y-)~%uKMWM)myJAMUIshW!5W~5&0>~b}`acM|?Qwa2Gyxi^u{?F(^@7^(_LJ7_>1)y)OLtEpa6+N} z$-ZY3@XHf2F2}rBlVC+BOTg)=V%{Rv+W^Ipb|>?1OlBVD8A$OI^DPVKk(+4 zYx)_MhDAf4W}j~QCbiq|)C$viyvd$X2cz>GXYP+_HqMydm|eJ8jenOpn7{W2GhJg| zU2UoZuqAP2i1tI(-ir!63}|7+DCF!tL!56o-9*2BI)DNLlJBG`AeVcks%i?NsA>c3 z<6!IsNRMtKyvI^8qOTf!JIpgbDi+gLCE4DbU#KLqhV6HXP{?{{~(#BeUwX1vm!et?j!- zJnH)Zxu2q0O@Ci2p|wOQqYg2vWJM!G@uGeK^p$;9IP^n?93AuhjxkrOJf)4P_`$YH z;VW2N-wgQtn?LZ$L8$mofQsL;?SJs3F?QfdZ>uJ8+pjGbMcD7H`!-5PXAVyy20$hy z6<(9Z9!&?mim8`@CENP zYXrZZ?Z!9Wy6)QjjuEtl7~gN$2d)G4UbH!Ioj66S+jNkVEFP_oSW-*d1s;G--_j=% zUU+#@s3Vyq8}QIKQH4qDO(R%cW)4m()QWj2Y3+mbp8du+OUuF=*r;{KTnq{DL!!WN zg}T~)%j%d$@sKw7ROqD$z;n622)*oDdhHZb>rmsxd0PM8>iw#cGmALBV@`KMry*Ey zufe-AHv-mKW<}r4N^*Ka{NUYKfbg774GzqZHN;LZCJZs^tVI{95OlKLgH)Dee{~&$nr9; zt*x!H$gVET7t}Ci^Kj4am5g`Rr0e*+)BayV-HPb@urXCB>#iHC(z{x{H@3C_d1U|f zf#KZ+KKjSk)iFq8`RPCe>cvU!1|@Sj6sX$`DPkc|SoV&n#x-34lff9!aMnqBs;6r0 z^E~khiE8-)18^S}_6xp+k|>=ni4~>eM9>&5S1iS;(ho_cnoooLih|dQC>`G-^!x_U zt1DsB%m2>ErV#$7#zif|?oy|VZ&@v|gmx)B{AZWKGco0%BnG&KX^>G!^V4~re#`!EI$%AYZ@01UhGkImG#63bSiWkde!}{bAq%k5oJ21C zAxA&(;a926e96QeJw4j8L@Trb>2Hj&{o!8rup_Wn#qYkp%<_cbtBwgCX=2JEnmA{o z$8KU6_Myq#PBxAcT>}^17`)#y%X-*mxa;O{yXv{664`(7<{Mo=kl5nJORxX4S-wL2 z!$1B6a`7ae9egEIwOfU;R*yp`8cojo?;=!pZi=nJ6~2Gs@jt=CGhU9g`QMzx<>eR` z4u3+$DaFWumGSlg{2RAPc7O=i(+_I=u*aWp{n;f&V#j{{#_&?A-R#=g=Cn)tK7GDmZc zy*MECt$lkNWq4Nc$FmEKs)8xB!4dg}R?cxN)Fs6^0Uk}v{SPRWo3MUfxb4{veBn&K zY>xr>jwFklF2GiqP9VP51C4dD3qf@B!Mt30sEB4-pv5x8KyptWLhKXa@-1cdLSdAXvZRp3@% z0tGu(Htogp#D(jqpQEqcIDFHeX^#Eq`hPxD?nwPliVO|)yRuXobDIhnM^nN3J`Z`a zrH&4nScvB-68RO?9rVl93Ua2&na*%tVzQ5m3e4QDvOESVXUh7NC?FDyFoU_A*w zEW5Vh(sVuejN_tiu4y@ZFI=kV`LoLm&YN7q%mEAT_oGYnyE=h-B3&U{?!q$IX98Fr zCEZU}DXY(RyYy$9JYU_J>#~Q9l0cYgi!*zqrl%X&@^o+W26_~!aAyRGkRXXBX3!Gw zAjX4OZ*QX%3E2lNWKTgVaLFT(5D=Y+0!L9CupNj$${5L#ZQVT!p8{9o-CAcXO*+tQ zV40JC=NuPywWqCvG>kJb=6vu)oNCasnh{(7EIu}Qanl_hM5|siSWv2%`^pAdX-32+ znvw;ApW#V=nb3bCA4G=>vS%484D(J2je+Scd-X+y8}@q~O4g)zjRoOrUsj8$r@|`G z>KmNzWMAW~!uhHQV?26;JEAE3l!WM$)e3hf38LL#)Po(f)LSpti31*&S-yLSv%3r~ zcV@2_o7nMQU=LoRN083NRGD?XUx1kUtwZokVy%9+$tjU7!`isNV6kyO10Y1S8#mae z@Dm=!%6q8tVPsUBFXJfWijvY&71Y``@0pQ$PJsmxaieZaMjEpoXFDQP1R5tj32bbZJ7~E36S|&0rfuaQ!WkE@ZzcOFW2ibtJ_V2? z-$VuG=A1glmWSiSDJnx9AkWQA3t#2GcDL`gpKl9f0w_TTkN5Wryl4T)_Ln~b53IV zz961AJ^h|r;)Axvp^;~gi&9>2!N(_FHo`3}b2})oU_&d+Ql!$Tt#nb*_eAt`v3DnP z=9<9#UMA+7q}elh_B~5k6xjVrP_+}jX!Tl#*beUC_^|t^dn(`F+EAeIDe*L9YddHi zjOHnnVZJEm*J>9`!)%`;PwMoMVcb@0ue_#(;pOg*Q+I;(>gLgPyYnJecz&-9nT#?N z1oy-${=8@&7zoBVqa*pMhF)z@5O6+|ppB_v>Q%HxOVy4@)KhBlRaat}^s(VWd_VEg zzi3m;W%@y7GyE1$G{K$m0$gT1i+5N+*ZU}AEK3ObHvW{Jy~NAJY-lhEA>$7oY?jKD z8}j!_zux5J6Mc&$`X~tGN~%ZPWWQxfkTpfcADJW6f-n3%ju}NJ5AGycDbMy$lk{13M+`D%X@2;L@327F|%MNYZa%e?HoSN+= z;_vkrs!?J5<4A`^$hBxN2y z;83IZmMV{nBIi_Nld4c`k2a%>t4~@1g<`zjt8j7atIuu9^VzEgLxdRVjP4WMq3gwg z7GGr)B#N0&KsGeqj0UmBpM!FYz%`8CGL9zs*8`D7gg{T!N1BQ6T`y^H!;arSWEFKT zCv-W7{mzz+j~QbzTMk~t&<1zT7cxB^6?t?EEkid$m|B}xv002YtJKp^Zc73etS^5Z z%-G|fGd&5z5~t3%)I}4PjP4-L@8VuZgFS!#!5%uHe}A;L9u*m)z$(5`y9#}Lz1cC{ z=kl>HlgsL*gGRLTFwHfLbv0=rjG^YR%>4MieDvkb@!@&T#U;3T^UV!{WK0Q=d%gR* zLE)1A@NUHlDN*hwxk>ayM@#B11+XFr7x`R zA-V^4+=O|m!gzS=yEAjEv11$B7TtkRj$3?TeC=K!Qj=SfJT52eWG%82=zy&%GB1zT zW%5`6%9Pm@Z%5a`!kXyY1gxS41_n7j#cuzp?@+Dx)OiWw)`d#A8x|{9W9Ebx))XUd zC&sp+%9l7%L;O}9gT!Xd7>8R*UTNz!o;P8v!gT-RTv+F7CSihAp`GlHTvM)ZBf35* z#a7SkqYeoL-!if_7J4zD2N7lQ z1cgT8{Nz9%w4-tvlJ6A~bWi_^nJER zoRqTcLpzf@HSmq(!r3mxLEyDWO9N`Fb7C%uP9CaW%>X;WczMZ7ZH)5grzfAh&g^1V zR%VaUc^K|dP;7^cd9}D+m=!d0x4kUPboRrp3 zjw0xV5ywjOgJYg3J{kHs%KOJp9Kp{>8NupFNCX=@iM^y`7M4^AZMK+RqJRlzS@%2m z+}+gI{V{NhZh3tvOJ`1Bk_tm%Xi7$+264;bxT;n!R#No4Z0a=k{Eon>fXOs(fM;kW ziANY8uScTCQ#vE@gR|O&!VQ~+2m1QJQ7M-$j%yf`44qVj4nkKvLp*p%P6udqKkIcw zQCEEZDfaKkzxqQEmuDI1#k09y3$?{`Ce2)KA2yoIWyy& z@3@}wnvR`(ncoD%iIQt>DhFxp2h^gjBBo*xCi8XQO!Vkyyd-rUxm_;aS!O)&8Ps%Z z19yW+=;F`eC|8)hXSSGBw%?qje(>PLPL$%$fCIm|Ac#0F!8Se9fOm{2ShiYD%iNpB zH31j%8zv#Tj$)jt7|yjG^a`Xg%Ol&JM!eXpKdX~+1s*cJ>FvTud{dz&kUKlUe{X{Z zdv`?O^MVAj4<9T0Mcp!8n@?}@ZOm?tpq6CA$uh6vR=A2!Racc1{6h^g|4ijn7gu*RI5DgQlTk%nEq8u8qwtG3tH16ifb|gnp@-`Q`{muGGnaw) z`b*`3g`E&`-%I+dhzudQgATy>$0uL2sT!7{jK5>4NrNA%()Vv;$%9`^c^ApjA*FbD z=r%L*Ye%Z391$l!Ck3JVY*N#!$*I_*0>^l?Ede>({20Lk-v(qbvS4Zh#%)8OEY1$2g+3O}tPPQ)d#vFsr1FDloRLy(I}TC93;wj;BhQ zm0(!ol(tX@&)}MCM>DCGd52Txy~sW4wrqz-HD;+KJ1%lN1l|g3BR!u;OdEM{E*00)7)zFqMehyp2zWjz=$xY5#!#>Lv5nxWfh-&s~0Ryq(jU8|aH@BygNfz&>^EL_eDB0=(_-+x4-rp%CEVo1g<)_OiALKydJEYE1z57?FOA?J&kFb z7Ic|JapWI4?OJI69b!<++$gW}@#)XY|HyM*`cXcD%Paf1kfdzJ(IIa_)E1&e zJBbVn?cND%6QwL+nTH~T8v%53L80YucSSRjRLktSuy)^!6g`bQv!TK_QOAfNHW77z zv8#C8e)<&)~raXrB!_TuPBDEO*Lim*Wd&ah;K z+*eXWPK%NM*MrsWX!LL{IeCe=j6bc{4-aJQ6Gtl*)qj zK8fpM?n^6bRr@}WrsRq2Rd6R8HVgp~hvx#D2szQTl*0IqV=KFyWeYz#*wf|uU zu@@uQgxxrs=eg4aM}B5-@^j|0CzUYO^L1&6j4OLUDwxMr>+-gN^cT9%w8VcL}tD5(WBC0nu>5uDNZbUIu2w9kq{B0 ztL&hSqg{&a$CzA) z8u4e_FB}?%lB|}@tr%MHtGf23Z>V}DZo10!6ix;5#354?k)Pkqdk)2RJ7_(7lb9~= z92=?|TN~>dkCmkJY1>L%B~6lTKp=Gi5jF#{$C`I?7mwzSZF|w66uz3|G4p^+^q%W+NgcPg7n`Z=)l8O6(wC!1Q@vTU{uy8{*^EuE z{^~*wIT2RzMh(|VGvDKWY7(SXDu0NU+1c=t?MUcpN>*hdrOi;sV6;zU{;BqiDYG_Z zJ{`j}XJ^)WZ7_3a#XR{qr3V2OCp-Jaq;A`CuUquZy&KEKx~Yh1SwCmUXWZxk6dDI@ zb(hP3UG5mox(9XtDNRW3@Fl$Jl~2KXfP$1OHRwU8Y`^)R!QiUETo57k(-b|vdD^X5 z^Uc_aG9+FGOYO_{EfluCsslqS|_^V|*tb#jq?s@B<9Fsh8Wr@|OcRuHF=PHc{zsmEMv)~0faoO^j z;zyfPtIZmV`^FK!jVf=Ef*AfAIh`x}{~tN!U7oqX_dd^)4~qoohC0O5gWX~w+DliL z|L#3c08dB&Oer03lDpsq2TyOhXnX2&{T^9l+iZ^_>^?t<|7Zukktb-%Q zNx~Epyfo51>P6@1wGh2n92NIN8u1EDk1!7oVz!FHSk(lb_}<(?OGz;pqnPy5AK-CF7QMtlOvj+&Y08v%F_Gd4`8g)Uv%IXEA%kmgG@k zK1N0OJ)bt3qTZY+9ptIl?;p7)Q0D*n0zgnK2{x%wdf&VqCBaay0LG8U@87@o9pauK z`7`m42BUsf1mk-rm@(XyJk@oJgsOe27IUpLYMtJHuq4H~<3=e1*2Lt<2j0q(E_kTN zL+cy#Sb~0n56!Z?UQO|oX+J!A`9L8nZ<8PP=^RsO5^wj&%Id7z&=(rgNI!Tqo;Hr{ z3K!mEa`;XQ2fI*~fF>>uoVRG~6U7!=&ZDbv1&#>@P5O8>n?P>BJl=Du2a$6a`X2OU zcqxjn$p3ui<62D7gr+;aDX^e1d~bC;4n@x?R}IC{JI+-Q#<)Be#PvzAurMisXp`8K z9OWnGSBWfa*y_elMgr{b*3)IS<$5o;RgGFd_-aX?E)wJGaQ)tqP{AU+H_tjKKMdbh5D0n(-HBwuUf?UjH6vKJ#yjJD2v+j z*Xi2ZYy;*gF8sIyTJ_KCBXDYx=d#z0x;pgcuzw)%Ksj4;r5V*E+x239>EUwneorHg z-$9gILVV}>(Lwq3p!iZHHEZ8DD}4g5%S~{J$|fI!BlMhl^c3tUeE{CfRtNu?AeZXp zo+c*`kkHT02Wyal0`?*}Pwe_5TJ^<2?omQ#N>Ah^yRA6oI4**?j)TOU zP4YK8{CrPCI0#LDC47;7c%cL)$ z5#cFKRkh!q^xSvQsu1cdmHUbLv& zdGUZz#|ON5j#2@4CPJms;$sLgA8$-M;h)VP7`KF>aqD6MK|1vZz+B!K#3@tR)px2X zwqSig>=1hukb9>`t<}z`q62#<4QyBjOna-OvMbCZG#T~W##R zm}*4Jw3bH@ zFn`A3y(b;n-3ZDCm2R@z>zC|vK22wYwZXp9(8fs)paMvD*qXKhZ)9t~rvq+X(@fMg zpIxwiw+qOi9|ICitIaE8=PfOGxuoE?*@?LS)rW$Ra0l?lTC>Jl6RmWWO=m2+u&0}5 zM~lTi#}>0Q%95INnnu`{fbV@mC~F^3WoaLkJ!WWcN%mQ?k9pIgQg+)RFB5PS<|)4I z83eW`9asIIG}`h^!gpGlLMc~8$c*eV8Ra&cAPZI(M?i2@0;@R^QI39RjMyvRWpJ+! zbr2HREaLzn{_1NOAq{vfLh z>>~=>Q3Q173}9p%1UC_Nt-j6BU26j0pf}^*cRtqQ`2&4${A`I8jgmj^LtER}F5vK{ z79Ztnxabd9EvF}2zJe);j!#j=4@p#u;n8*%LnBH{a~*h3nq1n8+ri};xv%nFkIXuv z$nT}O0n-P(pw;V-o`fI4L-UFi0M#D1v{Zi(ByDDSJPzN0Y_A%y?smFgIpl5!DhK;= znABtcjZ8WwF_SeaW@)Jszg)Qzh=eKMF8&YR5lk@8Ld>>17n z20BRvt+4VN>A_kTJv|+f1SVSyU?PvsQOxzD+;QCmqAx=xAl-%oMNT?U%=ecW%X5GQ zu?}K+y_wYaKnW6kc0FJoH0#CC?7kfZiJzj^3mY|{MV;ymfD4Ti0}EenAh^8nyU>lg z8hw7xlu=i8ALG#M{KF;=K50hYS1q zaj~w-*hc0>p$2k1$@w7igx6@Mn8bsC{GrE%yq}>^q*f~gaASRMvqA*V|^Ts)Qu!5e~q%3Gtf{YblKQlNr<3%e5r#~ z*(rW{aGoE>-%xocC)QuyBr%mj00M+*AAbmN1w$Rga zQ}DX>x;YxtrNvBx;&{?~W5yvG#hgWax>-scCJ#VUmjF$c3K)A)-+;^J53KSUR(BAH ze7FOgF^?V{GXlW$v;Q>>gE?|MVT-!s7WAieC^VW4bzkJ#(My1VQ#g!$9)$z=kI~J0 zd?9idF?1{po&xys)m83%2>SW$bC&>$cP!GI8s55?q(7T>Usmn4JrMTTvBP&|9N|us zoEmi7Qp@r|)1dDyoFjA*yw9rqPR?eC<_Y2)1H#eQk8djK+>A~dZtMMM*ezmvsCC~! z=wjLtopJ8C^t#uwqU>?+gCr7O4@Z0UOW60bTDL9=V~3YF-m{p-DdZ_LK6S#Iq6Zj# z|H*WDgFoHR=LrPCn{q~hNMPc-VB^)KniR|pVc*9f&eOwjYv((@0srf2ey|st$^VH%4cZDqKoMsUUaZU>&5A977 zX(6|i5?d*yC1x$=B6P7>sA*z2w)&UC15&D_yVVCVffqlbqhdV_y()N~3RrJ(M5k)( z^ua$Byh1s!BK1G}xGNCv#-S{9*Zl$lWm^?tFBI;#NwCpPe%^WdCc3wrKQIwHDvi`h z#;`JL4fA`XnKwqE&xT((Nt-ISz21!>N1hC~D*-_}9C?81!be`Vr!hf6Ys-rM#^F<^ zO%RQDf%18MIng$Z-$P8I*GcG;U@CP ze|s6(!?OuUMGBYi*s?Sr-}~;Bm-H{3b-kD+Ri8;AG1F}+`)y31(?MK~<_!dT}#H=vTRmoH!rg^X8u$+Cv8;ReD^a@-^ zRUzS48ugwd!v4+&G&JBL;}B{S;^6P!Hi50!luGJ$u@@J;u?6b2;u96-I5CswFyn!Y zu}UhS*Sc#wT?gM}Yy0rU4;-+LZLVYLA5U8(0#%lv%7_UDl(V50OOa(*LF_9Vuvz$$m)ITA^TH}VuU4V^)nN2 zk3i(eB(N3SFbR$gMh2Yjmj&LPCx0MHTGAvDuRM8ZxFlZm4&H~YHHRT-6YkcK%R3Y; zZ?KnBDv(lzrG?qI-J@nXpDOgr&tBG`dXTS2%6-@VHo9)dgz^uWb!WZ9i1m$83RmAU zVs{RKtD)?TS09xUpxwe<1}G|SBzmX^yz{0i@+Oy8LjWfl-A?Q=Dd;q5(?ur-y7HM*eTM@X*%=YO01-;#- z-YG8xZ^kWrluuXFm!dREjktJMgTl4o)Z*!f=!AJSr&@TPo70)z@S>}B8!eW3X3(svthf%i6(N*GP%QBhS)LNHUuu1o z>^+mfus?gL`veVvgM(ui=!%q}g%M+QPNn->Gkkh+2rejJW(@ohPG^3;D*Y3`l%cvp z=Y($|9d_qg8A=K(IZf$Q^RER-3LtmPHsG{8igez6=YUNRJQ`+ir#Pr)v}M$yJ5xrZuaw?=bU}s_Z@>VuFH%fAFn$Pr#9m|!3r>;!{bw%Ql51K=#}0%irZo*6KGFM&lZ z0m${$OWf4rmF&ROaxkdM-&uhSus=301W*bXVzMfA?mS6mMW7ENjX+c^V8ZD>30c4M z15sHeZtKr}Jpc8%%wgi$%drBcik$*00I%o(NANufkT;FH-~S;8?9_uTo4~1|$8%^l z0j33g2vlw}OqAJuU<6UBuA}-koeyY?Z}3o0LSW7WC3|ccmhe?jRPd3(sj?9kRnscT z2i*Se`RmUV>aiF0B)p|SvDGLFAe-imE#O}j^4u{3_;%Ui*M`}AUH;3sC_KwK@Ik4^ zYZu<#%LcGKtPo2%w**S|KHgbPgE_{WTO;6OsxcpRx&uy}t!$;t znrxn7#y_{7U{CG5-}0PeyEWnHRK8gCgduv9zWu`3pS}bwt1SG)r)}rXhi_roiWnJA ze5sa+%kWN%07bo6b&7!+&s{z`y0d61@I9MAh2zxoOTLRLuGywFWC(;(x0N^gZ9!lG zxYM9$7Oj4HD)UwoWymHwRj=P6c`yi&8*k9{pf*jO)-H|!dOQgU0Qdz@)YOtw0~b}& z2Mt+$)(uWoka_VQ$j!S!tuZZI3KMLS>*WQ) zG%`yS+bLFdt5%tT1KU%~r!0tl%aL6|9N#jeJkRwd2JWSu-fX8MHlXU$(5Ci2~7_+N9gLa?k_ z@E(p?Set}r+q)`75#Mo_%jsE616AA>Rr~ zHM|oDzhHRVgYGAeFUK+U0~Lp;%{I8+ee9KH7eTRVavC5c9*!`zzyoHJC%9D7z4J5MoO`se z0KYUG6sX5ATYgpNbv}^&h$`^CBq$s829~HLP|6V&F$qqE<1gjE_=^_*av656KYwur z=6CPPSQY5Em6#8qS^@#i*G8u5{cHSHuh8usgtg1Gb%Auofx2*3U9dJq>CLOD z0xP&rVYv7^u@&SPBPP73UGdBW{{4SKQp?Kco`w^D&MDaKLk{UgVzMy{Er_XBl^+DC zh2*JmI6C*);@M}PewU$0BxZMJtA)N_=|uBSn;wZnW_#`FXt6{D%ljqy7gqh zkb2=9WTV^x&qa2`S)&*mAq-@7yAFU?4N$w_nyaCgNVWiCFVslufjI(tyw4rJ0Fg61 zqlCdd%PF=n0-|2}uRuo6=MFzmjI00pGU%LW@_vKr8(@Pl`VE#HN zZDUUbcBXF!x6vV*MYMzgaKfbG-n?X`Mhf{-g&2|1b8o>swCaMU72kb6*Ib{9uy{ zlo2(W;Qq(ay7lXyycelFt_~%~Rmn;$f)Yk|amD;9w?P5cMo95=rkKXjSmV>6EpO%2 z5|l(`F%-{2(_yGV_C8fY>JRRb`&4!O&f$XC%+mJ{{Rt^I#!p-MaH;mTVXu+Afywjp z_Zok*=|~+Y+07O^XB@%O{M%Jvih>?{)$*ZL>>F_NsohB)5bZ7}GoH_+d+!Ew$ExZ0 z$%kk}(BAHhP#gg&-<2NMgXx}^Nc=?l@k!)C>(56SE-W%w;gbnnNiy1ds6-z9Tc_~W zLXzb~b5HZ+_FHgty5~f+YgZMG3wrY!-x^x?{k|^kp|kupR27g~R%mJ$OEFH_h0EOk zkOD?|RVEok*9o7m!XP_+!tS8^3jqgs9$rj}XtHl00UaKs$^Xz&^wvWGhwjU?t=XAN z&wGSROy&Gt5*J#*(O~v!{-Wl^IzXKj34%5x_HJabdE)0lE9WBUNylf-+v zlaZ8V`v9SS1XK*{pMz~6aOyjs-)DJJ-w;#8bLrLEOE@M!W#Wg;{b1(b&TAmk@PckX z0DarM9|QK5A4@az+%p1NV3H(G!%O<(f4>6jQssOAK4PXze)4Ri(aE)nGdt|-RPhHz z#ip-2a%V188S#Nn3bS=R`?6`4I(<3tqUCs}WEO++vk$)}r5eyVR8nl&XZ58RyT$OJ zFND`U715!4=uJs%)Zx;O;85)*mmZY09cCTiNRzZOYnL4be>%SF^T;o-4pL2j6uqb( zJ!hJ7k^ayOVJtJQp&KZ}9?JZ#V;}Z&Gf@a?u3&$s%;M%ZhxcRX$iTT4ukV`~8gZfL z{OQQZiZ<%agDJty4oxJT3CwB^?yTS+9n$Na!5+4ccSMD+QLw~zN zP-*jxWEvJB`0_!y5(=MishX#SrpXVZzuPMUyAY9bjqnbUsWazxl`HdcQXXiOXs&La-LFHl@FVAPC8vl<67$~#A0XyI5R8V{ZFOP)H@Hd;oaUl&CqsiD_%}Lid(}DF}D5@jn5&sHT(Qb|DBk!%5^xML! zcpntXos&X0$2{f*usv_cgbt5wc%=Ri;GWd9)$Se_m|WXeW*Z5vcnpu5af#Nuh-U$xqK zTE~!DjY`jQ`LJv2Q~A?yTZ!ZM4RLuh#Mco=lid^a{;G`7Bn`YDA#8H`ZvE_a*1bA3 zWpLdhGkXz-Hc4%1+}saFURQk6EhXMkZ1DJ$6qs4HNdO8;SP_^zW&VEOx~2^!%Zq18;*Yhi)mB_(Jxc$IgDU1OQpvR#gl2iKx0}JX(JSZlQ1ecYp&3D% z%IwASJZ;5?k;Bmt{|k*cHe)xJQR#T^B2yA_j~IBU8QjwodvB*7O1lw^0jbfYc;bS7 zmzJ5xSSH3Uz4xWhd5yL8si^O@HYHqM4$DS$92Rf8u1gGRg{B-3G6BZ=^3$GwG<@|hGCfdb0 z=K^Dg>wG<{fR7bpF-i>Q&iB(l)W*26IQ z54Kp|?-(_PF`%j)ZZ`|NAA@AN?amvdL`34&WKX*efUA!jG56Us*TAaC4tCVv(k&9? z#7&*J9z4}VfpkR7NJ7gX+*l7nP`MpTcEu7e-tR_E1DSIG;pX0KyJ{%d=QQVBV#c>9 zhAHv!F|>d?S8D$nRw0GE<7Kk=sG8|0?SG|^* zTpkyv9w_4_PpIhD&08^UQGE9NFOv&_oJ$yL_?313KlX^Cic1FSQXAN)o2p_@dLm?9 zTJ$-cYCvXiz<(a`4tF0vvuM4EjoibN?=0dV&1B0uX@Ek|yWQ+`s&e(mdeAGnhoROi zycxW;cW4L7GyG1^8AZ}Xswf19h;zV`(N}?_6nY-Oc}462IOs_>4OmO^<`is!V2jjkxc=Pe^(HZ6_JTEL7g|O>=yzXayZK7(C-ySv*MnXCC zLoXJBJ?BZhA}$)f{^CaoLD$v704INvdRDH!{&Zxg*J3{c+7Y#@4B0*p=IKP2{*Tc} zs_B(ngK`H=$ZX|_fpILWdeEvsQySnwj42pu{ZS{BIRj+1>6A8pK(!M8=@XqKSlx`P z-2?+g+!Q=7iA}`*1hw zD^3jy0uCnG-*~>=30xmKwA0SLh=f5`Po!mR5(6pK2hNfiZn;4JilXA{|M3|8MTB-F z$~HD*5e8_ID7aGODY%OuIR#~*zvH0y?av{1AMhHXQoQoQOsYt)QD4i4g5bc(n2_q`KCm16FxCQApXwR?3>@>oi zSwy~X7!R%`h|P^*;HG+ebfk8BrhG?2n?|CJx1@(z8oSU7gY$qI;SKS7nOf^T2h~M~ z+@~cJ{)1M=uWQu3VmtQLZM$2HEn?0>m|_UT{c8`^{SvB zKomWgz#i&V^ciSqjaj=m^H^+mK&j{OA+bf%!^8TEXM=k{bHcu=Dv^rJ}ESse? zTxEIklwkkp(=)|4IKD3{OLYEYy1a6I+>j_+&8%HF^`7>3)+|wc-}N4zV$F@m61q@n zH6vf8fmZuvjQ3ZOK?g{{T$E{;htq}- z+Km@wgqkmxOf$jxn7OB4oJK+kjv=*h|u!p0y3IAep2Lbv*Cl#^*lN|Z|Z4wDK zk{|`36~QwMTLg-y(}1I0c@*n+D`ZOE&#R1ye_qy%&9J4G{96$5mMuxsS&GCCgF|I( z_>Wjrth2h@-BU!>8$oZ7EI}DvgxA!Cq(aj z2bf}JEQ<08uQ>M!ZH&+zd=6E=%68)b@N=`W+8*7(rqxn5lC_?aTi4$`?ikLlIR^?W z9g3=gsjR`Ji@pL^w(<6yrKOz&sn3jT_PTtBy|-pAzyO9ke&Fd*3cbbIURonCdA3%7 z%|CEV*lJP+OPpri_WQBuTk1i;pQO7!}ieL|G zI=DI-KfKiXTr7}}jZ5At{0drHb@NnQ(*lZ_Q=6s~v~ z@_W8R8haE75P`aMsTBmiX4FJ^sJIMuTS$di$c$M~a%8ItiSXM~Dh+A2y=vuZaKKvw zXtx}?l#R_b20J2v`rtZm!W0oE`Myi$NLb}o?qH6pA*rVky!G8pjKfp_5S_a&C+aFM zwMsRpl7tiz-I*5l>I;sAdP8rke(ZolDAPvv?++_qz|v$dmlqC^0WiPCEVxifqbSn@ zx)Qi3S1C#A0jo=0-UbUSITO3uv>~^4?ar%^Tv!`}*8ejb;D1$9Q8x&_ZpmxEIf=xL z#%F7KJ=|;DulJUBbe-tc-a@ZjGVR^x51-4QzIJtw(`qs6PbZMd!!8zCF3*o4xI|DC zGxaH`0_n-~XCmo^R)XV~<(>+=pSI`H^Ha{E zZ7(=(_>%O*LK>^fs~d<}z{NqOB>IjVCOM#Q9-V8rhtt9Z*h2_vgZ%j|V6X`fP^MP` z@HOA(bE45_lM(U!MW{%>g;ON(A1#AWj_7?4jbZ!)aFFvNuowB4_G}#I-8$ZRJe(G% zX@a9-fyKy007H0-DC zYSwS_dtpynTzdaJP;dXy5TK>)826#CUalT{hDr9bI`Ve<#_0YyRiQXmR%Wy=)K6^j znAWg;G>^vfj}~VULf7`he7x*&YkZ`h^q#W#Y>dRwvG)EcPwsHQ!UJ_!9DVzU&`xVH zMQNKZ*t_(cGDa@k@AU)^K zla=a{DTZ7*%v%}V>htPl4QUEm4w6>_K?R8F4#3Ds@`J{Y_pjGK+5_NsH}I-En;h?V z3GD95zX43tqU_&-?SIdvz=QlRHJW4y-KvIT(D#7kfr0~`l%doy$ze0z?DoA^y`shU z6P7(v%K2`)NujoB7KyidrA|Kd&%)ILFzXDZbm;HCkL>M`kbdb?O%%iE&-lLR{O&#` zzJxclh7AjREU?t+GZ8lT44wLeWJCV2Rv+aBI-nJJ@t2MSeC zL>l6wr}VAe7zV!0=lro^`~jpMq~i;cSY1pW^A>&eUCB^|mNQFNelP5nteuKufIJ)r z{4&!awaw9o?f5^yzV9FV%*BWi*P}&MC1n@=?UEN!SX`>`tL|pwA>X;$bk4e@3NR6c z*+sAU%d7yBkxMp-r15&xe%g+GE+&)njq@i!oJmlBB-+Hgl*0Ub{{r93303S+<$&}AgP_kLMI@vn4#zKO{DUZpS!6$5-^ihH@_o@(f0og9VbV9}G4`Dd79 zt=~1F?O-|}akQ0CArCUeAPY=;q4{nJ>I(X?a;Fhjf3{-d5^fguo6u85?&rVwbVS`W zsDi{6fz&s!>g+A&n`gzTnOHa(KC74Hk>Hyqh-yL z#*tg7uZl;xpBY_`h@QDZgBklt=~&K^^rVKJQ&WDVUJC;hAwACGb^O}f?xDu{3T()- zZKczlFM*kZ)AtRD7?z-12DV~IkJ_q9mp+-bIMYd7et#DtqRPEZ4`1 z!(-6JXx-yvQ$Bn5sO2rRYM1Usn3e92JNi0giCsGO9gQ10yD#F5S)i#AEm#M9**S9A z$#`5;GLg?vo%u`oEeo{DG~8eB^t3IOVXmePDooifQASm5RNTJ!39;wHQ4dCc8oX4S zKj%vKe{c0d=Oc%@v~l{c4G(Y1lV3D7v+$@^GiZiHruS$iea0VK?qI~fJre9cTk9?8 z(v~%`)7joCZ|3|xvd&_E82#=1aJ3t#%-Xiyz_B^ed0{+Y6cg@b) zRxEvvGvo~g2GLsD^Hn7B1S<6jJDpOWZ4&>y+3rnC7!R)N{GB>dQ~TKgHN@-iBC$4> zVTZoJoLhS-SZE2SVfQ1Zra)HbBeslnyextH^;c*gxQj-{&JmZG0@c$qHrrVZc zXPO|tI%Yy1YNrl9K36m^dV}hd6doyO_h+Qe_a2T!-lW^BCX>qaCNm~^j6NX@u{CxG zPm+-EbD=hiBo$K~^4B2Dq<={i8|D$c+ej9OIW-N4pPwm=utS6pt15m&-@Tcy%|C!q zLCx?>@qNP#^*i(Jeck<;VY%Q_bV|=1Z8-C-e0ybU^f1gh%I~NTm!$Wo7yhEdbgH7w zNX1S0*is`=dCcv-B+c)Hc!Gq^3XIxCW{x7J>re0Y=zjg})1+0|2lttdWFSmf?r9jr z%(06QG8iqm;599el|JW@sClF+vDuWLlk#rIX(x&i!ute2+8`0Z{(*QIMjxXtT1`RX zMC!fgKa22G<|XPlzX3sgmqo(r=_GJ6>uf36w&=sbP;@D=_JHl}zQ>$QX>jZ0S?%wA z5w(928TQPmfA{XVZ}(F(r0*`Tukt?>Y(H2pE#M@c|KU|%y1`yf%)O#!7nI#4oc47m zLW=L)a`Xx^7k+eYA1E(nf9ypWMKQk;xn= zd7CZJeEeN=LKA@cbW~HXUZg+e>@~uB&<~GJSY@f?t#1gp@OC|=jV4EY!61t<{O5B> zjjk^m*Ak-bbz8^#o$(jkD(XC=yw6^IpIZ&vyRwKrsNmBJEgE5|M?W-Sbj$S#fiw3_ zhst;%J(!Q)SwV}MHDi4EsD44-=|VOOGR5%ZyC|pZN(WSnS0p_=f8^@fa{JCljc;mE zcH%I)y#d(MbK`@4<_sE~fI9SeyZf<|j+8m+$&g!|7WDZ~ozf!QD~LkyDrmLwIdgHf z8RA$VE=fX88y3|5E;JS@@buh)@;Q9Fvu&79YM3TZXeAz~=MCvOA-)>;1p;z_p+Y;* zU%VY!Wl@hOYWD4XcN_NBVW*3%wr5kUMxM`cN);jM^1hd+^I|u$p~iyasXo;>6r7~y zqkH%VN<{F*b6FB}Pj@a3YeD@=y?1)s!An&}T3uZ-zxYH4GeF{kjXp)MdZM!+L$M#A zdi&j_-oZTiZFWT*Cs=5R50T zlS{bwl08d_q~4pd(y`-Z)ABP!roFJ0fJpQ7yvT?5yDF=9UO6d{pVSYcKf*gRoe}SS z3#En%uNQBhHW^7Q+&W$$E33PAlFsCn?{_w-Md`TEppe99s@g{OgtaLB%H^*_v5xeV zmmj4n8q&E}pxZ@!%*+?%Cim%y-NZ7Ro!4?N{mTarPK=kFJ)iB8)cd~?xW_L&e0xVV z)VOZ6hd5E&DxI}`w19s`Wb`=CI@0oUrK!v~xs*DdN58CsUZquVTSyC||I;kvceHR} zq?dXce9dE`*x&HUmzvL4GBEKBzFmo_7oDX3lG&YN}-vIL8yT!aU5+nKa zj;u?1Z4SRhrdDW|j`jF(`mmxqi%cT4zsk|qIl<*AS8H0ulezwmKsSl)10v}_o!dXXMvkJyU; zQhF{Nh@pa}7$wztT6ba8cFcdWa^AuxDJD9c80TdxDi`Iez8^Cg7;fu7c4Bz|UbCa| zV!^90M8D~P?1hKw%sU~QKE}>LB+cJSe$5*oROK0fRxTCDRxe&qhd!G8H)ZxWcXqe! zG8s!zfbRd76;IwjVl2ln7z?y=4u4gy6l#?a`w-;bM5e)%rPdPhZMi0$wFJ$2bdAV&!Ze9B^ z@pZx}8iM-1a0g47($8cYO=J`rd8o21#;$TWh0}T!aVYihMM2T_&%5TP_#;t_?M5wp zR&riD+5E13do+pB!jQwLz@1Qf_BLFPnk=Gp@Uv366?bj<024YMI=f_ibiu5+q;M^D zIW5`v3w)s$fjvw|uJYfwqDFIx-Hddvcz-5jW7oe72x_wP8LnxAK`g4{~?Ehv^cqxphfZv3! zEiEbS=fly%11p4wVDbsSjxzIDt2+7JU$d&;c7T;q3d-9RSb8In{0Dg2OZIj&7mK_;zVy+z-7e*oM;+Tkisz~#-chsxFP_(7@rT$T` zjZ1U#R}Vbn@7K+5Ewug#YcK&PMDJ@c-Qe%v3FR>z@8U*@^MIGK+`sJ>9#!4``l=5Emm5?fV zT85~n(wkpK3m<8#b$#sqsFI6j&2dj9B+c+TadDj0T71jeX1Ml_qdlEr!GM(oK=UE} zwC1??fWF)`FJHQnV;w?#lGEKpQmf<|=zrvE zGIqEF4Td5gBSzg9-t}PdQrzc7UF-SgypDdjH{nzrxLQ0XI=+QflhujCZrd=&ALr|y zjaZb^{NCXn^YFiqL}wgf4V&NBo@-kaz6yVnv{^y)QoL5X8PD6CH52@37DU{sslUPt zyR(mIpY0PC=+p3hthQyeFEumsxK=?R7B1*p;)A={l(F)bVaPGC0^JI269Xz){cQ*&iV?Sm8)8G$x*O~xo5B%kvdeZ95O@UDCBw5Mo_47 z@cOV1%_DJt`9%ZnI3qJ>Z~OXoSW~uN9?8m`$F!!ue5JYD)4l8KdG0Zk>>{CG)wFxr zHo$Pb`1ZEnBb+#&fiENu@+)5bu(VIn)dSztp98>~bltTIPCLoI^bEPWx>n2p-?`)C z=6$|YCxxDCUO7@#>S|07igFC%c&36%>7caKfRt>`ns>9AswJtGfoPLxS z5pv6Jc7E3Hko-5L4f?^9r}d2JIa+bS&+C2Y1!A!fvD7Bq)d9-IvmYmNvuRCV`j7*) zuj|$ORf)VDepn>n1dL5ucJCbj>t@@+`#Xf?4~1Zks8ZDJ5o6Y~s_f!X9mcK*3P;zG zETLv4&3n14b9&6F6xgeguQE4kFQ-Yn@MO5emB-x{e|qhxm}8wD=dt+a9i{JmN=k>J z?j`Xi?DWP&Qrd4CPbxdr@7j%|9A%D(x@|S6yDY|WFm@XQKYYLi!o3^6>U2_=s?8`& z{EQcq{=w3IWJLD)tzJn;wr`PZ&#i{vg-NG8}5tV}iBsJPs(HQ0K9fh1e|TfJ3r z5@8(L(P&C1;l1o5g%&w8g2LJ}&d5eGem}ymRZ_2W>`N_svZUz$GEtge4pTj6fYzY1 zMw!engA|H|h8=Wpw7Zj=Z4-qc4VdB3Q4qO_?=w?v_jkpiq+UN29qOwO<+!Yye9)*} z4M{zLqfmKI!luF&RWEt>#hKWt*N&%LL{Oq?5+OA*T754Ca9=Y0srHror`xMG=t9%q z3Oki+;mJufMOx)XAE(84U>Cjb=zXG{u)&|a7U7E*IO?<^n#BHr z!2>HGHI5yqA_n(D2|YXhLVN^YJ^fOIZn&YHgg9m;`{7FUgt&55BJIePcu zZ8)7wZ_7~a1vD96e=sUcu4GgQ|4U=sBnvqrz)fKPQH4-1Dq89U#nKBucpckuFLq{8 z{ATe3+fVbmdb`*o5}wv*HUIgip9IQur^V)( z{2riWJ@_c$w3{;POD#rqjRf-?uOyuZf&iD4ac01&$o++>le2vAHvFot1>6w_uY;FH+K*bzw420vd}Q}yekD?*KtD~1E}+h`h=a@J#3LuhC z4QZ~HBL5K(atU#2ZvD;IUe2@$TI3$KCf|G~!aV<~r^pl`=gTUO{6#ga7O4_|->3LS zrRWNxe#L$^rIQGZsGsXIWkR^iR-cgGT6;TH8KQi;JY_l;R2N(LA?{9dzqnB;(#-OH#KZGJ%>T%!L%D5fGKC@ss z;&lOP3{1Wgw=Bs2O7x_rMl)Xg<|%%Y9f`dWOrc6 zcYS`Nkzvl&T7eorV+XeP9~n&2gN*`rM3JimB6J&3L10RjYWNjzqnSb!Urylh@33#z ztg`N7kT+4@y|5jF#~o@m9}~y!)N^0TVd38YN}NKUEyfOJJ|LN>cDMh(Cr$xbAuvoF zg&|ZY4)m6W&wnWP3l_G$*IWv@V=6{$8MbG6J;~|#Cs9#-!_K8ss|W1n*S(5>dgWxJ zIN$p5cR5lN3qndht+K{_JftG5vNKi<)k-2fpC023ZN|YGma7m18ic2NA} z>8q&Dl_d|vjQj}*&fryOnU;C+Iq`S>x=AfoLp#>8!R&u_p>^N@#n=itRm0JouH*Z^<%|%^6bOjI7U^yNaQY&Z#rr{@ zZAh!q;$go%n%D3QcB}xCar&Oj%z=#i&~KMg2C19H-h@#GW6BO)jJ;!_`I&;4%|=;G z%OYK=izPx-?CHzfdoN?ubANCiZg2dFn!m2T-wwULD%C|$D07qVn=fr}?2`+1vOJ&r zF$#;&3CZ=aaJy+@b5pZse8-+M6^9)l`l9pPH;huNrRCE3HtiG(jis;UZ{m|`qvDA6 z*TOW6vn{o2Ga_n zHVvX-spoLA{W)}mr=HYEOR|4nW3M_5f6{(loSwdl@UL^ALv0PYHW6_ud46fXJYL>k z|BsZp2-Ah!76FF;N+h`Z@iLI0(6BjgxO-MAUXs>W;fi9M?Wq z^bTPc>)aY!Pfa7glQ|zy?()VxISy~T&2D};eHG~IYMRbOM4o~VYbgj7FVKk4#C6^6 zSK;*Mw{~dMeDjCePr2Yxsk7rtH3ZZbZn=60cU-DF7#$*4F56{sqeH(VuG4K>j@@iJ zZdG;XtA6gpbXM!k_rrf!V;D+>uVY?bhXOK8pd=7pxu~u-2}upxqhfC&GQE@}_kSu6 z+9yP|rgj7bv>}TM_G#!Gef(*v%BRdqKZV1E$WJD(70L}$TPOLUIZK=)Q`qrq?jW%h z>3sJF#jpQLLbl9E#XJi>~)?(2Z*5;Q#7a~l*@fl~gN<~-z>r}&9Pk_26 z6V{3MYp!vJxP1{ZH$=XiadHvFuD)@95AVn8r}ql$Pn^Wtpf>mXoT2luhw6QOv?OMp z;d~TdqF_pUO3Yh;jgcAVbevgrma2Ryvkrp`FmvZ~zV&ESsFm5Ma@>l;&!2SKoYJK~!z!;C`A6fBz=~~QGp0yIr z&RqN;s80T2-B`nTT`Oj#@^C{oZ?(+mp|^rOCjX`;XM%`x-xS5T4sGmXz1rfcHZxsL z4UMQVoXNTF<{bbKrtZ=GbUh==?Zhm~mT#(B!G|VCxCNdkt-a!~G4_gf^RcbnBrl*G z%ZuiWZUl*cnf%zGUw<%=t%%#e$-Rw{CG^&uv2z8Zo z1C^bTWOU~RvgWo$QTw4;`rY3^coWbDFQMHU(KaKWY_HpNZzU@6+b92%lyClYC#idC zHq{N?|3ySy`Ls&#n^P`9d#ApdK}!_WA~+uQc=37ahZrb=1JyC*VuCjFlM#F!3SuD& z8W!PlQ}_p%)z_96!AXwCg_T@fkQL}D^u?(mizUR`3S69b+oM-Sv1RhVaa4;JS*lnz z#i^CsBAR!oJ+TO!Sg3Z3pxrbRdpeBgg~I0N4(IFkUo`$J7*^u-N(bm3u$pfQ{UO$R z^4}9J4vyW;@zRn~tD`62Vla-qSt2YWk_rC10cbUX%?~N5o+@&0&01bb80wayF5w z?>^Fvk18Xh$X<`0YTT5GN^4f3WsN?QhLMuubR^~VNTEa&#bDYr=Kncw?E>;_vMwLLO_Jp2h)GSMm?>rD(3PWEsV9V z?oJWRS}i)SXF{Dbx^{Oze%E$|o*gWy*iUDs&sM$K6;bTHW3Y6C&4LHN=3{4fKojuC zhWfAl0VdfUZ=#dj7#6n0#i;x)9r6#2AGE#NX$KGXFQ5kAVzxk`Zrz0TS(N_CNT|!N zg2_^v7m`>h@prq8vjzZ&x38oUwxF?ne|-PRT0O%?mxd{P!PLjrciQfFIW6~F7)s~O zKFnONuyQ`QZ^*f%Y+G3JR!n;hl>af0z_%1PGNsSJ*meC`H9slKk>^0NHNe z)Kj^Eyy*jfeS>tz-wC4sc~*2@MtXTNZvsl-yhL2Hb%}} z+~|?*Z}?@H?$vS~_6s=wdRx~E6Y!E-R5f}HISCT@sG1RmrCHWF^*_VaFB!PYZQNc8 zandY)LpPgf?3*|}X7bV{@1QBMLlOskad~VOFAb`^Fz|24UZKrj#iLexS#B@9tH_bh z2YOi3G(WxZXLx-#8@AtuG@ZgG_!b0quzXRjthq(6X&r@qOK|y|yqV+vok>tHv=+MSp_O33RK5(oi zDE)cu^quus8wBPTE-iPqk+l$Krz3h9ohh{**r!XD@+)VkAl4tdW>vc%X0LIAhIS(* zf?89vsBljLxr?L@;?RpT(9CaO|5@mTf+bDv>WUQDlaMK!a>icX7pymN|D8Qr!0&Jw zDDaOjR*)Hgd3b==u|C4^=d0BkGByJCWIM0foJG>0=I=@`Hb~U-tg=Jh)*sI(EHwo_ ze*WE-#bA{9R!n0Ig`n>AAVIW#>Q_uZ%un@go&a4cg@;883h}TL)7>%-c{f6xbz|u_ zjUN=OOUr$p8SZvy_=se1XqSCe)02E(@|(I>oL0(#z0wKXtmbg3rk^9!Ex{=-!@OYo z;?;Mk0Dgb*`^7Kw{Ojr1mJ?*65)(kxOYE}T?pI{8D^GMB4cMDL9^nwtR=WW)^!G&7 zRB~D%O*1mQJz4`yPNBZY*h`~pOs#+najRK{a507@bN?qIS?2aPlc(0w_xDw!m;Hl2 zH|LXcVuLFBVZ{xyf279`g|SraW@FBljPCCyW9lHnA0fX;KZF-utf^7G?I-W9U*Y3jM(1)Fm^u@0${~#v|7FO*)Gu}S z@?T~2Fydu0On*s!y&skzU>N@@ZirLOjNlY41NV-G=Fq3BE72vkjkzU2Gci&0;Ebjf+TsO@1 z2Y-Bk-5IzHm;uo$F%2DY9C#U|mnpBqbmravc`ktOeXspTRw(JPlH1L6o{vOIpE{P7 zIfZYp9w zW*j`I8(N+CIw+}F-e)_joN!HSab1D5kK1d5N&Cl>{ zVCQaJab-G<7-b3PPK#GWJ%7i?zIBSEi&#px38zKnU3;lq9=~>eD0Q7@dlxFOC+hK8 z)IVSOKzF%_F7owkjP>uH3}lHY_2d_4Q2Kqb<*bb5fX*v~XCFCSmx1pXup356Y;(KaB-ZzBFxVS9r7dMo+R{l^@df$lS z8;pE2Sd$PEGkE(-_FYySoM+k!wqTJr4;Y%^l+BNc~&ysrnRVRTqSD^j8zPEbm+kiLBTPyTib(y-8 zeV|R%=x%I$!=eE5B#?v&jW96WHC$$Le38C$|HCGFW`Fl>Abzj3u|9u|Ji%7mx;K;O z#$9~nhF4#-W=5%T75IyvFtMl~g+=+VpOHHo-aKKgT1!h%dLme69Rn91?hyM> z?6S+5Q&1BFQJWAKeskZAfP;2816+wza;2ipLtWfDD!8S_b6lxMEt>DOY=pq4^547} zyhz|7;cQ=Zks8))xXZN-pQ%p_(X>a?-i(Qtdw%BGst1~PEdRfJFDjqy?+vYyX^+a}-G9dWP7%TZL?;~JDZ5P^!M zK%l^%aW}O4?+ZzY6U%}Vz&V~J)*{d9Q&gHs@?oRptA#@SqeQL^u{ROSnYlNM^Khen zzcUKoW@Jq4Yr50emQ!lp^=3u;&uy*<+5@xp7i(ObPB3iGqc!w%c`fGwa}p2vdWE~b z@+W&m)~&h%Ri;|iuO2=5S|5YU?Owor=L}C{(l7Ppy~P zz#`W6HR{5wcSWOFbaK}eQTapMeTFPJL;298ugi909BV2emphBtmzG_jwd(!UzIWxX zFxwzRjepH5Y$a>5$g^jH0F@l$V|$758|_=Y3{bpH|#@7 zcC6!g;l_Efl9I~muqW;k^FuJ=+}3iFm;H!dXl@MT{Mx!lfr=X(VK zpH5jf)zPiYXrh_TW{rHBDs3It$L)IRoy2fMat~71JdFrhCfan!`HODTce0~=A}ITb zeo3QAx);vsi@)+JGmMd%b(PvcFcK)9B42vu7|p2~Qj>J3jpS2b>7P z6Z7wl>RPWsIQmc8bi@Su=3PBOx|4MODe-M@lUxFcwV!xJ!q#v2VW$Eq^iwSRyRdIS5T8M`;CW45cYX|!Pt7h;Lh zcOk@imOHu)m}>_oi!KKVV1`&-p~PYm57ko1C=De*-IzXHGcH#z zIEg+)1k-PDOzH|7@4mp~Jo>`C7W@~c^WkdGgo{SzjQj+w#96A1|Nh3G;q^xKN-alD zY+1NBe-p#(iK$q#uf#+){&-S3*-{T{%A#p}VMvOr`CAcZ%S#ML^ z_t>HZ*`!w&78lv>-o1PK$;0&jJi-6IMfSqk=1hZ9MaIX{n^|-9KQ%mh5_*d?UT}+1 zm5~HX)NqwxY{~w}sui_iK&2+v^!5|qj=T)h&#s7v8ROP#jaZl0zgoQWX2@<^Ony}& zr8BQ4jPMpmA)NGyvS8nL9I}^%Ui9HpS==4B^GTqUmq?)R&l9A3IqEP%GiwKHkU#m5 z{-Y$j&n-oQ^tQ#ymxnD0cggE=TgLm+kDEOqhobK#PrMch{(39|$5;k`@^7Uu=tqXb zr(9R!MOa9`T)UlLpRaFOYIZwT-ZLWk|KsZ|prZWtzHdZQ8bpxp5EPJZ2_>baO9bg| ziD76&LO?pDLAtw3TDrT3h9L%Ko(s?YJn#9RbFX(T*J8PV#Wj2H>(~4H8H#G${f5jP z*E+|15)*yG=LK(B@xfH0n$P}59{GK@gUbIAYuG$@3gV>JK2d(Kd2UzAV%E0$C0x4o ztiR<~8>|Dc3WZDr0q1l_f9&p~D$b2TP}GG>aUWSP?K+2|N2)(weyE|U?%Pp1M|>PX zNfd1im!XW=89*#5)E-2c{up8~xuD7Ky)^fRpPDO6<`>8Pg}-cE_Eg*3Tw1-cJx&PDl{kT}!lmV}WK^MZa_p-MYT;8+@C5y258<_q{UDF?qhT+27o2P1G@OAdUba1N9=Mr z0PFu?9{#^C9)ScW934tzklo>=N20SyrKZXJ9pLhTSkvZ?{hRnK0^ymF&6dLz89yfDOkp+8AwyX0ND#cQh?l)h*9NSlRe z^vW#p6CS(L0dBdTwxYAypUQD+%{Z*4utuMU7Zl5hl6Tf7q88bARp`c!jAah8Naz@kR$WGcwPwzLf@Jo&EK!?P8ehk6Fj!cK z&E_i=6gk3E^T`wE6F^VkjUI*uq>%*9pkPi0Pr;;BanYYIf9YK`WCi_hRcU-OCUAM9 zUYO=%XI25%P}oP3fk5wChF88C1Ja~oac^+k5==jW*x^sQ)adeP2qApe)C+Y)x4hm{ zJ=g~LVZtA`xccLGNpG9=u$3t8hI5sC?+Um)M)+(Gb<4!{@Stea+Uv?it1sozlH?f* z@x)|1g_bF;Vaq4((5&>IQ@0xfO>3Yd2KnyH-Zn2aGKi!0x0dlSn-ljsgaYKr^@UUn z=(X~ebzNYvfsDj$I}K1zMkGXV=6Iq;;m!q6y55#RsW)JK5lh?06kcbmZQk1!0`;C; z-h$S_ecHKy_ZYk>oe2mInEwdYfFsgQ|IKd&6Uu_^Bu@pwl?=Wn z-A)g6n%TtMC2O!;PY#=y7~0^E314WCp&1uFe&PJ z-&KcDX`w@f!>+I^j(4EYA;}1ZdxIQ`tuli}A)6A9Z}S*sttD<81OsX~UhEMmLSc2x zhq&4B>T;q;@lun}<`P?bk@k?MT!*!B$CvSy#_wPJaJ%rI-cemzP4Q;)aG9&iV8 z-A{Yg<~GE4TV~eA>}BJda^D%qbhVvOUe}LX+qokxA9G{An6e$VIC&p_vp@j(!R1hz zoc84y2r_kwF;d08w0H5{JLM?BBqD3w9{!5?%EGyL6d6R5kn49q+PinkBUB*Y+WFgX z3n;a~b#cB7qtBRiDD_mF29c)QFYHS205Yhu4b=Bp0S}s$jMmgP-9~`8t^!FWSdF#= zH=t-37^H_4BLA9g25w~FT4e~JlR_@b#H5@qGO*a>!u%g$IS|dgK=x&v4X8qQE`JlF zPfIAvnAq9MlFV1w<05+o2?4ddEFC9x#b>plhJHk*Dqzz#||;e+sb%mYdMcj4y&`MHW=RT z3>+wJR7s?H{xa`g8LCPW@?dQybD_L?Il7_+y9oW24uaHif1J;pruvlaA6ljo(84(| zUeAbw$k2Qh7}i48xTKMw4iJ5Av3D^Qdu<_JdZ5W^dP``_aGQ~+H4aqE!RO||*YnL7 zQ@;iY2R^&y$5kfJ?76MGMU_pz3(9ghMYPWcsq5#-++GrCW%yw~=k_uHrKvP6DbnT1 zMOCQe(t4Kbl_6AFma4HBQy1049w2DdXT_6x5+wU+Pu&n1a(oEd=u3O%ukk~mOY zAOT}(ko?2ub}kNwm5tTG+444MlR}Fk#LujZ$kDDo1|cr3wN&@lbte3Z%0850QNA1L zk@H*^w>4%9HF!Po$y>TvDOB*$9`fTGK?3%;Ul1=Zc((AApAo-LnUJHfP(W(TBjr21 z<9ljs8;##3KN=y#Ereg!LvZb(EhXZvD~qlCcO?uaEd8u<-Pe`yL;uumDI7QZ>y_@z z8gH^>amU29!B(s7bXyv_7jax@g&v|R*fntKTvS|X%j3nzgrSPxayb5Rs z7;RDi%`yMo-&HOtBth)FJ*w+!B0@qMVXJbPEHtlP#kuiM z6mu^kY;rqx7m0Vif1^`N#$*jaZ`z4=eKqRIOf27>Xk9*I$&X|egVyvedTwr|?$cvz zTh)QNlJ`d?c?MZI0dQ`13eS(Lr$?s4&XUT2Wd{vlZDf-C@vLPSm)UPhyp`ROyyqp-SyGERt{sf@IQ` zhN_NU>$1rGIT=cZ_v4_(nKrZpqApx~qi#GQcHH{yp7dE{M|5e29WSC=IBbkE_uWXk zfQ?0NC8`lS=Ht(WAY;8bcYD*+w#I}frTUFXgLzS79!lmozo!oIF$*VhvM`fN5%B9r zTcp0Sj>^*IZR@2Skv~#f4qmE(wQ$_*Ozkr4RFg#;*?y&R8ya{B`igKrlg*Q5W~Xq;NqUQq{Q>A|`U6W;weE4RBd#a( zZDiZ3oxYBG?%(t@?tqXpwId+mp2O{w{Vo@6c!0M;V`|5`Va&p4N{vzTdg88)Y=Nj1 zwyr*-bo!NdR+TO|<~iKE>(GG%>XU}(sh=3T88*)}?l8~vzBwBMWGVR%oLC=BcKbao zCx9si1XV>T#Jsf`Opfs6EnohJF&A;exK1;UTK1DrnhP-ac$^c+VlJ!ruCVEqG;U06 zA*TleW}J#zj?N2)Y9gItpV$(#ravpwp0BLRZu#O>Sl^7!bF^p3K#Q?bHu`(KXx(S4 zaAAwoZheHzvq}+ajzv|$+C-4ej-wdG?KaOIiqQS5Rt4UB|4{fB*^7lab37v(oQ2uBIK)Stza&aIX_p@sOP5*~1ik z*mDFwo&}3f>D;?MEmc=mySdR~TP-v2Z5Nw-;K%X#A&vXdV(*I>l7~QjHJw1f=axsK z%#ae8dP9@0R*0VP3RWX~@*lBW6eF@P{Va!JGX|i$5Kw?)K0YBNEHA`@%Z!;M5QCN} zy@^dZXGu9n$|T<~BqvW$k+W}U^JC2yX*tY*4=c}mQIgFy-6|X1t7KZbT)j;e<4XlK zw|jHpa$}W@@%#POiD?Idh+qhF;drrCH%rz_(K%E>voO+93sHnH!wUHJ1jLv+(d3dE z)=yqziiaY8$rEqMw*)EvJ$;@b|C*z~(JtPqdN+t_mOv4rv)1PF8(&+%GZT=DQAxQV z4Y>MsSnNBz(?%wWFkj<9f~ajSX3$th%udin1)08vRl<*mL1bZ$@bAY7;9G>xV_;Yx9U#*O zk@MTN=g7sTK8{cQ>+-L_?SSSC(w?f-0icBm1`tFYfWE%*f#zuDpR13O$^Ur<{uR$- zxJ?`5;Raj8DfE-Z{jd-Ve9lTNX6gA^yIQ0I;zHdrm86zmPViKav3Gyt5gKgIrJnwDk&40Q`e`?;TE_25Un|=Ph z+C%F&o_F>ogUM`wL7D7J_U$%{Ga79X#y>71PeUuUCh?*V2byP>C&V1@HP4M+o3VI* zl8B6ryiGjD{R!umhwT#m90_gTnJc_8YM{WQruFnWzx^ zV62wGB`a2X@{G7V+#&-ZQa;qj|W5^j4!Z^3o? zD!Z(uXpT&c8npQ9*q6foEz*qd1C|~&FvEHDi)r$u+Lj;Xbl6?qP3c?i{%(&FA4327 z*7yfY{ZZhLJ{t9D3SDv$f}`7O#c3m%NjH8xry`)J>ecc%_7Vih&SjnJ1^M|}uBGoj zR!`~Qd*g`gLBRwqnMMqgYZ<$Qkd=c%^3ga-?&YkNB)NR@=6O?*ztY?lw?(&G4W> z3O7p81ekh#Dk&ueh94DtD1KILyYLK9a(mPDgW<0? z5v4x?1T7^rG@i2SR0dLtM^Xx-@mfu;wlJ}}RApWK%P^s+_T*A$XU(UrPp2E$FaI z#dR~|LkQQ|ca-ye@Z%{QR*TG+%$&*`&5{js8d*s8A zL%eyxXh=;KR}{Z+N3^c=Gv149QXz_e57n709BpYI-1Ek(b(N180+_d;lx0nsR|*VRIYiJXCH`E<9; zVnF_y_E31@e$)pcC$AV-!e!J|{itN^SdU-i_7B!KLasZ3Q{!3$zMC7YtjXtS1r1~d zh{3{6bQF(N@%U^PGx6{Mxv)+l2q1jyM`Cnwcd}Z*^89ag$OeD5#+ro3dKw#$Wk$+X zNN}6P#={GF8~)633@BZi{(BMWe{zxXH|;>Jm%g9Xs@%NWIQ6M)RKMZ&(59TU4MQmL(!3f>G^SiJM$jS+CsJPc30fvM-HexHu zw+uZQUL_(qxUyYP&-MAc4Y&xhc*!lA>_H;3vo7NkEBW-D*7XehD`sp9eKJCo*Ih3} ziEr6sAT3#A3T8AUF{6d}^c&~-wZhXc%>=`-%$2lVH&m&(+{pt)wZ08{8Z@lgry{@x zddAWzMe%Javc&jEQ^_vDPBnUxN74Ry=lMqbHsLyzKR~oxj1_++7 zJnu)TrcgCU(lY0uB4&d>NXXo%a1pQ0nR>n2qX3n);o~MEgmZpWCy=+^JWigH3Cy$+ z{K)VnTpjW&=i$Wd*H@$B2#QeS3{fw>z@VUp#aOJr2dSCX;bK?SVzHi!2=Y?5UdRNH z@g_{>u|z(cuQ{KNSo|C2{Ie{HWXWWt4^~HFA@NTONTnD*TYUD8!}=ZMDvqVM?F%nyFZx1fLi zjhq!^yGuy#8&D7%jOh62r0GD*A*%#8IklU#8`Il zNuCqU5ub%OE#j6x=Y0YggoE$C&Y3Duz0i`pksLZ}#OH&)qeST%x0lpk<_nCCd`kf0 z-t*7tAH3kgeCz=gq)#V@6ts_HM+DCTC6aiq`!q6*HL>mRGV=;v*%gpO^)#8Y0%<8Ht7Y_NWgp=YdpW33Z5N* zWG3uY(I~`p&xmR+H-)@2&VM@Alwrz9VS5DU9*v7haq%BSEl)eU)fZl=%Kq zJ(T44;9XcZpIFmGpXhdOT{icou>Pu9u-^k|}x>kHuAc`ha(|2=m@_%u#w$V^(lvdnDXW6|CSk_6YJe2^QhWJ96xU|w1~>u!2u)i5B~!Q?IOOJGtmB`p!Rhfv=c8qXPBWb3c_U%H!UD0lD5g+*pRZHyHQ6~{=?ybie_zg`jYP_a8` zL=rZ9@h$=JGEk@SPoYc1a>wf(@i-QerlI+O7EQLeP+D%;Kjjp`cu;%&Cge0ufsKe_ zkf5>6sUr{CfepAs6@7VF%!s>57W*k#w!|qAnRwsz3$@A z&u$r)_L56(*`~-L5W5rp5d_1R0X&(OYuG(f^lcc=iCsReO3kTEG}$Rc|6WJkU)1T< zm-1NYZ>Vs6yR$pc+=b}SeU9*#d#ZZsUHy4Y+eu6^S6KJfjozG=0!BdUG<_K9sF8yA z^?w%^C)K+V1Pm|NmXZg6!D6tXe_Kkr*k zljkLUjJ3h;e?D-OXzh*p1z?VAdO7Qy9sG_P*Uk}|~ zQhmgEWC`!R0>)C#YvGrd{FMg81()vx9>!dK5+ki+!fE|mfYub@^(6S~LqyR|`RIv- z(3cAfrr9O4kKbH(hEw_~s5HwAZQ0|S|Cg=K!?=zBIz&++6dfI$o{!N4aU=~)=^Ud~ z=q1t6g96@eE7MOBe=qQwvZ7%8Zm&x|#8o51og+fTJ*!SA|G}m*F7!E44bYww3fs)~ z$=>n2{U*W5wT+U4Gb9@?3OOL8_^PqS9h{~Ugz(*4I9_Fl6UdcnCE#d zyh9B%{S> z?w$-wP_5rHfmZLE-gJ-{I#)w`#3D+K0@mZnAZ5nU>VTsczzQP`$@$)M_nb2JDhgxy zl|3=C0ak|Z+p(Jod-fIas~u&U-ScA&!ERl;JTbYPf@BxdBo^u!qq`(^PLPTC?Aa29 zcXcB#`kk1Wf91BH{N;SsGjHMk!1)9BPb>9^v8N;HBG?(ms~Se)vpjoB)TGhIb%~($ z!?v{p%zN6;%hL5e2YJfpT$;bXQcEo#3ln@z=rS%PrSOyuKkHpcy~1$%dLVtZxHB`* zcRzD2FQad)fTF|GiB5wmy_zf3lD`5a33j{ z+kE7Wv@{B2xrIC!hkToGY!W;ExDAoMAq-h;n1IyA{a1g>k7m{1Zw12*80s2HrxcS4 zOH?edA6kx33?`t>D$h8E~CQR7Tssx@6$ z;(S3zdZ;vkm+3@@42&6sAikYc*X`f3s`Gnx&-`g;glj*(n)dunKP;W$sm8iQ*Lnel z_ETQ%vLk{2sZ0=-6{5?Jv>tzkgXnLd2ffF}2Y5zZBeTSYmvaMzmf>wr4RPdK=HBeR zS6>sIuS-GLGGOv^WZ(>P73?Q{LZACO1#Zp>|AgPiKUQ2fHfSc2j#$kr>&jZG_2i-c zMg_T1bxtwn>B^zeeulZMMEdk{z#H{w&R6=} z$-y{3zqvmodpv+rG>R1{m#F%r(2a{E9FXB{0v&O~-^<%&^Yx`|_Sy8SDFXr5nlq;g zjxG47d%v<5D!kwqe=dtdPO(o6oRlfw4F$~mF zS>F~&KXM=N5^3)BD)^LcSiF(Cu+$9dy)kNuL9*VKIWNPEG;T&=1KI(%Z!CBnUc`BC z7?hpoM^Px`NtS5Jq?Quw9Ugnuz!iwn)tyAnru z`5^kgM3Du3e6vD@PerAaP#Xp}4Ey#i5NL)=0aQY;hz9p(4Qg>)^55@)(7=O%p#IC{ z3qVkm61eb=322N4wEDm#-R?lj_LjgXHK)_2tbq4*d0ml{(SLNy9}=B)?Zgkht16_7 zCa=-VKD`)TJM7PvPz#a&eqlmLaE^<+Xj-kWXI_25J)Q0J5|_4)jyz1eNY=A5Gt}PB z#3EnUq=ViB!Sp3EL!_C6a3nnt%Un5Y=Op8s%slpUS_LSx>iUKph8L8v|2u!&Aob=Q zM}|0-BZw-r8nDwQtpt)lKaikn18e|#7OZA+UMVps~ym#lL+Cm-q<;u=kA9C zZt17!p>E+iTI&&!Xf|aQ6meoS!P8V$g>rwq?{Sw`qaFw)>N&g%@TUBoXMLOl)zM-r z=A%c2T*kI6VpW_Nz2bsgs}{)@XlkNC&vGvv6ht!@Bp}TcZd)3m9`{K@Qdt%6@k+JMrYGQv*6%-%|{K)RFe{)u) ze%Dota4m`>>GctM@|C0x_oT>HHB0d z$l`t;|M?dG*|T2VgPyNhrm0bu08eKO5VEEjb(j0EItC4fhZK;C0)u!c+0u$Ua^0pQ zMX7yZ5z zK?uVLTa5~roz7?m%`bPRgw(FAGG#&8=0G0j(l(@cwizE5UbF~bwIXV9@q&{k@;d-x z?r8tY@EV{G`8g>B5Z~%}kTwNKBTR8GK0Ez5xiI0%Lmy{PAf2_3=(-X@N40!Ig2w5Fyuvvq6qYDh08j z)r`JH^II1|c`swEqNAA5VMjS_Dd%wa7Z~cOMY1_~aG4MF%){x+A+s|`nk*Bl67<)D zx3O2sP-yA#;^DN^I|H$-XJ|YqlbO1>i0N`vp?c46r zbSh2P;Gq_yfXpWdlKVO)784sg@Zwq_7+}jt0Eyjd7<0Pc%@DS_B9y4|E7zc)k9FXbCXVPI;z` z#l^)H&2MkGm_zMN=bt8G>3XcY)D_=s6R})}=WA-BitlkvFa~<`Gb2nT-zc1P6rQ+M zsGgq*2sYduk%KDMqbfolZ0Y}0e*Wjn^$z{2yT9lYz%+iAz;iRP-;D~mo8F_K>*P{4 z+)mA{qMMxKPHP$86C*H|Z0>yI80O#hOT16GBOJU;ez+C= za5xR0czF21jt|=q!Jo@M2l84^zd2fLu#}J|d}xo*^!%$mQlx=tx7<9ie9mg8g8g^+ z>EAQFcBu$I5GF8;Nmg^jZrYh~%f!ekuCsnVTdZHvf>BC0zbAVa$vyt#75pdBiBLym zNdfhRFGCs2&ncq4A%ityqGOL?1LHKLGwG+WZ@g>4tEU_&q>ddK)2YA2JrNd74ou4; zCovRUZsWEGxle)yo~6Sd=;)u9&ys6 zj$}YctVs2v+p!|yrJ2SDtuuw29Y91yBNIt+cQhdJu&Lm@2XTWijmtBoI{t^Fkb`ia4xPe7QN^2(IdZDiG$( zvJzCtTre?zc4x^qd%CbuAW`GulsdmEEg;L9R*w!$>CQR7mq)V~5Lr@v7949BRV4JK z59@`yOW^ZXx9Lnh6Qd5czxEQ3A44N4=Dx4ocAd(z4R&0^Mw0d3Lut0zliTzyE7!lA zkq8D#Kj!9;1ssZmgrY4cEunEaV}yQNV;0Y@Llr1jLHi#~UBDr5Jxu8QKyQjQW+qH& z84PR512`KS(~(8eO(BYcY;Y6C?Xj@O`2YiCqe7)c8WOSVXqVlHv_3m0XjoQgaFBjF zQaZ`fEk^4f=16#FcE3g*WG%5Q?#*n9ko$grb!#MBj*!7{TXx@icr);@AJI(%V)l=v zs|Wj9(9xPm|GrNDw#!tye;05xQbLJ8^(`J0(m(g2Gk#X@XBF>@1ZO#R+yKMf##*fO zu*t5mTo0xgW%IgSK^5y?gT&uN3q|HW7Cx`dMCI$@0}Lv2r~bp<_8J6C2SCKDKivWs z4ZjFd&ByzwiOlYm@ja~BNjpBi|2lkA{2 z5E{?Laea_sVS0Zmocj`2XaE*sEd|Z+ng^8`D!uU+B>~1U-kOqKyw@H9o+x@9L%-1l zQ$FDpPEJqy%aj-{MaOKDe_NsddC-T2QUJznCM*E^(l4PD;quZmF56v5&3QAT)Vr8> zT}_x^T?tp8c)#wOC_!4Ox5`|Z!FM11cN!v7ld%;)W=VkE8US9$sJ5$>;slOzbl8S$ zd6_}eGw9J$F#>f zIzG_;pA#f)G!F*ZN{RyICwi0A8LFkdQonog>$4EXqNq@}v!D;%jf5)s$=aPkvyr13 zpEKv@=ndQQyn?OvV_Fn?#MUHpDp$)m2z5aF2Hzj7G3c;Oi|P{|6zC-(e}^1dF4jA$ z6sj>^nKZO2qwv|zqu9<@gB6orchuT0998jK!prGtP+(nf6tlbk2xHm<-nJtE3J{R9 zJ0Wgfe&6bkC+Z5TE?XaV#l%IH9o&duaCtq&^6^D)^Tbth<7&zxiD$Oj<9*GV{Pdw% zV9`9YR?ng@^9|`e5tAI27Jbq8Ar+a2JV94RxgBs1`R=>3K@NwbMZrEJ)%)Aa05+{s zKYR$TYz%K~42qY&K1FwTw?n()tpnX&MilTJ#GN?<{nH1Z@^4!VmwUL-!i)pG+yQ3H zHQ#>Am#jq_I63qtYn z$90QpMy8)R7!zX9ek8PmUzWC9H83Y|?raC9xU-qhZVbn^%n0tZupMkpS;e6I0aVm; ztE)A7mGoFj!sQv15k%fkm);JH7@&#eQ0_^TP<0pU+_Ox~e4Ag_kNuHu^E$;d3z8FXOwC-k^hybD<-kI#zGVUq?+ZeXHht_@e%*8&O zP&M?q{9p;t|4i+YHza^C%nVS7aylrf`%NH2F-}2XdTH(pNVq>hJ^$prC-(&4?Bp)L z0Os#N`NyuxKkfm(DsImRv}#Ke|l5>Lvi`x=DiQfSJ}VD?6+_%P12e)Akj8`3%04Quen9@41ArOKtElT#TYMKe>!&{h=)|(F zt~HRF8NH`%tx3cn&HkCvBqh9XO*7nkTf-15=fUbbn(XaH6i>#S*__7+t9BeORxrnhejJ1ww zSa$H;9g!*)TY zpH59Zj#P%Hg2o#=8#iyy1uQ_RB{mE&J@uL#Ie_=%ecHn`5*$7NO zffkER%K3)I;xiKl(2~Ff0FFYQmuW+ZT-k5aFQ241WdjT3rkm8_a>^^t8NB?RJeS6mkS9J8qWq#F zsn+xu+xWewC|uMj38I<%jxa0!dxnOh+F+iHa!5d}sIoDXrE-CpdS)wSKfFKxZG z0-HO_)-Ov)2cyHl=u|8u`VQb4@Qkoz>o0>QO<&l;%5NYEB~AWSw?YV}l;rA75q2-V z*d3PwNKAB=K+}OnV_=wnh26;K@`t|X){g^Eb*agoQRuL}uR4Htp@w}!T=$7@0~tFP4hhj4HI z2jnnJ2rZfypa#NWqh3TcQMBwqiC+FmH!^Gozvum3gox}6;)k@-uIl$e2_q#Yg_-hf zP7I;#YrxWxl%$sPM1wiX9(FXQyp0#o0%Pqp4q1iGTbIp!0T?^sA^__Ls`Q<$v|bYb z9TT_GmwrHhGqloC*%hPowk$nU6<$kG8*6m0&R>K~FE zGzpjt0Ew_rv^XtyJrmoP$AnDPvMsp=qTv4He}Z?ZX%N zR_Q_YC->R8VQ&k9+=sG+CT^vYV^A}4Cl}>Q0HfUKu%=qGU8QX)M2WI*J|S@IIe-l6!+6;a%Cvok;ePJigYkYdWmG?5wjnR8Rt4 zcTp$ji!~3deK0hdR|DSzIMD{58^;mBoqi7A)9}kiX>6Z6UKEpNfCXdT;`G@@7@&15 zj6{q6!yCVGri)H&$Bp8<)A4yYm@F@dv2sf{tv_^2S>Q4-f1xiGsusnW$(obEkcptV zY7m)0JB)USFXOc`upXR<JhKGxEL34GN!|CP`mMEy73%KpC~a+maBml{fC&d5iII zb>}%r;QgZnV9aqz8W#L?wgZR5_$>>qcg6Rl)j324;!ei*X3*$KYmTv%2Y@yuPiOV| zCvZ`p30OiquGzugrjY8lYT6pV*7>AG%bcVz`YKqx;iNATw)Vo``#eXv8e>0QGqh_I#H2bAza5X-FKKK* zh5>_gM$B-pmb9B8>(YjItA|vC(t6!L@HnOeNrYN4WI~gGL8GK-{fzcVXa)%0aCaI7 zifg;u;&`?pdJ&emh5waXv}9)}i9UrbnLjZ4+bAk6HKya%#+pTR9T(UKP|SHoM49m% zQy>2ftzw3tjtYb`rULjdc_dmJKIXNaiqNvr$&&|fC!!9O{fD<}P^94kiyi_>h+Yfp z*uDFnrTe*WbH)fQG%&uWtMNNu=PV$eB_j>fM07ni=;}r}cCpUf*ObRo)ZmvR{bBG& zB%IaOTmlsLZ0By{)|szkD9p|RM79FwyTs?hk33ZyjmUkVYx|zXW@r<4BXLY{ z#Pi4D>!Lc&A1I`-m2dRq^-(x~bA9xPykdzzP6ZEHfKX%wa5bhk0M>X@Z{;>VY#fN` z?4Ry$m3QHCulYzJdyzgy=IXWzY?82edJ!IXv-L=1B49T%n%N_4Czw#A5wQb;n@jbi z{J<-_4CiQ_-EY0!FZRFQKCPw&Q>Gu7r@6)hnAz)t#~x8qNO-gkC$IIq3&(*q>;4i* z(pi6E*k>eX!;&BQ8i2mNviiuvfgUB z=0#*z69uiFB4qeFLPE37iARqki6O7y*FJmmT0^q{S4`zEM+ik4IhKXkh z)=`mPLN9~-=Eekwm=rm`lWq|)E5(%>G))%*=gzBn{O;x)b*bCMYCp5$l=b&6K%tCd$h)BqN zz`$WYB0^v4?*m@G=K>-B*+%gpJWqfr^;;viXI z7FaV$-z1JjHpc~J$PFGAge4R>c_T$|yX_=@z$5R|ult*k-U@=%~nppk_CZbU*^@EF_4YVQt8aYT?@;lvt~5`Gy4hr`R4FMj}I;+(g? zh>3}x4N|akqeAh-uHPt$oJt?u{88ZGjq*RT%#DN$hS)o(J>+Mc>biD-jyrvA`uBULFRPE*Y#FEa34>)NYl$fP|I%#fN zz{5}lg(lUiJehym$TB09ipsh)sF!RWp*kE1SzMgW z^0Q2`w?^-uaTa0GtAEb6c^%x*PD*j+&VjW0U@qNy7VzgYAPXzWW(uYjD-cYGhvUvi zGCc}OCPK+>8aoeF)$eKMc6JS0&xnEuoXJhtn=6IgAQnG4MKby093sGe*lXmNl3%Wl zmfq_JnG5&!?bHJZ6aLAdxP%0mt%t}E=v2L>fzzd8pd1aMr~(tM-&NcJ;z=FpKK=!l zA%G1(%+1MKO6IkO1m!=l&kuLNG?CYbWi8O;ngA9YjuTu;A?+r76I8Bkf+imTnlg=> zWp;9d{|t>zkg&DfV-<{U3aDpdcD?n&7{QqH4?Kk~Ix7<$oC-?Srwom~R^50hay`=q zXEaZB4ED)FrlrjC+8$0ZcicG@mbtrD1XKP(!=+&H(0SRK^CpzE{0;~kK8qWz{;9Rq zYuJbs{xy0l_Dp0uUpd3>?X`~|eJf{&k<#iQ)=;iTShvJA_{@%_1T_+60J#5rrq);Q)uzm zEmTovQ`9oIiS7?iXPj2vMe)#KTQ$5U+P-Yo6rCx-(s1a}WTv27&@~qM8*AxoEz9*Q^97EJ?zU*wC|2&^P=EMvQths*Aob# zebm7}VpKQot3P>-xGV#`1ZJi~#n5IywWhkwTW(L`R!|tB&`f&+>H??_6&=)yqYX?B z`xby*I311|C3HQA9(WB#_m&0J^sDhDLJ?6MuM_|Oax#xu_6hW>CLU=;@`w(*} zG|VKQXqF0kgf&BbsQhW(h`HRHA5A!32t^U2B+0P*Gg@UiI2s#Fg1i?Wsq^83nWO-U zp+|Vg7za5l5r1|~we@^NI3?k_cmF55HxM#(S^IvX)x+;Z5hrgIc;kgJosx3W^SJZ3 zALhK>$Y^bYkNNcDSY`2hjn>l$dz;q`Di-0R=#eL4AtYZ!e1Q5FQ;hvS_XZZm-8WgF zV;tdANO;?KOaaJxS+dZ<>746Tyv$kkktJ^ck<+oe2MRE zBV;4u>_VmD>{#;|3=sQar-%+;*^pm7#(lj0VGjanuQ64OM;SVu|KX8nz!3zpipC+# zq|Ft*bkd3o>q6@SKig2!^s}2Sl8A(|+=D9%-o=sVUJE5h1*UnH&RF3md5fOiR7mp2 z7hB+TlJ>7(e+|5ICEUCFgGsQBwv%CGPe4>Eo zeF$EgH(kC3g|beK7OH9JW>2Vt?2=CD`nPJxoR_wyZXRWQo|2Q;bF%%@BjCTAi^%>O zk;P20l1M6fc1468ODD$ruL=#>AiPw&RWY~#UD(8kgyf(}C^1RJJ3&PGPs#P$ z?0ZXnARO9~>{chK&McYSHH(jdh1QI%S7jUGG&AXQ-*a=vqpN321C>V5<2rw=ae z@_Ge5TWRlsU>`GRM7&gdo;p$WE=4xWXJ%zYC23vqnCy) zUy{aJ1v!2WpY?sb`VZx`_o;8zk-Z+yu}wY|M8*DOG|(}0O=9GQ@6NY(EO5|edDWNrkJt$SO+DsK;c5lV?_c`>K^+DC*<%!F?`!0SGuXY zRr(``ADFPuKel1p0SB5a49TPnaD;k5b&#~H?R%m#=|9f)Y&s?bTls;4(qKM&G#{B> zUG4Nun2^C@PeWukC8N!}JLMl0FUb=U3<$oTq_Deg@u}#Ux8W17iJ{4>KW)`SRQsV4 zu?c0B!hP;>Kf*jOWxCVGWZ`yvdHFOu)DITNG`q?aq*eASz5|^^;qBX?YWo$jmOGEX zLX|oUCqS7?K10kWr?FA@2^LnB0AUl;U%t@)!wvPLK7@mz_Bv{6bbuVPh2A>Fa9X@| zmX7`#GP$$AS|=?wd=md-dI+^oZ-W|$08ZoEDHe@URslK`a(aPW2dG4uvW#<%2fm04(JB4eI;xdU2R)S#S^m5_>&VNZooPHPhkvDt+}V<_Pf)c8s~D*J z()`rxU9X_mo@b_eK$UFH@&7&-5GXJ|R2ClE0)Xeqx54X!|Kq*$L#cH}4toO0HOo~K z`|zo}`2W!M7C>39YuKm~(hXveQi_C>bc;wMsB}n3cegZ%0-|)IbR$T2gM@T<2z+$c z$9Z0G@4a;I^Us+x=gcs(*0`L-_r34)+;LskecdR$_&#=RYZ20u^&bT-jpbfYF$I75 z0H-(zQ-I~542ZMtPxxE+dbRZ{G&U%s@5^(nYRH9>+kHi`PssDU_kl@w^0JlfZ%%ri zPzKj|BWi}U9-<;?AiZxXwGKR?bm`B)2zU5|+So+4tj z;;;97AD!-ZDgKN$-zrsVK8c{P0X-;+Y@|0m35@5bhn$}L*@%UIhx<5+(2P1w)huT2 zT;(J(SZdZNPyNkR#s&3XFPM^%>R%6F@Hg52V%?g| z3F}%MI;dxx91JXNxPKYBF5zz07%NltQ$$l@}Vll_}AegCZkPyYr#p+(nl9j6%!87kqQdX+lx;w4JEO8!C=y;=8ZB=Kk zw!RrUrO&!i<8oQCz6o%F0z@0VCqQA9Zs3qH0O?ofDijRfCUw z>hkkrWqU4XNCX+bGm>r>Ka}$I!s~KWzUegu;IVqZ&vJqryZ?S5-cST}Y8H7Qk8^N; zA*f?2;>D)D)0V%_07^1xO{lE0!}WpZiZ%?2-n895P5Vw3r(3Y}eHna0tFKO{7k&#Ps{d(0ZLPrKHExeBA?~Fp-Br%v zACN`7=MCCNR&!6BAle8J7p5}_qon^Rjv(@q3IEwEZE?(l%%DA*^k&&xJ&Gkyo3~t1 zIeW|YwQ9_bWhh$9ox@fOc@KXk$ed%~C!G=@*Yh)dH~D3kUUFI;FUInQf;jULU}+LB zafft?(dmo3&Ywv4jU9tHlv$h}31A@S5h zwr|o_!Z{dPtv9zWC+?$?wBJ~;rb{*nB(C)eNXX0Pj#x2yYG#$!F4C;m2D@(^BhZS% zw{vLI_U8Ta?3IvK6nRLhQym8wdFuZJf|c4F|gt&D^Xhr{m;0i8)6i`Ye?O z&;6C2M%i8Cmu5Y~7jb;~$j>$Q(uufgP_UH&&as@12$F8d?U9ymwVx7iJ=~ObS0*dF zNYJh?hf=SF1=b@N)@s|1H=_m?F_g*RndW zKkW0_83c6SR3k~%%U@WlUV`7`9DfXm&`!h#`XPTz4l{l$oO>v)?BLyDdCz& zz`D5@Q_$6eN>oL(p!VWtae2MeGD^j9r&ti3PU_&$TZPEFHB~TN$w2xrBKlv~PQdM? z4Gg?L7`gPr|91xf{-rJr6-v0@^)mq{FnkN!4zU*Vxzab-2UiBuAwd5@@zsrC?o=pXjj`ouRn|{Sf`y( zP-*phjdb%ipPQ35D&`m2&sjf~Gm{c_3ddej4DexWd#2d3EzjmH4>c`U*35G!dv5#; zBuC?5u)50syAi$z;<-9EVVfKV2VvJ+NogQBjk9V3%A7*>s1TXPYSc#^&296>{)Tq` z-)CDKRR7gQMG8RQ@Ks&6fS54+ov(sYRSA7J1v_RKLF2Cah~2&pXYzz(UkaCxof30c zRql=MoszGQNsBdj2h<(2hb8``O{~uA<$K{H z{AeE;_EKy*4^&Dm;e%pr+_J}{|81r1w5CW~7HGcKqC_LhyBp_d2yO5Xw zJ=kU+vQ`^&@Og~%EkVr+`}HiFkC$ECorJoHa+_4GT6eE=rb<}f7Gi<;A5|}fXB;&^ zt`ZKkl{~A>tZt|uHarh%OQ8wn!n?FtYr<+{2_DDWUR9J|Fs@(FX?rjW*gG80e)R9qkJPxy zIm8E5wVpSqEP*hkb0AYuM%$u5b`gZUs!Q8ObNbeNf0pC%g`(p{<*&2| zIHgV+<7cnO%}gYo55iZ&%*RU2= zf{hZ!J15`m-@5XPqu#;b)b%BqrbN2sEehX2z6QO{?mWl42qsTXKe?S&8taE+@_+rr zcM8)=1;QIv*CR`eSxZ1qk8OHiCc%+c2w(uArr3B;(!64Y7JS%X5{C(^BE5F}2lVt~ zLwTbKe-?8|x1q4#>MeCz#ClsXnVFlhFEUP~>4orcRl5wX5>Y3Hvuqo>=DH@8|IymG z(56>$u#YZ*AHe@ao0N&$Y@6?Yz|PH69QjhiRpb6#7|tVQYi|h`TWfA;Y8r~FmXBKN zF~x$RuFD<3Hc!8$wMTtKTnJwwdQ5J$&ILc3*Z^trM3VdAwkV1uTq*GFAkqHa0|EX?4tV20kC9ZMY zo`yL3;i%(^QO%Hzb?{IxA_lXrh*M4`*;D=%fSJwCZ-jBMQgWw?R68~4Hg3-koUsIW zewhAvmELiFc%`)z)H3t4yqbzX3*fW7;5V0_gs8Kd{^#)1&B#sn!^-CZIMCGrNJtgH znOQ+NEC>Hwobkp9j=?75Q-+{iU;q}hyV18Z%(?O&x zkjt6GXlDP6{S|L9j>~kA2ToRrm!dVgg3CfY%*Vk5-=q~P)#QkMm;j`uj5wC~6}I_> zGbC*2p+NYw=6Nhn#Kn}$CJ`zOhkq~B7Kr9#Z_8bNPQ}aiCq3Ri1`P)0ZSCV%l{DyMX*)tY?P zdT3EigHurLTcU}6kOCUHq}Jhlhkeu{D+=u9&_qDR@Meac z?61)Q$Mn9oOU_jbW}Og9itfwHE9IB|%wkpQG8uIp93do6b@ z*gg5#7^JJFngR(J&jw;GYHs%|vHY2;uAvP`fil|dyxs2e(f-pj0fr{keM2yc1mH@|`N8$;!Jyt~}VF|1CtS+yH{20yIL z-xcXUaRdW>8s=rQH-L&#ZuQ|Cvj6(Xl%ZvHnt1??9M44IMcUw*52TrZW=ed2 zms9rSVEv(zx|Q$eh3LHy@{gPA1l{SqFR%b;?IzGrTZYRC=PjYK6n5dEpR5qIAKxr`!%TT+rw>Z9D#J z5lt+l=Lzh4C} zOS|>wGdsD#WcOY#J6pSS9^8J6*?Ra{C#Q}46K22vt{jWydA)rvTyGqnV5fc8X-7`3 zEx~2O$izbGu|?IkU-R<>R{eJ{;EihigQc|%$9)SC)cE5Owv81aeT-lCg!U`_h-O3H z?%$>vPxdFHheD$3rpxsBoZJP_{*4EPE9I(|G@Fh7qi=)WOwut#2(%{dM6M*RfK4%1 zz#qn@^)i1pGeI$fW27it)`&q{DUWGcHEn;6zML%fZLqrtg!@_lPu#Cl^a}UuJ2>1s zSL^WEo-dUk)GD(~23h{w4p6dp*24FhfdV=n?ErTSu$}kyPljK#iE>2)w#sNQD;!8d z6(BXO(a#iL7|?_q;CSGLC+6p$40=#fhJZ7ry@ooXMR<~y`9g9F#|gqSJ-{ms2lG@T zf&r6YeR$~Vs&QskRv2^bnL@$$*<5U45~pb(WLyCMw@>i%)A6yX3OO2t!WvLlAKd=_ z9q0RBg}$Kx5jjwB1Y$Jc*co zS;Z3Qu+{>3@xe|5=;D4LRWt_?cNXb)BI)C9Q7i}#c$cBnL*Jj`Ho_4rhL$yuw8kW9=x^y=V1@oLAXLUW==KG&2K8W`)%ye^^I91+qMl#=YW~>m zAbhpko1d{jwCVAtb;dB)P`uHE`K-k&^qoQL7r zV&1)-oF-Tbx6C;vA-5z!-f4Z`tGDX|;1)T?ciqTi z^N#PB0P5Y|vH!;A*vxAT=JB#aGhGj+U~z4Vb^rR@rh=ERNNS%Zod{#VyZQx(6rHk2wNKnysx)s_$;_iwv0FNozsOP| zYARkF;+A_=AZ{7v+U1H=(K)=gViR+yNuITNt80HIXwUW7f54lH?0BT+ z$xF0x-G}11Q>ek@$28{%}@8`ZR>7DvHqCROQUrJ0jXF}Gh#O}$? zRZf^rBuIO)MTE7SnEODGyI}sLzg+nA+sgxo!FR^o=>~x>Uc7h)#)iFb&t2BPMUvOH zCr|fQbhoGQ!u^_CU?z}j;4uutgw!6av=iq0ICyY6{iki}mC;707+;j~aMh zPa?~U%XAe=n;Ez)G=H_<4ZvBxAN0~DeMzHZyB;}(_1Isc;dprn9cp$I4ocIUqcK+| zHQ)S*ddN;!hrX-VCtKJoM*45x_Vsd^AW~B)3{I`?*C($bLa}@EJ6A|!kq0R z)8U=;`p?MGVdd71^@-j{--&^?tu%6)P>}AAU(Sw&(2foZ0K4{JxnRlDf;ID3aYNWd z`=YGKFg~+8G9JH%k`2S}i5K5@?V+R37uel`aU2&mmqWX5^)z;|3ubd&5c)9Egxnt! zi%uXMPVA=nLAq5Z8?tNYF@b<#^Q7Fg0Y1c#|2f%>rz1PRrsEOTzU&J$SvRqJ}L2VPQ(H4B7fFxd&;w(KL*Wla&|bS@<`bmwsRCY5jH zTP)O}nOH*!F3Hjots7$|&n=b3$(fRC==ltZk%XtkDjYbO{zY^qwv^NkzLe6xg>3Ip zJr@izUm>czwgp2UiT@uRq_y`SV5T#QaJg3*DPN|{;X!l^junAyQdVANg=+q*mzz~* z@-x&{Rp>;646}77d&{rWol3I4FRyvXCJcLJFrD9%uGri^+1yf*zfiQo9s*PMK)gn z;c~g2ocQ$#QPq!Z=3yW2V2zRcenJMaMAv9^80oEhEn1%-6337PlHmHYi9?vdzkfif z1gf4I+ZBZegSTLdcf6|$WHO!KizZ3velSv@)f6V=+~(a$k{QMl|4886_J~7M%!c2A z)ve-bn$Tr=%&f}ghhTz)Hpc>8VElv9r=m@dZd9uILu-iH$$(DcLu(AxAGDef6D~x$ ztAQ?_1IOOKUt-3}X;Sw)0{|u|wcL947d4{i0#?@meBAc zta{$bI~hE2eTW2uGGXHSk?cJ>lH<2u=t00DG>rssAKEDHpIGa1^W$260fAbPQ2X1f z4C$6{8U>5~0Y8-sL(&iDb_g10pnr~H+or6Es?=5as@cjhl1eWqu=;itWwx4A+GF#5 zY5e{#dv~9Ev)ME1)GI$;En_L1KMMA>x_6~-NTk*F$c|L%=oRJZ_2!>HX;lP~8HS%! zm!MGti~sZHfB3h~)xYg-`xKK7nvcLyuIL^|zFo$wszlE$Bz3h-TQQzIQ-E1hY ztSND3IIij?Zkai1q0=#P{Y^Nbe1B1Ut(iEv{>@{r^V~f3dyOZoPo!ZD%>XF$d`cnY zS&c_wgZhu_1v-1mKgFULoU-Lso~V{2pGipGL>%j!u_nctuv`Iy3HL1DzY!sPhqm4( zrE(NqJP{lt%hf^B7;ggUDPj#*%2$@+%(*XzlZx~_fLaa2;6Cq3Oe_eM`XC0xs=6PS zxc)$%LhnM*$QN9e{+p7*u{y-T^`A}45)ktM;Ae zu{y_|cZ~eoy^+!q#09lNoJ*+h(XCrO-NDjASgMV;JQ|CeP}P7z3Mzy z<@*!AgVZ{*Jfn$E;P7EX7r!z3%b#Z-WX3D*rkMF5n1(wE>%ZsL{@{r^(Lh)1gp1TX za_X}Y)hKLUR+)(gBA>^hr4;KFvm-uE{P{0??8w0$FfUe_ytMqFo3}#d?{e-aC4b(P zQwq$($g6GB?1k4oPB+V`mLD7cVLGN&voMfGars!J!DFF9T(P(%U9 zY5Ic5tgCK6NHWfSO-$YYrcN0_gG|%U`Dcjv{?&un>y4Ugl+(2+gR1opIFU2?ce?Ew zDmem6Y#2F2Q5KJqguxAj^WM*^-AC<(AC=E`eZQ@lTAT}JQo%%u;8#)a7Hzpyg(Amo z9iST#-E#rvC8-O`%Tgduf8j$l1+-BFx&98+E~)@Q(iDSx*BRv>Ja$t3C7bm`I@&Mb zb`UO$Sy23L8LDoTV3-UnGVQ);37i3^4(C zR8ONq-NBrt6e#-^z_3<0nAcPXx@2M}FvIp$CiZb51`4!d);bJJx@QH ze`0EK$ap>LFmYS+dFOyjRBen$bLG|byZ7hp!oG8eO_oRy&p+$5T}EOPESOdyCYk<% zLI~RR-hLAH+DLiww%@G%?Z8HQ$qf7+IiT##^EzdW5GOuaw;nqIt-Sn%c)j)~CknAQ5; zbDF*8T%*P`6VpV(rTBfYk)CG`|UQ?*3dR%J^Px|8;^-}yPS(^x24u~{YEt8mENgQ3Uc{#kR;1s4H z%%As7?SjiS;8e6or zN{qvQB=D=%9CumFDH$2T&yK%u0=-cW-Z22M^#ELzPY}sZzi=IpMzRDT1@zZy!vDPP zy))3~?BsYN{hD`Q+rRFE$H$`&1k|Xr&X~+LyiHSRt4q=5Q@`$WPTSJCs)*$h`#Kc{ z=P(bTcdz|a9?HK<+wl#@+rB1r{!^_C@}wnvG;8Lh@kQ@(s2o&wjQ3LNX#c$2V5_YN zGM~je+|EC)(OaZBq%fip(95N&-#WLlao&Uc0x zw``AuY&}`v| z`IeXeO~(S_Vn(aE@_tsCBm3*Om`&v|}Xi z=4-d;t0nRVlA@Jm)^^;S*GKqz8Pkb85x+`+}iT|`R1uX1-sqJwo(5$DWz06s{*JLTGo%iuk*MSAG z$$Luf8-E0e&^mi*iszn_P6g@6_5sQQpYzdF(~NGbD^~2Ga+@2xeoYbR@)6@*Bx(BH z@`?(vC+nZlbZLE6m^0pN(qd|S`ToY*mF~_R3|*8**YPuqSY;DZ$;KUSfgtT){`LWW zT9C}mIBPK6RHtcLey;ISk!JaT{uAJ}q%z^n1ecq8*>7BL(%hV^EPe6vrNr~+&vh}| zS^mgxK;Wl)Dey}LN&X%3YQ1JwUl4&-(v(P|x9l>$BreI*>RwS(;z`kg+(EvorU8*# zPx99O1>}%8H%_TfnUV7;YQ#G}eBW^O=0Xm{8Nb|a1;UYRrB|IZ-cnyvQbfi|EjIJf zeyb1AT)|9GAJ|EhnU@y<#(H4bEq@EuzPmsjGAbUTXZ8K1Xv2FI4!+G+Pv7yjwEz$Y zTT6vGC8SBgI*;v$~7WE|vOQO$)Xv&#^H*@}|v_^$sQ`6t1efLVYpeK}`| zUHyP)Kd$33oTK@`fPCN;wC$Xf$2K7L9vLWvLWPD<8B~j(L0CG3U;+h)ffO53J-Gdzocq;eVA20*H9xhYZ2i8#BnAe97oqF6yd*coD!|vFyUzns^7>Q%75f) zzyx6-fGRi?Sk-pQ@~(gW-v|m29#!ci6K0^e5blWSTS#R&RPr5(imZC~VrxC4%c;D! znvuyJk-2UfQWZ5|PPRQuqF-pdF(C58($&&4LmK`%%o1K+ayiHP`CKMd!P+?_VrMRh z8FQDxNYK+eYGgTa zD7oYZ$xVcF1r8l0ZjCy}9aKLher4!3dmRm{H`gR2VO0)D8qHFE?aiFobOx;ToYP=2 z+*l7Oe9LWybc*5W)^N1l2WF{Gx;Sj0Af4oi1|^r1!%YhW7et3HsD1qBk98;n{pl3Q zX?z8&vweeOLKX+T-?C_#XG`wIj-8?j0(bY|rxziUAtBGbw@oMN^F6ySky=$^2 zhc3K9IzNP%6kaF%TaNXqsMZ^s;WOREo)4^iIib7GMhZF&DzsYw(C>VE9Z?TV>!^?g zPD01$A3qBS|E*ao`2Rt(_79eYT(lhoXrOkCt%ICEyisr>^gAQ;UG5+Jr7H+Uyv(m4 z!EI?f%rQODOf}{`#8=>LE;m-}&EE4Me=vb;DpYXe(w3wm_dK{)Gsvrv=fn~X_>u@$ zQlOqN^;K6|{f?Djtg`0({P<&K$8Xi8V`bN+mQXM9pQ_9MEkphPSF%PGa_V=s^zLI{ zrNicbBx_LuZWR+?=zRyw7V6oN*vtOc^#(myB!kL5{q;`v8r|?J z?7Xa~K?gQecQ;U5?uTYnxz)k558#wHw1=IacRk==sH#U?jM5F>{*PuYkbtfOsA;Hy z*W;W2Efoc0$-oam0f>7#q2btAh5cLpLZ9B;=`WJ5^15$6=dd6gn|C+(9^GE(YvZbS zOU@g5C$=A?JulC;Rqa z5WD=pKLp@xO?^T4#y3JLN9WnPXxB4Zq@FqM=eo~79Esvr_h|J=re>-WeMY)#j{UNc zF{Oa~XY(b#iG4nSJFO@oTpzDtZncrya$QWs6hohThOm608wy50PM z=KzJ@Pz?l!DW~*0JTP!d*1jS17jEzyuTy&UN>V~1@Usc#fmez`q5SH*;#XqoK7PX1 zI*th#m3f-QZT3#GGZ@zSoi=7U$>w0hzr>_pc)X-=sR8SotVk_=o5|y`OZB{sNqa6~ zp5W=1+hd7MuuZDc_hGL%Y!+jtHD9(`@A_QEMzNXI?EDBY4cq~CVt(U5I)${F|MfZm4& zGM{*v&oirdbe$)j?E=LDNFGm<3}#6<;U4VB6ESUM6(-({fOiKHp^=e0{!tnez+x|m*f+0 zI~(u<;}OoYEe|-EaIXh@hj};tVAXoBh4I;j@oDAfX4LLsMWo=L(g7kY$)rvXFa$qE zj6d&36OYy{vta~&Di#Ec{G5|<{ft+|eZ=~vE)I2)V+N5Ha{k^5=@L=6epQ&7_- zIHtTO;bv76EztVo2m_q~71WylXDSynO)p!j!f3hz7zR>atHha4Y1ZuZJ|q2V%bXvZ zL*_1?cjrV{=Dv_KY<%qcPVR_*?CeoTAT)91Rq)8v%JRINSg04Se2Y2$_LSe_v~!IL z?HYAg*s$K&kpU!dj!L>bDxeEo>W+U1T61Ebz)_%kyi?_2|EIMAb!aTq4B?Fzxv2o% z^S?(;Gm{La7N*qhFy9xm7QybAC|}{ly>Pu7xAssrmnm50))TXR^~2;z4Z2rUZiaU8 z4P^Vx+k@h4yS&Ym0(0Va;)k|& z>S}af)#OxBm|4UeaUyd*qP0IDTJ54DxEq}BDD=3c{jKMRPTGHsj3Isn+M&&!tWY&( z0nM~3J{2(VG>I}30o}Vh0YWk|A4uGfW=}*G4N}1!#2R>5N$J@|S75)cDgjU?z?_Z` zuZrDV9+@Y{@7{&gH}40{%P(15nhhBwD94A^CcFKj`litqQaT>Pvwg5n>TmTuZL|0B zl*Jpt?V^)lQS#_@5%;RDp@G2C{7tU+-#Fa-zi+#SnG)~Jn|)7{t#Wf~ zeI1v(!dv{|Fm_a~DyL??SNu(7D7nQq4!gPaHD2s_Q<>>xv#RMI?sNL!*9fqV`<^+* z=4KLDXiZz)GB7#8Hk&_xb3nkHy~(xrag=GHb=*`Y-tqJ#xRFb(-@!15Utwe!z+^|I zz|8C!ActX9HQUkR;NlltlG87NvKyDhSByrscK=@9Dchv8oPf$`Vfq4IN&)KyJ`wucDZWKt ztutRRW`E9F?9>O9W0CbD{bYUwM|N$PU1o`W?FU}-(^zhge$VAwd_NrB_+kh2*9 zT`VeI|M!teu9HgMCmv{=&R5Xk2i{uwn}Y#6A{bh&(`GvUYGRo$aYCW7)Tb>(6@_P3 zu9$k4s`LJEZO$=h9;kbVR^LO=)x__;z~(I{^9IM29L~2_R~r`znochqrjv=G2IkbJ z0ZgOfZ1)Oun$b@?{g<^IH8o@QQd|#4iTg5bqFvoWCcmzk)6mNkO0*s{1O4WGvvI;_ zv5#|8llEo%R4>+x6){FL^)ZYJz2W0lD5KAjL;2h^jXu^Q?%`ISi-ht_nj}p5uJC4` zji>MVAstdP=?nBG(dG>}!hce^L|Lup);w-9BQ=h;T-e-}pKQ{fe(ZJ;bSh+$$*V*a z0#Kwxg4f?iNwy`aue9P!p*Vax0Mj*+>23Q^i2Le7DMdBw+($VTKJ^w~qh8;P z{~{X^Kg1Bh;xL7W$#V!LpaK~rrm`7?{Rv}cIlk?t$NP;fT|sqf7S%LB?Hk)2D=3j3 z4D?3NPdjGaY9FGwZ?3IAd6A*i`3tRaQhYWV35U!%1P$jkzcpIVJve+kaIDQfb;M82 zhGafo(eQSUxS3Q5r^d?5_ax&e*O zrv>6_?n8hx3lMaE@C#6p!}4G6cM)%rDmC$Z)thra+BMi>TEQ7HnKNa3TzYr?t!1zt zuc<2*uXLhlK&d0F?Opb^bN9dS8I(80Mx)aXZ))zBYFdZAue=aA-f*bJkm|V<+o?%I z{^$|kx_PAFJCU_*pz1DfULQ?z!gB;e2a5nD2)j62p0@tktGk%Ph7(!TckeJDxd-dO z7LV+i<(&2EBrnr}TkWa`yI$+6`R^!xm z?4Pgw83)(uytx|B$iwtFrT`)iBD=W6{+Xp&00*6E@V;ErPCbG4^B=v7t&(k z6ewhlNr=siXzXA(6{uRkGbxC7E@(84UbJ57jGrl;H)reWoWQo5N47hCb7yM#cS-g@rEP0Rl#LiSuN8gwZcoa9 z7T>VtAEH9x-0$ZjO7FP9`1w z)|>nupEo8e{k5>KVh5z51|tpX+$&!{E#Ls#wx?@F1x^Hx>m|br1BxhZ<{D$|)w+HX zrFrp$&tS)hJyeb@b@5&vB$=mY$<2;Wh55MS1ZT+7PJw#Wp87w@@Xma2D^|Q$8 ztuFuU5ni~^Pj9QxHfVYg$KDGX>kM+B=5y_**%`CJkvi6JU{!x#>0<@(paMJ~&9;Fb zFUKC9FqTfF9J<0=EUCJEa9LaJ{Q>Sw=qYQKC#ORtyrjMTh9uSCW(}Jm|7o zT(!>AS7mmO%xa}gGU>Xn;&gH_7;LNST=}Xs;3D_E zJf#maSP=9Uii>R&Z;4~3fXAox;7cVI2I(7j6NYFk7xVKz^P1PnBslLWtn}NER|v2_L6;}YtTgX@y#2E7dXxM>{jPsD(cm`)!U{O-7zKxBiUn1dqUUUW{YmG< z1-OfouzFX>xrgoHtSOeM4;wC*B^c9ulwDj~7BR9{5IxHogO|r)|6#mkO|JRjW(P=1 z5m2Vct3Ghp{s<%uU4N4RKC#cD1f=Xa-%)vJSK49?cpaup`Zp-ou^N6RFraW{W9QtT zxOkBIxCjFi6J?H;>X;O}zxvem^nOMX%6F+@A-q-x$8~G5--pncj_OCDfs60x6hGH^wpg*vEEpekSUo+{Y^Llf;lXVpd=V=Zg?xh|A)8R&6) z+P=wYGEI-BXRk~i+HZKcRr6kUC~)_TL`xvQd#c%#Lv&-G=OjH;Jz6ejhR zzIE}G*v0rZ(1WPEKGK@|c4HY5;S>vwN1mVmN@QF7_%pKYjGMeYkS;FiwXQ6LB#ni? z1w&V!6LgR$-U*Pu3oGe~5f-lVcfFL?0L#>IP97&X`xviGYI@PCnJBWelOR9LVzXN9 z1dik2SH@l3iowU$lpwhGBQ?Woisud4m*6R_8}$zmm)~-7tX@pnR z?6*{9!g`&j7(NH3Xc`{i{N|C^|FBqa7}n^Cf^D$&n6XiW=F|lPqrs6N$^HzPCrY|# zN_cFzEB8q1-#7T&YYwqmqSmo6b)&SuSxD-(66Vc*~+d$YYz@Hk#NH;NvG)KGeQp!Fe-Bi-mNsSl+) zQ`bqfW5-(r=*J85$7mteuXg zF&ii9By-tF^?=vL+HMR8cXV`s7SH~sY+6#1kZOrB4RC|{OzJT4T(#Jc5_0uB_imMT zcdt3eP{Lk-lxC)+*D*Y1W7F6R9>FOsKs$I~0a(i-u<-J<)Oe6yrBKKBg_4rg%a`F0 zJy!&}*kjnY9%weIxC#-QODI?n?#K;9?|P9M_M;Uz&9AzG>wr8#f7-2<${=A#@hO&| z!1-D70e?>%<6Z|D(~hJ;BTCk8^Nu=Wz<$Ne zOFVH!^-^-uf6e~WfrDtzw9MfN+sRV;VhxHx8<-nn1lHrt`Cum3I^sWe4fckF8#f1n zh{_^V$#y79M1M7#WwGUK_|hI#8wTs2bPSQ))k~2M`S~$^t(#nh!@+pk+{P8RA@WYb z`xGhlQOHyI75pYx#xac`8+WL0!Y_Wk^@3;?Hq1rSsr?2ScLleFItni(| zn870Zb)^#}xzJ}(0BQ-Nc*|V8B}QDcWjt9?;)dqu&*u7Is8ClNGAnjnH_`e8W&qR< zrae5stfb_euJw=gxH`K*sa!E<|2V%h^D;%;d3SY(K`o>@9aYx5zNF?%BhPN_L_}^MQW8FXZtDz~ zL;a?Ge-wTo*OwL~wgr9@J15^V*DPcP@o$f%e*!zgpGZow=I`%oc}8DyW*@|6z}Aw; zM(lMei}UrHD^n$(1k~zmCIs0={4E`XH+IX`5{>vW@bS%Wuerc``6?yr5dHN0bP@Q-t>Y=Ij@6G zB*9NBHT3>fg)+?64~#WEPinFQotGLHHUd}n@qyPkaR`2}^IIbX1X!j#{1 za^GQbClzQXf(b{g#_J&+&I0Nl-x0-%lO=Ka!Eic{zs(5m`R6itIWgyoojguFDz?k7 zL_t`6gWLXw8|}JEL&tVbiUIDxx`n8fRk!M@RcoAzRP+3 z?t6UWwS7_#ONVJSmz`G`-HMJzvq7)+?b0dMlMIH9QVQS^nE;{9DU?neoN66V1VD_-!nZ zEJHt4pEXU#6CHLZlB43Dvx#9vY0n;Kq$L8DR&R=MZq|m=amMEA?01CxxyERkNYd^; zm-YNtn=f|vYoGartgc?qVQCF1n`4@O^5zk`Y{tKlm%F19njWjQa2_0czW|Bpw_89; zlk@=B5$wTONb<)1eBmz7v@SKiD*kTz;#BTd?Zu!j$I%H64r=!rLW;w)pX)5FDF*LG z3TyrI2Dze+?+7`~_mI~p1t+@0fwcnRRjE^4JASxturhaN1>R+&-;&L?zPJZBZY4!q zz=UiftG05$7JwFQ@Gi5!W&?ym^W^Y(GC~Q#xP;igbyiEQS&Tw&Z1d$YR4;DUofl{a z&XrCsW!3}z)@CoVeGhJ$^OqL(q5zFO6qe{FK0|#2)GWU=nPJ8s**?)7vXaWH<>&;i zhg#?`cu@nOriBP7A31KR6q$Fam!Agj0L znYV$qils<5+uZta@^FlA!4Jtg5CE-^ek%2aZb2lVDH|TAp7<8uaW+`{ zz>IB`7=W}*l2WFDyIK}dwo@Q>u1xH_WF5K;s9P*vbnhhHU@a6)A;j47zW3i9%}YF&KPp6iJqNuQ3OgUJb} z0^X<%uUiZrm<9Uf3Z?t{#w5Eh_TO&a2y>&%Hpe)2rnD=x)<|$-H7w6w+#K#rPdhTq z+jI=ho~H+Y9qahF>s6e7^7-@1XQ!QOa9|%5&<`(`(8x~TTfWU}7WMJy>#g0lEn!O#gk68Kwlch3L#sNofy%uv=%F38 zC0)P0wu5k&`ZtZ2d~PQb6`h}x!*1~%*ga~> zzo5U_p_Dv#F9!aNM*gX~(&Q@X&8COyI-w0+S(SEJ+^_WReqJ7zF+1weVw=!Vtq98x z6pWoa3Vv|OT-G}ysfmJE-nrKET^-o}0jnj#k*+jLAjR)xN5$mXl{OlFQ#i3z$;N4Q zkuP`qWajzBE=nHO{rCI^BO(5D1Wn3v%e;5dGf5A}R_LN+1U-H`zhU%H7m#m^96+&c ze0$&ENFV3f;5&ueoEx5LR=Ra^!U$MT0A?_$QxF;6K4KK~wIGAbtFlU~lnoJ!bKTSeYxw@>&l%79QI|s4Osz(D4iSX$0lRP(IH|`?Y%x z=dVLaY!=*&)2i3M#|s8eUQn?|?Iv|O!h6XP2YVZ6;+&rI6OK&9Cnfl14BsSp=tt%f6ylItIvtYpS*XQ%Dqh4XFeXjP83s-g{-On@EeHM)kAe@QjpK%8r!k`8mKbeR*w?wx)!$kQ0&JTrHQRYe=fC>9Ba}(~}UGPVQ(4 zZ50)w-4>4b$_2@^@-4c5--N~e^y?smzLa9W;X-$8y{+R?SgvUZ)xOUtSP;N?ZxJS+ z+1nrRqzM60YHxoOWH(!(U&Q@C_K&ZPX3U2zt$8@Ygye1kvp9YX-KJDrcNu37A}y0sfk2YWFO1_iNQs4}{VEnPb3u;1_m`YAOs=Pf3u zmC=VX^DW=5Z=4qFE0iv#le%p&%qA#^Xt=;FKjbiy@xlm+C~)%+&+sNybSSc8-Qini z9FgSAbin4pZhb4qTr4ghXq*EH{?h+75K|&g&Vdzfj zmIeW(1W}Og4(XH*>1LSu9eh6D_x+c}S~GLs=UlO`eeLbERo4<5q!Ryk|HkF2db+oj zXE@bmSg_|y%mUC_yALXHlAX$6H?*GR6zsGheMr?HyCv29nQwl-cR~nzivCxVkd~B# zs69k}ex+Eztsv@Np0FJOcRD59f`Nt7W((Gxxr+t3<4Zb1?uR@|C z(?`J!4d!%3jWT70kZ28p@lOGY+PravgE`zIbce>vbC$WlhGOg2@zUkNTe~IE+kHaD@lNsAogZukpR$2TP41Li~a$^>&N0V2Hyz z^5jkjl>kYGDivpYq6GV{5#~a|f1_GJAL#}$Kw%nwzWfX+HN+snLF$B~{aB4VGzC}n zWx+9RS#|MBbFTRv^@cEg0fmpk06NoLO5Qn-Vr0wxN1FS!p#~63G63DhnbB*xt>uot z*Aw+?kk239(C#j`h~*^h5(IQ02fU%3X>eF(pR+C4EUCi081E(v5)nDt#hf{b(=cd2 z^S>s*CXVGAdFBbzV@BV(OSx6%P73mYg+;3x9aFu@CwJk&D?5Mu1UWh=o2#|rYVT^| zR&9$i8=7qi$zi^M1(afo=Zitz?dGKo*`KR3pBM1oR3j*COl%|A6=(T)1)1)4(itV3{M#Ob&>|8LlfGA=gsJb8{jyG z6n$@}NM5)6`MNf;WbB1V-$8xvkPUT(i*o0PHQy>1{`Kz1`^BnN0wH6EtTVUebmm6I zUD1U^)}sB~&u5gZ@GK4Y-^0nkG3ZhbqW$xz#kA1i6z7kJ?IV;hfdrpxvJ0#tNS{As zO<9qV%~||`TZmO9CH9N1+#c{s8%;S$&fp!$t%GFT+1s$S;q+Fit>w{~(XF@z>Vs4F zt@@$KWDK_^Z4Qn6;6bK;zc*6cp4##Jx9(WudE)lc&)3okh(?Pvuz{6^J>o5g%=yv3 z_WO!sC#oo7KSL5Ptp`QXr4Z6{Mn(e!<+5-db{uQ$p+A#F)`A5D#fs_Y?%Xj>1P_ZP zoHPu#L}TO_yxUEZ!|;2e8F3Etbel1#^Vq@45InVO$lldJJrP1~l&u0g1&N9$X5Lpl zdu7s^`Z=f`gjE?7U(BtGG&{K_1ssJ6UYLE7BOH64AfMQ6M8Fue9&+&$Hw2d@{p$LO z3l~`=hWwtTo_|_Y#@bd8xI*RI^A?yY`*RXRQ*fY|qVf?Iw@oH`XPuGTdN1Ia)y`Xjw(#(KKFOs|DH>Dg=pW25LQt6zO~*)4nt~{to$6sTvj?BnS0+q#zh_0G2Mb3^{~gJvxU2uHChOM2A{aUJ<@*g=Ee_ z1K!nyAb`tcDT*F82Y>l_Lv~O{{i~V}hqxy|g2IB#zZ#-t5^nx=FaB}xPid!8#{NWx zlk2djVXV@H$m`=b4;u1 zD4jDq@|$1d6Xe7{hfhSwq0g}M45SXlvY~iTxZ%Yj5%2YmHID7%*0@@%gc%>cB8-$XNz__wR2+Ht%mmTbEl5(w zTM~3A9nn}Yp$^5szojRk^QPwbyH{^r&)_caJ~zEUz^-*4`64D1Z?31m)!CdcCw;8R zhR-%7ULLnizxTE}T)L}sKp@>*E(gK7GMjl8g#Uf<_{R{BAwfd+H+q9FfUEGw{uDN) zr_9=)fT5ojK#6zGCe(;(V;e_1$tt<0A#OhKD1Hj5E%p-br=K%bPem}==LO&dxkW=q zjrh4XCp{T7Y0k{ORQ6t(*ru%Coh8!TVh+M-%d4yc`7>%vbgfBLpQ+oaFJ2?AW;!MvhFp}GxSh>UnSXO`6G}*~$N7B^qnI)KJd8B>TTm@L-ZeiM zFY?=m;3>-Z)DzF7fcPnWb7_63)g61}eW9U`2s_|xMqq7STki0)Y=7&^9nErY$vY?I z(}>yC+4;T}TcDU+escPQHJMpQaZsTB1nJ_f;LUact+GJiN=NSRx-Q#4=kv2NarZeu z5Ouw%f$_1jpe)Z8MiVz)U7;G|&^6v1tV9qOtDZZa}*qvY_cJ z9dYDW1}z&x!)bt76Sw^93i3{cq&`z-Cp^I-(nmRK37tRTD_S56Odt2`xqyc`e{G2f zkUHy*9@z;=I2=D5m>!oQGL#03m`wNfg`c0j&7ry}hmg5J{5=qWPQ49grJp=cH#LB9 z==Qt5;5O%!EEba>Ej6)o&7DhWktQP=+GY z{)^a+B8Xo+Zjd1|QGYKHN3qNx>yQYgY?@D?xWk3C+79xJh)4lqRsqnJmhODtx*8`= zZRG7!yKOB5=&fJEql_}2210&dL1dRK*mBf`jKr%TWaG-I>op!LBAeT6ZA|i5dPVD& z7vy|_v{RIjKOVG(1!Mx1%l?&Dndd~<801hd1UcI2$lz>L=`?NP6-!9Lc5$YC-uv04 zx4>c3Vrt_4jR|n$%rv?%4yADg-R~}4EC4r8>-5K1VM|-gZAOK9Twd$R54HbxYNbCW znlo5e7-M!v)uF9=UH$v9ptN3s=5?0I-MH8+`p; zs6W5_ca)HEfFB3tHgGm71e~$tP%Lt0XESkbOd)05-c$~;yPHMM&pORM!7zt?ZIZ?w zH!(VdUalG(mu4X$Qvf4{c6ARsB^M{$)q^1_?@Y&7?2mZtXP)aE zt_kPuow{SSWE3?UNtw>Vg7+_Hh76 zFynRB6IPR@8m_<@bM)$Xtqc@ko+QC<=x3DZlZ$UdM~X;3?2(=fCzOf%NX1D7Yi#M} z)3wtfSAR4fKc;<1^4kZ?Y|>+0pR|7`ScyaxIV^~&kBoi>WAx@JlpOk?5{nnY z^kW;~Tx3Hz)z%MS!cM(+XNxfYon$T!jq}~#8*|%n|IX45b(Qmm;_gf}ts$^jtUh(;L=16aswHFO@NBlV5Ve~9 zJwc!SIWo-TU%{zt)pyv!} z9B%O$&crzRy2uG}*sC^;AA2cBR&uIXwoin-=uJ;&q?n+7d>nW5_jx9wDxxSUh~Qrj z1Co3;)Pq|YcI|#}RkE{qEM;}liJ{%>^1q&r!6A@Vl13K!;WmokArZhEd&LE&3xE53H;4%yC)a z43PRX08x1G`)}j_om`LGzYW$vF#im!DvTStFFuW@ISz78HuSBW5`3&3{d0E?FiNY6PRkY`%O=!hfOj;&D(^5{h3t8sGK@HOpf39#avY2&(Qil(V2o`@d_NS^HIIGDDXJB>+bk^qIG< zOZX8Yh%;{=5=_cF3QSVIK#=h*OGHGOkwFr}d3vwqqzN29LQD>qTcDG!5hMxasJGT7 zxe8m$o!#)Zv7SQtz5m~ z7U2f_{3ZS@)kPuqQB0L=u4452$0US2bL9$Te$Uq8#58AXENYG7H(+`u{UdHk4p-NQ?zekHr z_JIYR$M!x}H6mL$4+B~B)c?MbF*eG9$~IlGpZl5#&1>g~-*1vw+G~IPcb%RIR4MJfHfJIZ}><;{IlDukdLh6(5EvM{@v z@WhaCWgY6?_n4m`=9B^Ui$65dxh>2KiuO~B>FJn}ft25;kMv68Kc2oVvrYfApj^&q z_nJ%x9%LRauWv*ZXL;9VnYXZX9yT4v%WC|(eM$Y4toS_hTo)n{^)3xo`498pP#Y3+ z+4PfXsPm4MjiV^Y*n8(Saeq~M>5xaw4=2m;RkDR%sLh&>=mg$7eiK857D-S|wkG|Dif~?o{ZJ(7ktdN%Un|LDQBhk zF-*K3M_xf8kL7FAN5CeR14no8m41ex6K)mi?M;hO5qA-0oBg`4eQkx<)pi1+lHiH{ z@E*;&v1%H}Z;iQQ=|{c?$;Rl#@gpPsNzPc!@eEQUVyB(bW%BmZx zsB6KfcX$3n;L?SUX-V}=@HX%X0UnWq{l5e8&;!5{F?Y35hUTl-f^mhe=no2McBaJGCoR2qVS2mC zoJ!~YQmVbq)bkU*@PJpA=w=im#Fjn%$VFG#vvP`iy08zhojZ4rK`|anUkSqlzYw4& zY{(4oFlhgT%fAoij(PCwWz(NXmTjPdn-A1P*e9-CgrvUU?ayf2%naa{Zf(c;-l6HE zL`;Sk%GSLIWS68W4m^8`sYqy~l7?Cc*`@1wyw40hXo$F@`0UsS65HK%Q&KJ;I#j#ohi{wm6&&N)EMpHFFgKO(#p zNP|3QVv6AVLN?H2wjiByd7S%DwAV|Zucw|)>u-icdo;mwBRN8XSn`2KdjbkTCnJGM z1O3^JPb}fjO3cRf77zso^v3U_;8Hg9g8V8@RdkQyN6`mw(DEi{lb`?ZCgP@nhF2v^ zAi)oD=gzEMs$M@3Wr+lWAryApA0f(>kVwXoHi))0)$ zVOSqxYW^g`KG+!6PE9tAkX<;W`ivT{FNo% z9OW`DGZ$U}$lz)&*ZK!KZ$f~FQO12!SpKRqKk_L{caskrr#mhu$$np*(+m84`ltb+ zfZPjMQ%~)%hCt70Zf3+*?(Fx7wVC(w9gqZc+g<`_deESWo5|l^V`o!Z8=Ea&`ZhcdDGXlb~>UrnaBvrH#&y+@k4_Tc)2wpjta8reYOk8&19y=5M4 zYHGh)CJZ*B-b=PoY!UbGVz?Z#R9FGvXJz!dXE0e%0C6z?Em4S{sV6sUI3HZmkWHua zf3X?|9o2+l^K6<5M?9(jMdLq#tze5?TIaKN-fBl!PZ3OyAY%_zm^Z}h;y^0s9jw2JZ(@wSlCuqHVNyt#OY0oc)_(N#_2(&tHM}*vJ~1-;DatG)KX?ZT zx+=&36Nb=?0Gc3`&@^IKZog>V9?GPBN8>)s2m`FkbpIgyD{@H^KR5_gE1mZ6GW`-3 z(W}w7u5AyA9aJF(3+K`D51y9^oyy(3Q361Kp$-emyH>s920iDC-Ckd3-^|r*nEA)A$ zx_Szm-Z0LLMa>_64|-)MD*g8++KJ+$+veq&M&RJu_)A{;l#mIZ!JCyu(JW5`Oy77Z z7{?h^0{<+TgQnmKcfa{o8vOTucHjS(A4UdQg%C$gy@__LTgyyR-Wsc- z@5`VqJpl2>{x3oi1%Q!2q{3%vsnE^_8a7cmTpY}c^>I@MvkGO0*!XvPd94%79n~N> z$iBk=bsoklesvwT@I{&Kr543Wdw`Z)`cyji&`sz1P?iTzca7Sb#bO`QDoY#yZo2%jmn=0bS8yyZzNdRQ&&B(0j6k<8?UMttjs=cyRf zy*6Yikzut{9NW*~{u1{YN#qfL`@dbgn;%ZO$_|b z<1dN>BNC`@@VUxoz&0TbaXi8^IpADkkNCM!d)+HuQ~@o7 zoiAelchKH-x1Ql;h~inoJz-`1+j|ZTx7agY`qfF!68`%AR5~6B;xGxu7mq>@qE)gO?=9!NiQl_O6W@B7?Q2 zOoH(Z*O`}5o^g|T_U#lU*@Kp;5pho02fbg`9;63F{TY$pzFIr}bZMMFEJpq_rVv?f z`V+A5#^(R>zZe(UT08QRWupb=Y?cwH@+^=%Q{mg;WHMUI9SgZuA!_25?0b~$2=#uk zFukESFSCNh`^U?ANy%7#(69zjD`%OTAgC|IPbb4W8L(Q&82G-?qk6`-?R_V#TyE1n zPF?L3`)_a;=70(tX}fMvE5Dhn?67m)DN6NXv_5C~*QfKN-izgD2sqKKK34h=hK)=$ zlc9x>G{`^2KbTouu&yt zJTu6)j=CXPOHcqDzw!J_B~$>XuW?Y=?k`jOS5;h5aRQY123VC?A1pK;mr-RFF+6FD z^@AN|xs-ujR*iuJlb}gRG^)See3lJ$wuk?ng?V`GujWjCcY3?5GphBk%Kj77 z3Wy1^0M#o7_<3guGTRGXoFkhmgTTe2whf7q_ee&xt!Gh=n?8{yg

6*SXg>Iw{z{ z{2&{{Oo1StYxmc05cf&$p}&g``7?qd{jZ*LfLl9CIM{ef3EM0+H9EYU{O`le;#&7- zYj?X>6TJV? zI$|NLdR6`a+EoOqlu>Wl^f`p`Aa1T}Q}c794J}{tjS9p}0?E{v;T;lb&f5aP;f@J5 z2?~FlR@5GM$@1|L1*Q$+A<0W%kHuC=jmke1J`}($4H4iSw=czVvG2d^ae9Z^W_=eJ z^539vCVcXmG3C^uq`Qyz;CE^nKs(<3`X6TZC%P4=>MN5AxyHw+uDwYn%ngEsqj@`9 z&wY0mNoLoXv??orhrf`mf?=ByA?V&(5x0%_>Ki&r)SPhkLz*rVE5r(YEG+`OfHy%9 zE4eN4>lTx(Pr;9Aw$SL-c{fM@R*u*EZb<7^wLBoOGkHJt5wQdTl1 zICQO5;D%kz+7-78gSm$*uyC!lV>{EnjkvNXV?Fi2Zg^w)@%Ss`%NsOqAtIRuRfkz0z-413{`g8&@E}G|ywUAg7ELaD6r;nki%Lt3| zy1dlxCdGS$xxvK$D|m3P!1hw<+n!jAa@0geRAKhev`FjmB#LKTVX77GIpLLN#nL{p zD>j7&xIWaqfv>}YjOmPDs#y$uSPhOYjC$tulnj)bPQgY~K=si;>S8uST;kR(cQaVrAXPt5eo_hFycgO+t-jz(($dmpK zSd08G306d;wdlr_XwIzjs>8R?B%%Wni~MOPdYR}q$P*{;nY&cgg~c+9LYdP%Uio{P|sSDN3IsBv69)5mdDNR|^pb01w?U`qVcxsNQgx z@^+apD^3-I8E6G!6}j86j+c`W$FeijE>hVlQgFFfw}md09@1inrVO+vIjUjsw`?Dn z#v9=Auc_riU(px8G=RiTdG2meS%1a1&@9os^XkzU@&4jOo6zlsVkp$v&v`8vCp;j#R$={c9Urx`n4G7{9O~J$0@IKDX>`& zXSr+RcBf~a-_PNWWvIrIP4H4odMrGF43yOw%UaoG`QuXCkk4P9kh%7)2ONl??sq)c z!5OCc!jcqOg^{NjtSrJG9nPX}=VuN?oLd7+;!EYjww{cmg3()>tiOrBqbpWq%j2-; z*}eI)J-N}?_XM{#Jh3^L_t8p=>#e>Fm=?Uu2P;kkX=C>jICa!NF%R~Ij9kZL>Yxa} zd$6OaeB#6f5&+hdG&D8w9Gr!6J}N$4eJwRYnQ)x#7XF;&M67N|>OIoD@_Jx`JsMb!7m&tD#_CVhox@7teRbD>Id*f(2cLJZ z#=KB$m>VE=TWI+}vLPI4EMm6c80`+55B9n{y*TfIrSunqtsUdtJZ+&23^$$NYFgv_ z<{ZZXwF2+SW~sC*hDsGm6&AVhm_hC4sBgzo9&<$>R^=0sxy{w?pu#KRw}ZG`A7ES*-#r}< zk3bRX^K~1RSaf~40&ku?$im}=!hM(x>MNp|{s)aLL4bp^*kqvs{#>mk_EYvZ0!zQG zuLXnstK01P1XUE7ZF(hiXRsZDUz0(@orW zQ)wH9o^zT!?bxF<>d@vTbP?1m_q^ zD%x@=GtVO@_70m7adpSgl~$;B#eU3Z$l3y?*F9cxMH~O|qa=}$N8YN{nz3OqK!gIx zMFSabA9u)(8jk$xZDoIoRU}9^l*(ITH-C%KYvWOVA(2>HMrFkWDJs#+(yb z`gwoS#_vZMEK{ORCy~#&xa6OmSsX5t>2NiOs5)6ThAem5eg`L;~^+CUqZ!Ih}=zU=!G?XMH4 zfq7wzENa#%|D&bnZZdzd!_`7~YSLdt*{?>by8X}u{xH+=Pn&})Kh=I4afUA6Ag&!I zK`Yc`c2&fvaA?E{wvpJgs>KA?Q$*EDG}^mtUGumfpWfF#L1jEYPY(|ag-f*x%x}QD zVX5oRctT=K=zq~09kq)1-CB2ybx$n$Jy}j*@{`Oa8ge(9H+R5)J*>z{ORf6KJ1)`v zUPK-}n*0Y^gp!aa23h1cUx{DS3~aqG@VTh3N%xX{DDb(g%4>TgYTyZe3UW2mP+Elb z9XKX-!WZhZRwVld#TtHT;m3mHP`5kx^=O3Q&r6M+$jY4wHL)U4aV>}cuMP#JAs9OB zG@TCfJi@K!csWnMiGk@hKLFfi~m@ZYu^6isI~jPTS3)|O!v6Xh)dU1NkBP^ z>}ahJ$6=c+kP0;r!j}^yp3`Ae-;wwi~wSkqEfCSEz?s&5X) zg6h3y{`$KkHQ=5wYe(GtS8C8AQ<;&zu#q11VAroPCwo7h?}CRkEj`H0Fr)NzixFQe z_bB`kl$2GC)icGZP^3FcHlu_XAqgjvpLPkb%$IX-lk>37*0dFuD`w|{9=GeRj1RIkR={Z$Lz&Oj>>N3GCX)%rFg z!5OsMUv?8ar9M@|Y(azMX54eGFmjE{Wh_2=?NH-RA0UrV(-OjS^DmM|iZ9?)DRO>p zqo56UKUh#${IM)EQLIuBW$>Y(d`?ulhk&I!ke=d{^{BstLd_kHkBRTl6&@mQoj-UP zpM}*&{WRR1N^BT|(w4674IO!Gs5$`;7i44v49`NWII53G}p`d2Zzq$b? z&Hisc~Z{)5B*6_-0IL7_4C4RNti3Vuj#oP=P0hEkK${ zYnItgTNRu1uUGzknv-tr-&U;ZdA89N)l&HLusJM+%~)c{zA-H0&0=gMeezSJr#bN4KrzMd5n&t^U{QvO3)PGBf>1449}?pGsgw@oliDpp8j7}HV{m64yMYmrHW_1|(p6dYpV9F1F=U%Wc23wEn*S;IWR}+x{!82Nk z{I%7R%zuw}XR=hkPmOqBu(NYNU<$4acvYnZ z+O~9!Kg59WaXjQ~J|yFqK!85=h}Z_mTG>9H9(Tq>!rAX@%9ZQ?e4M2H z*-9QR9d&g~a!&-QU&XPwJAuBmwds>QC}bLYG1Fk5X0D5-sVzUwv(c09SIfu6_LPTD zuR#8|m~V~$_;pJ%?^7t^N-fG{d4y#DI?d$r3-s?5xE)j9zIVh1-#W}s8M@kjtN#pn zTRcho=dz2)xDZSd`_PUKn=g&tT=f39-pt&&J!~Noo(IjA2;c*5hqt~GPUk{RG8uWM zI;(nqk7HG>(n2y!i#`sFmBoB56hgf3H@`)c>M7I;=0HBeXnpvnA=LPpX(ZfaNU}f4 zNe(%CpYP~%hO|C;zduveZPh~h)8!*}#EL*fM`fl9;~-Mg)B*t+K&SDQGnd+Uu?p$yYO{wxkU35^hJ^lc1}~4bxiXG=peO?; z!y9U9e6F?GGqo}GTRp7AI0M6!1Qcm_SOr$#Tt5PmijZxR!}v&GLoMG$vY)6QBpMl! z#zxAZ$Xr-EfPSG!T>aU&Kl#y0P|r^|XXWJ0Q^Z;Ka;)J`&#$%fKot>sQH$y3llMFp zyD2YI7!FD+c!zsgm*c9}TNKcHC@)?3TIm*f(|BCEmprND65Y3F752b}$@ z&v-Q#`j**GQ@ub0ru=a}Wnev&@gjep6ZrhzedJ-6-x`vvjQ&EhFX(!Au|G?jo78l< zbbHnoMmZk(VXt(*dZWze_nh_*$rsE2F>3%*x||4&B$JL^8-S^la&Vi zV?y1<2D2ZoM4rBT4LfKPjNT=Is=TA-Dv z-y2OX%qMLdTr`=eM`IG={@Wyu{&S;o3b>_3*0l2`YRr9nGvt;Qc7iY38^8{#nq~`( zP2rf z(*5JihU4YFOkIZ4&14DtL&EyD;k~s_$CQe>4Yv-57-z^{ZuMW6&KodKmk@HV5i&u| zKk)YR_Hw*`0b$%QFo|a}mi0SoL+5e;%Ll+qGvg7#b8)JswdM=`9x@8~3or;72SX4J zW9RNBt=A?w|7Dr2@r@3ZYY_A&D4{TFhc*z0tk%Rbtxh-D>Z16S7pq;+|5}i0-lnCA zRYAqb1+F;mdY~zZ+(RinCSvtN_Q7`5W)3#zw|)9}u?A?8K$ zTB)|?6+?++ir~oG>+z>%=mZ9zo_4HK8@<--=MFe0iQbVy6=b&y%(km?5lW4_6hRbC zKAUuyM{302_>I|R4=a_r&wJeWP>PgAKFf+0J2qGRw}ry~;VEGJT+xZV5z>7^GtnK0>T&udB8obbTcl za2N(42aWlfoP?oN*6no`M?j3rL-*Lrn*_40(s^5&4&*1OFl7M)IdwwPt;&f# zx%-Ll^!=RQenjk^L`3q!h7PsuAXf=-*@D0g*P@@f>s>$atiW)CI^4VBdRpfa2i^fm z^a8H8-Rc|t$@h40#kG4<%Z%%YY0wEEL44#k`Dw`St4LI1OJCLg6K{Vukc*}^S&Wv( z`T2us9>H{e!4I}owRHo*JmH*cB0l`+RA}AQQmO#Q4>? zF$@3L=ij8)hm>>mzWm9o8o`Q;PfJLC4~$*M>}hT&S_l>x=2A5@sD7oi9{ka30DX4@ zjh_ZWnuuXi>5|c#7=z~)YkYn?-*cH$)be+K#})1Vjx8ej-G4-;pVA`FzVcq_Vvi^u z`3qthVOt<$442*Gx3y4Fg5O#Se_hrrYp*UUHS03BH?|hq>Fn1o->e=^lQ3}Jym8og zU+=i;Q0I2Xpr|mb$OLs*yX!d7NM_zB_4-|-U1=lu*`VQ3GOJPWd~iALj|}TslOBc= zIr6Se&8}dZi!X?lrYST(D7-P@>&~mW*1rqQmSW@v^>jL)--LdCrMP*KQlzE&g~9!; z*QIgY@gX@B0nb!KNTv_FAo3PydI+55c*f^%!C!;Uw)$i0e6GE2-pV|>JKplzUD|l` z>x+(Pc$Yg&jue{5k|KQWuvo8UTg4d2B=_r?TwN4n+tOEKu?#-DjI>-&VEJRgKEPCk z4m5;|+1~&DEaT98o-z=#p?&vS?GLqyg)6NZozd+KvjRi@aE=@ag{F`jBZRI^Wt2*%LzF|cQ!}MkUb!8TKf%6h6)Xwe0u~h4$Lr@hH zzTHRorz-p2oF({DnHhN96wO*jriJ5@?kg)wydWpbE3G`$7?yZN+>R@!MBR=Z=91W+ zMI7TUl~G*THq2o525*juzWdypl<`cI-+pGh(G~htcWj0e{o_%3$=r{-&b*@GN|pAx5oum^?${bYPcw+bw6^H)Ps ziqB~n&hF9`6@uwQ`Plh1U*X=@mtnDQ7e@JsWw?I9y)P*~MQ9O?4mJ=?1dc%SL1yA8 z6|{e51Y7V~^PDW|uyA!YWQfi_&LwR7mHJ{Pt{pU=Y2oi2erazNNuYg2EV!qR?m4g* zx-TObq^6&8pa|9dOtWt}mt@u|mY%%jXg+Q@KCE?Sw;mcuoqSrq$cmo>`>F~CxB zH_nb;QNk^cX=V_xQ?)U@us`HUqU=Am^T{6K7vtS)$tM&5+f*e$L>L^q`Bu{@1?u() z#kVYNIUa9i4GQnMef~795It@>+=rJHIQuivxAzd-vfr)BFLj}V?`xf*ra*8J`71x~ zdbM@QUB|oBBE{s7m?8$iduVg>GfuGOP=*ZUi4h%+YTh-HcLZ2#a!|Ibk}CIN%rtlp zs`2KV-Ey!vga@c4YZfRvKG)|UrjFf9IoGN}y1F7m_9^C75nv_a3=F!I#IVT(!~w)! z4EA*yRj4T>pW61`TLf36r_$*0+Rnr)COwx}{OPVXfhH_J(U-_L0S1ubSBl8z_X`fL zKqYUMK}Pi5t03}=`D4MqK&C*cDgt9ZS^q>2BvqghV+nXJ*^qI@gDLg`ouLk)7ry|)!94bZD zgi)u_E9}JmzDjO(-L>4Rs>AW|ZXJ95d4a0{jJss$bxYZ#!>B9C)jBU~m>~5`uI+&7 z^>#>t%XWfaqSf+CtBZHTzbiCymeMA*>hgkiEq$Sz-!W@Oy@urxV%KNryNi}IY#`c7@5%0SeqaT4@7Zgg5iSAO0_z0P`{lB~)^Y>bWvi%l}Forl*mU0!Ge>JL2 zx0(J+pwU1-GZu!xD%Knc>lss0uAmffeR^km=p1&e6}kb&p(d>iATwj z+}J(xeJYoGejCle7vmPAU);e9nEHhMxMO7N>b*Phr>25Pd&s>t*6ETGhP9R>#ekp` z;bROIKWp|NY19?kWgmMpB|eymKDZ{@OswwqZ;ItBw(!C!)|ZWbM*SYUg5~I&oa#^D zlsy3&advN9P`|O4luzH_JwakA0GGvtNH+6+^%z{1_!$V9D(dY3M+<8PB~J7mR_4{& z_84uuYcF8ON&-dv3bDqlpi**fbBLjHAAP{_g^Mbb4q_GGUp==@FgFY6av!@M?yiRV+_)Gusk+HxshX;VgjBwl}b7{Jp(E4Dt)K#jSqUI@1Fl6wSc^BtPDKQV*^l1S6Mv!|Z#@_*sgx{4Q~ z{88GEK^dNgae*t3v-yds>4Vf&Xy0+^^62iH7e-G66uVH+R0@!gP`w+5bw}~<9E#Z8 zV+s|t#4z7Gyi*%(u}<_F>rzqU(ic4sibaz4Bbr2r+ENs1HE8gj$Bf+d)%q-*WPo5- zDqJSNdSt1femC|7;Z5nMWH%?KW>A07?=0Lz%%aOIRgiNiJ;eSpu^Q=N_W5yx?=6MR z?@i*Hqr}5z9rHeH7d{?_%lw)RMOpe8)1YjJ#nxj=Uby2OHrEXHOQC=q(wCXZ4MDO} zn;8aaLyb*G%HB2;Kdfi&h${UIXpLS@ex4W1=f>GLm{K#TmAc*Dte3JrxK*MZ_dp3Y zad2j4h*$o7$VSVv#!+yp=5-hM;f_-(2|$WHy78O|bn;sxm5s4*_Sj=Z`*KoP8OZw}|?0?tio_byPr5UX`qpdIkVg;fL6^ zPh3GE5jFZG02f*KP5d=ZeB0>jm;aPx=k`W%(3xUlpj65obp1dbo7Q56((0S|Ojw5* za^Vs9nb!9WTsNtMyhjDb#Xa{uQSBvDRElc0kODcVOYz%pa-neCS_Ugbd&pyYEHS-7 z6Act}OiHVILMBaVUVp%FXNz(?D*%M;kKJ%q;Q!Q4rH7=}^r;p3A&J~*TG(TnnY^xG z86raCdrU#}s1?PQwt>bM$RLlarWpgBx5pra550QoG|1W(Tfg@B$(vK4T_KR)G>8fV zYm+qsk~sjQIm;7&B0c!!L{(gdzA#wd6Eq*~QHtDsEXyk-A#IRMe7rJsbmC_Uim z>#JARJk`IzVMBd^9*Hwdy_i6uP_gD>Wf8=X&FlH{TU37AFS`zD)s3Re**FAOh4JeH zzF80PFG1#)!XWC`p9$N3p(SLGXnCY}a*gx3-EFielTg?-ow)s;*()>e`f?IF!`1TT zcjD~$>$oZGG%HWQ zGs0I3cg~g#p!N%h;|gQtPHh!3d@l8U|17lYMu{La8gHqK&glY*Ma0j z6vkxr@26FN-Msnf#8IpaUyuD? zufl`hSXs=oeEBYEuJt@ooQK?)xqVf}KRJQkANq+t?kn%Ac*+`f4V!J^Te}BD(p6Fh z42jR$zgUW}mjw{n(UqVQz>g1gtEHrhHT+B+|s%f2JHDo~hj{v57gWbe$LQ_Ze2$ zbo`3RcpB=4hcq>gWs2LwzO*TZ64{_d!qBWzruuWG+wVN%-1e06hyXQ!B1;v_J49uW zuKy^AW0!yNo{XiF+qOoICw29miCPC5P-?hD3;yj;g@W;zXaSLwWH)&fh;BXfqi!hk zlL{u>9a+Z|^N2>~QQy0gncxo(9sGW{OdFL^&taeVo-D1Q*6#m5s?Itp%Iu=aRM1 zwa$Ic+2`7y>)NA-Blu|4W!Inf!U9E(aiY*4huRv&3hSOO+P2TW52=GucATg-+w@e{ zl+6I|@`4^@?eOX4T#Y%NpzSrOgF-W}{@6oey91YogZLK-1zIfOW919p$^P^Gso`jP z6NC9j*lKPyQoVai-K?f*yITwx^pShCLu5$>IB;>3`_8xvB}W`<3x4rirf;x{CD{Qz z+pH-YwDLt)H{pm|mb$wD|1|kUp~H=L&k{8g{KrH8dWlivgZZ{6M4tPc4vspQ`D9gy zu*5sZ!$k}lJy3t&Cv{a_!{(;P8r`>jZ6Z|s5o^Xc)D|~}`a3&3Zh}KEaIL%GgfYv+ zXq3j{zFYLJhkD1$2v^266MI~9eHKa@yuc=a+;Aqiq6QgxEdcc7pVROqapqq8-rqbU zf!rTkKkMC&vMI)!osGU|1vv>>jR$Ad1Xk#U64ULJy`hkoKE)$L=oXZJY^?fwk6#3q ztx-hI%`E-bdsmJ+>Fs>0x3G*0I>n~Ea>n@-|3u2n21bAtJnoe=-^&dNRnLuLiNYzf3o~lE6r;A15ux zpfBP0-bb4J{;&!x=w+yyT7`;DtagbSvE8#~#wyRFZ`|cX_)|5#yMv^N2K9CZEXGH| zm)OG1#E)~!8XHIZ>=}+_{Ww~BVvH}ivQDsY8)v#b$o9^YtDM$FUKln#PAzj(svt$0 zHxbo^e|se&+<%tzY@a(!U!C9uO}<)^xKtC78_YP7T2s9G3Mg#^9@{AfIra8zm=qj? zuAlM7=#^pTG@yiqYBTRTbC?%lZWZpE9)J0=QGL~ZvKUaek+}39ik$C#yFST zxEV4pW9TV267d&vp2oc$0@F|ww9p*>cPVYjm7fklZ9ezZGU@;( zEH!;&Afht2d4f|o-618tj|1xw9IGazR}`8INrH`(9*eSUpi-Jw&jE?5|DncBgycM>Tj2T@E3lbL|X*pKQF zuw}gc04X)B7N07A7hrvK6c<}E!FyB1)`~{%UaVwlUqI_ImF3`FT_G~ zK^s_@sBCYc(PlhDw$k0MN4qaO0DsFx_@gGG9y03n%aj_gvz_DoJYnKgKT~dIxJ;&eY z=ze|pzzDZp9H>}a=PfHt@#6GKV?t9IL&Ys~yY3e7rwZC6$O+mAehc%D2ly9_KP9<# zTDKyKG?~(S?3YhsfM*PR2(OQs^Z)tOyJ9oZn74H<~&81RZTt0duPcl2AP zEs$$++k4@3DL&;O$*PI(X6s`uXmW!kK~0iQINtskBt)vVRPXPc%c00*t5fb~Y zDf`XMII)#qKV}FNVRm5-1?BEh1@2Op-3GzC(55SlI-CTf(Sm4rBn_or~ ztjN)AJ`(A3d3uvn_dV|NeqKxeB2JE3+-1-u&V-{Y<{-&-3s9wlAwt+%$Y;sHs+)t- zRki9%s%Z&)@FZ`{Q4=yCk);fS_Xp!5H>-`DW73e$H6Z4)A2$+Rbu0|jkisO}H|;%{ zt?>+nHiVs^Y=h`n?E()k&cVwopzWLi2Eu1$n$L|A9$4(T*edJ}g^E~J6=DqP5vDnQ z1g)3H$goJq%?uZ2a5|548@;rVlMu8JQ<4ieuK&=Y=qWQ+!XgM=DYabyKk1NaMO$p5 z0WavNu?7VV0>VkWPTl~B$KipY7hauUV3^UjkK1~AO5w(#$10hUzC~VVN`VV6?4{d+ z(h|uw1*7n_XxF_K0E^Swu12uQ5;HQVy@WH$*W*V2Xbi{((Ioi#=y`K{WMiUq{aL2~ z^BNr{1q=C9Mh5pgU8AzC_cyd2$af0$0#*l#QhbD^tDqN%>IK474E}*%C<1Jmjr*co zR3X2)a%x-cYU@!Df29!8U12e-htw&sQT>3R&#FM#-Svl{lkxESt4WE~y%RPF=f3+l zS%^0X`){m{Pr|PIxi)+_U94G%ZLhFY2?^MQ6)Xwm0iDsAIX76qR=W;>V3%k`M_zUi zBa=Ktv0OypSps7AK>ATtA4Pu>zXkDBk>*peE~`ZW(tp2Ye~O+W4=czjj=9$Mvup2l zj~-kx_fGRV_`NDk#2=CL)d?Sbw+a7ZIV@Q320l}HTAr?=7$?y==Pn#q%poQvlc*sm zjbbw)z4H2G{azn!EBcV{Dk7YYwCi<5-Z}$mRVE-ipS@od%|$IWV?e*Wz1;N&%$k?z zTvTOdYd&K+Bu53j9LtpXS&rJ+(bpV051RB?DxPxemtL zs!~Zy?9I3iI&`1w$ygtlc_0_=catM}oZ|H>%hzD$6t~Ux@MBdLIun>&cB^#?m+V3h z%1gHT?$&=jl#vs^RO$3b*KhMg6sRI6IeT_JV}&hnp3v`*_gr5?y--p*&FWMRb@<@M zDwgg1Wy9^`&u3UiBM21RD&ZdfV~jnL4~{wAVAzh><5w@0Qa}5R>z$Jn7NB(>|1?4b zP8V9&>}o}Za0^l+U1qFoEeG1$IhRh#qap=Ms!1*9!@iKz^f!{SfQ-RfDchtQZ%y29z#}hpw1y>fDG%+1mp)xJfMChMe){4ba@)>|)Y=$CUUSyV;$IBCjLpk-^?U zhJ?3Ps*qhCx!c_U@)qgCSxv?F0)Y$2{5z^0lfT{(_T8gfL1iw^1}C$?%OplEQ$Ni( z^E=mYv+&?%^<|QIqn@zqO}qwrnHTRLB7WA=dNO@832GIXGiDeOf)CL@xJuIOW>$Nt zERy)zR^)d`n2C{Ek(^H7-9I_}wbT`rU6BA@Mj%ACN|C!DyR#Q3dD2*c@u(d47hHv2?Q57t zIaU{36bvM_YY<}CGLvPZ&NFF$Qr(R@YttO;d|d)6$EhcpemuN{!71%qEcFEW1vsp* z{5-H&O4wF~ydXg9Q8 zKF;?E`HF+a>TUuq8gc|GvH!wM0TS^MuV+Zj^^f@5ac4okQG^lx1*9dND3v?S0KCwo zs!*Y!LdMs510w$9&MBg>ojU$u!m11XtuXIPfYhd1we94v27878hYN*Ei(_$M#2#Xpt&`L)SBV>TS@*6WOGq)H03|R z=9m#nR35Tgt&DH8s0JPn8i;{z&pbONxET!;MSHN)iVrtCjRfB(|7ftz|M?K>dQ6dJ z9e{>juMZ?%tgZZ5qb++P)nR}C^Rp1?w6rwZ1a6@GsAy+6Z7iHtKoWAd{GE*lRha=Z zq5*9F41%6P1TBQsU{BfBq{c z-eaQfAkMi1@-{y5TlvP;U8kjCHEPT~lt*X_Nql2U&THKCK6ic~*Rz42$CsF<`zz$U zING|?(UZV6@F$7=%9=4b@53iSnE|Shixy#@Tv3PE(j{+1X2`ds0yXt)_obcHwum8B zj7(>S$8~xz(OcC1Hrp(a6=K959pG1U=S_3kn}nB!GMhqs>mx5Vd1#IY+_~SHz+k6l z!hVExu@mQ%g|MjA7LB&TfeQM+c?5KaEg)nMhmKIA+21p0(>l54&{72W%HDcv+*Zg= zrN(t-{`z;62>(6&HVsMCH!hQhV!ejWV^0J&heFkK+Art8e?hPI*SM-0OQgWg+bcCU zJwI(<%CPMJ5`EyDq82Cdt`gH7ufs|sym|$}NC#ZN> zx#gQ#+_h#%gFiYqu>Nz05Slc#U!pVQ`g{$?vcs z1>%Rlns_x!q-El_c^gc}VKu)}`jY7C4y*Sh-ORsw%2RJz%Y*;ERK9OOK0JcaT@ldr zsFxrzTT3@=`mvlH85x`ipi47QAWdp}&~llIzd-nam*Bi}X&qV0cwjf!3u;v!(XV-z5Q+M9DVio{Nn_Pn`>pD40`AvO<)? zp}UG(U<5QpUr-myWR?WB2ki|p2GVV~9a7$vY{hB&-~?M6xJM!`UBBe#-6Y?x+a@R+ zR6%%1DXU9+7}!l&e(+AuP(BEksQ(Rb3R}>>l-&cYgbp*mK#9UI*Ew;Z0hCD0Pe!gD zESgdzbdQ0k=TS~ygcX&fh4V`WhefO2PwM!vm~V66Vx_X(3-bhU@)r?1?q>gvHB(0>k9JR^q>&K4*&uf%pQs&H1=ZJo}= z`F|oMe+xL-(qr;v+iDDPR6Os_|>FW7{JwwSKrH< zJs~8VE(Eb07fVy~bg}2xcvB99_|zB`hm&tIV0$Z@HLDfRKT%s&50sFBRM1Pmrd7h7sYAQijo&F% zCaMk;-ZI%5?KD`PSzKH=J1FagWYtLy6v#NTa(50~m+gqxUVv*1$<}?AqN3s!AYC zahl#cnmti7SAh)n%#X-z(gBI5{Y@$MNvNO60ke1oQ|dHOS}q`>r&_(AWsgLE$_}un zgYmqd%exR{JvZ%i{VMx#@X!18rv{PqQR3L85#`v(pYD6LJ^v|&Gt!KU*kbgfq(V%W z9VWlCfJsxK_x;CGNUBbNEVnE`c1)oM-#96U;7MT1*bZ*G>LTy0U8!)lz6ONqc+0g= zMcCC5&Xkhz=ZQRMmNUVzbrx0HFk5ll2%=T1l%rF zE2Tr~FJBvjTi;N^(AreO|0;{NOm*1a>g2n1RJT$JWoj7vyWmY(q0#nI%I6%>@0qHf z^!|);HABnKJ#xY2*wueeFOI|MIUgkQ&cUc&!<4lhs|Rg(J>!R(%Whd?Pc7{**9(q|S#`#L z&PaYSh~x{RxuKu$3?VO(`17Q90BdR5_vDbt!=or?ixw1%Lz1=X;U(ew!wzZdLn)v9 z7&p}R0{G&vfkyP&5PSsC0r{t$6LbxyZ<||Cpw@E+A+k?3{;1?;x0VRu(AM9L)75Sm zWDmp6A&lf9o$r7>rdkYb`VIpt6I=b&k?}!v!P@DQZD80}tzg&N;8Z@^Dh4IXifU*6 zur;^6SV(S!B~k~KS@bOKvewUoF5RhtW=z&+1sl+lW05fQc9DoVS$riY|12b{#8@L zn6MHae4A!1WUR|3A_^cS#%7o7crfeOyTpkflJFAeClS*H@PDV|Tm{-rp)8?yh?y9k zaqal4x#HKsYw*@2gH7<07$eGp7zwJVp7ARgTMo*F<7DQ@@?)o_=X{LbB-OEjm0DGQ zPY_bFBpo6(lEIa;?6D}`q6tR72Eet}dmppne@J3cSytbn`jZXyka=6CFIewz*p3!? zB+XcRq=kJxK3#6e!7Xl!3B-3>`|9gu8T0$D82s;ii)Zy(*TBzHNvT>^w`hjFd;1=m zSmZn&CZubSQE4eAeDw_R8ABzUd?JG1mo>Ay@9%T~me0#uq>%;wDV*Qv&9b>>i(1EP zn_Axj+mC{gYF+KGBSuxz((ZGcl&_~Pw9XAsdyLES^Cw@30aIvyJo}eUs}O&Z5#?vI>(l829T6U^M;8~EQ7~f| zI=l#aCzZS}ii+?d`HqxPq-v7*($ceE289H^^ny)s9z2E4`QB))#*?&s_(MW2w2StLKdwZbpY9}f=R2Va zRgn?1UpyppBCjoA1>74_R@EoGEtryNAc#CRNDvOnbd)l5d3Vo+J>6-!M=D`jiu1+} z?_sMS+&HT6Thd_vc69Y&?ep`}fFK{B26ml=m<08E6p*ftbUb_t_FQ7DD2u74FBewRTI1^ z31fG($na$Cpk+0B;|~uF8YiXD45W?wF1;gnQ~-poe3>?M$6=X(JpC6*@C!Ed*em}h z7wxo;2(B;KCkkg-|@j^~>|U~(t#A-)gk zZ%4+t?5PML>9^-%_^qSoK$>JUopy8+P_t1XBNz>2Udp8|SXaXN{GS|m!bE9ztYM|e zmI=m8WEz2VZqza>P=NEPYPhRQ`l}KGfa?zU`p;njN_a*}HqdAO3AYfmJp5EJrGTLS z?%0ED8e-2wL0MooEew@OVs2av)Zk>#Lgari3F_D`RDMHEkhF zZUZ={$Gu|td;rLZ#B2IyO{2GQ_?cpEIeo@WWYP4p9Gc2?4^=;nss0Kb{hD~; zRP7}ZV%&otL5nHEfPZ+8*DqnVhnUeW!;%@@ZM_8+%%({b!)vIEsTlLgx=(AD?6DsM zVHFt7KYG=Fx{p+O9QAIrbHT0D;Yz>T)_cv%6R<2pC1!z4Fi7QFbJP<9!o z*HB+17k#W9`_Hv6y=xFk;;P3Zj=Nd@0lr_d)C_Z8-=}%0*gFx;F)?n8$7FYZ2Quh` zCE5z0khflf<~L%x30;Ztt~--Q7G!1az3!K$Q~glJ8Q>=S_<>6LCu?s2qIYF4dLvrWy!1CCYT#4-7@TO z+W%a_Cr40i?myhe@!8X8a%{1~ub)K)iptpI2aO{`#cLF^c}ULpNW-l=`$@&k1%Zbtl+LZVA>d4zn`i)W z$T^Gby5FtpMZDqE{eyzNkk9{Z?b~T?i7a>B+W{9o*F9T|uE}F$TS6t=_c@O)O zb32a`Ywn&YKCL9Ad?77{&R{dn?0?-960ZD4KL~gA3{TUcU%q9n&{|F&>>x%V|=8*Gsss)BpGIe(d`o382~_ zMbJdl@r}#!3(%ht%5)R)?Pza&ZI|XH<4@;!;10+SzQ=A8=cH39#pn^UKvXwOu9I+s zUk-BN`o{IuZeHhyipACKMzZRs$AacGn+LqSneT3bu1P5Jv~LCKrFslw{PXD-IcoSx z7wS^h<3;7{>_2XgH4~DS^S+MrcFfJX{4cHIh!#u8>Or_XR4Z16fyj2rJv_83;pM?l z0F^-7Zsr*JQ}CyVRlG|oP8af6Kif||o6-;iS>j%=9;qE}KU)r1FKRlGV^bY)XDI>S z@o&nMBjClHQftdxV4wHxG3iOOFq9q!=4E;|P2FIIKu58lJ+lGRDLaU>4RGNIJJ%GP zlD6|xct4YA!~>(v%rv44V|ewYC@KE#74b>9)!vV|pT`12%U|zM5hkE($(i(u(qbW`7qlgs+wq@e03XbWt}^6x;YAUgomtJr18=sR@t7 zghskHd%4>#bFBBxh&OBt>;3`tL-?`@qAlM1KWQ4uxnDaKf?o3n)8aWY^4l2GNI2p5 zNcDw@^d$Qz*j&~2ofP{D1o}$Y*#4d;q+*hv;}dsPx>Abnx@IYayZ+S;uw0M973bv~ z#TCOpx5#KpCr`cx9$Ym7aH*uL~(W?l@g9CG1&fd={Jan?9Zmu?<4e6MuHZ z&U-sIntFPeIN65Ao8p#V#T(wn%+~TSjQ0g!`9VKiz67$(r;=dMPZvgS$~{UdSY57< zn(q~9{*IlmyW3H}@vcXc;``Z}SC{56f~6loCF;*Qj9T8C%vRaw zG`8V((K3MjZd|->Eb{$II@%nUJEdcpj@n@Qe#1}iT;bCc$ZFHEk_6SjtLI)MwFhj| z+F}!ar2s-)sQ@~z1h<)r^4}Dn+n&KTqd(qrd?m{HiMuaw#{gN(bx-tjo(NoLuv9rA zBF_6+UceJtpBK9elR`yHJgGs6^gKj)ToypPRSbWIQ7l(%{G*~#Ry!-R_7d96ZU@8QIVV}kyYF=%cc6iUB zLGCBUDX8ITf5AalOLyKMk7q>(F6Zn$kPJFtQZ$qsq*yqK*T}F9L|l)<+cY+>E$Yyp zV(g9mG6RkpKTLQ{V+l}{3X74J2-sDu7M=Ur+3=G0DVV4zssR6R9a|h>D zxZZakknE zuI~<3dtrJWVoC?4L=7=@d(GycJMj}PU&1!M%rBYAksSH?&tlRQI&Fyu4LW^tfy+1Q zwxg4gj;?8R0?cuZ0Zp*t|4$>( z1#40D`KgQYJ>cIVLprEGkwTvEqg3wqL9O=jVQ7m&^a5ad*XlCp+^J+^7~WIdtW>7v zMw7FFJI_&y@Mrs#ua7!wtoE??n;Lb|yAI?9)4(p`Hdq&$ZTl28v`>HWjmRxxrS}LL zUVE&fXT4W&<^?&BVMjYZTeL^x-7D&>Y>~;k7g|Pw0^O??cYQi^jNd{0U^$v3GRo*C zs2l874pv{l6!0%!fJ!$q%fyn5H=S~@o8>?JFU|tc!@OI5!jAtFe2F=*jq#q$=E7?w zjr1ReQX0=b0gk}1`&f*yTWdV&3XsP0B$lr>dR|dl06qh(2}h5nGAlAzu^M-ZZhU_H zZ9#1sRw*rU+OTC=>SJ=-r)t!HRMW&Wl}DEkLqsb(hChqHeH=C@C&DZrv8$MzU_q(E zt|~<0!~8yIAt}tN=Sc&@2L#5T8k$3s12@L_3&Kg9P*-2u6+uKsTj0!qiis>HhdTj0 z?%Q1!qou-LVkUPoT1CuAFWA1+)QIJ-n8dVN@>a9-= zbM^6dkm_Ov(&3GjqP^lYI2~kQc?vN`I;|Hz15Z%lWn=JFTUzo6%lJMRWM4ewFYGI8R-moh>e6yr> zzyvihBp$Lka6a zhcaLo?Fc+Aw_h^kzB`V9Rvv&k>@*Pcsl6^~Ld8&9y^7DwcD{(YtwT*qIV}!g{K|d> zKR7Ynz4_&sk+oUZfn0vt_bc6w{_e9CzR;R0spP{h%*w={qXMqIwFCE^NZ$&xT7r6L z+iC?P^7H68VH0MeV1)CKsjw|*0q1<{80K`=l|<>o4tjcL{Sw{yyx~H9lcqP11*Ek}1q<-z zC~d8N-fUDJnm*BGG^1P(Lb#gopU;G&D zVHD~MWnjNq^~*blG9ukLagL-aeMz324C@sq?i_KvqURvF5bBt*$zR*O!ks-pwGTKC z(4Cisv}Buh5ghlVF){JxW0w5Hib3Y}X(L>kV}q)e_HaEitRzq0r;XG+&d0 zOM#IwH^?UZx3$FD0mhT+;%8zAIH@o)JeysN_aHbenMJRJ%gZ6h9uZ9{hcXNM+NgiP=OvPI;S@LT? z$Km6+pY{x5{an{qI4B!cy3U#p*k2n@(;;BVM75x#_C*Dpb74hS4M#2VSqmJB zGw^Xu<8fhYdF1^y(Y7;dcPrnmjEs%5oR8r>t#8ycbc8B(<*YGO+(s{mQVm&4bSsFH z$96mL_Fp(az4SyTx>wMkg>@&-@kom#e9gpU8@rpF}eJV0-w7|_Q( z&cpIo?xyYyf0aB}ek%Rd*v4dS_PpsSIYExrhkI-L_SajrCZ7M^2X$OXE%-f;hXOuX zo_G4R&bg%@KDo0yY&rEbI!}P+sNk4;2JZ3PP_(E6j)cqftAnMI!~_gFG?nH@{n=x% z&FSLJ_DK3^K-S6eOl9!;&VfYF$3J;EG?>p&Ond#>jtzsxz*Kfc&dsE;}yC3HKDbAPJLPlUYKzE>t@l4cM2JfWE7i6Y1@? ziYCLFDAY&K0Z^ERcd+TB@w^L5a4Hb0v?d{4h>C(y2Wo&T|79-|9~%@WqiB4 z099X8^EotH$B|P@*d6OW6|Z*27@nn`_T7osimleoU6s^jV~nA*lE$(9F&!&Lid|FA zKclxBEB$`4jS!8|A%N{_iDD|`|1GCJWEtJAvgjv!aYjg@=i@#?<%)+?>5F__Db8go z1i{GPM>s_86eKWQ& z1*EwB<;v-5A5*o?F?Dgd&WE{>I-cSs^S){DKE0cHeI(CxyFMl;sr5ncwKL(910&(w zjUd0w*3?HLX!{0FfAM3u%~ko(!$sff(3)2s_0fsW7&*3+#n7DYrReN~0r^CU_gO?L zAKRqeA&J$6izWWtxejbw2i zS7x)TgSNPY3Pni2gp4=^v|7tPu7L$6(T6)Rb-5>RTI;8k%C^{vYgmqD*jE{S%t{j% z_9*@6{H_BF8TesjK)-@^>RU;C7AlUljGP|U7-XnoXNdvPiy94!W6-@?$D?9(pm~mlo(JGxs7YKcq||wk z!>LKWd%r4i+qRpAdi&@ou@cViAJ*d-)}d&L3#hv7rlXe#8vcmSd5g0Wk&A3}`6W9mwG$>auv-Dg6Z4dt#}sP=UugyouyR=5gHx9j_DiFb zQbij;XlwZg}kwI9|xUh#!b)3RS=rx1Qx8(GZ-1U@9+*u zIUF|*n&Q)BNCi#FyFa>8M`MD?k~6Ce+>|RpDDH7&hDxoJ*D{Q=1c92AqsXD(`9NFE z*N29R?`TuP55mLQB3U6(!0s)N@@m%SK5tDQ$E<2>GXc{cU2}lithb#e4^@mu6%L*U z+ibQFW8zG9m{}zd%^KuaX0{cw2bdA)V9dfrYe9QOrm($^MqB!!e^Ukjv?f_>9Fk#9 zW8I;7fevyG{-J9%RZ!w{G&QoFxV{P7oTwRb3M*!_W&gp?y~V;Eb@PlXX7e(y3D$Tx z|MYksql4U~2J2BBmj&uP9RU8oiyq48L}Hd0{NjCynhm@5OxCzd%?^Y$TmnKR}_WbkQrk2?h3VZT*7oXSFBKYpr#a z$qpvlUReE#-DYAhPJX%LY{2}moImYXUb$?gRo8}lQ<{X1PSgkqyAK}nfIpMumD}V( zUVcn%;$hM zQ$Mvu$BtYoDk*uF>D7ls(7Kf%>K$Uu3Tsp?bB)nXn1crIu+tU{$ci&$qK)JuQ=*|y z#9V!yc-|QlC3?}nH7yzLazZ;tUmNwnx2;6y*GJwZRMeiq*CcSmV(qo#_{YY}<507$ zLz0sks!T+q;?t{}T;!P~JXF$2O8aTX%S^STXJwI1ENOYIx@A>AT8_t;)*!mxYt;^% z1t%JN7>QO4%R<~-QzyYfhrPWs0X3!=C6&Ffa14VIq7T=~?RPBD*KTNtPJLqc()h#a zl&~$HQ4;p@agw_PvvdxwZS+fuZqp8T-z#l8aH4JVHHi^$;Z(y< zVuX79+uKAJDa-Hwa|owN1LN}6+mQRQk2v+Ev2Q<&V$w?>({LSdm&O3;UGd)~BI1Yx z^1_n>c7j^pj2Hb~owBone}r8a;vUNha5wm=(vOZ9Z?;xd42&CuE+)wH*&aer{A(9{6dF{ zcVQt8^sTLdF%myX<)hy4KHSiitDEl zh8DOP51nO(lwBw*rMwR$iy`l>%>3@6GLdt#qKA<34w%WY+tkGVJ5Cn=X14bjvoo19Od{O8Yg}x?|({N zo74usM4nAA|7LU|O}JFR6#= z5ObZ%&S$peJ$`XCZQk|$h6EL%a&kuoTVZ#vh7{FJwgq5! zSVPqV0rZTsq=CLqpOA>HirQU&^u1$4CJArV-rYDjc}tW(f-$=wsLSk^;-Xoyk>UyZu!%{sxuLxNuZ~Al=%BSh)A8 zkESmK8kYdH*%1lLt>3S;($Gz# z_PS@Y-#6GEK6!)`ipLe3)Xny_p;QIOGO(#7KH=XejC&zs&3r!f!j$dwF9q`J8Rm+r zTN%%Fx2oKD#qnf;=1TafZs|uBj)aOB1~&CX_KA)1l3yRc_|bUEy8otfziG|g*PG3B ziIUtZb6l1bYBOw2shTS%%2W99qDsL%4t%j>#pl_dCL*>}IlaUkQ(jLzm}q~0U_oBB z5vvDWCF^{1cD?D?h#KzbE0r}p-wRB4pH*KFc^3~n_6G((KnK3GR%jOtADzhly27KA z!$rSS7d>i^V^&788YAna1>Ao%h0Wvxw;+GU*H|@vt~wtdq@=#M#<2rox{j=WCKvqxBT6@5pWOoFWNa z#@w6+CX4p8j7rF)HZ-SY11v%#VpS@9@^nma0$M>n0YAO=sCJK<4UM@y@Q|n-=I-+O z&X7~gy5=h3QAatK>EycdF*!aZ?T>UM>@bRT-n8ExZw;6n+dsy($OIh%*(Pe|WH;}} zwAJoz%g+KkHtGzlZQo`W{nDVW>UBjQb@O_`L_V_2LNv7#5vt))<9K)VV5lA%Ju$NP zZf*Z^#psk%ID=LAi>Uk#!owus-5yin`alWEIaW8+?b*`h1a@QFu6A2Ev>jKNd8{p- z=df>gt#o3~AhM4aDC~}1AA_~e?nM<0RrFg_>QB9S=mY8bKqG`9+y&c0i;kd77fZ85 z9o7}im}Cgpb5k+D6DBrrHjNPs=^?wlHoNhYypuknUMvGiobkLgG!r~n$nMcxuK%fT zKnfQ}t@Tt4U?4P~B>r(*Q-|F@-J8?1tv~tOvAgK{WO}*biu@nd=kY~Hd5+GZVa}`g zu38d5`=5^1J?}fK8gQ2EsB`=_-c93-o>ECs5RZt&ub{BgbgNMHNr;*URv4P8WH zup*-m*ODGz)5iASPD@UkLm^0q<}vQJ6_Z=7QV1bCZk?QkEHaw@U{N#s9IbmLcm@K) zt}YyEdX2Q*>n{%6Ytwu>1F9^K#$|EN&4DVQMM7;Nh+s^}HRmYvmFUV5o4(u7PVMnz zJFoM!lu?|hi^|6G7;Cvy0s4x&>9E&zHmYZ-2DC}>0)i1TbcZ;s-2fj!C5ng+T>P8O z{JYKZ_0dQk3VK1!n&~GA+CM-^@0dUQPYwV^P36-19CkPZ)3hw|P(SP_0(?K+*JAjm z#^FZif?{Jkw#Rw6M4uK=SHo0T|0+GjM%K9t(akWD^ z&r5iT_3UhHt+T~Q;&+c;tI(nvdVyde7!>pMz<5bz@v&>@+%yXPt1=|PZo2w@Mc-Q< zOjI|RdwPBp)OO|erpl7nl(j9xc|5=w6h)Yyeav2t3f#x+yt7#|^7Y@5xWL#1bs>O> zWpB8kd1W`5`wuG&3Y*T0{-UYOq`GxSFH_68+~|G2)SNOLCcH#{`oOES{eH~JZ=M&~ zFV`xz)1)`OpFn_6A)tio0XMtzPLM&L9t=8IK7I=w+g+b6$cogN zEy(%kVMMb%er0AgYbM69yK4j9saXPE&y-gwwIt%I#@zdt^+r^Mm|Zb+xHo3H9@rcX zB(KnObliO$PeDUH@&72(ENWRm(J@|ap<%Vu=Bq4h$h^k{NyJy64){m0Q6=4)uEx6d|hSSN9*ZNE@lloRbmGU_Y^P4bT-M};<3TFrZpq41k!B^&Wh_MywD`2t?FpW8WlIx%C z_BIC_m_5JJJk6$__FOGRfPJh^3ZDzi`W&Kzdg@|rTkLO|1 zz&v$xIK;$MQTCiP%<#`(at?{8T0@xT2cGd4YK$)n+bCcO;Zt(E%_T`>!It2X_@U3vX7H@Jx7CIe zQJz-*_f(%7wQ0O5zvG$L$r2XXgQ@jz-Rmk6rfgtJxgoA*Yzvx84P$QO4Niu|x~??o zY}eVLLsT1?6_3$5;zDQFpZxY5&`_~FfFbDHWyS@{@vPwcEy(?5XP~vyKUu7;(y$(5 zLmbe$_I~`~KUYZ{PXLf#K2SGj&+$|-a#9LHo#?nLfOo6GaNl)Nb_Ie|!}r|Eb=BOJ zA6A7(BbYy*PZs9u{;=kHlZeriV$zWtkoJy((s1*)?j zYAnQSEUspgHZ9u@h^DP!1yd(_4ujVEFV2@oEF(~Xf#ExY_ulvQUF-e>-@5$7BG#PPd!PM0 zXP;-skl1#z+8m7SQ*Oc#~#7ImeTGLNZdo)Kf2_aXx2x%5k; zjjy)T&q!&b$G!?xcnd2%!v6ADtLtky-ZQ#M^?e(q;YSX=P8GB)mQqFLVZ28PDz_Ir z821vecUH5fAt$2bk3@DocU#WK?+ztFw53DS5Ph16i*1;frAQ8MTB zk+N~nsz)`|BFaD?Fk0earB`4n`6uLHZBPPnm=A3O!t$DP7X9SE4-R~7H(0#Gm`NXy zn;S5gQ++{>H{#_Ny#msv=)_09t}>cE&t}_yzT@*D-MB8K!7=tV&sVASUMU0LN|TnH z%#RvQJltJ>+WNT>$2#Y7LcEJzOy;|}Mhybod3s9zwL2KtH1ltwp}Qjm8BKU;eHzev zyytk_nHH`UE2(}HyewDbX=q)Y=Z!^xC_^AP)Zqq2R8x2;uD+BRg2>zsi&h+k-Kg{3 z@e?4y7Yd}1BMkbBZz6X}u+HsyqFtZ3o~53{h~UFuu~vE%5TkLv=UNcLzBAMe`d!I;~Z)`mgsP;DZd2qKZS|4@b z5>xg~NsPw#Kh;)g@8OT?`*hz$_zNseFO?(izSoVP_@!U&PjnFr`fyO>SpB#P2-i`v zmGh6Ldt47zDDXAj2||jaE2yzX*jpkauP$4XKcW`{ks#B7Z}oY7*mL6SVAI~C99hJ8 z-@$MHe4ld9I8<&4@)MZM>PzXe!%y+Ch+Mt0+v8hpRBI^+>-6QU*JVb+kMbW%5|@4U z!NTA%!4Yy}yz82o5bZz)*~1%tSfqO$zGHmOdReG@p$aoC*%(dri-!t|axcf*y1CEa zMB1)676GqQ!AK8K3nT|c*o?l^>_fLuT$_9QkZHtq^za9sF#VP@FSN~UUC_RTF3ZRS z4g8-6j!L0{FNytttlo4uxRQOQP#Emt?j)COBo%Y$$G>2w?(UJBrB<27J0)qU$uX)f z|MD}L$Qoo9&%WdCn3G0((R?}=&-Slwp&N;0#P$5=giz-tdERGkHmsN70eJ*%iq2Jnowr-I%hAc61{zRTYVoVHKdwmL? zvi8|X2<7YyfQpul@%0IXHi84gzd{KX;ls~y){H}M$ya))2GxL?~>8zpjm(8fb7 zE%l#LN#9|WfH>sDuN%^#5FUBe5OLKB_>(`BMKqigapQ=H@`|`|uwHVEAAa3zbCifM z6F-Mug(Jy4Or}wf^Sho|yb6FUCqO7{FaZ zQ)TtB=g#S=vPQ?KG7AN$l88SXphNw*xD{*J8IYs`AZVnWGf(O*7} zd=i)sI*pQA@9~ehV=(Q7py#x&ekIMl81AIIfDRljc?4CEnQvy@ty1xh}>yk#2vfD-6UT1FtoCBq#UcxhGIWkN`3x1}nWOtE%zF z_mKg=YW%~h07kHj{h+%ieH^5I9M=}=J8*zCEVcIQ-k1jNsG?C2s$-*GYE{JT`tC=v zeBC?xTriuhK~2h1zsZmG8^j;#xJgpWVZME{R`X3BPe5aX=l2ev2PZyVE*FFsYsNNH zRG6(mfq@Rl;1@uouIKVMr`mGS3Rz<#wPp zLKUUyK!_Sq86`dHo}>GNr%%#+hYEyGbUe;ad!SmPCWXlG^FVS&;PNy` z11K@-X$$f#MeE;Y7B(HiQYkf$QOMI!&6IYNTeDK`fJX$^YeJM))th#0zB^iM*=Jg)INi^B4ys8yf)tK=g~T{5 z24Z)4@kN;gy5Yt1#FMxD_t_}ageYY;|XG~Dft zx##`a(}lcmzdv>cFbi;a;JA&9(sB_eb3c$B1%#17Z?a%tvY;0m#tqME$!UJ(-}kbe znv$R)?4l2`($RGc0yKX#OI=7z`brvt?VIqFfvaS@m02 z+3(IQsKLS43I-wt6#!%`7WYeceCehvFm3{p8_3vNYL0@`xZW0<)DPlY7*e;20Efr> zj$1<8z(7$w!Nta@vJAi{u9wYsFrP{#kIp?(%vKCJo+_6(nlPj3w^A(WSH^Lm{55fN z4;8ngh|Q(fMt9hy)&FK~Fk!n&UdQdFqmOzwZBB4!P(1gmJF^e)DGx!Z`U(0WYk`@Gkoc)f$_CyTphT-t!USy~d#a z1t8rTS!xj!cHgr5Qb|O%f=&VS`py0!U=HdI;})E(j`={DK5S}AUVMgg>h$)_z6nDPdoD(|OP$S=qe?rr8 ze_lp)#X|>SLAc(7uYLa5>sBp8n)aoTQD;1jRNThns*OUwjo#!GfO3?A6!@Wo7dT$n9#U{Esu=^RXD3w z*kt6Wz=Sq*_l~N{P8GBZbUa6e?4Q?#)rfah^%EU5wU03P823mQ-Zdzdqq;8dFKQSlLT>_;YT_b~3&yJyUcP5;9`j z(M0vKFv^iaOP_wCYK2UB;PD0;!49YFq{@jzqO}p-?zciGerML#I_GqxOWMV9hTCUF zpk8e-+x|FbEBP6W&oQ9;Ufhi{ed%&Wq8=Q9W|~N=m%{seDigQOcFuhL4ct457EaW~ z@q)gqD51*=MMLWoWHq{jb|FVN=a+m(#HZs4EN#m;HDBm#CL|#vWkPdFC&ZLT!Fq6* ztx)JS6xYz3tXDXyRS1crf|nCrrICWqIMk9m`EeX(Lz3V<$4XKjc$lcZ5#|6tGXWa5 z7dR(?nHv)bDAOjH%TFF-!P{lQWu>n1Aqty8_ngBKyBIS$kC2y;xE#xJKOCrWsI<%4 z)PpzHx|nM7B=9)qorC^2decg<{Is!Qsqqpsn&l*D4HTo~L|Hh%oTgy=y=zC8W?)GF z80iBAy!8SICG~r!0)a=0iNA`4L-7hGS0KB?3Iy%~Vwg%+x%8aWxg%bZ`mEH&53=ZM z)F;$qau|-eJR9T}CswUEUp5Nby8x0-Y zbf9k-UXu0Q_`S(k*T)sjFs)#uCA;_3dB&uh2PVr1*<^~aT1`=QIbX=tIuJqiKjtSG z9p+9OB$zp_wb{Jsh}|1DlYxBDN}svO$|6U9zV_6zs?&D!8$?Ok`<+bRH!5$tj?Z_Z zX$iilIhgKfZqnb)v!{6WJxq=OE<$_rbJG={LKTHS^l(VqRicj*Po1vOMzs`V2d6V) zgaW#r{!XV617#9(ShW49SR)+=oAYy5xEPtZ0(f)H^N`}*s9}_N!1w)hF+E!6=yJVoAzG)fO0(WrwXJUb{J42KGKd;$wn}pzoe9U=iZCm<>CIs%#bpI z>^B5dhUcq4aksm&LoTOff5qT3j5Q&t*>M37FMKO#4 zUZPs#s=;#=K#^w;a!>_YsNf8t7LYoBk?5QnwmQo^>hy|FHbJOAGz(I}a34b50B{t2 zc$|{u`{9n!m$-+~^A^I32NNADCKHyX^5PO&*WlgE;#BqY_dRRxqjM2JI|aZBO&4Au zY};_%4Zmi75(^^ZYnbb1KrQqDW5Ambt_0`gzXyQK+@X5d& zvR!ca9(-CBTA|Lo(yPk6KEiq!hy^@~8hBKu)kA#DKZcrv>FGG_uH4PDEg7@dsP8*R z%@}>KSwHxVoa3>uEsoC9eqo#bhX<@Af}cM332SzJbHQDs(c0v-F-(@lAJHT&iF_)I z=7&TPwQmt4KMGE=ZxA--cW_CtT|Y^+*j#^9tjEm_@oi>zU#{HD?F#tWGeCrQj1saI zLBy&0a`WehtWsjeCb*qf)4&kpL-@Mq9Kz_nrC( zve~Z{@7!tyHue#WB632k^7;q+>9|5Xnit zW&6#r!Vn{BD}E)pX+$c}!)jgj_m>sBk%EOzsuqM#W>()dI(ff{#CoCA6G|-3dBs!y zCmf_gegKL8zRjb&y~$YDqDJ= zc;0Xl$8M=AwfPCND9L$klN9R*2jz;nygS}n;%L_B9uYvo8+;AA)f^oJ3{ima;fykO zy(GL`4YCocN@O!TGSAYeusKtwT>h`oaSp5epwG{+tKDR8Ax+tvbX9BAL2|5nv~}{s zSE>P9K0jdrDVv}tFhRZ`L!11VZ`iNQ{i^66FQ#g4Y1sw3pRfb4yZZ$z zu+&3E2Qamo=GDa%*l2vl7XdYN8Il`b>aN=2{-l0ZM%w~Wj_)OHN=+Zj@I9+z4!`D^ z^9XpEVp&-CKfLVz324@^FynPEyiI_Yd3-2uDfMHA3%y7*e~3O$FJ;Rr9>$|`I_}b8 zUIW$CgwZXbJ6mT>dy2C=BwfeVt7=Rs&MFp2QvQ2niRK1TQ%&EKjn_93w;B{#jT=t( zW01ZN!yWi~Yu#$p?J|ZF8uE1Xf*#!B1Q{l|AN{Hl_juuOA8cVBT-@p(TZr}6euZK$ zNB0_g*{}w``}_Io&AfI*PeH+wch!u`JS~NWiyhg=U&oQF-a9qM?*$)sY8UV)%Jb1 zrOBkl>SO-fw&BVE>0C!WVysHBL(-?n^X<38(Z<#x!QL7JPy4Yp-{Zk-r7u+6`486Z zU_+!1Sf7scPMMMXdZx-~y+Z~~I-kt`Sx8<)ryq7sMe7T;9_*LI%p~glvA!3Nhof=N z8}BIrZk+&d>pSAQ4 zsKf(~IGH&fTb~sVn}oNye!3c5@XBiS%$1Xt^s2dwnFc<`;;`njQf1Q7!#tuKdM#>Q z-ipg^XLQ9&t3Xwt9dq!WTu01EO#XQ0hwi-mKQ4D~don%-mgUVE!F*M&`JdH#g+n0l z{s+nE68h;`5r3h4nv0goC0Ae93ZTNNMgK;H`~IB@7rLUtQ{w5@%qF!`#d{k0&l^|v zbg^%V61+xm-<@Z9w@a5<)O)nWei`n29cDde7T7H1Z3yzK$-7g~Oz9W~SJK{B&4I); zXk_nnqU(MLk>7!k7Z}e-jXw-O$(vE&F7f z{=3H45+r!r%*|OZRiNg)!W3jg@7vGR_13K{w}?z!Z7@yx3?4e4#dxw>24c0o25FzF zv-;|T|5(>m&cciW3dlk)jz-m)D`#{n7Jd5V4lSpzRSuPUnVLEUJSG^w{|!^-ZdPRO z>Jo>D{9N&fsMGhi^DZzdVTxkH;{Jg>UpFSd-;p9mfa6Z;VEAE~WcB5Q?d3bdlZj7z zgTvEep!8#|jPt(_f*+OtbOFa?^6Z_n{Yh@$S?`isM*P;PqO6P}({Oj1h#qVrD3Q=A zH!wFE>n;CK*y^GkO?uVmUymT3t$Os>t3J*Ft3kf|kTTXGynml0AZ}vnY-*-3)?EHL zt}nCEgD387jlgVr1<_PF(UI9LSPysCRg!(ibz69?no0g8?Xt5829{f$zp^b;gO|Fsmu50OCP=eAbGsC`~FN_;wydGLO|{!Xf!n z!pf9g&Rlsn?w-_?I4YC6^tw9iV1}r0DT6Cv_tDZV0ys4j13DZNdZFYUKn?&=(BY>; z$9?z97&ePU5^HnL)!tdQ)nD`5t5(kOFUif2_Pm*!q5p?HzpzHvaBf;;Gd(~je70e= zXFSKz`Dr0K%&2;nTDj!8-%gt*4F_|1P|!)J8Rvxvit*Kdceb&nvRpKYLlpG5{BfZd3P0@H+E^{ zmS|q;}hF$!s!R#oY;B3>)jjbvsHkpFdZVZL{x^21CN#}U@ z62xLnsGtgtXuZtDy>0J?Qy3XEUaSy?wmG3Sv~-JN@Z-@;?W9dwWuno|M((2UzE|!1p!VeOOBIL{u8Iz;~>jB zG@uvuHfF=@3rn@7VI=mr`X9WlSU5{~Zr6MI(pP=osSKZeulaQFBR_+_rBqI4))t^{ zH5r4(-#wdZ7vB7&^yqL>{eagD1K!@qZ~u9e+kXBQa#%gecZMEbB!?ZA6;DHL>>*o& z#vjk;YjpeIHT*XAG;Vq0;{v)Jl;GZFanjc1?tdaS(yDU2>P~i(S}T?lsCX>3H4|OV z=|{74`Gy^B6&_%^w&N6Lel<%qt4&;PsGtJtWtab^rBu~S19NGsQJA#@k_t2Y#$~X% ztm4fz9qZd&f{ygNe&1w@(7_m}52cz22uY-h1owpJkESDKv<+0D7%#|y3VB>CD!O?Ha`{@vrN91T`R;?0TVNr!I{|qi)OXsBv#>-g6Je^z zASxH&%au3%O)urUH{^N9ldYRVrBM)B`-@aU$aO}FRKV%qQDeu`cyQZ7O*7lD#7oWq zKG{Vn^>#2@N86Vc^W+nZz$cIm;<;-~@;ecxIz$xh1ZDJPAEP^$PiNaK#Td6WVhTJO z{I2MyL~;*ngbg@?Rm^dM7P5ZruVZ>$dw-Yk8}c zW4W8+yh9Hi=cl3X<7!pF;}>%l4zBh3x<7^t|loyEp z;j^XaV1xT`@+%Dw%(?PfHM#T*b;?AGJbi^XJ*+03^N~0owhe;lokqDR7vHXqU@~Id z0n9akIk+m&hSZrYeRHaSU|J$S^_^!Chh=rgNxMl1S{clbhV2wTf2@`sOZ*w%e{Z@7Y^zXX_O|IcSx|;>Y=;2uk(sRXx{SHARJQ zvX0F3w&Pp2{Uv=~ceo=hoBH-%CQ^$CRXRgq#bStE6(`tTCga0uIWY{fcGwl)gA8w+ zi%gVX&c?l~tsd+G%(V?|rAd+wcWicNu<}2ZGwRYx&eQ1*?oP;$03GpYplRtKZ9QX% z5$Ges{@Ygn9wZ40^08_NrD%Jc)ej1w)LJSDxA)m7aJK-I1Zjq>mxZERKC}J0QdI1- zY_UVrhzzv|i*&ig+zh#l+zh>@y+Y9ASloM$TP{WDCf|2P?V2A83kzbWB)H9zI%SvC zedDjZ<%53S)7Nw7GjOG2!hh_*a|f30v%NUqEE!YAA@%mx=Sx+B#Z!v!R?ZG&Ws8QI zgP8W!m1S6fe(r1QV5}FT++r4ZWQumOJ|m0eFF;Dd@11%|*2OJh01(B4Qp8fTpTvTQ zc`kpwT|h2-EX?STNVQr7dQfgUNCXJwk{p#1HSp4j3d}RA3A)mgno+1zj~&BqmGWi9 z&!D~dbg#ds-*BkYxlq6Hmd$`{ORIGY83Rp%TOqg}9HnLeuij~q;E_T{?CM?OUywElYZ4Mt0m8%G)dVHE=vnMTse!(=Kl4c4&d+APZfBq zC-&yJzzySkN`yJt%iDYcndS24c8+v`NcD z157jqcu}Hrv&ujG=r6wq^Y}0M@*igXVhMT3C$i%GEoA)j+y9c-{<%EC&S2GIjQwx^ zot63%0%8GT8RTA0@~gj#y8iLoe@(?85!lZXp*~VT;m?owYjTCb;BWAz6e>~wGO)jX w`>*l;Z;Af5ME|p*e>(GD8O;A{N4vzVhc$)&eEP%x8u$`_B`usMr1Rnb0IC<&Pyhe` literal 0 HcmV?d00001 diff --git a/docs/images/visualizations/task_explainer.png b/docs/images/visualizations/task_explainer.png new file mode 100644 index 0000000000000000000000000000000000000000..71f16068b4ae2543c6aed6fcbb1e623100e59754 GIT binary patch literal 241373 zcmeFZ2UJtt)-DW!BBFq(AR<+Y^d=xRARrwSkX{5S(o5(qDAGhgdIzP7^xlbriqt5* zBQ10SgceA-JG}2XN6&xn_mBVH`;GsOalbG^c2@STF|em5Y=S!HGXIm{{0?6Df@d))Xm|-V$GA`}XRcr4r2zla`35iu!UU9-il|sx0)W zzit~*lU?H8SZE$k5@QbLJMB4wpTY+qIB_IQ+!mfp82pp@ZMXYlUV^=R)ZX}8g2Vi! z>!QVE&k3mC5uOvEzIa2(*~5c4*!ly;-#73Y)5pW~Jk^aH46g2Z)R#-l1j_fmtg(a& zP%JPKe#!_H@|7V_pk3m%f%oeANPlT|UuF6-!j)P4t)DBi{G0v-$AxFUlpw-j^=lWG z(g>a(KFBs@>U$+|`6jPx@@rP@2n_RKe{TlgOCRPh^*=5QbU#`H4?+jK(HS-;m4&iz z#ovrnUB2%APJy}iMjsY5vO)u^7Zw*M!Ae@duvtl$l1QP$i|R>6j_;HA*#3OYGNhJVUtXM#|Dqfr!W^T!^d*#HGN0OJmnGbBsmI_6Pcmii*TJ_0buxO%%oH*7 zwR0IoH(%3B#<9&SkuQ{>40qqJn^ks8ryip{C+S{P_Bf1@Bv3G-Z<|j|BP&J9Qk5Fqgclq4M+wHq-Ab=Ie2w|IvlHVlRJj;t6Ggwn zL9+g4TG(7qFG(d7Tn27HDtVf>`{=c=TP$g$^e3wc)$C1}9Dk*(L217qx>+Ap6U9yN z5fOWGaL8rztL0^-5!5vba{kkb4+MhKGHJhH(3)( z@`Wg3=Bvs)@o(~((;}`vW&y{2RpzXwE|&FT68TQ?{pu2{JE5J@HWNjh^ODw*^y~Nc zbZJB$mi?@{OR}6h|6EGqe7}P2yUqG1z4xa-{PtP>NZVd`y@UOfx%~9jO)}xm z+k)iZI&&+p^AIIGLsn6hUM}fauVil=7^@^NCGiix=Egqojeee-{OdJWW*MTK*osfw zJNB&%plI_>#)YHQ7uskfWPP;C>&VA*|74Q|U=$ls}zdeX{k0!dcBEn$bnz#02(@x5WOGCw2WlCOB zu0`Hov1q7l4ZDBcPbQG9EiC9YHW;gyP`sAkJ$BuW??>ry=~(MW)Jr)F1Hr~O#cv)w zq+O(36v|VI%C#>0Iz;unf9&J1M6Rb=iB?9jw5Fu0q?Ui^q1hJMq^Sfey+algecS+5 zR+dDO2ZOL+;rWEF>q0cb-hwazNzce;(dK{|#~JMzgy)op|AEK8$pO`Y&DxW7`jL&N z7Ca8rIZ3@nUnRabcl&{Qe6D*Z`V8+q_RK4f@yi1l&6%5*n&(2zabZ}0qI$WvaxtH8 zg}R;r^lsx?M4ZVRu8TBo0+tE!J>Fo4htWV?X&w6OYs;@yJ=K%XJqOmK>!Lm5 z541NfZ1B+q(zR>6*C^|kHf}D<*}6Q%HMQZTAQ@vCGeh0NefXg*BQQCz0Ao!u{CP%h zq4Mkdz{?hIR&K7PeiUrDZ5LzGb*{^(D_oPO=wcB|(S;(KXS`j7Z%l;`r^jIf&I3&Y zEt!azAzB*I^5-T)jUdxa(^q^+d}CchT_myQ0_|>SHGLBOstvudDQESDNN2SfqiLBY z|Hj^#jp-I?9_c|TQUB?MRrg$R?)xj!q}hboWKtQ@bG}Xf%l`EHC!JG2CA<79yvK+$ zNEb+3*rg5ykFFkX?yT)tpBAE=kG6J%k=Kr&prrR2c4$zDL&&$fMH}#un>q1)lJ7+1 zB(3wlyYO92V#^oIFW6o<$w|nO%H5Eoc-bE!9O4k-FF*YN{wnh#+pDnfa?-Z1VsJ8) z2zpi^w$j(4sMBuVZT`l5EX(GdrK=j(enwhc8`Ef|vAe+;?HFaRftOGngi7w>v{GvjrG)f zUW2VFDe+el*t(-R^Ao=$&Ts}Mgs{7$PN%IW8)f=|U-u^(>hP(HWTs0Vtls-}?{lrZ z-K*OC+BQ4J^N(M0$(c)l0>OS2Z#ky#2Jr3Cl+k!b1%7e(3KI$t8nyATKCvqA-5l8K zNnv@jdkv=1Gn1n66pr*+`ui zdDi*H%(pS1;oM{O$M@hH9gG;gg0elj<<4_+FMD1#2w$^hNH369lPK}5$80Xw31Xv}dtw~1oKZDlJpwofZ(uKJVZ=lJtkWb)K{{JDHRhWp?+@XyIyC^ZMK zvyo-1)qH%{+^YZ`<5EK&{`R_I|FwGSW1;Tq`sKwG7b9$wg06eja8qsR{^F(i*HLpZ zr}gM5Y!WT^lwMQCWOI6>QS%A>7F1-!p>B-2Jh0Vyq50%s^MGvwH!0xM87eeZ?L=2h zmqaJobEk(+%+T4WZS9$&-q+}!;2umyv?%`^+9v9G??@Tec%z=yg?r{OsKx4Zi=J6> zy`;3H$BW9FdNZJz+ik1~aer=PQ*_$2`E-b3Ejwm?e0@lSK}5tq`mp??qC-;M%&GQ~ zDN1JN^qWLiGySRIsTxHvQ4bLm^Y*Z_KV+W7nB0wg1oQl05#HI?$#$TO**uvID-DbJ zw)E}rj)js@dTRzbZfSog+olHIljo|ToVncr={R((yHLs(13Ic?PaXgFVI zOsgx?S@K#C96j-?rf9$oMh`-O+fgsaLhsOs)6nrXnSXTgbdW~ZOu@pZrl;2V{Y{)( z7kmqf10S4Ll?3T)nUS@69ZKL%3h3g*CSBO4TLZZd^tzmWZk0K3PX6 zof+{B7hFo_+-9@L&1Sd@pxBjx)D( zudNMKY&0|oxPaq}1jK~52+jjXguqjVknW$y4+-xQ5S_K3BOnO3BOw0!98KUA|9A^L z@qPY!C5jItAOYS{0Z*TI=l*f_1%-D+|2QVv2R+E6S!|M#b z^VdcGaUBI~@H2NiR}VWEXD0l0Ei7F;J)~J!@B{tx@z?jX_ObidNY3EDZwt6Ve*7o= zf_wt}|GYNPRSMrK@yO1{+R;$K&Iy<^UnbX6@At01 zee~~LpMb61j>t&my*Vi-@c6#o*Kdn-iW)-uxu3wVb_WAbGYoi)7;wzirs@tyJw}p< z0U0T+(Sd>d5>Z+zVkQE@i?RggD1!<9dxtkI&m~-pxV$@K{rrD$#^3!sE>coju>6lN zgg;GSpBT6nhSw(eKfV@lCG!8e(tpj%&@3WBPIIL*n~J0LmdM> zTJgUSNtRSS{hmtH!`}D}=ZPk5VlbluCeJC5VqA69xBqJ>Bf} z@B>|r+Ml}Qr)q{yuTI`ejZkb%>RZ2$F05X+FBg3fXs6S(p0sy%j^@^f=tt%r|EG2u z*z^cOS76q|(|D((!$p5>d(rjq$&go%P$~(kW zKBoHteadIjXocg?u%PBOvZCfUb4=zlog~e-8VmkyS@1?7Pszlz7v6Zqgcz`=7xQec zdrK-cwlD_puTMdz)LZw!y$czAxuM|xeleKnkmF9?7^{Q9+JgSw-mU!J=edvLm!;L> zf21Uf@UNu=&d-Gw^n9@&d|L;+KjN<9>+WgOv0GK^O)qLLH`{v>pX6UOh(B4&bC;i3 zwN$NIs7?1)^=LJ`xV$7KAjxWZw)ozDfpx!*DVppB)kb`&I)74FXOS5XxS=3Jz;|fR@m-A0hsxa z^QHHTE0(k#uvSW-8!x%Nt%UnVdE?$|sWG zvD^@DvGLM;$Tq{IML55C{+z)!iy2ej)U(LRFA!9X;f^=ql{66v{Zz<* z#>#alNNfNBd*VOjy(MGig>Iai1|!l0-$}Eh7Z66-@7)J1kGIoT z0`1>8I5ic-D`4!iH--a-C*9T9B2Px7#)?rN>XwsLsemr!I>^Tky}U`R>-tEOB6!|t zvN4y+a0R?cvtOx>2*L!$3W5HhOxuFL>-jPF&-|TJzx9&^1l`uSYH#RbHobe zcTCjf0S|an?_9O86=F80UnjOg7(gGWsJbb2l6~X&jpS!6^DG9_3l1}$U)ovGlH_}0 zRt(8)VTsK(TZ$yu7%n6XR_;0wYF)w11~(UvxURiE5U~Q=Jps!=X!uNo7%?s_hE47w zl8POXTC~3}_E;RS2{$g%44wI>xKG*A{(XmAa~T{?0t#MXtL`R41}*%nN&fbrel?>y z(Gi5Al4qyMsU1VTUDN1jY|MIC)8wd>wdd?|9*|kugZ(BOTn{7+I;& zTUQ_Cp)1xmQV+MtNOo|-tbTA#Eph7a6y+V8Jo))}MnHcmJ>1EtyzU?`ui2u!UW8(* z*2G~$2|tjN!~e=anKIzd0qAb88t0jrOwQb2-~ZTW^!R-i<0^Oszt6@mX5M$}2te<4 z!FQDqo0+s!F%>CanSXlw6*PQPtNsy^-0OFYz}AbS(;!liz0AZQc~v1yKPj@B0SZE6 zzxS;srvna0XG!N2Rmzd=GuW2{f@aW8{yrkX3qru#&=N41;#;SciNAgO)_014x{Q_@ zJ#~gUtsVb3HNNOEvmoRALNGbf%y-^xdbjSuEdPk>!6Sa2k{ko}z#T*fiFGztf0LbB zX1Je)p?H_j8X4rcxAa3%z-;1yd|PEHk|#Ox3Y-zCWuqv)j4Ui=To1L*vSpcS+IWYu zMz|q2YhY=`Iz`D>m_z>%6E3jJ%33{rbrY`vxIUvCM~~>k(WoK#ZrAoE&y7Q|^}?@T zO&Iv84^|2cC65+8z~a~$DpTQJQC@M|W3m^SWM@ERX{jRv%fy%rpieRIxhG_}1ejMU zt?)`Z31fh(Wf8&XpCI4{Y=Cc%ADzh)FWArR6_?^Lh-b)Y;f*ck^`Y|O6)xF z>eg4Go_b`SuI6A6Q|C|@UlGq!|GxcR%4p?nhcSCLb2dXq`9M3qG7It1@)6Z`)#~xE zlR*=sPWMvNk|clxR9!hJXsQicHfYh~#t%&QpD?gj9dd;~?FfVp)3Qza#T4?}l6)e* z3b1B5CP5;+g{G1Q9M41ga?jWFHMb{TvaZiea)^l zB0q44-$jU<-n!5?!DPy`oTuOd1kMP{k-D0GjF~?=Izr)WPh&6uhe5*AY_orMEC0z_ z)R_p+v0S@&P3Lc?fF~_u<}Si8oOK(f9JSO#nL$0DC1n7IHknpb?M;}2M)7cvO1RE+$2Wf2@6W&dA=rtT>W zWWRMes@5(p95bfFszsHFc+CfXSUVWK0C*quNFAr`SLq^cvy-WiKBz?+ghMl1BqyM` zv-M=X#HJ^ab3Ww3AGkvaxEaDY!e2knh?l^%@dlS1)Oi4J zK|ymt(6Kks(W$45KC+Cm?*C+WXQWORaC#SrmA(JAEg*Ohq8qIcYT#)v7XIS#*&vy4 zCjNPrY(jR*#yJNu$=#P_h`BOzltdr|efmgFSZy1G$5r4t_LcWxQ+Kluu?&LD2DxPT{(2GKm#Y|6y+TF!r+I^bQ8B`KB>0W>o?{0vnv6F8qf`#Y z7Gb&)^PBzY=j43j?mc?mrW}RaOe!tqunCOzMiCpbU+R13VC+{*gyXYX=_^idWRFq~bD7x6q&_$Zmq-r@Lr)>(kSu^@RFSWOs=?p4IFTVfW5c6nu1vb&rs@tOBNaon z+p=>&OWg{!JSlpfkztTw;0ip4a`iC4(*YJ4MwJZ(!H`EYhk-Vmdl3_jyVxCS?LU2| z5g_&qQr(`xBmHIJ7ckIiUmw)>%XGb$id!m%SY}u@LuOK}e`?%*h~uE-K(u~x|1rdF z_M2XYOtE=cL)A!O?1>r@->ibMZ;LJy4IDO4$2V)O>u)$Y)~)4X2V3#Y+E62xXC)ep z2PtrTbHd4pADyw&$p`2?d~?ol#VCFZCCt`j;P=e?;`R)_OOJ2F;kou1-#A+^B0SU( zNHya(3c$b_1NY+$o6zhv_RAd%Ao^?2vfF&v1^zn%$sFSo6EEDeJ`8mR*NDZ z^tfnd?>vK3oYWYFaZA!J1s>QL@f4K&2nQk|&lUh}OIT(H9yU4SI0yfP^?ljkU}cK8 zk6z#jUfxxoqZ#kM6Mwx_#VF@>VyO%C&}AH1I0;qUDe zOrWZpwX#=2YF9K~!S2{%zrqyOir4;eiVqg644aF~E9w)Q+Y;t)-LYI7W&WMY5D=dL zVOPG^(+~LBZ@hZ3^-j9m89de)D{K33<`K7mQ zhd;{CjY}|6B;B6bf!D!oTjvk;#c#? zc*giI?dO>09NQ?HVVW7&6+;I@r0&>=8bJHp5xTcd1_AA7Tj3PsweV)5)jHM}wXEM$ z{`OD*83+)vK^nfqGyi#$*bIX4#K(=P87uwuM9J1UQX@1WtwDHUMmX`*9eFN!>@nH0 z)QH-~BsU<`V4HA4bbCuH%Cpp}+HJMv&F`mE(ja}Hv@5=QU{)70wqi3Q{h3GG0BHer zpid8$43TRqlKq7m8T(%UvDM;E#-M#VTBX9^m4Rvms%Ot2wL`HwsMlWU6a|*8W$Qbb zI)2X+gf!TSc*t*Em*0Fm23Af~PwfOr6Bwc4Vu$Kj~np(A_@eLZ|_j>)UPygaJ4 zfUmlAF*!bCwvqS1KHk?@lL*&AjqL|Vp;mHeIjYBWujpMx!xUr$}YCB`^jQj2^b)dQ1JS&8ibDlIb_99mxQBvmNzAQ zo(tuWk7>U>&{Vl8(lHqQ8MoO)2FKt6oUqs<91VH4(4W8(4-lB-U#ob(|I~ogq|I*x z-MLNaJKuE02Ht0v6-Xj9x*esHdxQT`WPb9n&5$kFq^0IvvjqWbLo`kzCpA?l6cB3^ z`#_9O2b;8W%%vcnGExv`!Ymy$b{GN~$bwqU20}8fM4)HX(^+9R|5UDSz5<4qTm4Cc z7p^F&8UeW+wF1J~0R%NKP5c=vokgQ^rs(LT_KdqJjjjl+?%$=7|8yxxT9%{^6{4u@I`b?kKv%b@Ft?psB^ z8b!l})kcsJ&gDrhWV&XthIJkGqfSp<4qPNlJEO-Q^Q0)z0o3mxQ6QQ$a#EtnITn;Y z=Ar7uRdS%8rCpA>_rG_bZhq5h!Z`2x)G;&ve)TMgy8M{SzXLbrY6!3cWS+ji1Z==x z51HN(#_2oCTBTo!SSm(;tKs{#bW-(YEy*#_%FSG#dt)AYIyKrsf$b>17p^J@<5lfrJPLyO-5Fd;tY=L3RzKPM6BV}=p_iNqa^f33z zWeD?IFg_f&(o=WrSxKwO=}m(`T2bD1`_%wv@3$E$)UdI#va&3?@P|I*N(4|jccNL3 zFuZR0?%})Jl-)D~s!>hFrvK7px**tkuSga3M}`jn|4J$U-!vR>bg;58ozd@d)Q>R0 zqC|&R&-9iE(&w^EPg>-yKF%=b_R7So4W6e@aVT*>LNb=jsG z0R`fFOjHn|l9cm@?DQP5G&!dj9+iYG%76rNEZcno2x#R2_55XfKC%9?N~HvHKxykj z(#U`4?4%P>p~#%yX{|A4G#YPj(Cj%kF?(} zK9^-hTj?E#1xQSvoUw9@v^AED`Hi`t73hV8Ds6dCf|2` z0$Ng%RPh4Gj=9lhMHLxBUZbKx6rYb%&9nz|43`2@$TvWXxH5&l!$gIB_=leK=@)=p zmj@`x@XH_eI{1P@hn7wE{|21{RWxuDn6S&af8eAXpls77GGWfZftK(}#Icsm*C?GJ zxnADb9*)PXO-3K-I^EK(XW%e*yrLnsz=-XG?g`%j1qMZqNQJ`7}{8$ms-;f?h2PV29Ct!; zgCOI~R3PI&lA{85foL#xU~28uVuH@5Hc=KVn}5oZ!SYj4tsi4N+$_PvtI>_x4SYg@ zo1sP}CRPstnJ{nmAnXA(WfSl_JG^PuyT1vy6Dyx4B%B`qYz_QBDcI9jxefh z=&tZrD{q>2w??y#FSB)aKpTRU@4M?WB=3gbiOPXhvhfQC0ht)KLUGU2z_rm?M?)j5 zIv!CmW`qBcA@MTh$|G#&Yl>mTLo59;StOf1x=xSP{VB3x8Ddk@Zc~GwuNabD6U55# z5##u7k5P1r_N8xz*ZWtP&<@UD;(6o$gYSPo=>Wv~7_%;xrJ5JQEh%MH8boayO zNnH3_X>Z4H@U*BQ$0}0NH)?62Mp375K%=_=a(^h;>GOFM3C4I}HLl~F39pKenM!Sl zXx2i+Vp1a+!QU57-Z0{@vl7(I$jZx*_n*;P-WNh)&IVpJD%sYg5v{00n;N?zc3~l* z+@UwOe>V)u=PH17pyujsGsx|OZh#5s<`B*GH|s1kKY{^u2jp-ZTP zu(Fw+kZ49+=VwUi^858^+34&|HP2bY?=pKy&)K_*XIip{mVrKS!_Rb`F3Vq^*`w|PPi@?3DGq#CI%m3n6 zQ9|FZCcRici}9Xf7V1vdo=Vrs$HkZF76yrEO%*US9LQkt@9O`0H*X&CMO}5+FQFLd z*0vyejEg%)c9#ymQT)T39^#`8TP3YCWK#i zZ~{es4?+ejuOt|H8^GL0Fh#DVMx(8cWinXz9GkQ1lNA4X5Oj=<{cnMy`xN+|yV$Rm z;`nqEP;;W&#p90GF`E5=+?C#Tma4qD(WunOS19FOd=1OKw%vE5|4t1{ct1!|p~w3s z=kMNY5--Cg-^QHDaGZpUl)k@=WEGOL_H@nP?41;6k7k@>N!a8*DAD8X(Kqc)d2Dus z-0o5no4E%h&+B{w1if45jpQoSKDJM{2CsN(tGP)7O2$X}om8Tr7vd?5&VMExsq>=03KXiT*OUfNiM z!5|)veXYBrHuMIGxUITyUS4{K@%(BK?l|z1Y!~aIBNsQ3lP%hhJRzPG90WPW#Bsa| zRzBAKe}QqcEns|1(gTUsP^n)RAh^>)%h5{f5QH&u_cbJ5}SKJ81?}mRs z-&-K$ix-x|&&sbbcFGG1gmyHRB_=T}amz)*c8OL3!J2B?N}Z)~JV z?mgM9OQLz*yU0JKV`@u-o1wWyFQ|k(`ei=DT;07*S=Aua%9jOfOQ9F81L4cnqOduH zG;Wu_X1HKSoF;Nq#JhU1oMvq9C=!BGQrhCHZ4k@Q$Pk@?8N~QltNw-^l)kT1VZboLNU^*0s)$In09lI6WMWq}K_rVyu!O*Zgxa;m_<(95q_sjySoQ z@f^wop-;sRY~jEc8$?+pPp`QTieqdUAW}kb$W5k*i6xP3=&6P^RxS0O)(H;CHdvoy z5ocii{VP1z0KAlghsP_tm&zpW2>yPrMV?ioSvjGnz|2AIV1w{#gFNfjP`?rgt5zQ< zo9AoM092gl9j2&`RKV^0vjnj-^nJw#T=?P@jP|#Y;O}+xEKN!5XGJS&nDAS5^^Yo= zN`cA~5r==PJo%qgJAN;1k))SNdtw>-N0hsThh$jHStcbc9w3hPwh{S6DWOa%)o&8m zO<4y`LdgA(=o`X6A~Z*$Gl4Z__-KY>_n$6{hP08_EmdfRkniRut|z&pCAl!uIE#6Z zsMkcR830G$SJmAEI?@MjXMtn#GNL}NSk*!8zfQ!8WR?ABpB$0er%hfw9Je>=gf0@| ze9V-iQDVnPEm_taedT}dRs2L>#B-G&dHiQwh4t+P1rNCr-w(}~KlJA2(()CCS-TXp z%YO+JQmd^#RXGp?DyO4Ek= zfoI?4Q=k8-8*jlqJ)chH4X8}SYqg3r>^ zgl)spoYF$=a?#>a)b8PCpvKVZ_zmPYWAlOq zO^izxJqvS`fd-~)u(41A71(Bl*tj7anBG?54%({gfo5!a_SCgVa@2RYPk8Yhi0t}) zon@_}WG6K`$uHCIw=UsFZf_c%BBnz%QKKRasZEFN$?-8|fDE%OF$+D6?3xZjTTvhwz2uj)iz6buyt5W?4j>u$Y$?5yZJ2^a= zsF9%A5FBwhWU>|NTc&Z~2UOq8fapc%f{vo}x{!5VkB?^k^`ghD2{HI8LKuj#>hs&E z#l6Cg%?aT?C!i=Vsh5VDh`S)jZzkP)NIc+JzXtb-6uh*ma)<2sOgv9IP(C|WYS=c; z)3gj9n4hy?1S-7n^*|Y`$^sAX)5`=5W=eV1UnBV0uF*uykj!uTG?_2U1>Tn(Gy~5e zzIM0`awcbxvWUsK?_{+LO=i7V*)a>6XOkmQ8I<>mX#Lqq9;X>DgS|T?pFHy-q^p1a zPTPz=sZW_v-S<+XrtcTj3iVJUd`jChKzYFoY4a;Ts?DkWJtF!zv&B%dJe$}+uORGl zL|pLGR0jVg%^IJd8t*?8RZot#4i}FWp0wdhlK_e9f3-A;SDY~J?PcqKdTzW%rwPQc zzb;qd{rS@m7xM|@0$i8H%sdG>#$wqtUE(KN5FapBquGr=N3G^jBB$M@@Oy$-4Bh^^ zT|8>O3u6s9n`CaqRo`s~kIB_^RX0z+Oqeujv#)Bh zoq0VFLznc`1?O+KI;Vz=U!|{#nRA~iVwl5BwsWXsKJ5El{xj+kJS-qrSD3cQyStflcjfTfP=UyQ$FXr z{qa1Y`1J>gg5N=oI0Ma>Ox?{*`!ew^sFWOA7@J|_GeZjOjMw1(WG6UdP9Imlyw&ga z{ag$1@%~=>XFfiWxETl+ERGr|;U}s&fu}K^q!EcUvz7?HQl|77ZJ#U)Lz;2L`o%7y zXu>$4&@bJ|9A|a71sd&tQfGQjhU)sROMi>ow=q`s^~Dr=a^tOoO^|ErdPUAd!&T@U z-+qf0%Lt}qX1|$&QQBJ6Wx{FBoo# z$(S*#bH|n2^5EW6QHndj9668AmE&_lBhpH%Y|#<&)0^_Rw4i%t&6krkDe% z8@gQ0t>9G96i~hXGaRv+nG;vJ^}MK z70L<3=81crn7Ekw{fGx!X8UaNAwNT)y!I`^3i+Jbwr1kVy|lND>$kTzpYBRr(`iP= zSmWzmIlfuq20tJx>gAV~X%J#SK}j1}Ps386M8+AER`>j8t3&b-+y+++e|j(-Ta(i3 zi4H-j`L6`+@Dsc9?Re{U&??BX=F9z|g@ph#O6a~k51v-t~gF0!Og{dG&eVVg(!FR%ScUb6A#bM2pwTYXPN?xzOAEd-zP}rAH;NS@lWDVP& zppo396`1j^=t~u}1ciw7GUUn(rjbyUO1dDN8f1q9kIF?$wrKA@C^!CiUSV`xD{X5! zurec3vt^Ug(8~Mh1FUd&EIMi1Q-#|2@qa=g9av+4(lsp>%pG42GH;t__P^MW0nzlJ(msLG@%B6~tO{Igh zEZC2Mc0d*mu6gc|9< z)}j$LjD3+kO;ql|r&)*75kSS6+Q6)9vUQyKIHEqsDn4|-uojy=ViPT`qkyp5!ZJ$j zm!LYf4eV4CIJBz6=VK=K$(?@@3#%Ff_XmX`PVF;Om9ED&G-b~b;zO6$#B&4aA6ULR!r!(Eg z#Y$47Z%H1C6nkwK5Qi+8P(lov8H@U^h<^%d$pUNSaA#O?6mP1>o65#N!0qLSA}iJs z3jSW)zEH}#(9WM(e8(Q2aO`%kM9}Y%;K(vb%B(AVH#gx3-Siy@6U;k0*!>KLn~Oiznzv9`su zZaYFSTQGQ0PV7?u80*HiiF1X6-HNPJg)RfM7T1WB<#@j68K^Sw+UGFMzamhAn=!*) zm1z;m2pPM195=)6&ezS;a-UC+H5B|=1#NCo-!CLN=qehz84w54?cPqgpH$%%yL=G6 ztfxx#q3J05$wP9NF$eCo(4)igUE#ycH(Bub8P7pp3=kZanR=Vfr*>A4zYQ6}9j#G( zWeTaCvkWmlnrZnq8+ElO0iW}G7N$|hUjIHQfgDY;3?q+o&3^oQ>CC^$#Il@BRPE9X zDKpw`*McWIjGe;27wR+I78CEj)x_mspH7wcQz)~3!o6yu+NHa^AF-TXYm&0nVdk9uMtGtzHoaWLp{mEf-$pIs^B4p9T!i@bP_q^8~8 z?U1smbc)IxC+{Vxqs_o9&H3R+-05nrzfAoFjt%C)wjEeNwGwU^ci1mZgWSi050Pfo zIz>~qz0F&ckD-i{TJJ=%NbbC7Xc;T37M8V1$RKNtI7mUMod&%L>p2G?%$fprl=@pLzuJ0cYy~GORF|eWB=98q}_^^y$7N@ z6e}hM&Oz8$a|-<+uZUMQaepN6FK_^AziXJmS^h;APlPI|s1(e1iSVs%w>rE-JVI4OT zZBG3dbi9)>K+7BV*!Ze*hgN({kq#-=gDm1ZIm<-xofi3pB~;g*ds#Qcum$DUCWEDC z)5_O-36cH~SG_Udu&}~N7`M;&=nuK(DL%LHEp`^~C&FU>%58+YTr46g=cg2Db8}K3 z;;7L+b=u%#Vj-J#KX9{ehEI~mDINwyb}_bbi&{Bo)Mw_>LGQL5eI7e}ghcnn_#@1$ zM`!)il%#8KBs;tar#&RtS;1AeThKfd#=VWay@UbkVydemf8j~N(55CqRbnBGt3{rn z(l)Tvp#8_;dKToq4JRWgs2RywJX|qM!=G*o5dN5fC!x0Q34PM2nH z=Hnb9o${>uqxar0bkZpuHzQ-gPIxsfz+NdUG)M;J7%uL|15!5is+7935;zVPPe(_! z{Yai1Yh81s5hqgkmJ{@nyOu4y7Wxb&!iE|ipA>1(#q{P`DE9y3e*t_aX~OlCZ5{7Y z;4D%h#0u#_p?^rh@92RLdaLm#86b#$`ayN~yu$QTYhUeTb+vAw@b;tGP(#UP!whr$ z>Db0aq>=2>8V7i&KmDGkQ+-(VILlQL|)aVvx{pb>#!&n}oJ?>wRP7&F8cU0I7 zt&jD-d`W+kj=KMbQf>vqhm?5Bi zc4k}ep=;<~&Vc%{LmNXITr0Dhv$Mv=5<$Qp-Q8+=>v5K1lbmE8kwaahjI`7Ky`=2Z zJG|YgzQvx|-J8GcuA3!=)sQMd*KFV23F|U+Eemsu9EcE5)m4?E&_r&qim#M?V?Yr;ZDby>^CcM7QAt0bvK zaCwSoP1M-KVcsNNV`v&l?^gp_L#@U&epQAL#QLuPt*NwO(%gGnl!i4Y)c)UFGOoEn zE8$(Vs+~{~9vIwz@Z-=WW)a*^NF1Rmvp}6~Rh2cF}rtzYOO{cYI9k z92XcTJs#HPgj9o%3RyIqSzdA=iF^2G#`G0C=kFOHf^)n;3doKyn(xen(_|Sa-SRYO zE?c|(hE3Dwm;5KI z-#n;&V+X+F=EC@Mkl^*~yq{JL&+Z=Q`JHY#2UlEbA*7xJay)l4HFDVxC(m#3hMsq_ zOEKjZ-rPfQ$NRqbzVYtwj`!ZU!+$W@*?aA^=A3Jm&-|>_bpUBcLt9$%tpG6_R3>QdKsjv= zaoPs$cdsvQA|@6HdrnoECRy`mDIY}QmHZkz;clg40>y%*10$xDl0GZ`5#|9H^WoyG z*^9JJBq-?ICu)lMMDSx6_tPG3_Kb&}h?fCMQ7_!72vTX<8{U&yl{_xHT`%}c}tFr_(+tt^&}GI2{*yA(MGp!?$L-G82+`Zu*!!G&BOcAzpErf<8}j)C zavZyG_Vh(*g7OMB+b}PM+Zmrf^1MFwoOtTIw@2)5>&%Mv+ff%W0e8; z1l20tLPl-eG1zHcLzobnPjg-f5qI(%i{i7Z=7!(1`4|EzdFAuzMj9?cXMG0=K-Vkm zC)BbQonrvWgN|&d8h>8H`98n^X@G!%g87+KOFkRwZ*AV+G^Y)LI62anbkRclcxEbW za2a1h62rn@LFc7h(s~q=i)@4dd9!&QL(g2pBzx-j+L{EhwnoJoNmT1m;lB&nDcb>J z^Bx5kCcux*V=VG2_U#naXJEAp*7vCw9gVPkB6r97E8+4CK8YuaDH1mWA?ut9dP5jJO4gm12o4@&b z30J4e6=F4W_k(H%5JGwT$z?7hsAk|M8PpWt=$X^J@e9R~A#egcb4PR{fe9s*YJu%O zIs4^QfBK}=<|y>z;D~Uyp{j2hwi8yZd1H8jOe?QYv|Pz~)Ehh>^qMznBi}FQ z!hJQc(oxEx%gjJ%How~eE-+Ntfi-25 z=MaZHX5J@EAt5`8Bnp7#@~bc{Ae@Ni-!B|UR>5p0OCc_fRa`EN1!?t1R)&C=*}q&R zVM=W|B@>x69iA6YN+a7ovKkGRpK;0~AZcL8N3H|+73nIh=&v(<*fzlUCqr9Knrm!k zi{LTXnA@~8!Umnnw7swU6C9VbMQ~BTOBDLMsv_nl;IP~`ISZ2x?ia!&ME+F>(|^i`0%~vBx5#qPt0WqKm-?6OUr=2 zv3NP-lwiiBu0k7AkbK4Es!F(|$Y%pG2bBo&H>uo>y^Z|0ZuK=|B?iP=R^l)BOmzlj zb*A&lh)aXLm#HuhEJR>(_=Tf{l`IX05<2|))p?O@TTGRnhb?d9Mm_AgoLumKif=5B zQqn&GlZGgH#Z%{cNy&K3Vr6zZb6v>aJb?+~$p`slYOSBXZr-BbYX?#dnTzY51!EW2 zW5?*fSy?)^E0s3%!)ZI5kMPQg5y{aU-nbI_Xn&G znmc*~ih&*vay;V1@F(k@jHR(th6%%p=-W8QOn3hqw|fiF*&}4^0f3(HZ#^F4vP;&6tClIAdJSXkbW=lI-%OV#ND}y0({*O! zGFB=C1Ir=JZ81r~XiyN?4uo;DySKbam4i2H?kl21$5@wHI&GACYXYm{Ckkc08#KxSU^TnGy2XS7@~Ec>9oI*W9En=iHmw2iQBz_vGCERTla z&Qc;(tleQo6=1^3a2=k*-%G@Ts~+A_5z2`3y_b)HnCl70C^+U`C*9j|@W2BA*1w^g zE+}m`*%(96;TDswBp`RQ64*ryc+c5t5|MJ^ z%jL&MiGh|X<>}+h4#7TLS?A)NCNvxiw8-ByGna7*VTF>?R7O&P?w6skQhM-883Em? zo@?M3KujB@*-Es0-MR`CXh0kQayP%7+vM#M4y!mjwq15{pWiE)Lql^UK4V!ehVSNd z=xX*Bm8wxCc6JCz75DpoP`G`61Me()%88jUV|6VW|)fEd^OXdVE<2SMqkWI84R@sqdr4( zwj{^RZO!%pr!A(?U26JEc|xvlZE=oU`Y7cyNNvq^OfFDP-yVGaW+OV)Tfnt@!)_^i znT(P|wc3_^xz?JTS|uWp?|BUP{Lr@=`R7qp#wnX%{zqI?E#Ggd15 zQO6zktqLH6DI}HVukT#~_ntvybn4G7ZOI)M_)xTxcX2j&B>MHj)C@Uzw*#m++E(ZJ z?x-7cEGPSI>mnSyn>|*cHZM=by#0%xX_9toolw1`UAz2^n|cP!A^DW5DKl7h%RE01ryDiw8Ccq!BI|45%#NjLL z&3cqYznZ15K!n_!uUH}rGhnvhSf;QjjM&ns0q|aZ5%}7rMr1Lqw9^b7rOvdS!gzF# z+~TkureDGgi8t1z?;JoD4Fh$+^=Ag8=ij-FtT0`or&s^>jAsB-T*Fi25pHFPW^DCx z3m>B|Oeq(WS?X)Vx4XKa8dYYJXjLR?I{;P#Sx{Dl2TX(@x2}74V|x{BD`+sbsQ@jWRpW+v(1SRmh%HjYPo|(8Zh!7T4 zJVfPilNXr8`)6fy4r)4A07ovY{-dei8&7sgN0%gRk*%hC%jm+r^i8ma~o z*5Ml?JHv>X{oS*hec`P**R`~?Qt7YITZ(%MA>)X#W6Y1al|;RVSk}MzAyeCvc8^tk zBBUC8UtQMiePviiOSnio8rra`===R1gHu;+qi2~{J~3;5CCEWU-^*7*U!s;r;{h!% z)TR$N2jrJ=k!Yg@$Js#@M+wK(lFxA)stI{qr)uGZiGeMuN0#lVGOki{Cvh%wr`~-| zC{kUaBg8X$u^#A!GGD5(P=I?(ewgHa=a1- zBG;D;l|`3Ls7T46xDRa?tVArl|HPY%2ZAY0guS^OV^)7Vg|_=tX{JR^H zp5&M7F&^zE9&=TrZp%HMK?RpdOmN#%X!Cp-%vG^jQ$>`ksK^Kw7s01@okBx_cD1Sx zzyy5i?bTq#*V+n8vn(xvByo-vk_WuapHt1h)Bi~z=QF6&#nI*=um4QE#K|8kzhK{@X^}~7h`LYf0Pm_ zR9iZ{Q$_zVPq4nBo{jGGWQ9&N=rCXiJO&( z+_hcD<&{vllZ#(376t%mus~rkgGY2TYb&0?ZAZDNi=mAnJ=9H`%=uM}{bDpfliCzW zP;%eXPsE{^ZYkVRZMsL(Ax|!OVajfpp4lbY%goDY+g~!f1^^yy=TG(PJ)lq-eFQ|Ulag@)EZWU0ThxWm-_kT&&i%xamF-RHL-BX@Y!=Mb0YBF@)4gn6 zplm=gZga=jHOP@Hu42*gJUi6(IP>{S>_Cb(Hc@0kQeC6_jcH0sOnXQb*|3UX)XS^q zokBZJm6ExP(K&S|rNR&*mmHo~B#Jyx5b8`A6WQ@n-kN8#Q^zV299;(d6XUvzY41t0 zY`&Udft^99cMy#CT5_khdi2t+S7}Xu-kI6zF|>^=&}@aodhLyMZru-_DtIj2?sY5s z{kONyczUs?!`@exPVKpY!lvar#43nx@)j4?Q&XpD(_xAxXqb$8%;k6 zfW+Q#Q3?^=?(_aS5j!|U&unpqCTrJ1(HX#je%Kzoh9+TO=P!_d@z;#66N(t@^2rPb z!+Dv`1JEs+NP&8AW1VfJJxd)m`TCt;=U+@FvOY^4v7#E1S{E}O>|s-3-jdxU zubS`ZvA5X1Vq=7l0FB=+{rXo3@*Wb51iPvVePKHNvvL&1n;-9*&7ghFl1tcA6nk3K zQAEx%u~YA3$S!N`Z*J!l`c)%8^xXg%BrKiBP8Tr-dP;s|Z%Hg~zm*_l7ahlIF#b6h z3zP5wa9UN{bhqmA2U5jk+`kb_R|`7an3XCEQ)cqSv|>#WvyiGreN6Z{=2G+u^}6G6$yJH|I(X3Qk2Ru1y6>95Kkt*VCckn7Q4}`$z;=NaJ4G(Rqy*tM^fn@F~eZJqxp{vaD ztEpABpVGBf;B{Hb4a$#Xf|GS47p3Flo;6qRDkQrQff=$JFg!4b8L zXlzdpY@}@@^~M!qWL_Ls61&@2ReWRAC|^ELsm6_pBdZf$FL(oeH{>>@U6oPbo}}>%N3%T)y>fk5?Ev)UwTL{^o}MAwH3__kr}z z=hM6g(6Vv>E-lkR5vF3+N8N5|qmU+#+CKfJ4FF6VK3kEjDdt)gV(R8E?fSH^K?{_N zYqaKoq+hgJm zDS!++eTb9bUq%rCuA*;8BcJu1OwztD%wJFa~v!OYAW6cjA#R}C=;<2NE)Lsq@*S4NM;==bD}`L%72 zU@%PmV(f$ZEk+KU*>0>9f=#@acX5#U(5=&!=}<&O^yV>sw-a zZ~b!p{61#jCHP!n^)BWkEi=|p23t3`uWdy@%$IdIYp#oAm~MV-Fl%3EnEI01<*nWC zMvVvrkj8D*Hk}k<6&G1aa>NNaNYkh-Qs;VyOq%EfT()@e1CFLrg)7R`0N2_k6(Tic z3Te$-rq^?B*^l`DujlcYcSg#>sOZHv*MT^LY7e#J|p9-k|x3r9Y9S#H62cKdfo z%OFq*AwUU#!sVMXuGPoW;I+_JL4GD?=)zY`<)_!IkR9}7P|c>rgi0N@ODWvHn_y}xX)c*04+kCl+EWHybG`^w?A zTU8}~lKPhkYpzwt>MFkANMRM9+AyGr zRNFkTk^bySLrm2Orc!MoFNCmCby>V;MZAUwU~L>jk#+(ccx~b1YTMWgBlCG+PYgiH zcEDNncmMnKzwR7nLY+$hg5cV)ZwL88d#X$1Z>pcc;{X z3{Qd(a_GL)^^^Awb;5BITRupFZ4YV`QJ)6~W3k(Cf{h`7Beu;c5wi2rD-WHzjCBE0 zB$ncw3}!->h<`PS0X4(Bz7vEycRdhmT!FcTi)7t?~ip z$1%k-6YA23FtHSKA;62`pS?fe!kiA zIB0~Pxkn60rqX2KUPKdl*XraPN*)Tmp5jMTw!>x=Y%IszzJh@@oH6I{-o>5I_Hft` zanKbfGOq=QtnX!rk8(|JZ|@TDhL!xSWMLakfy5rDa>%s+qO--L>uyV+shRcg!yWcu6zc+;hMF|y6h|X7BE}%UtPCX(mr#;%9DHIw(|Qrp z3B~su6kHwel}pRG>ZS%LSd8&H1w6>(hQuzwips)v6&C?^q_r4qD=s4LJ1hwg6I-6J zN$u3rb(M$(Sc;P2{#c+@+Nif0Gp>mNB$CKVBPjpdLxG$+6JXlT-U+Eaklj!sPkcKV zzfO*+bEItp8gmLMcYVL*uv0|R$RGFZ4=COl`20|gkGmc`<{x%`!GrchAyd%x77$&{!MPK}>Sj3!4oTRDx#5TQ z+Lm;BUi!E|Ow>#bin~X-y|B7o1-)33k|YD%Kn1%O{K@KN4zU&%c}4XX@U``&$cS6L z?v8n00_J#5u~}GxfH<^Wg&Yo^U+o-nq-oz{^fboff%e(Jp#My@1}0$1F)v3Es{cPC(P=UOqeX|naswB29?}U((kPnqR2-S`aI|jTzfnOshw;)IT zg;6=}wM!P3(X*Dq=ZFx(~qE{?b9BIvy(9=^o36 z<15{k!o=DD%+==3{JR~S8T<97m?UMqnZuwEmsoWze6qv2yDY0-vNjO9%^tncJ{M}P z{TfcOhVjFqPER|&LV?WAcU*?Jh~favpjr()gHqOZQAucn0l`YV3K(f2XfvD&zB7GT zN{JOstyBI;+4N0e&NYNb;gcom3fv^j1wr$y_%2!>Y`v`6 zNVGuWP#k`3Pi&Mxpc8xAXgyxoREj_6LmF-y50MzR5Di zMIlh}Ks{*WD56j$c_HAoA(+^ae7V)D!tE$xG1J$9$eh1?k=UU;3jLE-jmWk0S{y49 zUgjJA5R{v33oz=>A{mYF`-MRLr-~5iwvd&v`9I)VgL!Sm>DXi5=v296d39< z3E$My6dUe@9sj^>l!t62H(gI?vXtd_XU}E2j&o=kt+BoP zG!}pE@xanTWwYJ_GeXpsy<$FSL@l2+u55;X4P-rm6fcW~;^V@)AGM4)xI6>4teyP| zZ#W!jgEVjCQQ@N%vZY)4l17A1{iCT}fdBT&|HlWHeU?Yc5*$&=v3o)}^Oao;Ja=Mf z6p~(Z(a3YaHt04MIrtdEj?zFJ%SaosFQaTvnqsA)-E~j>W)Ek|_V&Hd^Rkhy| zS@Bs9=<>g>)I`iXabnI2`4~3^$K}`J z>Z&*DUR%GLV17ERZKddpa5hi{=2)DB+OF243z+U(nhy98Jb z=16Gv$18-E^lh^16z-Jl%yKb?8|su_?@(S$(~**TxFLP>9WKpt<1y3i3F$1x_9Gmp zPo%zLh(8u~UiZogHM!0eLZTzmYqfo;ZHaZLy<-VyScqAvavrx^k=Rc3Fl5(wBm6sV zL+X(T$#)7Jp;(G{!$StBULuUmOHtS`^1nLLbyRs|N7g#C7$rQ zyD%;ZePaV`+=g#%m-qLhggS2{*#by|#6Bpvs+_#-mSmWYk-KA|&ee z;cq<}A#_?#8Rbkp&om(?SrF6Xd#o*zdrQ|OJhlS0hYrIqId32FYgYK2a)THcb=?16 z3&lwbBCWYEOL{(%mz8x{d3yOKPk^q1cL5t?hl0)x`00nS;q)bmep|=6cg$;8ktp;8 zm!Gfhie7GuG=cQtJXB}3b-Th-7-4jE7ZS^UChG!2Nn5642>ddiIf4{abIoJ5#`Wr@ z;NP|Gh|fjV=h7y=-yTo^dM<@xy=l>Xeg2!5zvttKl2%a}UBev9u@4aT@Pd`{{K$-l zx`EjbC1=@7LS`mw)sodLc=Ob-u^GLR(E)o3ZVG7)`~NH98n=| z3d_b7|8M>A-T-2?IXrD_GfYP>s`Vb-A(Qm!*a`bLIY;>45-Yr!r;e@G?&Sg0(ul%6 z9vGMCShtpV)5*jSZS7>zwQA0hZ>==-uPLBX_M`Jl1>@I*l+$)qvxIjcj>pnOc=4B@ z{d~H^pqQDA@_%2WcfI;(UggK@6zIds{n+Mu- zaHn5b!C=u7%@sF0TI(3`{hS`=Pn@2FjuJ&eexRS^E?D8w!gr**E?GIL@3p(`eBK64A{HQyG6oO9vlaX9E zFzKdHNCrVeh2Vj2VBa-$M`TEDW|c0J^&TQc5oH7BrbFyV2C)Mdu^)zXXG(edOovw2PrKjx}E^>zMQ zIytU}nW!6$3RWo5;K=(FJg7OUXDO*4Th;v_t5l)kVn5}`TMxx{KT=06QI0;%tnxb1 z$9hlgvat3>D+;n{!yFg2v}LV1SGWZjdD;x-Gau@uO`chNW7ANRHe2so!AT!wnf*}T zh`u1VysX!={HAMK%8$WZ`!65Nv*Kqhw<@4@n8F@qF$8e@4=I(%i(g?H(E-(b3#?sK zovabjP$t1XUkefE%FgfC64bUk2OcVaN3}IW?c3TwgMRC*V;y10jckYsc+4PHI9PW# zRr~eUvcjw!+pT~-hjm_wrE7{}FTndD>*m*(ZJQaV?V8%b`8ART>79veVyn2J!K?o= zd^R#i{N!kTQm-65JBmoVFw;N->59p=V;!p$cqrWN5U_T&)_&c~jZsl8Zv{8N9^<#w zK6aFl(>bFv?V*abuc(azJ>Ve+M7L=N5dzpe@QYSIL{P4$zk{2l9gf?e{{|I<`vXCw zr{ZPa$1?cqB!`vEMv6EDsnjzELrD5uPY&j)f4vp0BzQ%~p6#|GsUCNl5GvXrDoF)_ ztWmwxL0y5t8pm#NV*C>eC2&(3>z5doc~g0c?sfTi1PZ7gK?@19!tDKUIXo^TKNq4 zcj@R1y%mMAZkA}*Z^bjXVmrm1+%er*9c^_4c7hzb3z^7dhxVeMad3?|csD}0L*Q6a zYJ@a7{JPlErQpSTu9v}bd;6Rb+^n!w3AoFjHsVNL}C zWhU9xQd%o-SG!$#rWZNRnroeGa+#8Zff`LchCyBpA~;6_;bk6d?ZZpygdsVOk{J;J z6>}UfUSVwA2glA1b-F2mpEj8v%Ya*#^kpQFvpqh%)d*e%)K}@I8j=6br$gn@e0LhU zOPahjv*dO2>qS4B9CPy|gD8D>AAZHo*2ps&tpk<(!=REuZGQv2Yi4A=vjvc8+KjBr zRYSI*a)|%m0PjL_x8MVYFjl)7K1G)?XCW_$RvGRy)8Qj;imx7}W41XR@sT(nCzoSI zl8G!KDv38nyv(j$zEut(&Cs(w$`=5td1ldP=C;&~9-^kHY>gqs)|ng8>nKn|YLmG_ zqEhw)pG;Fi147jnib9Q|uLbx)`!o0?sar1%QSNu%Fv)4%P6!2gQr~J*E8IbEnz-fA z;rKp;`B8qXzIdw*ZMMGFxx0}=j%t}wQHrK_d#Ks3ar#hUH|g|u3JbPR4~dxRcSYx) zVcLae0Al-S`ZA=q@ z-|i_GseB-uWJuHI#kWdO=a#E9Oi58h0-Ij3h#ChGUebjn0U}KVxM?y&-`$-zBu{8owIMz(JbE~^+b}+7Hs!w8f)9<^fOBQYZ z)jrOCo;s!O*h7qOmM3>Esk!q7kmW^2zuhOul{O!^2vcEgPi3tphnA-7Qz)loI^pus zM#d>}thyn7SWG(EeO5rnJjuC&FE#Qr3)$}qx-17+C9%3ROX4#(r9O|dqgSE(ih`r| zudF|fU+He>e;)3;qkw{O-_&aD6VG*?Q2?o-tgH+qu;;IiRyq_zJlSM;Tl$}J@m!=C zPH3WO{0!{)^{sXr@a~XXTq<0t#lf^w`J+2lYR~iGyh?`qoezhcdX!9SHv6qeMCK3W zE2L+B2FVK{h*jgEa&z2Wmq2)z)q*3A zTl&!9|j2( z(*ovX`E~SLVnHj4s~pN_WlF`xWt8 zsd*RKrT5ZEa!w;Iva45a+Cw9_n?RlS_P+R5h_|)Q7*IxYs=eM)q|zq-k?you!iT2@ z>9E?tF}0$1%;pRtyiAJP1*h^t9l#vxzb}@yFVRuGGsu6Es4mH;2a}kAG*^6EGM{We z9q(8AYcEf2az8b7!7HqK3U^u7hW3lAtdB8cZ|Mj}T6vp3tF85n-3pRw1g+gLqkl|V zF@rEM+M!k2g{&u$U~41D{gR_L8FwLB1)3Z@om`}Rc+I|@y|t8(F-r8fN0G_r&qoXK zyR^-3D6c&}Wk&SPeo`s`#`Y#KRrI*yUE&8uU-{A2(hwRr^sl*hJ6)mEP{L%h@2}>r ztH(`3MP(}&ldhHCFMFIIvGz4O#wL!FtI#X}TF^L4n#0?$)s8`_>z(P5#LYtk!P6== z2xLP*Op9z0s%4L{Eebg7sTnwlW9(?*O66h;;PU9j%gOx?A_jaOUSKHMmxSAiHaAo2 z7$TZG0w{yGw8inwyxc~{oFt`D06B+l%=akn&M6LhY|ODrZ037x+wOX9=IstJojZ4~ zyTpnHDdhAZL-r+pXq-@2bB0>E%u_h%tk2vP|BCj@kM#n{Wz)a`nCZnEG(36i{b8MU zZjU{m;&D%(CVSG))M3%o!Oqhd)KmZl6=FPeDJDNfUwAfXPg_!D*xJYPFos2Z7mVpO6i6FZP;5w*etuPcX93Au&E86O7JYF6G+)z(9Fa)cx z{PV$V|63kR?tEPh4Db0Q;*!9n48F3NpY^~Ac_S!DsW9ik_AXgynqtg6B!x?-@F8#n zCkH7F_csx1|27z0m6}IC$597#Su8utKu7`4yjd4Ou% zyz>aiAKY?X(Kk?y2a!^9b!A)9>_(L+MsxN1W~4nPi(E4&Ro~43s$-Q>4S&ZCPe8|J z-w4Ti*f%Gr*Jcxj0=hglZlb(Y=7)+hYH3o-L^t;22>AW|@|s%mLua-gIcB2>JbeV6 zDtI8yPt1=gxtz?0bXd`Bm>F3KHkM(u-nk!qmHFTpw(5-W;MRKnJ(Ym`!^Z~{hXebF z<_l)pL-y=Pn(H?eE+IZ_8Smoh_`CA_rbmDOsVfg$$9X&KaK-SS63mZV`V6xGo|X$# zto9JP_K-JrpryYDoB!zSzaJ~LeE#!QwJJD0@=#mTZ^Qrlv6{ej-3qEEhm%_We(XCJ zARUu6h77hloT&ffv7Eql7A!XkfA^989HnX%VElSl?3#oQtNS01jUw(L=PwNX`-{W3 z%rpR;WXtWcsf5EJ!@u9QO98k}m*lPWzrQ%Vw!J)n(J9#*S#STnd-U)%#20_U0$i7S z;_eS=`}czWGO3m^fH)2BFYnC$=f?tKSF(s=`pE_Ge?;)t3?8ih{{|6cAqixV5Dn_| z>yi82$NO7TM2T4*GN_aC&x*KDhZ583i55SxZ5>CKKi1Z#6_DMBv(g;DtVpKFF*i0j zNqtAQI-1{MR6nBpL-EdmUP%d1G>hFvPgw5QdY<~j#z@Ih0-F0^+Jbnn$KSW0@~Nh} z&S;x4n@Roa!=EauN=4MUubp3G{_&b~+17wyE-el?{%nB%{$Zse-cz-c_!~@qXT^V1 ztc6#Ago|R|izxju${z#!@j4QKcdFJ2mqh(X8gwImboo&QsRdx;UK^G=H6 zuhuXp+k+Jxt>}y|`qN-O1k$ppw;0ua-u}5%JLSOw#*0zb9!e$r5a@G6TPYp;kmv#a znEQ`DQsX4yWaNmCJMkTopp?j0F%m#b$?`h{|Q=SX$1f*oG{iS)l_@z?dY z00D`@hW-<2SOy;~;J~Q=n(r?$`1K$gVz5YCm&);Ty_r57;f{SCPBP?cZ%EkoGQT;!t!wE~`=$=Re zd(1EetHv+?*T<46)b+)5ec#&H4%xq_?}$$@Gce65%{@H7#`=Wq#~!2H{($?}hd-C` z9V?Jz%kqc$gZ{CVx+>p+vw7wD?0}SzMeJbq<^|W!=b<)C-_QU zpVlm$G6YLEZ9f+@fX&fh>66BEWiw(4R30|_iscRa`<|J=WSe4j}IjK^}) zd{Xh(R_oJG^$d?i^|a~PR+#?Jc{)Gk=g_^K?*O`xgzHLV{bQ8BU3VZp7WEM5P@(C0 z!WCND27G*cd^eoOpz@i%b$2%xk1chn@W^A~70lD*tJqIYR!v*nHrPro&~iN(X@}mw z_Ihf3S5eZssZ@uKSaRBbmzG;jV1nMl8e20rsIGmsd~)e}ONeT{YcIWLE_ z@xYyq{LW|Gc-Cz`EoQo8z%YVcJ-xX&OWkz#6yMA~xnwACq9CF=1RU*)K_TjV15DQI z8VaAZJO<8I(OGC)}R({|wVn$+P>6?N3 z<~(a21_60Y=Z>)Zzuww?mz7qld2uC z)z5(>AxX4TO(8LG0Cws?I8F#HL%M6d&X;_6&*r9@rk+*oS;Jh|*(;)|yW|U;Ub~xN zlKUR=KuRu6Hh-mCnkKz+bBS1wlY42{Frq<|Q@PLwbOmf@d(&F$sL+&L!Sxu>o^wBG zUrzZ|Q&B*ojAAe?Km^QRck5Yonw2&^Bd=cdC9#UQ>n$<8QC#nJWJ_anLg}?jle^@wqU!NF8lI$hlqF1-&>_fwPIEL&v{ zqqz*3NolyE8%U$)a3xZ+opB&XGbmRP62{Hg5zEPkjAOBbOKOl9hy8DlMVw4s( zE-{a+)}&e2jLf22Yy_iyUhZ9V3Y@b&&6!d9!V((PA}L?elswq_alcGJz@W<^Dtr?dSnIdfmG*f9Y+gh4^1Ac38o6+1nOf<6L$VH<2x7%)IXO-L3{vb$VZg z_a;6R?0-K~3oEvr5;6l(so_uo>JXdcwnbm z{x9uRrlManaKtkQ3SyrO?BVK#om#G8gx%SdLTo9m9}F@O;e1XhCW?>dRRzOR^F9T` z>83Bf?-jci2Ge}gM%(m#BIaO?T{}u;(HvbW5iZr3f z`}2-8o(ZZaMySs02&*D&JRfx(xrUxsf$Dm@cX!TwJ`O)`&assg_+wKn?3s3AQtdgh zBw-EY?AEsnbcFFkCpBEm-!1Ro`8dsmX=+%VY<>C0A{tD{E1NXtq{1|!U949NTVnz& ziedW<4WrE0Mw*Hrk2f@7s|&>cP6S&`=WkMt+|T<84k!Y9kQ;5Uj7dngQg0pDP*u5i zd34NFY|4>T%HoEsa6z3~mkrd6B63`BcQJ`6YxU0Y33>L)(WSrS$^)*6cgRHX>3MFR zEj>+(yH;y!cYj&C=f%i1AM1-AMx;+v37!+JbT{yzs-}9rCxZFo4&Akd&w*o8TSSAm zNOp-eo`~jq5oJc&<#RH49wd$r^|HcY0Jaj7kiU~)4PZVTCa^RuTO)Ri+UOyRAC^5s zHhI=8^`&%RE6_3@{z&OU{9GxPQt;7m+!9MqnHZ`~Qcd}65 z0BZTE!n7;{%B7QH?;r79|LY~gDMX#dmzzVrMP!u8V>SV)sj_+(c_pTKcC4dw1Wzr? zK3d!EIDb}iRZa)`Ewo`Iy8bSKR9H>fg6SOx-a5JsSjiptu+0mL^rbyV%>tBcm`q$^8CU%7!zMj_cSS~Src}_62T?ncF`W)>n z#Fw0?S4nYF2EsD4ZE@F{7`Cr@@%OhcoW{PObH^RUGf)tWQto7&svM2KFXsMeU6L*B zD0u7I{>`zwplP&MAQf~YszMZm)%$p=E9_`@ ziaF!_cvXzk>Njbm#GrKLFpT@*d{lMmlfJa~32;@UqTW^G zsV+zd4FtUE*({C>j^fXi7djX3d_M1+!en?-hQ*yOutZGETS5PU4?*2O>ltQH=VF`y zr7g`={MmsFne6hiZSnv$7i{nF&a2`ts_8_I9(d-7h*NH@lgNJ916tLlv%FLd2C*OpP8)kXK=_YO-T-Z%8D z3+{gZa?sVTEcH%ZOoj)JbHKL9v$d9~?0%$k7{$B$Xq4AweaUkwJlQ|ZefEon5taVn zd9NEHw+8$S-bani7~S?zPO0UPSc=fQQq-M%^;;?0C|l7g#cQXB;8gyq?A=ZL3;jEI zlk@z%68u1Q;o~f00#$z1oC~(cZ7VpJbmkOz=Qi*!N9P!*WgSVZB_CH@UoywKW?uqQsmh>5sec!>TOQLx_BCQdXHK@2{k*VZYLwQ0Oj&;>b@fNG>7kpRw#R4!O7#m=Ec~S3h zBhxCgt+F72M%aTc?xPpUHg4*W-o~Gx4pS`b>}mB2GL~o|h!mMMA1@1z@W`){5kxLU zIDGR(Ap}RP)nx*tiT1BV`klI{Ot9qajf$y`k(r!D<>*atMX3rl{lfe{-2xQ}RSf*o zi27uRhNU|n>h-vFUW~Cb@7*tGHZjhq`LkDVu{LCVLl2q>{H%A9qX*7rSi+!&gl#=E zLb@aB_?DPdV_-!cidT~8+xAlVU@@qwYtt$Nc=;`fxl`zw6uchtl#=;Z-d#}^Gy!DH?O5xw_Wa{n__<|zVQ87#> zQ55J?*pYjf&L)gxxL6}z8UM6r2Ayl?rN~qL-E)2*A96u*iI^bjJ?o>4qQLg$D~A<8 zFLB0v17CAl-i;X2B98e8qc^iwLfFB@9T%%l$PP! zIj&vflqoh^pZsLf(1!&ui|uF93A}iHx^lTL@(7Bj()b(X({_cY(O-+og2}F^A-=yB?5oDAG zaI?;VuL{hR|M#fU(Tf|YN) zn9fB)GIP@^^mU^1?CP{17#Fssb5KZ1O8BgyWxxU!U1>u5+7V>Pgm<-e@wXlCeVyr^ z=_TyH^_%{9NyC?KKZ8y>fgq7$_Ni1;4d~qdsCeoAj_iH{UMv0x;MKguIfEt~+Q31G z=oWe{yyi;ftat^kTXEV$mhOT3Gs*+J(isw`D}K#i{i#g1C0t^P>~Ox;B~$6?n;+|$ zJ?=+9_TD?{-{e_}QB7--xKcTw?P_v8=0y4YW6`j5{<*$uN-Y7hOOLbdjWlDqYEpUZ z{S-@cUVn0Xvo)bYASL(idfK?7NSDdVqkn&n4K3@NqxfJtzk?x2LzHmeeVRoC8(K8& zbBeW@Y?t|N>G0K?#*Kj+h`iEa%Y!6_YW(N<3>Xh8*|nC=V@0X3Vap>MZ?{{kq~V_N z`gQ$N`(SM~$9-_`qM$D|zeipBi7^is6&?YCm2z&E%~?CpFwr0$_;)XHxj?j>=1&M! zgnEajRi=B=ciCuA>_HF5Gb}LI{K4)O_STS$> z?e{7D3`gVlw~{^%#lJJsV!HD_c5+YKaNFg9tA2lvG|aaCTysm<>JxC`(|Gsq&!XsY zWBT&$W=p31Vf^7Mt-@Cn9m9kIyvVTv@nP9vX%AYVsl08fR!GIr|aEqx@-W7lWpug!M-ltt71V%yHyT{%tO zRY(^{O(UI7@OuU(<+NK!!V3&Qm+_Q}pUy@#(Ta?+a8B6ags(q@`0p5NVKZk(L~~1d#@bkuK@(4y9XiL^`CqM!Fek1}5%- z&+mJm=iWc~dpPI3@7{Z@z4qEmY{YU@z-f4UPF(60+W%Bdl&t3N2>SmtMeNj?uYTihV6U)g6k>nIzK^Z- z)IZ%L?4Wx6qPDYGIhm7Xcz=tG5 zbWKqHON9&{i?Es&-WzE2UJROA7a^D3e)-S~)ofkp2<;g8dWYtwWpazD=|y0*3HCKN z*UqIKZET#43ydXCqm7G_M90{NJ+go3eq!^=$hRK?WhKRo7FVb8Z4d-Y+ey+?ltzA0 zw2}CCClLXxAgp!BHMJ1>-cOlzX`Brj(1c^)Srzc@RJ@ zrJ?@(7e9M{1i(wa+omBP3>Sb7+scJp;9}6?#Ox73hXUfAsd8zHi9*F%B>v)%Y64e^ zWY@#eP==KjqQ47nuU5(1n>`^FZ#b7e-v9F)kIMU7vINct^Yp3I8iP#-T%m+*jD7zb zeNiUR!U3)Fa6^IDUAa)rNh2RGA^lr4uHzLz>CBL6Y0!f|+M&|;K34iTl+g~F$A~fm zv!8Cf+4FZgG-?br31t02{ppDFmTH1U_X1^$?0bE*05x>$yH>)09)aoj%MY0I%^B{K zxuikOJ(C((7xKf4>F}7_)M$<vvzhlKTC{V2s{}PA5r8_588c15(Rq z!blZOQR#RNt9oMNB3Z^+BO9LM3>!l~T(33~TxM-Ab3KkcRn-#YFKm>ed>(pt%oI^{ z!hl1#5kvpDc3Snt6MFFJ={vkfa<*vO68LuJ)5_Rs?+ON8qw~-?o;z^BFKx1IgW@-`UCYNDn}`y<71rht}61 z+E;&r`a{uPQZO!XKfCoU0)EerX*WT~3r6|q`9|??WKyantNHa-9@L76x+=Mpgw{4F z{-Igei41!)-BX55e3U*5eH>3kdSDn{&Cyz}HP=RKHb^2%y`pHY|kTxzK@Oa=s!D*vf#)z@4b}{ONNEzc!!!f3Si1CJ3kR z4FTu%?;o3G3BQrB^tH(}zt^+qiVsuh^O|O?i_nqaDE)r2RrUd}fS?WJ>%IS@zX);q zK}I8cjHjarue#0c8%O$CW2F4NBLs<4DE{cSE)kA@t12v02|;~<_k5Q~_{fr7C8xmV zBOYuQP}T4C5cFemr+7-;Y&@Bpb1hBJd^I;{Cyvx%>_MGVc~t6<(tO4u)3Fq9J z0FCEM_Y`D|XwRQ}-cFVK=!-+uZCzjr6&&9K&#%*WLV`HdxSstBX@B)(TpZEB{gYn? zIM{UHSV#wI3ci&iQYorg47vI$diM8$Y#n>(-9ts+K2*oea+jrF6~Tr)1g^73IFLL; zQf!-K=jy7SBl9^G^HGRzNcX*^n0QtWGs&BJ5%F}6Vx5$pP{ZU&QdogB^Y`9-+gsQN z6hHvV`M#Yqq}HnQ<>hnwSwmmd)XS@KVGLONrSWx^Tr^c0&@#+5etfG$KS^_FO29>3 zx8PB=rSI=c*2R#%HDgNkbHq(3K5wn8Y94lqW4!^gE!kz^H3TdEV_y0qt=C$(D)GMa z&~N;SJ*4vg(s@ip!waxT7lO}v22W@eGLur0^F^a^&MXp9sPO#u4TchxDu(u#Hq6GG zkBY0f&nbg;>>^f; zrx{7~DfnWozUtdw&_Zwh$R@Jvr=#6$9vTk(Y;cE3JTBb)Lg1unVLQjE0K$?{KCWBY zW~TDI%jdQqwPS+8zfG7X?%PuS%Ipbzh(lKq-HAM#kB~g0?EBjrhGS8$c{Z0t{zELC zXl>B4pPFlZ)}noI$T8-Uw(Epxck5qu?ZrL$C%_VONv~OR)nebP49Sm_NbX(S5C(F& zk&xq*d+I!oGYG2}ZEnx(LMGS>EU1P*w}G8j>~dmPwwp9#uPuM4i6}dY1aMq64mPKL zy@KWRo--mC92eEJz%~r|k%NhZGH>&+gkol7K1D**$>#0HjRh_(ZN{S4CNegghWzun zcHn0P`6zQY@25arSx2*TD_M>qctG~8A37q8Zo57xa9({3N+zEyxnty7?j)0Ad6;%1 zbk1nx7wN#BSX zH2fzivb)Llp~Wu8!4QD@)FG}46E-s;ZF^soy^x^of8F=Z_*x|zS*^{laHkS*i|iY@ zE6|MEm~Jn`+H=`FhbT(I$9FYMz`%)?R?E8!2dmD~PxHCn{ppoLHCCfEdQM|Tyjb>! zrn$w@hVX^ij`WeJyC$LQGyim#uy52NI+Z`Sc!gk%Xm_h(&IK)UyvXWPOk}mtqyvTO zyDWy}ow&|pBAU+@PU-VibG5_boBAUbb+4Z+ygLnUm6Fm3iw>t#(+ImrB11F?d`O9i zESM460Y8*{wX@D(4^y%K%*JZob)`wqthr`o|Jq(;gWyEBPw{{{&!BsR=vv``zsw9J z;)I3{5_EWHQn^j3*XUhn|EG;9+u=CTeo;M$P36=bce0Yda`Gp~i13VnB;K!2ehR%V zvkzren#Vbv<+O6UI+u65D8c*Ldnnp*Wjs^0OM7Kqeute%6}Nma}WH5eZ6 zbFmnt@1nVs;(6iu@buR-0KTqhgl~^p=e%I>=1wfDq_ED)yo;Zq#HB)^{ohNn+11qi zWZrdb+~ABf_*dh~|1n9gEE32oc8{oNx~4D38Xi$+>So3fJ877EZCLtiL zmwgdPPkBYrwf5w{FxR*7)d4ty{OC-#gXi<^2}Tly4Q>l|zIb%l zm4mVlj0WgBNtAmBd^_z?-_T1kluR)|-iDJWvECA6Wyx><2+2fGaj0)!X0!EWUaK-$ zk}6!e!kQ`?=fMq3oX;nQ|Bm$;eAhG6)$P8Dr4U~}t41Wh)QVf+r`e=P+lp}WuIV;; zEc+TfVKZ!g?nQC?tN#H75h%iClYCYY7TjgA^r7+uhtJQ=_g%KA5K@ZLp z;4$CA(=&q7I>R4m7NK)ZD@R)wDqpDd#3(@)2}n!8FZa;u(3s!~Lo*zkvHj1BZ$XLE zQ|}qk32&C;bA7K^n3<5=hdAFP66lgF99|B~#OqYx_aR&CzbT z;%ztotvuoFx#Vi~({UI6>;kWHJo-MEuqMlJk_EWOO)i~JIea(-rI^uTTw9!(eXnGZSRO}T$$siFhmRfhS1U8RS*%kC$VwK4p->k&vwfgZrI<6 zxlGE{PfuiRG@zLjq~Ag!L|%(BGxn+si2qcHmkO>c*{;5fD#h*IKl@Qnx@d1#BX{@D zw;BvS!dmAhV(O>eCv0gRtB;WpBs+|QY}Kj>t3DZ#AXqv6x#YatezH_IdNcax(e1IY zV>AJf+;*YBMn4px4l(8-v|k+~jrGkQ90Env3W@HZld0^#dK!O6K})Zzk_j4uwICi# zd-Z=jF5))+nYgRmcF}h|tLZwghIC*_k8TT?c>pDs&~UXnw)fgBpaW^Uv|wDT9ae)*FSpWo--SZTfJKgU@}jW z66g16Taa<8%im3U^dAAHhJ|^;GC0brhI@GzMMS-zF1%Y)`T z=WbEaGt5%aL9UROo0=ki8sV}jK?cOaB8<54;i$R7(bigdyEO7O6kdktLH`qyD77{U zBUdR~J7>P0cz!CfH+8g3wvr5HtXc7Of(srq>frxtIsj2@1#UfI&OIa~@tgtHA;O=* z`B?jje-qSC1j$t(7p2q%fCxv-4xeATg|IIo{bU`!8r39*!MfBH?iQ@RLa8iuYd<0rLjg0iAJCF0t9{T6$j$=Q zydSeY7{C^@FM{nj)bfVz?qT2FK5SqpesbNXt7iM!>4&RI90mbR9L!qgJn|usZvD%> zjQ%$I8Fi^kPq$z2A2bO`gK4@owIByFz;IBIQZ$25BKEm`I06QiqQgaDSvpd z)9hzWLjRnBRmaesC}Y{uZR z<2-bn1hak|WQF{PCo1jpKJCWl8oeywnaqT>Bcx(ehrs>DzhfaS%U-B3UsPbJn#@yZBKlz2LzDc)Q_p;|e(%yE|DN4C}Yn zCK9|ksGEejoD8s(tlAOG6_7fd%5&NM%*&H(aX;JxFSodzuG>jG>HxVB&6yNLZI-bZ zzA!jC^J*eNET;WaX^gmr;LdY~o{#)YbvSJ{`exZwzBuCjseJavDGp_=tNFcR&0x6X zk=`L746v1@__xj($~4|F@JRXr80h*F-ZIs#lI(A^RdyUfOle`Vo z$-6Ca3wVAk?S6VUjtL>Y+G4D&C<{HUxgM%4SrkO@SezVv0~)APli0<(`ub~-_?PB*Z>d&LhIeTbRgYJNNa#R6>Yo1EUXP{K3iFb7*D%q{H%b4m#; zHa|%yoX2nM38}obn47*3|%0S`?v(t=qIpsL|mffSq#&YHlGf1fG zEf(8^)eDpiZKM?UXH>PloP_#;#8WTghST`fw*J6BY>gVc0Q61qJ&~kC@l47{uYc8CtNqJ)JfLGz2FBGa|Nipl^||^CcpC(jw;r|B&Rb;+=omOK?N>qk z`uz0tt8g$FT?IU)wPZUT{{T<;f6^Ei!KWE>vVp@4BKK(_kz@ck2rl$&2@ zx^vhZNy~DJM*?Mbo0eR8d-|Si)DOJUwAHeR@Y)ULZ|~j~GkN-m+oRlk&yrGV5Cf4@ z%UZ9nIP4o^8uVC2IXO6Ot*0}*q;K=zE7S>b*Ke=11Of)Ius*6obeA7~6Gbps0QQ!% zdUx}7M$f}x-libRqJm$3z^+R-0T6=Tit28u&5pSRU@`UXctPl#_UTDbt!<$gkdA_sXn~C5oX}A z>3@IMt6AMzY}Nt#@C2uxes8$P(96m2nk1FBa)om`}2kwg$C*nbCGVk zAsP&7nR|La+(O_QVoxHph%*5R3|;j zsSf=;lUS~16`8Ck*ZOQeKz;XQz5JpuF5pcfBOQNY2UQ$K2Ze%TwCX*b0>*B0hWHkMgL<(!^G!y%whGD?x$+~ zVwXz{x0XykCkZ)#R7+42UwG#Qq^rB4{>`JHmz2tIn#Cl8;riB(v56Y>7uj*k!*+gkH-?{SZ<( zpoy{P$b2r%Ww12G@$}ren(GslZT4JPv4t?lVTKveIM8b#!>i+Ic@FMml}43+f!~Z7 zsr9FhgOfvgE1mm0LkImI(d>A9wQ$ZM1d0p-%me&*fHu~?6~aIk_ko$4gesp+j`PoZ znFYi>qv8_F?k^e4q*6jiiRPO{*b4%JRwc6Zg7~s?AWZ`jfLsG3_n2l-012t} z@=2)$c&r2JO8|!PV@1EY27TzV0+=B!pGP@v$Jfe>{m>FvtMsRz|EwoeXTRc$ZM?7# z<;gpK?BBuw++|t`FI6F#VRLKUvz3drBwo*R|8O&lx_s1!RhkP6=K9APj#xE+tcEx8 z0Xuq&m`BO7LpKFE2c(_`GnZ>WLKX@d5II~MOx})z`zi|!yGOvo~>I1mcQcVoy zb0>qXXK*A842v0S*-HP2sL^+eDERcQHBUJ(^Cks6WLNO!9~*P_PY~7So_9XlAYh+S z(xC=x_hBo~V8y*@ZI#hWvR_Q(MlkLf^h*u23R+;XyMbniRtnGwEa;XE9d}9yuGCxG z%!V_+yc4D9xt&~gk;4Y(COcRh+l3XQk*fsN)N3+5G#GC0fZOKc@f8SnKtp)8>ZlzF z)gAnm_n?b}xMePrf~;8P2lKA3**?rFIyyELle0jAmt(0bmYgl!#5VblfY?9lkHJX? z?}fooY2q17Z>bcLbJdtTvXall)jr7IoDQkn`+QyPiP1(3IXn-v!*%gwpShzV6XAOW z8|@d8pkWX>s!l*$*MnT-6}oSg5ekoov&9VjZWf_~K$m`VudFH=&>3>8GK@82r_wF9 z%X>7NAix$^=^xAF;_VKZ-c4=MYHoE=k<_OojiK8sO_WpF6aS!@(l1#5C(C(7X{tR4 zwWe*;?2Q(ea$j+J)Ij~N$DLI`mWaS#m>IY_LSV#g$ahcvtGug`H5gscG3YI8S0ou^;_yBc4jYXqb6C@u8}tlCkKT zfVU_0wduKm;vK!!mk>p^_#7PW^A=U^4ENsu2^pA!8nTq2_FfD9{rW&EbmMm1S<MLfG^t`&xou zZ0V#L8+UGGB665m8AsTYHC>OxxI1z3bCS|X?09MY_gIbr<(q#^m9b~9V9K|B4>!!^ zScyX}FxvCzkMm*@I6;}aeV8`*>95nr{?|;m#-GJ3kgfDa!eO0=Uz2JQFZ-xHG9jOR zf#HUtT0-3}Okf0`wcYzNS@|f9J}@L^j8A=exOhto-DDocll1ZvMBO~Aq=1&Y`bI09mE zV5&Y7AymS1*B93Ri>{2N#B?Ee8IpHX3TBLuAG1m5Ye+@qB}9K`xrV~N)VE5g3^h3fT&UN+i}D*)w*rxV(LrCSw`3cM~m$|d+6p%lc6->Wny;vC6?)z$d?9WVc` z@AoIy7_lZJ8xsZR@pEj(Z}!-0@T;Z<0cBK!CZjLsuKvn&8?+F~Gz*DWxocsRIBdXx z%13`h@S48)B%;P}c{3oC02>pS`ADu}nVUB5qjLEMan16G;G}J}8xeC{5#5DCAcm-4 z-V@-zpWAt4&k2yadT6z0KY<2o`|*#>`2yG0*ETe*gR&oa1;W~|XQO?TBl@~rtImNw z&3`r=Bc2zz1Da2+u9fNEo{yz?9aKkc!uSZHQ-=JUO24(A;Mj+{2G9d2hm-rheds2W zjxQXjKTcLdX$NETq%9B}tN2720tM%*F1BWgj2GdcxXs<{Q7i3?kJI*wgYfBV<8Y?$ z&c&uN%6=^Wi3KK%O9!_d76_{GY+vK7!dNH#ikCG+KJ2`{dkKBLfiH39tFhzFWjA3r zlM;yx?l;W{LrbGKVj|&PEm`|${Wu}P;TP4oLHaXb-?akd*G~W6P%8VE zb2(=cvy>HGHnRfcj+Z11MWKN11&F`!! zrLdbF;{+2&C57etGTpnCC5`spKPjo$gsDN?i}ChFG)8W6PI*^V0rRiVz_-^aXF$vh zXr%tn3*f+7B0)BibO5l(jv*=gs-Z3Wi<Ux3``8<#iybr30yP8I|%Av@)D4pB`<;yi6$f>7Grz;NKMBkyR7 zrb_MeYtS|o*cyy>sojb-wiC(BK>iE5)hU)^0>>cf>ujg&Z5aw9tdX}`7J?JlO`WN` z{^&2|os?~VW?G|8a2R3Ut~B(icbalRGij{qEe;b^Gq7`7-QU4$vyY@GAT^dnCU`19 z^tNEg!6%?30U9i$KN27W%jboe9&3W|MG%_#!9zR+On!hSdI8LB5i4>+S4hVoS3TT= zb2a9`bOM24$M20wVT^J2D`e2Foe18mI#Z@|^v1J{T{H#kLf+g89K=@h>jyylR%Id; z(JmO@AoMqs@JR~z%?7e*U>YJMheFM^)=(3x{|2g%4_+8^_#C6v1$L!upWk8l7YdUl{FN4)o7)1wt4rVsNh|I*4L~%k8Mzx;beXO?=Hd;fe=|X9~ z*!)ferix=(c-;1;&OUdC5!t4`9m1?^Jrb^gq=5YNfLY~n=ljlDFYkMV;gNHi)1-3w zPSeFZRE(ql^qImnRG9_5LnA9y)<+%Pr*gl<7$qkqvyXwzJ}o%vq5c1p;VLri0wMRd zWah?SGK@xytiS@sQjR>nRTf-k*~DH%sU4F(7jngw``bZSu2dnXSC4ha6ET_7MdQ)F z7+?*5&?AR;B99e%|MAIXHu|Q-@U(Stqf!Kx47m5E$^0j0x4LtMx-dKE8x_^W2TbtZ zl%+)Ii1~ZInofOWtpL3r=yF{$!uF%TB;B?MkPVr(Q_dN^$V;7F$Ejee+xmz7Xcd#6 z{oVe=^9l2474#F5aX+btC_RZ<)KihzUGSNN?CrQqDyLgb7Cr$;#ij`1dsZniboHua zVA&(8&z+~{H()wb&vyGBdYT8>%fv#%AX-TF_QQ4vsKJsiU#zmEv+A$&6JC}0i?R10 z8N6LZrF&&w$h7(*9s@c4Pg37wJ6&D9yKM?8SM(#$v!d?P`2T}hrBv0jbc=>2dmYr9 zu**v!*^9+X4(F@YP=m-j1CoWzU(>pvmzPj5MD%=0)4^DP1UT4OKvYRlC`Pyb2A#?N z#;sgNSF(v`?aI3#p%Jo3UD`zh9<*&cAS*Z^ZlH)ydsVLq^zr;&P>nHuO)Sz-&h3O` zB5mt;z?~>FAlaT^NOxIoz;?PCQyxI7$r`1NH;4i2iG@U-)Is_k!+x!1Z57wW9v>``_mZkzbJM_72^KI)l-i61R^@ zCMUC65>*u7Pud$J$A@~R%AOoB{K~yQ=T|=e$z_sa)zI-x4i~|ag#R{LfBW{EF9d%T zBk&v*yG#V88**y@yjiSQrqo(MVGh&rYB_{LC0=V@UrdU~#7CwX)(NzC$2{!|-bJ*e zlAfWvg*47cbPdt7g9*4;_A@2EflmP7k&m7iycHfSD55%0Cw$IZm+mQ#36^3GP0XVJ zM$(21xZj5X4MrRlilJd*yhBhrvxtOU^%kaLS-~++KH-*~bbAE?e7qZ12aXLKdBsGM z$J)T|i$R{+7EbYvT6X*6=!*x#7`1oQ1yH{+g1N7IXe|bYkDM>`tRJm*0oCG*s5am?5D=0bsMDiCcmy z=S>m>?L8Ay)Q9lc%&hZEA?ppgKY0h)u4Pc&r%U(z%LB9Of$V8#;E$!u*OjdyotAApMSq+A;K{W zRVDkf)!`HE+`DsHaGzu#vtC7dr+xHN@O{#4piTM6{rWNZkO6STd(-%Yix$*lJ}L5U zNBWyYifn&QGLT0*sD4XA?#XKL5zc%*mIT#>GB?a-fgpHa4CnCuR>A>SoC&tJGa_| zY4bnwcSDe0O%dON{&koL62DsOJ5p)DR#ljgx(BaoJMq{{+;yJAXX)?mV^#tf@TcL! zK!V{x?p*IzIZhkVH@@)crwav2?CYx+jS-kze==A^4(k&{GG+jhAi_(r{2h?hey-(_ zPn1B;y|Tg_j4zB%JUK=F84Mj#ancPuHGWYV+KKTQ6!P$pwMCp0VuVlZ)>#qZh48KTYWlhVDz+054LLM<`V8o7j#Xk-Q!kJ>X6PCNu%=bI z8}CX4i64g&wocbJ3kmy96kcN%HXJ*?&dhIduo<3B$7o;|KdC}CR06h2Eqq51<@SJ? z(aZ18MDF>VbBP08VT$i~N8Ou1=PP5xpdM$`@J;M})~e(4bf=?Wpa>+8C+0Od z7ieE;#JPst$`xF^q|&A={(W6GiXT2pKM1ygo`HZb@9J4}&ArauPf14KPumxpQ1NF5 zlq`)}aoR*D2vtdNk!7PL^7O^;q68lhCEOn_g-p+8OIw6>NL!rHp2g{H5k3wHhW%FO zdtLl-S2gub(Y4jb;|}bP_v>7=*aW0yW7dRyqXf2o4C85X>*_QYdp zSAHO*tL6LQXS2tT-#xqS&^F$F;0u+-vR(?eusIPK;K7c1v2*;+lgNcGKn~&EE9269 zf@jA%?wS=NrLV*ZUh`XV8kEvEd=wqz=lw&nfAKrKlQ_Z#RXCn1f7$r|A|`-ZOCS(4 zxR`SX!DpH}NH95X_2T#al{~^fdnnxhp+jp33{{>ixr_pJ1$23j*P@&TVT_j48>Ug!TV4xztcV4Uc1Qu^>q z;-X~CV+TAVQ15bO=h@3l-hdx|oYMb1<;`OTO0_qYX9Vy9V&hGim&A&iIC@^qT*Pzq z?*pKkB>^tD4%TL)Z9y>=J+Q7IKJTKRvt7Cn_DkF}pSy=n{h^`l9&zQUa{TyX= z0lW%fWgOgrs3*SwCxcMYs2P0c4O38`d)Oo)pqa>B1{jQbk+nA;bAji*&^V_H4DAOY ziuSJiIkhPcBKp?;dj=~mfLn#)K| zQ7_kO)^mFI$;Ek3Xm9;0y$7I=bvQr z-vHbVppr8hiMObFh|viYGQFaLM%Nd##E!@hibznTfPlnnt`T%z+C`m&4TK5hfjp+t zYqK$(z$)m=i}XQN(<vT#p=jM1$z@u3PpVrJcMGCc=4op4>RD6lq^ zj<+=Cqxsenw#P@-BpE0_t=x~&Ie#xL9T-5mqGQXLryps;lYI*&tuVYoqD2IsWgiv@h$4H)4#w{5gHI7i_aQP5Z1 zg^83Ho=Rr=^q+Eqi@g<I+{hDE>*llU79cMtmer zX4}y0>T{rooCy};P`bIV_!r|2$CD}fyp{3N_$iMn#kFDDfTg@aWE=8d5Y)oAGROip z0YK0CA{KHR^>M~HFFPZ-J6La}7IQ{}_6_ncgk}7yfCT*D;l9+*Z0Q%#5MPRL<*hn#0qZMaSm?*I#v zjY3n}q&Ixpr2%q$ABbMM3F}ogHR>?JUEBQPdG|PBnUHVKwJpF5;OM!^oF$*hs5$&U zU+{>4xb98#%iV^%)&b<`AS`o+ALsWaRN=E}slTbN{XBgeCDUHvSUaUiTuR?Z8nH%> z*TZ20S`Lx8R#Dco4>%}K^$gPmo&@pMJ5v2$YSbbv2gZT>_8tuxd8{z)m)Au{egx(} z{ZBCM>8?U3Kg9+Th<|uUKw~J*z((>|C5>0MgU0Ah3 z7q7X=hlJwYUmn~?TRjg_Y5X|WrN_6DAdz#a3AmgnDk!mY)k~CXAnWoQxfdpO-n$Km z%F4D?%!QELtTln@feGw{0n%Ji@#c8{L%YsO!AlcUF;)lk2dVyNqmX8l zW0aQKP2T}8u;_wOoOftscBHg@6g>}KVIx@fbDM~5NO$NtB)g3JwCt^b?(8~yHQwbb z@wV%u#U;}Cek7X~GMrr0R|KCv7uuC}Z5q3to)buvI;Z97VDgFlDia}d{ zFXzfgf`^}1P##pXS&t3b~xwM3K95%N?{nUmn8W5C>;V!0S$+I@%(eJ94=h*9qOoYLDEdOy{dUDeZ}$$xDxD7J(1_LmcUfo1_&wU__3t_Qwc%!R1||cO~StnsrfES&~}gXDrVwW|$%upD1g6Db=Z~M;tM8`gf=QYlcqjPqEj} z6E7G|(#j2M=cNkMq&vKdNR=K`nq0;CrM{8@+8us`fEJOPlZKIj^&11N+BXR@8k~o4 z^L3{CL_HrDH1&bfmq0=R=yizenMd)*Yus6G)pPsbnmQYM9~SK!yjiT9rnuik4fs`X z9K`7%&)%^$koXKCGfB?Dx+k?%#p`os*U}zZ;nEui`Gf+cwxoN|BSp~8dM%XOUrnQ`OIrcQs01MgWxi`>Db$sb~2nTtOs4Xnf#rrrw z=S3Ls#g9E|ksZ*#mjd)zZRn;e{J7~ue4^z&A4K0NsKp}O$D_4l4nI%#A7H&zP7h$U z5A`;206}L?(xAl0A$41UxG%PfU97%y3p=Bpk|&YSeE;;pTmC66a_$#Im2w2iwRHBD z2cz~FkgK73gGdGUN@+&h#;>~xcIBdfQl>3?RB@y94tN%Apnfa>kvMSu&5hx1g0B_o7$_+Y$;{^7aefL!wX%`YXc3vlX%U%#c49TwgFuu6wCNj)ysqjO zy!DTH?Ad5n2B_CAKzX$Ls@h2EC3ZF@?N^dJV0SlgeW>~uh`_PHVg-^=1@FL5CD-c=b$1oqljm#tzQB<2 z@H%|H?Je5FG2Bmbxz#K08Xb6T9QMQ0B6*pv7B+h}M6yro)y`N0ejO9Mu!hh?O0-jy zw=8g@@#6+~?Ki$$mc82{0uzjr+*4}y<-3rB9T1>WxS9Iq~7#P;dm$X$bl}bt=9|gCf(V)egxHvAZ z+_r}kJ08FS1tslGk#qEd9}*dLjufc|)4F}v9qh+9e$czb_l)P60tHt6LACWL4ZBYH zhlziw0d?>E4T}iJTidpr-k+~gX&KejrehEPC$liK;T3e1M%!vC*v3k1sPd|vtjXsxEzx{rR76IRwH9< zHW#^tf8eLxw+#8!QJ{0Y(HZKJQ_>ZRA4IHYO;R>pILU3(eFc~Ed_`oNB|rh43;&U& zlToKfUSYLL*a?1N9@oPvM!10>f(|1^Pc$UI0elHH*ja5nr+@e!6@X>iodK0u^5}Ru;$kuw4ugC` zquRYoKL$Qy03d8>v+3>oP{Fa8HwA3cwDM)bE%JFSt>ZA7mFS3T3DcB%=A{@5>)tOF zY|))hmD?7u$Tn`9WQBc+z!E-4!113wi|#Rcb73XPOz_;Ya8}`J@UL~mR~l0F*S2^_ zl8@Mf_lB=DrJB9a!NEF0!`weSA<+rpeOxcE`eo zp)J|hO~!SNQ@;IYh3gVChof8wNv=9VEiue@aPDIyn*NbCXopCvUFY!nwGDt#$*SJ&S*-ecdyj)5HHO8{_A;`od}BdoCFlrO2G z)W`~J61;t(>Uv3Hueuh1z`_}?pZ2z#Dy=c|y&^tC4jmnPd`-%KA5AgeIX%8L@&4EQ z7vGkc9qGixY0{#c;T~C&sP%f zkCQDes|r@j@BKdWKONp>5>cBk$G*=OI$+>FkL~hE65CSW(|_u(bv}@@#U12(njPn&3BjL<1Ae5i zZl;4jEYlruB)*s#?rtl$vL!(i^k5g>>xjuIMMw}I(_TqDV-$j$WJTd?g%v8ESgbYa zf8XnJ8$PN^aPO|sP%m*VV@prNErH3eo{h#F23>-s{cyNmVg&|6iJ{*&DG!kchnZ)lPG-vb1O2Ns)YUq{9$+}4!G1Bok~0Q?9|a(P zl?2$|w|e2tnrr#Uen)^(s4jFXttVcYgkJM|Y9rxlkDRyjCLC18Z}DVe3sL@*@mY$V z(f|>$#?}37y0m+sFvph-@!y@xL={4UOo%N)Q#&1ZxXq@49mi-P=3op$sz&>HDc_q5RV8@wShvRCMMp@{@p|7R zun!}Ctwa}>A2{lBenXWL-BN}2II8+MDSAag;LE-zs2nd+<+wOY6lhSR7Ly?v?x+RJ zz<4PWA4Pi-DZvOq&Idyli&iA3rMc?bN@K4sMyb5GA8E&YyS=MUGYsMgZan#47mr6U z4o+Pt4W%!m)4Bq=d=QpoNXt~OZ{R?Qk1){ncmb8xvH=|iZk)3{o!JoR{_Xi@#U=W; z7i(NwjUxIAr71OKIgSEn#9uW!ivbogy%5JyuDz8pR+-=Z_FT?*SXLvMifZEULRNOb zGgmE%L-(EMvarXI3N0A%%E)sy_-vSOH1!_zaY?m@zrOoDf(TWiht6DB&RJ57!7pXD zP`|k9l?|Es~UO}m9?1_$>&LUSX;ufYFYer`2q zj!rl9pv?tk{y~1I{|!7}%M4@lF^E5X)o-a2(>ICf!Fx=*e|u%P?CI7hr8lnZek3&O z7FOlHR?t9?nxyDOVi;OkVu8P0(|ySwA~}1|S9csx14Bs_v1@e_Huizs$Dxi0O2iT+ z(LO;<45Xm#AK`Q9AQE~>D6J|QND#owJT&Hb{pBz3$Gvp1nQQqq2$%xmvs1f@T&%mH z-5alNn^z&(GjmCnX612Mc>U#iw`BL{dUS!)dhd8qig9{ky8k0PXonaQFig@ac}JZt z=tNKQaxmK+k1+{D-<=01t}tGRE_howSb_ZKaBaYkQ1yEg4+O+a6#Y+keO-ttPsaMJg^)J?CTp7AXG6p_}Vyy zo!;Q4==DlrK7Dsk?YgUR4RkaZcyEV$Tmf!JehJMD^*n0cwgHOaLq|R=Rmb0pC#;;U)6CP0TAF?>Jg020?DLy-=sF4 zd$=->FLKo-I~L{lybM;DLp;cia%6P}nGQTdS8cyZ+#E$0KGY8@WTJ+=ex}{94LxuG zyRX7}{S$qStepNHAK0N)9{)IUpyU0#M@-|cSAf3E@l%j)eSPdBa+K2+JM!}{FS8dR zI*lJe2PX?uO6id&3i|xlB@i?&SH*0xiW~}KEB)en4MSv@hZ!0t?UUdWOIu8TI^s*r z`=UPbbL2uJlrPek&-w!0VI=b1yTDn_{qsedki^!rR59;2Vki9!zon(wChBb`3ukQi zXDVC)vyaG+mAG6#zfMd{T#@L)JT-{Wp}4cPf$Cu!2+7#}XY^)tW^6KNuC%;c8v`Fc z>9;n$M#8iE-w-7<^LsI5Nngb&qsBMaz4K_zHAAg9&Rqg&Mantt`a3%?*n(q2P%01k zP=>?xUg#;^5GWDSKaMPfd-|jK{`A>8V5(ITS3_7Mr*IGoNYN*Og`#9}rcJbQ(7&+>M(R!A-4S`Fjy*Gq2Cby8hC_+EC+w`+TGQoswghe_{a3hI0RXU;>f{b#3=hl#}fsr+2JiAxkz}WsbH#OjDRL}=6oKdIUBv9 zHJPWi_W3ZXucBa;~hLl2?f{YYaTH%GsxZ@&zNG zLO;!}h+ho&CTLmqGbVMlLbvY2#tVZV+cvlZstlWIq5380F+}XWiH5X~)7Nh0s5fqm=~W*;9Y)uD}?QUNQ$eJikQNWVu&C@kg&iX1EwT!n3>iZ$xecA^pqZEN_}N)VSTsZ1J=|$^e7# zPSY72_DrTDv$|hX{-`wU1DVsM;_5@P9^O*o0&TY z-F#iGRbyC!bri>s?s-uQLEv11jqvBj&cd;}=Jt4;Y#gQw>*LOcm_MWobsRBv)KXMg zUK(kUB8k6WI-P{cMIdS&l`*mUDUy1n((q)(*T-9-{wC?i8V}3n>q2U$E|K@x9{Wk`oNHNfCHUgsPcvcP z5kG$XDGUeblpECW{Ce?UJ!hjmg~#8@goma%+TCX89c%!vJ_yd;Q;tp}-g@=31GnYm z{p1bw=eQfi+x-hs%5V%>Y>)w!N)uT1LWaK^htCPYA72E{pq%v7f>*<^nG_H!ET9^% z>dknn6X^MnrT;Zdu->_^g=IVUfr+C(dW6y}=!vQOC9mhZEKDv3iW5h~_5HZsl<_!0 zQI&ohW4Kv1Ik#cN*!zi?J@XH=S#HEF0mC3fZtfl@oubkmyheE+iEkF1?FuzEdR%fE z_s=ms=Gqf?u5>|;(@{GylMr+;#ZDoJpH*HJI<`4F7aS@6>h5QxVHmlI6rE;|HZT4i zZC1dq&sT_E?hD5xFUwL$`+lHKP}!Chm1^zC^GzHn(GM-Z$vX47TkW}6eaH}TnNO;} zj5p7J6UOzX?1k576OCAx1BJ~p-FRALep>(JXhPtVlltZl*ru~?a;k{<)yH4!=0i|F z`-8xW^hy_{X|tz29Z}wk@12s1S|P zC)U3%50|s%)8&T3Ql2}XIEtMcS0kJ%(8b^;ZhhqSP-M^^3XnC>vvb5iC=S*iSdg(u zcKq@iBmW__J)7)U@^fkXQ6Scb#7%%nz=*JV13W11yA#^GFQ<{S|tM?o>Tasy_s|j zxy~g~((Im5q(oYHnG8AU#BhF~RuSIm@e|%zq#T$yrfqjGihDfE=Qh@!Rr#uk=vG!Z zi&ya$4^nF+5q$q>H?n*AF@EYgU=&}ZvGUn>dV#1YGCpFKm@+2*2Q4eODjVWsH?-}9 zm0QFtd!@UCFAvR^<2`kX#XiOF7O9L<;-j`f3KqP_&{7RU8ggj(!#6F7IHDgi@j*yI zA-%aGanIaJB8c7Ta%qINa3*Fw^l_eOx2X7Tl7G3v%i`lB8HI3~$>QhFKK{X6dXj?9 z4<$5A&(^VdGoHb=iet$}*u2EAmP29DQx$>FHIq^oC3FrBb7bq;15f7bh~c?lhcR4Y z@scI)r8hpof~-i+2dn$>a?)NNGr76CxY|Jd5~baNxi z0H@zdax2k+HSV8QI9uCpFk+bn(u5<;e}H#W(Rr0RJy3c1mW$0abCMe_181uQ|Gz)p zTcXAUr|=r1_zy#pnip8nd#ELrEP<%3nB@!8I@{OpCUOCy4pT}=y|M_z{V_-x8*Mkw zsVjo`q>TnVP1~xSzYvpea-vouZE}_0ZqHv%qAqi))O^}c`z_%1j3%&`SWj~#jX&)* z@5-bWWJB=)uhH+UP6(Cqa8XVO_f}X9miS0%9#B(-XJU{KMYRNCK_@Y}KT&g=3y;e5 zDo8#D>&==&+p^SiEb?gUl^OGq!N=Wr{Du3KE$ZwaCH7uR!+Wrb^jRDjLPobS7i^d# z2C3~bP@mF+>5Sfgvx4bYTr#Fli?SKzNX8zwP7&bYN zEj1;>gVMt;kC?UtMZZZM8*HQ+?>QLIge^}j$3V^zAFUnQA+;9iy~i#EXBF-neEf~R zp{{3-*UQna+GTpY%@VzAh&MUpD0Ais9j^C9^H|b>vF&5&*m$wKL*mzov z+JGttgevX=R|zjr@!IjiHq@_84KM>%6g1cSU%m^)_=%e4^n7=B7i5&}F(!!;ErUYI zAD%xUx&v92JddT(ZSj7;?EfwRth$XFxGhf=4?*}6KWCO(^xv;Y?B$QZcKx@+cS`I6 zXV3Q^@@{aI&u@Q%xX&|PhiipODS#0u(Rw2b(s}fb>Ewy8zeu{2S7~FoViqN@5gD)F z^!9%RT}ffoXE|;qPlcFBYwqV4wp{dztosigZ|<=XImaUpJ|e8p>VwXP?v40qS@Eg) zq67+-%Re@H4a~nZ;cW3z&j%T2tQa3u2oH*Ih<+50r?!%tsk2p;{+5dLFK(B&`SXv z)sb2Ey~h&s=QOcUGkWQXm&8FFrU+ku-#ej~lQu8(26OVRtVUMx02C}-CCyy3cpWk| z!^-d5cAy)B#Cx7EUc7{mw5!IpieCq0RvrgH4Wz^RkqH+)7Z0R9zY@j2mcFz(ZE&3= znZ8Z1+YKFYsa5d;2W>vbtzZp}Eo}($da}|n zeQT-5!G^8GoH%qfpn$A?*E}Du7D9D;yqt7yVjBW!LtQG$l6ha&Wh_tWTLXWWzJa-f zD#-QRUG6J;-=O&4OWu3zB^+n-tN;p%hrbOjF2$!(Ay0XIN0hqTu9vuR8LNcws`r~v z-Iq>-v!-qY`b{|i9dqKZ%ZH9@)GTGTSXyyDg*>32&0O`{d|w>?gm6gzR&*#vkLgc` zeb=m@yNlfmRLpn^6=p4W8LD~z_s0c_qz_;G+v+F$-RVyv6ISDD?Fb*K+GaNluZ3~4 zAYPX1CHBdk{Xh*GhvnVXA-9lT{{5ptJ$v!m^nYFJGz0E$?tfQ?COTe%<$(|$PuOSn z+nC2FsSIp^H^VB`>#`zD=^rsz*>l`pwzF3TyY*$_y?O)SD%zx^;<)j)c5Y{1h(jkD z7Q7*N%m8YR{3+;M@PtC8}EYz}r=gFQ_jt2NAdp;elX4l^}t;kr+ravBM8#Cv76jUV^Q4#-0x?cQKfYE@$nOQs09vGE{-F1S@oB z`MZ3SU9KCsZW|3H?E-RSdo0(E@k+0=yJD^rWr_@cEPf%`94FCy58;Ry?Br~{|3Vug zvvg*6fk1}wDl*vnRo3BN-J158=SP43v7tZ}_7l24uzHBRUJevlZ5OL|-fWSQ?~ZG@ zW>kLd()9O@nM9D?{b@Hr$2pCLxJ9LcWqa#aQ}gDcaaR?8=tDKw@W6`Mo!1y{v7k%~ z4La_~fvFBe{;`=Mpf3uNKZcO8(Eq0us8as`yFsCZ^z%6eX@*YBiGQvlrBIBOZ)`s<5r!}FIGlX4;c4b5@jgNN* zA5H+++_8#(_FR0?GwHQ$TLP|l^(m;;6oyM4e*NoXwJVXbVm{iJW~O^I|EmMeR$3nZ zPRedBOd0V#JcelkO_EoiMPAIzbaI;q-EOidhAe|#TNnbQB*oDJPx~mZFoh>b^h#Db zYEB?1dPjBarLF>cd@omy3V0Q|&4b&7-GbI+i~Qdx&gD3G$3@ zbx6xyI(5R9uM)p}Vx%4HEC5oHK?Z>Bh&xAWmCiP&KSDK1QWTdr(1C9@SOwsr+h+^rX+J6OQ)Q!+(YKqb&78H>`Xfzt8y*2 z99x*VJ&v|Pp>B+XFeE@N>G%K2gl|5*LBbmtHA^Iq`75!4%Zv!ujl4`-W}q#NbZxcH zPNEtgaRhaAn?Mf=zx6(`%A0MvAyQM}IZ$vtb)N)9(8NfP9fHlO11mClB3#f}l?4QKQ(+qR!; z_;(WgLQkTcd|_v1FDhC4c%`|bA{=>Lq+G}11Dq{2&B4?#P24=aUK$y%UgHd?1XQ3Y znjykVf6K)0Q`11H^o~w*#vTSI#?mGuyco-303DpID?#0yb~RtB&+4s4j_WtCb9%GL zEh}ySHyKA7qq)uc-xJ;*_E%w&Z~s^-Xt0xc&0oB9tWVc<2J+(Y2_V3+k7qjmi@1X! zZcb^JW&F;zQi7x_)O+ApEvLoLy7=mBKeUWCS0<4wvRXVy-!DHR?m3mD2yD{~#g_mq zgV2s^q&?;U_}k(8r?kYnS?ZrkhaWv@vZped6fC8s3ZoM3P_y;%*U&t3 zwsvTgZL`^4WBhI0QS9($`cK^z6=we6j?k%22hO-A+?malqG|vtcVCIKPw`VR^ca)_ zzts-=;T`l%6gzU3Dn^)5KRcv!G(0hI$rl z0{_A1bZzx$*+LTkTCX}aqI=+v_j?SWAG)WTi-Ka)4i<+SnktqV%`jI*ru>J_=y$;; zLT+nHT>6!ny-FTui`62LJOG(b9!zPH@*@^2@u=F_d%M6%Ix+?`+7V66u&Z`aZjBS= z7Z!kD$h;>twLH>+pYT=r?HAXMl&{q~;w3S9b8cYNVG-h?v0-?G^RSd|&ydP&`lWfe z8nH&Bu_oJbya^#2fT;Lg>bajbfmBE_J5dtrf0EPIeG>iha{s1#>IjcOy{mFK3@R|8 z&i2r>o&Cgst}_%Uou4^gU51C(Zs3$H@K#|?1f&e);i(R;Fja*;J!zcS8utxvwaW?o zp~B^*CcIt)dNW=QgEP7t^je71q0R7}9PVSBV!jzzLnL$RDB%F#_Bs%w<>7bZCv&@Y zY*0inkp&yli`akS0JQF{w__Edfu3?uPBe@OF}rqA3VSceQ@P^?8l@}ya|C1#)AN?F zFp_br2V0`cQ!%p^Xe_u1x-!Oyd*e#U;}x-Ix`xK+9QgkVuEE%(F`>*Q&}4E5Rv?U$ zP%ICogxPnMeO z(Ugho@tRlYd*iywQQmZIDfraAAT`DR5whKvu9n=Vnyyf2&51JE@H?VoMmNvjL1u;l zh=&gU(i?N{zqD*n*R6s~Qg<9R0;AMVz+1DuXX2CdHs>2tw*@HaU|a;z5pe5ag+J&4Djp!s&>|gkFeoY1WNEh0ejN6d|6&Yn*sb(9Y;t?iHJd# z4l0&%Fizx6|1yQ1HNiU%S^BvK_ExHP1-f_~H8Cw_y_GpI8!}=ZIaPAMla*0$XnmBl zRx~`t8>pgr7%{7zux7(@hZ1|kNbGFJ>J$URs(PC3Z)o@DObxxtW)*J~Z_Xynl>{xYp^$neQ35GEu*`+@$1&lX`QjnDQx_vF2^e6Y1i`d zBdX>3Kp4dwN+2<(iH^n(YA{d_{$kQ7-e=3*u+Qe?dW%v`WW;seu48XtP7nzJC@vOr zZ(j}a%mpO!1YctLnw+>*8+2i28{9DRig(QeWeR`c_EuHeH<(Q@N_mzv4!D1XZ4%tr zp>2RT3Kmv$;S%DdAsP{(7y-pe@zb@$V$a@^ej@GkvHV555w*)_gPttus*i_;jP>Oh zz#Ws?=UGi)_IO@52GR2xJ@ozlKi!YrBS#^el4;9OP^r2lvg4j1_{)#yO4%dfJV{dK zQTd>T1ohXy(`Vc&!TR`8*Jk&O`my zT5FkZVk`hk#+x4qrA={3vhw!JTdr*0;}UGRnjeCh9t${NS-hpQU7}g($3CiN;z8-p z-D1&LPY7Tb3j%#Th28U@VGM(pa+0||1C9Y>>ovOY+(1n^3!n6~mHd6`Dg`C}6)XKz z>1-U=nF?e5a)Oo5L*K&D+QuNr0D0Q+ZpmaR%_F%QjC~w-IVxB_Uf>Qzd%Xe($p3&X z4ZsyTOvOJn%*6e~?;TQFSX=T)1*#IQ zSUl_F_sO~WJVEZ(ZhWl6M*e_S`mqfKsQ54zSJkfomypO%=2XE(i4(j2|>>jgRye~_1 zb3w7@!`s@q;8UW~_TIY3nwX9!bcHuHglSXb%i-J;UVT;VS@*5Ss#wTx{W%{VpzW3b z?p^1TLeb&oo8g&hXE`qN?atHS+|!O|UQyfFpig@3kD-NL$jsSX>PvcQm6xaa5U%^x zg^Y^>#e`)igDw0mw5vCcS@bE0BrKm?^7FQRfrr?t68BgmlgL3tn(+eyg6uT~@YUU^ z^d0|1K9Hp!{ogDA?z{N5s+10ejpu}=44d~!K^NF9qg)4U6(;dP+w3pD8N`m?Di-*g zJz8ceeq(MpjkBIm*MlzvVl0KZ4w~nJiz=3S)`cXOA=fwO{zxloV?qi~XDA5Uu`N~mUI;3d7z#6q7euDRA|`lV?^bxX87*|V(%2gf}x zq*q67gb;=0_|CI6_nLeIhm7!iWCogHo=c$<27E(fm)YHZn{Dt+9~b{YO8Xo4K1FRw ze&?Q{bhBpr&-7fL5e22U#G4-;j4-O(dY}b@<2?|ZV;MK@4EOP2va5-W?7qC6B!RZf) z*0>U<(uDT~It5}io{|E?Irt36%k}6%fo21yi3XVZLv-|Tgp@B35M{@wXw3DW~)N`V3m$!}crv}Fxx zY{cX9isTDhA z+@EX#fgh)JVo^4Kv8}P(9L)kUU_3i34mm__t@mA6$Ct63W$XO2v4R+uiTa6%fl9tG z%9(8qMry4whi&n~5S=Bv+gC;B1<=_$6h%d>_>V^7c}7*BeHrq%Nae5jaAl9!^OfFM zLL&9t5$CPNP7`sbC3u8+Am7=#7wOh{w_)EsA()4(51CycxJk1NH(J+oKThBDj|omP^5tVLyAyYL zeW9IS8aRFB-1e6eV!ydBG?K3(^LYHVqiUo)60F-+uvZq0h#V=u3KCRWvcXF@V+RF6 z<>aOvW`Df)6gOUL1mGq(I2tgAUJ~V;rlnjtJ{(;WUBbN{2Ew4nM;+f(2y4Oe(%;Fk zZ@774dfsr6F}QvxTDP-6k59_+>K~G$mPgA_GFF!OWs{}=lITM3SwpAwXKJuVy~ASs z@{((q25*^UaZN0kcYJuzD62n1VWX?{-ZQr~FC6|E9(i#VSI!f-3AE0Q@?wrnInq_k zK7FNft^*-TlFi4f-ISF9@fg7xvo*NeD6&lS-LwS7@0*VQ6ADbT%E(y2CnL$WcnORs z0^%EsjEB$w+E^b@;JeCgj-~T0v90k{7H1T3AwY5H*P+&y_PIqd$kxounEc5C_Y{Y1 zSN7+Sp8X6I*5}fn)2evW^}%Wha{j2bJiZYCrQJmNqhlCO_fJnG{ZMvD-{;<2s;?&k zhLIf>JRtOoZt(|9E0tE-YEs;-KNbf71Cj8*YzMa8&p4RfLlr=5dtiI;xza3Y^k2AQ z-hKuL4DJCi|Ag_tq>fdNpBbFBom(btpD!GHPUmJdVON6&;y+};-wh*UQC}W%hGFyY zG2su;D55v!`c4c}HP(juwbHI60vfBsUJtO#TfBIG33Md0!>8T2)`8P)Ee^Y41Pf9T zJeZg}z_634UDHwwU5`35B|W|#<}=l8I^?mHp$`5U5<2X2U~fc+N1 ztJWNTvi7T(1%+!%0;~xK`q6lyF-PmiLS+M=b`#B+SGtP= zd=^G`?yZ>Tg8OYbf=#VtOV zb@r`?4t>#an73M-_82%meAL;E5_@L#6?xSWRM0+~%@XCPA_!`Dla%WG)Bqne37JzU z{7m-kp7AIn!n-*p=zdj=o$LN#D?5IYAd9j7F2ZGC5-A*U-!e)wI2>Xyu-p~+VRx2s zX0o-O_@XZo@1WT)OT?-{ySo3B20C`yao0BM5P%djQwE!qKO2ly>>-dp^cbC$Qj-PKN$S*)EZ7=F8=-b(wjm9~}xN zKk1IYo*kbr{k>Suw3fSfdeNa5vwIDtGnNgky&-VuI4>sUR-$P@5q;VcG&_$7MjKdB zl|nCWDZ|C%y$XBNcJ?UE6 z?D_z-I1GV#JFTHHwbN1j$_l>uU{Hz@OUvFZYgEEe*q7>zE0lkvPKOX_r4*1d1;qVl-Kv zQ!kEKuSByQxjh3(XP)3u@PJ|DWcmZZdJ-HVR%@a`TTBuSz<=b?n!}eJ5|0!GrQ@^D z4R5)~`G?vS{VGRCGBMe7gDrnwfJ$$z+bNvG?Od<{s$8U!>ISX@Rt`WiU18Vc*1*#c^FT_H^5W~s81moYYSI{z_6!!<}ugF-d|~VG7+Ew^V9mG z^x&OqSc0}fcqrYqh@J@Zm?AmC5b&Q;rdLvOc89V(lm}7N6>E!4T^y%wXevD$|MKuD z5!f!0-SXtAd-a0V&_?-|^=0QvbOe%vThdxu7*ez4I*4v`8FDd4nC~ZVp*K`66wbWn z!Cln>8+<>VWor9E0hczjsE)B$^wEF&!|43UbhDmHs!tF^R_TOznY1698x zYTs1Au|DRblpo1iG2pc+)BSmIV}7c}4&VB!o9M2Ppefo4{>s}n$)f`N z5u+pLVcmsU=wn!^kF`umx^5ipP`b%6QY$Wjybvru@gar@^3aU*IXcmTlUd^>y!jwc zr}03gZs^d_&!~jVy&8yyG8)4@U38g^7Y;3J7;zbUZSjC7WGl!T9Y#H->S^VZLlk$S zb^xxDpyZ&Hq3AEqQ7dQ4^!4T;V;Pg`xU*402)#O~LZL8%lbz46!o` zB-}q{8@)%jcw1mfzP~+2^~7o*j$Bd>@@oKPLk$*G^a@*rl%v-w#)U~wSOJ7>N5N^< zaPu$;G0a58{~cuaoiBshsDliOerDkLnV*8C&riLt9AA&5l;G94(9hv#uV$%7RhfL7 z`=|#!9!ioe$Uh|^FAB#UHu)+6zN5h`XT!p==y)AqzMId-Pcpiy9IS{(|EIiX923cA z-^dx|=*rfq3tvC}F)`K>SX}?I2X;%d~|4um@~?RYO_<1~BmC_B^KleM*t7j86YR97WQ|0mty9qi9eo zxAzk+1BXmta1$NeC{GERA1;`i^@w@J8- z+DK6mBSHHpN`7?|;Sd!ew&%e+9c+tP`y9}qz?u;SHr1&BXVk}#v(Qn|1v|V5nk+W( zBxA_A_1m=?H7>0><#x6z&rtg+JnE>&VDjM{sYi@@p~-SvC)->u;+fBUX^K}~xviOB z9&ulsO4nV0xZUwIbKIRx-vy~~Fu>oXSZMkJ=a+XA6SG75ur!SO`QrC*f40seht_5# z5h{*+5W33sMn>`F%fB2dL4||3k1h9&qcnYdb;E}8zlD9>{2tI7rf7a7p8#uY z<#I}JxxLxO5IMV6%xf_IvTMH92e#&<^~{Z*&`cVIl7=JC0ZwO^+eh|2pT%$49LAS4 zTUvmr zg2)ppmA;)thh%?!=2q;YdAUWW`;7Z$;L`EXfq9CUf<@c_HzD|%QU3Q|EQ~jHFzhw* zm&EoZ8ptv#7IvkJ*v8Q{Dwg2#3#iD$6rSuw{Plfg^UEAdC)bLs!{xNs6f6cY11z#U zKZwx@^az83XOx6pI1@KgCaTqBO(3C&5EThZO*;CzN7>g8akbqu1{@ zv9pcpL_mz2E11`xa!A)QMDyk`-hTc*{QQ%k4S9Zc>EAo%VVq5&8(!_O!qL_!ciauZ zi?k5S{+xe_DHYCSt4D-wqyj5L9%f|)SY~3^okIj&2@=Kx0^p9yc<)(UQ>?`?VZvxT zJ^xKZU~#H2tc$>XTU>}?;aXFG|8-%KN9ps4I@dwT*|}i(NH6g#>2x1{VRwU?7+UeT z1%+6O#!rlT1ap^P0eHimL|d|h3ZSixYJ=e4_~CK(TMKh;n4g&@m;)#V(C0nrLYYm{ zyV5*Ff6tymJE$A_X;iGyZgNJ}>Q@{VUn`{(9Dp;cI*6J(|mPd~kbE`zc4CsKnn z17Ui|*h=o+fso^_OxCpQjAK6qi{e*f)Eg!%hsWPzzz2^65gXJz^f7)<2v2!n4(a@0wJNk9$p6n> z?&hl_3uXvby(?C6%fP@R-iVt)bTqvGYKCeGdu>w1gG}p2f0)O2Fx{yQ0NPaU*ZVqr zh%Di(vkD*?pxAJ*3ozKHum3BG*ckbKuW~%WuU!Z?s0&461y@Ddh%N}cjq?m!as>sE zKd_e9h<46{QopZgv{5`J5C#a{*aW9Ob0z!(c#j6j!458B-!eTpZGO)&yQ@AMJ>JvL z<}@g8)=LjR!o6e#XncW!Um@w0KWSum?El3WRDiKl4j8qEB5xB{*L;3B?`aB zKywC#FZ8aVNd4?31Xhn#sg1&KQboJ|rxiSJ?kLh|NJFD{~)9 zA|IcOMfnBwSh0jS3Of0DqjMb6O7u{feods1erJ!ujsupLBQ}lLe;+(E%fsY@tt3%+ zpqGmqH4q*~!8*)2^gB83ai}{U&hWb46N4B*i4g*@K9?${Zfv30S~T_LmN`yP9(u?O zfJo*9?XCPA5tCe35g14lMUXa0xP7$XUwt ztG>q3*Nh}|1C5H$owcxC=h}#{YgvL@c(K{gdbLBC&^rk^1X7LU62CkYEXJ?WbTe66 z*uNzK5{EzS9dX6Np1rLp!;ikNIhs+FyBB*uuX@kyl*emCrLaK8VUru<+Oj=dt^f)Q z%QC563U}Z_iW+^Ua>YMu-dOB7H|)f?|0e0fn>}|~j#-mE znvZByY~zedx_reyiQ-zwqXYefOssByzZxM-k~~scC9#=)82Olzq`{A1%#_};KS8dT zcOuW}bB@xceQj-ixr6xHZaO9djc$zLu9{vgxvZ=DFEe)z8RwgD0+D|rO>$g!p6!W- z8nY1`T`@`qe6lbNHNbL~`*fV@^X&O$WmC!-IY(#VONPgmTTUfZ%gSSn8tZ+<`jer! zoNw{6{7sAOWzG%ZCcmb^yYv(U^lf;@2#@%ZZ&ludy{{tHyU9alF4Oi}{J-|=8rR4h zK*PkIa+Pu}kNKvOzyH!*cnlI&$EK?3C!SchCsuUNHhO)4)S&2ifkIK;?GhX>MqZbz zN*+E$TzOpIM1OyjKZedvpy3hsg5Bg=Zw3};hkq%61eIt|F>3o`$P%}CnjxM#{?b1tO$&XAEI`6n52TF+#SWrF>%&-U@q3Tr9nml_ z_M(*UQOp50l)&#r+DDNwSim6Wrw%#vlNMJMjLj)jDCnuXA}T*)bDsEj4HU(he7PPE zdHFywVLIZtq7r7pyq_L#PsHQAB1;*-Hw+|-`SI536Q#srGp6B33Df0#y1~p(m-YK5 z??o%c0n7v&jJ(`y^Em4J;3@)*vHC5U{r@Eqtg}*w_?XkUl)#Fhq*oCWSREFn$^~7N zKWrijJ40#f51gjQl3JYD5=`N{HdwUI;!#6sZMLblC&`$+{R{T$7#Mai=N zKJO#P0bJSsKmr(sAHf*p-nC&>458@h8VuV7TE#8C0DHEgjaw)KOLV^_PE!N%lK~bd zSu48TxdEV`{VWnY;TC@l9(=f!n0ny=!gYz4te)WWl*_FjHN#T=dTaR!@QAy5Jo& z0h`D-;W|`Pp;4sfw&W}v=y;E)Wh5kd1fCUI1rgYbU?FCFU}h>qqxTfCpPb_afdY&7 zT1cvIrJT^HC#$_h6kdaGQUbf$) znL?}HCzf8$J&`w>#ydpsY*U8!c zBi04TNX>a?8f}Xqou9=GBOQaxQ@WcPdDO^0c_3B|b6kXwY2qTt`vI_n+M)q7i`QD~ zP9tTDoZIJeqw z-6tOM4PC#I2&;SQpzRSi`IVl-&+q}t1m z8N+&-xWtf$#B9d9elVvd{A=w959DD+Une1$n_DDx#^?ggGQ$35Qq!UA`VcGIJ2+YJl&r@A93Buc1;jd1>rFFjOhKPZg`r)LX2=&lz(9ORB zTB*hInYJ>&KxQ0V!?l55vO0C%p&dv6 zTERAB^a724JS7ZXK&J+|As0ltJWje?2fn+mD&#R9$>#wPR_sqiOS_K z=E7KJta)_aGkk}r2~#B+kx>RjLPTm+%c5P`*6f6I${0l*jCVib%Z8DY<_b&4VM5{B zs6Zwjdkb8RfoWy_=Z)|&V{-l7uea@xm$<-Z0qq@sR)x4x6$U;xMM_tAGmMl&nkiFa z@%ir2jEX?_$y5AJLTCeLqk80%S4G6ffR<Daup0mp z=e5rba{Y_ph%atYOwGfPtBV#K@?5}=kT>Y8z1~aPK4P z{iit9S&$n-U&RDaWTCBMMsKJK2Rzc@Cg6~=!9!J-!(4%77}yqOz)1Dk79W>9+GKuB z&)``&W(mamo&01002joI--Mwy0jjZR9XS0m*|DR2r2m@wl0d`yEV zx!s7Gy(4J@y1c43v}2WWzHX-Ab<=Q7c$Vi}N;>w00>P3OdQ&{`2Mpc|?gFCS@FT{t zzDP$qAe7%V-X)WC;}8pqXi7Mj%H*NOSCxskj2eCA*~~;;=(q;(n^98VHqyycCtKSnOeF3%!%2(y+=XmD3XRF z&2jGOL?ePZ`bF04K0Nr?b=-3;gHpTVd{67A$DVc(ji@B2l=5Tg+Ij)BK~2ZLBT}L8 z@zH7*mJsNtuCuHuZ~53MwU$?Jqg%^Efl(J>evQN{2OI9>fjN7kV(xxg{2Z$Ah=a7B zpNr>A&6X`Sydw{iJBV$!TEg$x&F>)FL20qn#mBm{($8P{b`3RgnKjeBzFFb-lV+J8 zxAIzJYMeh?>RhQLUVyzR^CIM);afY^$$96#khxYg;vp=bJb10q0NHue_yCmuo#R!& z0(#St%BwUz=qeKzhOoM?87c2`kq|1=`K!P>d=K5Qm+~qhbcd1m}NK-Ai>uY3h3^YJkaYHLGlve&izl1al3c-4FoNA3n{936|4M?=T^?2by z*X855k5%Y}zJYC^Z1oG5qWvxJD%M;kNEWYE9jspUkWSg96Dr=5FC&)EirzU=5cf%# zDNSPCOsOYQg8CCV+9&e-z5zL@3Cb`2)gSR`Q!IzV0t^8LcTNk9TGsd%7-W!1w1B7O z8N_Ksm*M!kMS}_-a+$g#*iKf_oL+WgmZK_`Is0J7vy7ZXbayl{2Kndm7Cs!E_^fIF zJNg?_lEg*ZsG~0{Ve>LAuL)uxsFdz-a<|h-MPIJ5Ror>LCKbkd#dg-Fg&W&iUt6rMV>OqEv^!8YDY1pEV19 zNyS?{)ZF8~49(4wACt+DyzU8W!ufhKpQWw&2mGT`&Ptz}qOWaBJBu+DsOc`(goY`@ zQ85S$=Pekp(3!x%KgG`Ah3QlgzDi4U!9EAzi}0Ol9;zoq!0y>M3Xe$kZ@m&<+`GQ z?5w|}3icdIdm_!U@9vAA&8MV}y%-@IBWbmW3qC0}p9|j;ZApnD;~$zz(?LWn1)%_} zAabrZKS&6&+fe|V`4i@g$n=khYmW?!Yn0Q=r}f)yC@1pj`0Ks#%vJr09A@ul`B7@| zULiz!j3=Dj)YZ|uoN3wZ*5R(W-03`_WBE?Op7yXc54z+H7r0L*r3b=!mJidE; z>izKV6$pg`#H$^!+@7qg;Z#0pL!0Tu_9tvGn$Q2soz;}q!imN}?QCCzgZm|UC7APfUu^y5J^E2d$QwCm$trW%5a$a-q@~$(lWaNUR!@4@W+9g9FST+xH40uf+^Z1 z#JcZTv!%#+Op^D^|E5+t`|zh!X@`CLCdEO6aK0SzH-g`+wx2|O**zkzD_mw$v`s}E zA#x=1tBOZ>421DN|4@o0jX%BS9dMzZ*%Z>4C+9SaifFr&OsOc}O=6RErL8~)*re|1{BZYb&i5i}Zhb?<#q1I}A~<%z7PEZ6G<;Q997|cktUZ!&p@LCfXV}VBSCZ*b7vMhH5w)ninPe7&3?vAp3 zL?FAx*w)W1I?-w{gZua?UR~OfHkySRvmUHO+MwOg7+5pn$pt7mwMUU#WbNCWk~kue z@1&e0!Y*Y{f~fy)QDKZ$Z$PHBPi5mw)bF*sj(Nt9P7SH_onfE7qLsA8PHi?{qKFe) z=91>Nb3iM>aS+*a&7Y0!$qJ>dGQVooG_sQ_J{q3w^Z3{H6L%6N#8ES5^tgSY7H=xb zHoE?2A5w(>y!qvV!T99ADF#U?$NVO?n`C)x?w((ykwj#{uA%hCnsIUxAAi5xK;|7b zeJ7ZEkAp{i<9>xVm!0y%A+Mi9saB!SDovkTiLF+l(D&mMn39Z?|KVBA`n;b8pQgE* zL}#kCWFPVWW9u!$qWq$NZ)v2vaVTj)LO^Qh?p6sUq`PzIP&x&qq@_Cq1_UIfLrPjg zx?|$G@%KOHIp@00HE-tKJ^P-$*ZQut*ZQo_)r+?^UfJ$Huy4b3A3Y{nd~`(grZ!sP zvTJg{YR<9#+)N*nTKm!PPT}Vbs#u^UHBB($nSca2{wUjT4H|#@HxnggwaHCnJHZ9U`NjrT zUpG|6rdBt|P5mlNTj=#Ou`e+SSbRPFvoPW(a_krn1X>~?e zqaBa_cNyI_jku1c-xNH0B?`XX#OH=Iap^5bjhL{j#giKYTW2Gk!vGe28>P;Hxl#IPV~Al6r*bp} zMXQ>z8khSQz9k4i#{tdP3ySBX=EG??#8!-`W>MdhPqg2A=SDq?oXCO-{8}Y9HVnom z4#Z^DH-20=eBBe^O6>coU~Dk>)AxD)1p6P!&ad9STjL6htj7yt{8SQC0F8Z-h$j2< zq0=<;SkoN(fWok5aaUu-AhJh8RpEA(OMXSD>vcdDY%kQht%3>oEq1wN>6NXFa&mx8 z;%G6iN+*ln@wN73wD9H(sq`oa+;=$=0c*X1ysE_=!cT&Mz$sr+VSMcg2{PeT4DAw< zCZ|b63LAaMmjh3!SFYdxe)aju*RSK$KBN<0jE4B<%sy~yh|Qha{=Bd8|6?FMn|&@& z5zp6*oIjqQ960Xc9?n;%8(|R$_F>M!)T~dX=(+iJ&QxZImn0MzIn7mgY*?RPL2`RW zuXr7wt!iyDm72#=g0yz&NS5c7$JCFC7+h;44A|0~ZSKV)oXCH}lMfV4|JK*_T)65g z-5Y-6SI$@UcjQ&?cEZVNR1fkxeYN~r|AqJnw*kMraq8ZcB5VC_H7Ho^Ooqd6$g!=I zbeEa)2O-O|$=U@ot)A}@s7nc-*>O~Y$1{Ff%YUbP-C~Y%xI&J@TBVw>ZNy)UDj~9s z_SLwM;^PV87^)Mk7SB$?Czs_dr{x(118v?6+=srjTn?1PNs4+|c88zyr3-1ae1ZE_ zGLoFhiZh3(2|0-&)&f-Nm*_JumYGK%Y~OzcGk+t2=iOcvA^}M<0cunM!p(ax(#n3f zsrQ@?9nw-xefP3-M5Ed+${$6%qthn8$m5Cv1p}EH`Mq^0ebKU&DMkevIidrNU3GKQ zMFM1lJOCUIMX~1Q`GP)S7@H`DTd7R=Gr13S5z#vDo`>}$V}|2@GONcI&NE7>a)K`J z2nNK_lYFv~)vd~MJLHH0Ss*6&w8C@dB(;6rP6IwsE@9fR*qIzYfEnJ1+y+JN3(RRT;1;N~2 zz+sm(i#Kq})F77T{+93ZsuN|mf6I-^-FR*Z;QnFvUi#R zE>M}(*zQ|Dq%`-51*gWyUyAW?{!u>fviNf%VvBk4*cQD1R3FXUIx~Ra!?2_LV(lqq z`kXZ6;0tmyig$0I#^9XlwLJLLb>tY#=&R;J@P_J=7OXjSw@z43)OHn*;(7`F2DgC! zW9V1_7VZ$PX>I61KXx5Yn?C#x3ZzB88ImA*vDy}>8yqqCq#!{bC!@#u@f~8p>MPBU z7366b1mLg8oBS=MT9Edefl=X5;aE}vMk!b%{Kpb7mP*3i=V6~%M3Zru2s)7TV+co< zLe!cl1}p6w#6FyoARd*c>c}0tCenR=Li&vNbY!ar{)Y2O_Xi=S$NA_|qL^fa-AzD| zT8%=41S=UD2XjBr!OHpvu8S(3KpKm44zKGf^s)HFFxdsg2bv85R?^S0z=@H6`=?p! z?fuo^Ozk^agrsdG@0$r4md9U$n~5?zJh;et%E_4ck(>r*@76KE7d0oyjujjUd@eWb zi*vOcN=?g99c_#{Nvbhq6i=~~FHiAc@#j-Q%cx19YVb(5#P#mxl+ejNgq<8c^KYr; z!UvD}lUE*`Z&+*=o~B=cC);9L9vGTA<3QnAGpKdthLB#g8+Xaxrk9dyfqxtSE{&W; zdg3BI*TWw<$@!R)wWC-7ZT0*kBWi?F+wkn}ORs0mqinnWnIG6Dm<0^A%IjVDlA93M zo|(lK(fPl%f2@91PMD@m0eLr1NY=Re(fe8cz!@HydnOdzeb)IsW}mYk!nL^RwECyX z<}V05;=ZGq?H@V$g8oQ#@dnEYRu>Xn*?@b*ewc8DI9#Ypn}eJZS6N>>fI83l|?|Qo|fVtDuX)yI0CJm-$sXr0cCE+b?0%Kt&u$PGW4sklPgvp78JP$G%m>I zk8n4qqK6b-AXk@!w0Qw2NqNQfpRb5(Kcwe9;lZk^$$pwpy=-5!fG{2g9!9b-gc8JHtCKROY3H66?{MV zl1W?fw9n^$Rk+O0k>KmtHkwTFdBp7D)nl{LhLJKwry5mS=o_xR74Wlm{4W$Mpmg-f&^a?A&Od;*?!_O&)Nw_r|yxK2_-=F?;ziN3^`NW zYmdfk$Gp~E(2GbV41?%L0bSrMgxD*_4UjB41=F0A$NKml+&O`(0v<&GQ_bPwu(iaZ zCi^q+Knwl$fUazP7pW?a_A=uE!;0E)J>_sEnMckfuS)*z2O)QC?dvlOT;_Kd6kaQ@ zll686!@z^+zn(p%V{v-E#^7FdiH91#JQ1<9Cu3EBom!Bx#*dB(zEh3~XUqzTjYPbM zWnL$kp$AAE9Dle)pa((BQlEGIr!ugk6dV^Ku>`}duv#?%ID+A;2Uf*g!(r$DV;F9;n$+2I_G z#COY_4YbM1$00AaBMwKlDYj3XwUjK0Tq*+*t<6^UD!`9P%#gX!_ToN-R0g7X**2+A zT7!;5BCxUa#~to1`XPdF?6_kHZs1)H9c|0G<#()XA9p&eg3AI(3noy+tqADRTb0=& zD6z|}wo|EDY`puv*)yEt4!Uo0!CW8!XGFDr%&*EN_nf^j4hn`K;(;=1M*8>mG{Ov5 zLu-xO9#gLVZLe%y@b%BEp6DYxz4JBC@mX?V{|q>KhN2IJi?e8%!Owbqz0j zgsB?RASiA)RzH&*ROYkwz)pBbFXx?q3~}EP9`JSgcZ0=LcK1d83Flg35PJ=HMlDA@Z@Vjck zXZBbm<V0!PPlRpTA&v>zyP0tBm=_ljNO@l?zER*o+OMJD+5&(#B3JgHeWs>5PWh%wGm4A6+yltxkJ2k zg@w5_*~P5F-o;{1KEF)@Zb%I&m^PXA%~ZMN(%rabQNfE9ekyEn2T1St@g*4wE2X62 z^j!Ki^0M!%N>C#{x@nIk&$RKVUn60S#JR3|IBtm3W4K1d2bnR21dIeQ_^wa_r{lC@ z=QYB>mH_li69TJ58~p3)m&&PMr5y2rJj1L19I@U>wOFo9mE(dU>}JC(I{Oh(BXM49 z#NH;5>p4U&xR&WHk|2w?riPdvf?J&lFKvut;^`!}c(UFyPJMbNCpeq9`Vm1*y*E>> z(Gv}8<-5ztV=vaxFG!&b+PQCW7sJwrhEBIjVdw*kGrid?fO3jnAq+V@BRZBqya>Fj zpL5k3D9mCu^;v}vvb6c3WFq?B()sHk#VDq72ic%L4sqv*{Sz#GUVvlVBlz}AubY(3 zY-^}CobGRddFc)H6F0IBygz@RXGT&ys|sI4DW(wJ64h&uV8JT|v<--c@xd022SAeH zKuV|%zf%I~_|>2)3&E28@I>I5IvlmGn+kTnJ6>YF(9q<3qU+-RPv)o88po{84(%tha73Z6P^I^Ua^OCqcok&RUFjj0`oT@K)@_H#fk+UsfH z4VS!zjxkR@*MHKenPxCXNPneTggVb_>@O76TQeO0c3{-WekLbF_WgUY9a6YrwUq6f z8M$`GzpMBkyjs5I-8B>!`g|5gj(fIGr_P5c_cplRxPBoDVuOFHj~Imdd&|#Y@X4+5 zj~uZ}%DP;$wHqJV28N*O8OH^AWmob1V8(O?>PM|(cmLArtuzxQnSc6{uP%k`H9RLZ zsX2)~3}vdeac67#3yvJosye5Ev?GbZMXQ&kb;hHib-2}kJ-|&JT;lIz8h(zQ=U*Yu zE@=RS+h1c)g-j@07#hTIv;AtVVuD>J4-AGm`61I{suzho-$v-Aq?;Vh)NO$MZWuhv z&E^`+N~;#tZ(p)^;;Tdx-|>Fp%X%NT?+l(|OZz%sMC{;6&J`pKH{|t8V_k++@bDb` z0xDvKLnD-=kYTSC7Xn}3=TkAcWRlAwxgwOBYDL$WAU!gIj^t!a>uU}T=i+6^mBWnR z@#paQCYKG5+<{Q&$}Pq#uTRdFSn6u-f)BP!6eIQ^(Sw8yPr6zsI-FasM8~z6=4`;Dg4L(h~qdIkFO%ayY2lV+#=#6GLuHo6dvzYp`B5~)QJ06upJ+W zag4(C23IlTq~YDGPBpe;hw2^!bKV=eJ;S|`4)35x(y|Q zhdt!T{IFADEq;>@rj=cBXQBz14}Lw7fw0YK(nk3rL3H)2rUex?B_fV(^Tk#}dKV<4{qIdFr1!_)mCl-RxN zT}3WwYwBnMk`Hc_xlJqu`%?03uX>%goKH*Ovj=eAZJ(Fq<|~*zq7@xH)^b7 zCo@RSMnoZ3RKuJ)qlFw{DAdeS{X~0xR_G8&D8N7hGM3!enW1Zl)wY!-D+ALY#c9cf zaj~o?BMnUGLke?<^cb9{)LI>K*tmVU%!x?|(Q%Z@ce4 zy$j&48@rzTc?-bx0ldC3P5@BxIMZZrC_^XqaKk8ySYtug=hap*;$Fd7pz9aU_z77L zIVt+!s1E~E{9(j+?jH$y-2LupYPL@B#!8XL@)r06$*owHoM@U8cNAy=5F)MRh4gW@ zfIA=H6($D35x5cEr%M)t=Xxy*juZx$kF35Z&~z+9EvAnzw3a>wnkZdr1jg;x%E;NV zW4un=mc2`2o@sMZ*36I!D}7e1J61tS4yHMBKg~sgxZo?DNb_8uhl2$e9soS5 zy(hSmhWDL!Ae@Tw1W$K^aYdeIwR&uT-??>Hqu`b^MuFFhL2q08ydppN-L!k>?-BaO|byLeZo{6+GxTFkDY zFN}(y>0K{>HmEJn2uj!aDftFM{-e?-TE8ryVk)D(#=uD)(;m95h9eGFE`_Bt_c0bf z4C6w?F9^RAb{rux+U0B^?@zp;joj+lXO&6x9imJB&9}gLFm7b^ti=pLos!N_tn4`Fi`6Y{UPXbpTe2)W;I{G4R=5(^~(%x(Xg+;PK3kBvU zxO?37Cv6Ws<95yozQ~)ZM8e2h1U948NA4iVo7~y3+$TvYfCTFba2rkd&u`Xqoa~k6 zO$DQ%*#Nq1NWp4Y6BXnCr_FbW&}_Vxs0~3zJr8+|+adYBhWiAmlitLo^5JK`cBeCr zRT94H^=sNd9UD2e8U6fKvuv&tDlqYw%M$#3N!v&(~=%Jki`cfh4LkN5nmOL--Y*G3)k%-r;8+ueVu*>huDBYPcr3-X}1GeE~?El0T{ za?(TIzL{);FI-rrlXP->mAR72|4&GBGANiSjig=ZIeO=A{$N%d zZ~(KnK&D}Q3lO1`zf&-|Lx8@s;sq;;X%s))CM(>^TGW>o3h}h1pDgB#2Y-V!@;|8- zZ7zSAOw_sgFfH|H1O2;bneOlCZq3SW=3fp4LM1LtspG{|FoxnUoIxm&h`F2R58Pas zP`5f@c8yRg13)u|OaiN`SCrSq_s8@~__RWDjF=B;4Ir_h5n?rxu5D+>2|o#IsYod@ z>}D7w!SK+!jBx<+JJ_u=gSSyHA%7Oa{~zf5u)Vl7h`zpjNjGdfgQ?_1`~P6)i{F@0 zizNc^PQ7S)sL@0aKnul7ZZXjOh+v%|263P%l`CC&-lG79&zH23b?a6 zwL*=$4?92es%@Wi;S%_Ht_fjX1(RU1{nN(KsUPylSmr&tge{f3qiHK*EB%+Zcr;g> zZ~DnamM#)8zIZ6Q}_p z1B6};Tm@mtbRP?aG&_F5dY!wo$Af<<7n`Ok^5cU)OgSV!TOP@q4*jXpPGRT3T!>4g z)FHq%r}ImNIsG&*Hz=G{(nqYy^dhT*#q$+%acSYqGAnL%1rg zexFj|v9L=hTJE=BsNy=}t;3wYEKPSk4unr1hry#Y9rQTU?wXuwU{l00&mWowpA!Kf zKcJ%W@eSg)@Ag}}`SwC|5d*aUt?n{VABfHXAU96(b}MCziNBaF0=0RBJN}(^V{0)H zioN+camdMuU4fiR;otKfzpgkMa^u&C=Z5x^jb3klxXyu-G9695Ko=Q&T*esSm3 zF$4x>RQ;v0L&igciraLf>h$~YynH=>NuwX@0`&&9GJTkD5ZYFtWoX7xCqi$rDX<}@ z&sXQ@W`CxBXVc?!w(3?WRdxMBk=}s!v-)wDNMz8hQ4+n z3lSn34jHdnDJPKBd>H!INJM`88TH${1dt0Y)V@a4j##v6rS_#Kop_Q;k)9O6P(#Z7 z&0qav-D0~JdqoXAdNC#!gtI1b$lWuun|s~}%to{QFV6>Jk&|AO+B?n9;ttk|vR?{6 z#2}mxZd&h*weiHbl!zEN2_H!6T54zJ^Q}b2E<`k})6N||Q|FYJKeO=voo+qos9qI6 zM450w`f?u@&{u+4Wgx8~E_~A;tgkH_ZN6JGfJrKUnk%k#5lJPk%TPEXDjzz?mK^H* zcir}(s|PW%nuoQBR~ktvSPaaCF@ehcvH4FW>fwxjewhNqB#O8cKHo}}Io))9(mC>< z$+14Oh`{DO;C$rW&lS41@gnRg9_3MR6oz%pFa8lF<06c@E03bu`Z6~bd3N5_i!Ed* zw_H2nJKQ9-HO<3bSLO6E(o|D$TE+aAee2JfW(P+rubw2+@E~nwX}_ajTglB=G|6P8fa}@_U0+! zYSv5fUotEeo5z{=OH*GXYJBvQ#;U!a0(Ug4iU@3HWaSLX7phb4!S=@$%P;>fPZYNO+tqoJno?+eYQk*&#*@+wK^=9`i zEOqlVC2z&(5aC}jV|5P1wMnvAIm*3bY5Y~141i)zIO;t*OdrD^Ircp} zw*?zX?|NyqR{6;_@|pToCUSGcjCvX~qwF!8+N`Nw{GHJBrnZg`Ko<3}-#j%F3V`Sy zJ82lsf#(Maey=zjJ%pt!A*c#oN~qc?dY09?4+TzMnfmo2no#a6?3Vnp-t+3J%0#i& zf;QAZr>Dbjzuj~kJTiPqp1ENkL5XGxpHF6yNQvdtk6{e!+_W$iaOPpAm7b?%yGQXcv;cbcH*x)7& z0jZE8BU@7{x#$a<^MwA!m$#7Yp3Q9?)8f%y+_U=OK{bZ-9^yN?HuTDayD3RG-}1oM zE_g||SI+`8>32c z0+8t1!UurYv)RAShWg(G27a4>lfn`i#7MC9xL7i>@1at@>RcZUPB@8PM;d5Y4Q@t5m&H|D<_) zw)L_6!Qc(g8SapnFR-9iw^vbesUZe_dp=UqwmcjN#W~Gvs_7RW0_cb7_C zCB4KeqdJm9=?_LZ&&iL*VMRwL-7RNV#>>yAB03V+lt9te(oUs&%7eT&)&F@hI@XOU z_D$?$_Q=D~&~L@eu+y)`)Nq#YZ12~*rym-qnc6MWX(39hh>xCxNgoO3es1fw{wmp* z^p9cT_>BxS%w;hP`hxjG-Q}=xEYIuoGP@r+H4ELnCk$v3yfkw*4?84Gm_?3_Shjrm z`c>@al~wjhrJ4$Q^dsV5bm(Pb4g_}ODQx$6O!DoPb2A}26%6xR2Ba+HVuzcDHmiSp zq#v1051^|X+uZn#J~>^#YC;d^RV^7#x+^r(Vi)nFZ;I_35g;iD!1pS?ITlq~ee_s_ zR-`ZQGT|@*W{rD$EFemBzG;nH>|GPp-Mj}pl>wDnF(2U0x@{#X4m-B2q`SUm(YnI^ zg-1L~Ut8|y6hxoa-%)vt?oF$4yT`YV@05Yu%)AdN3%^4RU7zBPp#cQ zNy}MBcc)E&Riq(z)}Y1TlocLBxzAiNJZXo2YIbp;A%AOk>KvSX|c9;v&w{$Ml<7h12632%?Aftb4Y!v{h`f z__Igm*9=zGupj=r0`?dyQgGh6Od03S2}1xUf33rNnVM|FLZqtdd2ky_x4b_ZH>G$U zA`3Tb_bCu}QMb=LT~+3B!A-dI{;hvK<7;L&@3`g>`1Y%l#mzY43ewDXe?V>4hL`P% z?kv(ND|`#(I-aRtESKJzB`JPOn3Uc=K(HLS2u>eufBq9M>9<8!8dJ_>9U~%4$qYZ1 zBDNgay3yAtP{6G{#W+eNEuB4#Ox|vW zbpEhMo>8M~Sur|I<1gy?Nv$$`Gv6)VN87er8oaCKpx0(HC5*w$1i{M$>=`2N`+iJC z5>Ou~vBI8- zDB>Owt$cRTSu;4eUMQoJ8}o0=$mgqP3r!U&56!w6M8-}&_(Ph$>Fy3a%^r8KN zlm)t5aKB@ED$)ddux}eYKjauN#I5vrtat`2B!$c#2hT~Kaoz?k?r`WhieuB7B8&f` zwbSX)x3%=vaRA%tifGbY_yPv%kTf!#?j#F>yZ-p$%pm`PZsmUKlK@Y0bB1?@Ob&JO zaPb%Dq=X!I$`h3WWM{R1Wyf-hKj&2F{kNl}c@N*ODNLtF@h3m>=Bb&SOP2IHk^aSB z^70`HW$3(R_^mrNp>Q#swv}tJu@kM}@1xlx2j)xANCv+DnG^N>IGbuyjT&dxtowO! z1i?ujEJH)bdVn>nZx%TKey&EP z)U;8Ejs$WUWUW%8;aJqDkoQR%UATN~*uxa!!B|{uMso{p#@9fl_pal+%Zm?GH-%JY z$I_hoeOSh0LX``~PL)8Np&x!St(NvlA2aFsVPMe_W3u7567w%6`6`d~8#6q@s{9_` z#{-Ni%Eq%CLG#)CbvLSXR$~S!6G8Fc5xtbI>4W{OMT~(P<97pB)}JF`Y}2so(pTWkpv_f-37BE4be2snXNac*t_Kn;Sb{4+saKw7y^hC`Ue>}CB zv(lM<=>TtJ+Nh11mG+EH`CqiO$5K*Qs0JV)SDpF_cn8F) zwBr8ob8Y?KlsVoVbnQ!YI2rI&?_y^( zi^o~~IuV#QRK1MOEz8Y<8@ciESRzV?z=ibZXFRrI;X|;~Y^x>(x>%a6VyyU$2*LAWmWR}q+~uJ zE7g{Cq6%vvBOgetF6JcU_77X#W3DLsU#^_jOl@ENo{QTp?GQ75AHc`)?8m2a&(o@e zp*)RF24&MY-Fl8K3N9>k?n;c0KjChKl*~MTgJaY? zZ$BPa{?&!2*o@MMSFNs&D*7qU@3FZh#a$b^JyOUV*g75W*^nSZGn9lb9A-UzD7|q#jDm3?0jTdhS8$c`&JMBJAW;>n>ojXzMMuIpQb# z{NriO?zLI^#ZEcn#u6`hHm(WMw4F9*VM8)Yh4(1@6?>b9ziKzq3SV!7&KH+-<%<6n(ysY3VsByl z+~LI3P}pzPb!VmM_t-bfSjpVs!2R+km+~gBPV)NCgABS(LwMc@nHf|W^SrQ|n@W56 zp~!18zW%Qy))MPgX3_C)njdDv@knF94o<=U(3XkGapA%N>tczHvQI!A^1>qGeX+D< zkr~jat)d*|G{eQHVeWF4h~M{JZM~oQk^wuC2>pjScZg<0Yhv%^u-_rne@bKuOLB-H zpBPeM)#a2aK6Y1n$uu`y`Kk_{gmfT41{N(byn@@m*mE@-;^j$cBd+=9YuldXQtF#!ksN!k8 zt1soT1zmL3+!eYq1_#N$Qq{;diIidwNkhM=pEa&?`Kpk$Fh=x64EQf-DenwLz%K+6 z{BAb^rbymWzXR1pO2~>fwdww*zs{Zcs;?SS!u&smTs57$u10wkf((3lvlcdz`$(0W z5}gE(smb#(DCJao5gk$Fuf<`exb&7QM3oo$&4>-TU*Gu{4t>uv_nGz(skF-IpmmRS zY7MuzXGXdn^3sM`@?=3EooAaX3-uRF4{$S)nv@kqnP9qr|`SIL}0q zV59mN|7Q~j|NXtL&8QY?AKkdCHq^oM;?d+%ZEeKd7 z;Usz~;MNTH8l;j4jCW6q`=@bBJ8q8Ycj#FiH6_y9WsAUt8Q4T2_42{(Krj2NC&gC9 z(&(!17oITvHbTkousM7lb}5raU@|pLtM(ZMO_5Z}gwuOFED^($4xsQ?lae#1Zd*q) zXwYNw_6GaN3>9g@ULk)W1|j@EAge*Qfz+^scl>r+Hs!#rxy7LPKV3GA9ReT`RDj8f z+rRyf)6U1i&{!2KKJpN$fXHYXIlFfPjy0rxTM+L!|BpjwiO27#q+h38p7x~vh!m%A zmXBbd+<{_nltc0_)|6HStAdLmAgQPu|7U6ct#9=9xKDe0B$};#WLKNHOf38;+8Wrb z!+5~Cw8FG%qeun3zWhfDgRksI9|L6TbH=Qb+l8{Y9SYe_@YZ|1H&+d5U=va|v{tu$lB4^xP+v5%JCI zlBVsXtlI7LHe(o=@sf6#AoYO3N&&CF>Cnc7g>+E$%>pw)bQwWDC@zbys|!fR9Iw5YQr45r81Bm#GgJ z8R85BCSlw6U7#eJiH#pHzO*X~mcrkGww>XqiP(Oo7HE`>AlXQx-087~(NwWzKXEQ* z$l>~l&A=-}q}{KO@aOYM^?X+R!+FhnLDdZ2CNiLW6lKlqrHKjO$EoCNAofvO?c2=D5yrr>r`EhBw&WiATYz@73S0W=nry_K)U1<6vL#OkV$GL z&kKV_W*Mjh8&o{qQ$vHzg;+9fNlI%wuoUIfGuh5ooD#yKfgJ{EioS;S>amP>s3CVI z40~wU8^>QWH`XpR^$6y`_RI$00Qz_L({=@bKEraDjdbnSXbDGZJh{)pYrn>!7VsnU z#F9yCzMm>E?8DOxNEyk zuxj*z7oQDLclZ5{au%v>)fiL@&#c6kltHAZAQzt_JCQYP!x|B@#q(&_Z^Sl3uz}54 zTMP)0UpnYqJP<&DI7BLGC&hmL@U+1V|Aj{wQQGIXhZ+1n~!a;2bf?F}QDthmC9D^2DXW%gtB;?iYl>Rv`-cDZkbhhNWqPkOM*#TA)D74=y?3 z2bejob8nMAvV`ctDC82w& zSk=)sb>Ihr#W&(o-@$V#>&+H_8jNu0=TOW03oU3&dwICHx@|e}PQ${2S;-+XPDT_S z_Rp)*&@X|0;oNGn-W)@JB%}^FiE8HVU$b~(9VYBPq*lZqQY%lK(l`v$Y2<37qIrOJ zto)oOUvSV<>0ti6!Cp8CJkAly#fyc064=^&)?cx3r1=fy)N1M&QU(R}*_Yg2^PDqQ z1j6O-Tw@=75Zxe*$oOZE*-~KMM!;mevVjqPp9z8bom_v>KfTQV)|rZ}mMx^d*y79! zD9{$;z{}a<(SFMXpj3K$@-Yapv%uFxlwH*Fg?R6&6AZOTM zCwZ+qGQ^W|tl^B``xOe; z2C{OJJ{O>Svj!$aG0DToKbEz|t74yhzy}l#=vEWRmz4S=bR#eQb?>eMQ_cK+2}VhQ zmqM%f$jrM3d#cUdx@|LFOei;mqaG(@HRxX5ZmyCe@ORczj7ysRPFaMTxsjS^*jte0 zb>kUW$D&-9i;*}iQJ4bfmNJA#Q+zQZN%BKLNV3xv2e(K^ERR7^wU3f3{Xfsp6uTP&WYtQ) z+#FYk>K3i8@D4h*cFYDv zqEi~Q`TWuXcN_B4R0lp$JAoAA&zuGWd6*1`>`I6OL<7N!bhQ_FD~RRPy9svQQ%lNt zvd6mO{4K3rR=N2*5nx@DAM`1@Qo7muH0eGe{edS;)4(wf%__m~Ze4$Gznd*pgH?#x zA+60978%hxm^hf1Gt_6Tav<>xz-j>5<_qBBRzeDt1^;O~9`}4|h7#6uq z4xc)mxVwg^btqCJ!2QET$DhY9&!WM@hbzu4O`C_NKEO#1#`Zz4ofJwRxC1C~R=~jF zWdTdBYU=*}ew`*e{XGf0_8G?+y}TI?Y$}nyNA;Jw+P7w56H#Kfph*oZY@+@xqSJp*x&WN>A2rGkxhPwIsg$?(XuUBp!&~6(aR?IrTgi7nUUj>SIYDb> zU1*=+HT^^WQb`Wbc1%y+jOZHxC)<)jNT0AijMgIT`$mcGkJJP*=f~x4dQ%fm7BZI2qxh;Q52c#x^sj7=~r5Im#XhCN{g5uwy^KXs;@i6a3DP> zWiM|$Io{ASwP9TH4KSuKU60mR88D>Hk==_;_S?Yt_T&K&#F#*Imw{T`r}m(6T4RX@ zZ)y4b&(@aG_&hW@GR}Ko%Z;s1)`0KUq^9tl_AMw#?Qx5Th!|S5F5`tL*6Gk0G%^hP ztdFjUIRJI-i^?vj$oZFFzJ%SQ#D{kwQTsJmU&%C^Z zZtqth6jplN&2xdD6c1>is&AN<#nOF0iH4?yN!6H2lx>c8v5Goj9j5c&z9+xdk{!(r z86!0;DYI#9vD2F*@Pzy6>Jn8Up4ZYtZgWF1(aqFYVxp3|-qBxB{>uRY&Ljy2po{W` zNxj8+U-s*7FT0PWG&?SxRn+*jD1eB6y(j86H0h1RPbrX%T674(dg9Z4(<6k|EriHl za)fp+-)}&R)_MHJOnT&e(k7>df#y74Fwf;u1by-rMBr23C8ja!FqFnxUjCBs8MY|S zUg;z>`XO2Z{Zgm$d_HG^b(R-?num9?I%Hv%Qb4%EFl(9f+1p8MpWt*@5%>Wmz)`4m z>^M3QjNZjd63=1WP^0?#cI_i^8r9puTJ?_0%I28X1{d{0#v2)21Hv6|Ws-3GPO0CC z1RUnJ7Ejpn6D3_307>Hj*#a)(IXRFf-|TWa#5QDow}<%n-4_YnfB}>vK+!Tw6usA{ ztv~ywdLF?3cSytLe$%YRs-DFH=$mHv)q*u>NrYbDNAo<2Te~Ax$nvsy+xFh+l;!y9 zfR$e$>=TSIq5a|vOoo<$T}n4in!IfmHvuVy_(U-7f}&-JY<4wyf37?9NCsbosNJ6^t4trubWg1 zs*O2-IjWQa{Y^HLOqoDSGYr7w^9SB}+#Y>9E&?WwPL-(Bw(WlTxRoeNu(088fwB-I zC7r|6cD0^V{}&PUIsxyfic}D`mTAfkRx}Z~H7gGMThmk{Bnt&9!bgx!hSC3x zMX;!69oe`tEi@q%fpAPvajPk@)SgWwg!Q@wH+`?*J=TV~TU^`~+a9Pkx~$S69$1H8Y+(gur0|!H0gY2KU}y1C z2`m6X#BesClTXDmUtS7iN^EjvUY~Qn;y<>2)oxQ=Y1Axdz+=W)VSp%U?^SvA%`0x7 zU=8_j{PZvZU8fh>qV@8au>z0j=B}o;R!dodLyryY(o0QKniBY{8{K!j~1Q&M()>QeuyPY$a$Z zt+j<9?L<1(ciWnFewwCFivOs2Q80vjyUUZZ?a{`FWC*0WWh-I{oZlga7!H{0bLg?_ z@pu|&8X?t?hTOjpjT(taz1Ilbk=ra!JE|VS_Flh3{KkedROqGL+}FTwSC`a|%bjz- zCL?yY4Q3F~Pl5)K;Ghba|8VIObij9{GfSg^a$}Kf$;=WK0vo_T2A=Z*KF;P0<=)IY=E#Ex73&P+b#Oeyr1aJAjL(fq~UoR{tA!L6U^+%_(J4J8m}3PL)6m;o@Dm(=%pWuS%_N@flA_NlKOP zT)EX;p8oR{!O`VOZ7O>ykpVND`zA2S!^|?tg7WABPKC9s*xLL9R&>70B%-PXj3^$^Ye|Gc! zb4OcH+G#MCHuC&;Ey@|xm3Of3df*-r%myqaTB@HR5ykNKuO^p6WXSVvH3y6Zf3ma>nFD&TU6pW#z1{|a1_>4 zDOn)5wcPz^Ke7S876>AM7Z?a1PwsynZOnPTGoB*qx!;O8{$FI1%sBBUwmJFR-*;}q z@+O^;i5iHg_isDXBtzaDaK7KgmZ|>buWot2jU zkT(#oxr)tz&2!AWQAP^*x$wS1tgxNk+1IomB`f}08wO)yASN-r2X%pe^w!^!{YBlc z(`aK(1mEW6ChW~)xIC%qu2U3@gEgPD#4 z6K}#PY%oM7SBItY5e!IK;kwUC#{sz?zRd}01OoSI$M%s1Ir&rnWwN@Tt0U}9@ChDP zUv!OMKTR-FYzAek;@$d}v(VDUwk(&n4Bm>MW)l5Hr^~}2XV*gARTWauEAK~0vY7m8 ztd-bMM-bIj>AeQ8gX1&dL^_AKd{f>;Vn#C$>B{9669>p5d+a;FbrE%k z!VIxQn{yG?dn8cHxd5Kb2%f};!#5lwb$zsA^&JXL8#xME+`7K-etc_o@8CT6%lvkA z_{^-|s4fIq`VH-0;>Q6(^r?r7K|(AyBvJhdgaN7jXb7V1d;*#WSMnE&q?#e8q5l3a z|8&21n>X<;ZY9M0mkk;w45KeN8J+K}lspsW+Ie#JHPAs$N zBdb-@Hbi%v_J$1wK36|_yg5=i-ZN*Hw6T>A?q}b*>eI*SZ$#}UbxWgphlA=Oh<-^H z@INIEinplg94yF7OpnckcvUChA5D%}mPL0?`M1Wn|BtY*46CYJyH-FzX+*kPy1N^Z zMmnXtK|t7qbazU3he)?Hf;38(ba(Cjt&Pul&vVXmuIu~hA6$FQx#m6YF~&V+C*a7= zzj_(4QCPGzO(0JNDch)G3-|trbYlSYTfwc-)(68JK5gLlryZf~ZXw$ws0iC+tLwO` zsvL6#5&}EP`;b4V6d**~Hac`bkjb-Jb*Jx}IB$YciFAtw|9l?)=mUHKh%>KAR=M9P zb|SYGfm;e{v)hVs;G?n&H9y8_`*SG3yKAlg3X>{63yb!+FzmHX!+k9Qhl6y^@wi7V z_e8`yWpxx^0ia7#Ag?R*zQ0DRjKY(JIvmM$qa!o8SzQh9E~{Wp+yu?8^pK7L%08;8 z!vT!5An)G3^F~gC&kr3nyR;vWHEr0hjgaTw{6k_!jeW8Y1PP$W|DxuB*KPHK)CyTt zIIi_X_MDP9rKJG+J@ql>MklAjE)7JP6o$G}hS~TQr;_u@c$aMwSoEH|0PM=!RMW~~ z67t37CwcZL`yJ zmOcB);%yLOPcr~&Y@f3FZi`ZYbL_I2a>%tFi~#y*ve~zJmGDLpIkM9*K8E#^*xge% zd=c=TlDHs@(5s&i2CZ*+3S!Z;4f*yf zT>qKyXj%vW-GcAA@9wfp!!-tWynxddXC%41Tg>`W03FJq(ow+3yZw|L-CfW@wQ@{* zb4;+_p>Gj71Yv#J&sd;CB|X_Wlp{tuLOOSgqw)LWr2`(nwa4BxpV;lU<TPq=o3EK}n zW%{vYY4U3JB)0tm@{I(s8f@eXqPTUpun$V+%D!`Ej!<7081818GU~x~$s+~69 zEJ~Y5@0<_p)z11!|3s0x@Q3>;Ghrj(cjNL&D-K zZTuS&Hlc}L9uEm{f04Sw55zT84PN|!vD)`dnAac~xm$wSzetK3#fbPg$gO^__ilJQ zWy@_=QL_Pq7w1SKz z(lFrpMn{x1{#e9^E@=Rh*12@)?tGl6Wb-(s;a^~5 z2w`L!0sJ&52iTDWlwqLW{FF{pGDxE9o~dmOleki=LfGuC2NAJEvelO^2IOgpr4GS7 zVkNYXinYBD(Mf6%Kf`IKz-`OPSwFxcGqmGkw_Yl# zVVy!fWyZQ9;jT%jUo*cJI;AI5z7V4d@uNM&gUC8n~Ei~C1H zT=n|i6sU#wCT1up-z_}~f7w|Gb{Xb`ye zSZe~(Is+k2z3YX;gDf@j3>QT|?)k3AQz^alPEf*0?Y+!HV!3P1i;74Y{BMOZ7RR@9 zB2@mdqa}3P>Z|TzA^Mi;|34Z2z0KeWp`kQ*y_!}Ld^cHcu#wJeHaJoRis2O=jeq8h zJs9_F7%t!*APA`GH7et#%hZR{IIZi2J0HfO+XxTc*UzNo;A=ee(i|YYgi=@-FhEu^ zUhGcC2cZxQF}3WI-yOD85q?<(wH`95v^duGNy`y(X4 z>(fZp{n8-B$3bXkiQ51?+g$-c5R`sN1FUo*=npUk8jk{?L);AC44Uy8bRH96tk7vt z*nJ1(T!9k716Mmi>Q7Y%80HXBWAdgwrn~hlcoL%M_^9{RJ_90t{8!ya5x@7WV z@kCPKg2+!TYP2u`?)gWZZ$*+Gshn7dni!b>qR!rQs}9#ZG|qrjvBckJpi z^=f{Eft$d3vDwvs8Q@S%fj3lZHBHBfOQ z^8?CX>=(;PDL^TzlA|IbWK09w;`-%92}!+3C!*VO*)hWGxTEdbdD{1#Y}wfGg^KWf z48(}%q`Ptu0$vYFxkrkIVCUz9F`muuQRXmAGB{w+3kk8jg%WpA-!FhJhVQa++Uyeo zmwo77E@z=1An(vX>3cOE7%fx4AE8sEWFGQ%lB({7Rq5|+m0U0af^#5~S+2DGt;kl6 zHBH)ke+hJ2we;8Ofh{izzwkd$=tsO>lUYwlOtFH&FIjs8l!3p^4}yUIuKz?4H-a7k zqE{49w5|`4(f4qQ7|FN{Aaq|iaawy$Gh`G@L=9L7Yi{MBvb_!t3`@e>G!EG%Aa&j2 z?x4Xm%lk5R^fSL)i`#)s(BRC)myc=kqK2m%gHBUfU`i0^#@X^mqm~t(LBU`4ff6l& zxDv3D7a`yu5GtE^G!VdlJ4e%0ZLLGAw#i0%mYXoKO_G*yPUQb>c^BqUqe44pK3yJ~ zgWR9?AUAD}btH$rq&e-uTE04iV#`fYQh-oG!eL6_`)V@tWU%V9c~Ar)rz#FSp4?=-MsNJmW6I_@lS$1ZcB9iZev;Ce7NArt8<}kVJvLTD1fZI2N zvVs|o8DS23)%EcVXoEfg>baKNJOqV>&kil~e~HUOTQmSv)%<#6C{`ndNqp{C&K6=A zBx7Wh6Y&L){Duxsluk_g2`Bto>?HSyO3v)wyO_S;J4v0AI`~_GyZ#v%Ir9<3_*Sqf zIV==8L1^3y;V>DB!gu7*0&QLaMZ*0jQiA?l>MW4YS7#raj?(%!(;+_08V?6_ym$L` zwm<0s@e=s2=b2yt4p}M|&|MbV)d&IAg|V#ufV}oV=%&_mc%w4_HE9_Lsmk!^J@{q) z?1scs{0A1xF>WHH-d&tzOE^KyK3N-lBH4Jz0`H_GiH>aBEtGO>3TTS%w?#8ZfWLyw zf*C3W;NAwayS)LYzk3*~ra6ooTOy{*f^02+C9G+a`RgShLSMoha%?U+9`7qTvrYVn zZE`X?p5E@y3}R^8?DBwAA5;q2i2Q8EQ@9!oxM`ft6BE9g5D zjkp@fJM{Rz9JobKP)`sLUj#5M2h|yM-tQ7ZkwbpcLLQF4zn)l6 zquCgs0MjVA9s2SR@D=4qm`H>lV(W1 z+Iy|y_8h9Vcgz-HmMVOqwj_e~YZQ>sW7pGVr#(>n{fBaaxlQDo;{dcqHSA_*sE3Y@ z3g)LddJR<_{BE2UOGpaWio#(j+`u-BD<+@X3+EUjTXcNlNzsh#d<~PhP7(<{! zF=}2{*YTDff*a`a7eTe13|;(<4qVVsJJ@_?HfwO1*_tX^U`mIn?Of&H2Y!gg8 zPYQ#treAe`i>psuWg|fS1P$F(f0^_K86e)xTXJmW%jpvhM|t;{gEa=k083B%3`Vu^ zZERUp`!6ot4nRA96B{7=l?(2$dC$(H@yOWetn*~TaLWw+>hqgwvK!%n7S_nFJw(=< z***AX<<;Aad@!0NDc9a0GNXP0%fmAx;83ctvyiVIS2Aee7@5T{qpWPd9tt=wRxJkw z#aX|9fe5IeXgBr|%o6z0eYaT#J{e?Jx(m9^y55N{M?UGXfALgW7=QB+$dk}E)mzh1 z1f!>Pdl>!r)JJ!CSKioIyz|`whD48` zg+)^U6TU7-^OJ<-1=?RIb3@oL`8m_^s3z%+z2t>(U7-(kTNSU~sI7a_`OcZle&i4F zM*vd(*pdJyg4g8~J+`W@0E?9f&tROmUtP$|CHM_65%gi>WImb*3VnD2pJ_kO&`??k z9Hk$33VX??m5`ig+7sh{{>NbF6-x3bf5b!HkHxewfi!OgZ?MdRzNM3S1NGU4946(p zQ|~K{xxzgF1TB4Ac-ziN>aeBfKKOtpK&v5qoV)KlR}(A!CJFM+oAx!>5{PwMM2;WX zAp!n8#q^{Xv|-4&2YhQYg@}E5wZsrIht4Bj1|8DQBtA3a9H8e8Kmh&k?e^TTiT!b+xtOB=3aVf{-`Q(OjYx5F7(@ zIg+zqKwI^WAI%@*p-0wJ@ppjNU;t$NL34TIn06Rk)cE0Z>&%fp-osnwox6dai5zhR zia}VQ&`6LHpbxwWKuj9xF5InQuIij;wHD_WRK*h8N?t=DeHUeg-<)538QY%s<(|xO zTcCnQk~~@A6&gKuS7{?D{SLnjA=6)n7tQflr{c>o)9;`v;XVD5^m!Ri!5{Ro=v9x! zG(EKQV%C5exD+wYeaXX|UC!{Ujr5bcz)_(s-Je1oGJFdsUpOpkTXyjjNPLG@)DSN= zEwVJkAU_;XPbjp$_p#xU=FVQ9s@j!i* z-g`D@J`xKtD}~%#+~G|eUCNn!k2Bcp%@K!cr=d9tP)zpMuMSBkiz~W@kB3V@pnc&Z zGP8%a=h{Zku=C08$37lJ9|8VA%qJct|14T12@=_J)4 zKR*3Xp4cVHHcIvXR59BX>*EjTLiKNI!J@Ol#&>cv!h-;$8D|^%3el>u360o>xO;eQ z8#{rY;S7kkktDPVM0|(#J0ftkNPd6!5e%)I^kTv1$kVD*1ZKYDir`0@<-mtQ;^hl$ zEcIh4TGE7nG!huqd|{0q7QgNE`>VmAWu}z|x0qdMH*FX1ZLFO!CqnPM)#UzT|Em>S z3B=Jyzi$WMTL1L9I~Xq2Oo|UgH2f-4#56mEWAGGG_weKibGNL7sG1K|f0kGM7mIn( z7rRr6WDqe7M?Q(S&WN`Nx!K~y;l-FTcC)|z3@B9hNJ2!c+=psh22Z&|8rj~f$r$7u zv(fo#1jma{d}f?OKnEGZun=mpmLuaLV3v~YkvP1+lbZt7zuy)veN&zo3!%l*!GUPC zu4v#v-heniohL3o*fH;%9g8n?V7$wO(&*)qr2Hu4z~P{8Wap3Rhvbd>Qt7>Y4<#?6 z!wf$^2HFEBAI)7j$k@b#BUPUGM9@>yV0?$vryeBtglr%=Y;Dy+9?A#SE74@Y4BsLM zlvC9LnV#Cvb^YBLpt9Is3vEmvd3fipsv+Kc!@0w6p^yz~+X@h%jg90n=4ixexMhk{xQ@&9&x7$=1nDucMTL-HN8Q>g1Rm^uMt*%NYb@yj5%UBulm>A(xc8i(l~ zfj@X)1-KeJs|0c@($~SIB;>hA#k+P)k-z6UI|RhNpk9hSTG|J!pl9fR=>7ScGyx(h z&;5yKR5&T5y6OGCtS5g}sZzynMc9e?$?x(B9brT(=eSoOjaLE5< zE1b!2N9b6-@6#C@XP|^NOKVu8(IW4}PFk5M;Pw@Nmh(8sVDk!EF`=EWil!zlg=>(Eov8dl=eTHU zhIMcXTgq}%OQ9(k$M?8f4bf0RX^c}qiF!<9% zDS>C)!im)YP?lZgvD|$TvF=NF^uiy7<-&G3W29R4$pTT*-uR41sgzY`P7N?mwMkpa zMy7ex>RqJOKA>`%+#R^Zl+N4Jyer$Ovf>;ZH%QDNdnLN z(bT2Dibwu?Ty6B|oN^;%c$u~*0#*j|`+E$}9~+{OE+LETXykk}Uw~K3QioZ#+sCpN zXH{f)I-kW(83Qqifzb7E5u>z6j$^mcs)yg)uGN1{-udNfUGU)mU#r0`vRtoDYNlMD z%31?V4HS!*Wp#%5hHD_dG@UNbIQJXbiD6D~NshjYf8fGK@u z`LzmQ@TeAfoNnn<4-^61(AZ46w60;h10f}_1?r-itUfg5G)QE>$coXA%88VP&3v~f zG^kg^^PO9KMQTNP-uQ(l1s(P>CKYUw!pEvpo?_AC(ISl#*1+Z4H=;v6=$Sk=1wU25 zcgK;irr#G1^8397J1hShH5ShXoIMlyvM+Zgixm4Ts#hvt{hFM&hLR0@UQc$UR`$h` zn2cudp6)Gpu-Svfv}#Ri0DqMlLbm$A8=?s9I@F-hBCQj;)1~5dwpqUXHE4c%2dcDJA)wh`f}&v?w2({HdvbWGN8l z&AlIaJdr1IW*7TERofxpJLH5>@=#wZie@m=Ww};S zAI5b}^3!{V;~p|X62oU(LVZq-h)A=8TQNd6Hg{AO9z0uf3<=+A<8}hib23C|ZumY0 zJBKI=1^<}?Hp0f8H(0?~A=(fQy%#zjrj;7!3*dso8GTo#UN7`?>ln$ygBl7K(fuSI zy%bqwY$CHe-^1q^vMC$!h|ymkziPe<`X}(u0?*9v< zW*iNYK(3`zg}j^hoA)a5a=YI@&WS-E2U@%zUyek!0LFk&fD91;&aco10hfnSRyFMC zwbRfjV$TnT!tAoTR5G6VM56bDAJK(Is;g2*OmaOK`3v*jYn2<~p9EojH}1oje**i= z!i+u!uq}0d`eVYQ0v3h&I6ep&O(a*?A!LOs5WqB?vYG8$ukd(3stx=RL`m}yp@g32 z`8JvD{MYZaMA!;{J=fnKax*{#aHGS^=%!w(NRjPeUFExzBYX5Ex$@j^iFIH_*P*w4*BTm2<5 zJE!`gBq#BnMkwpW{nTuv%JH(fR~xx7*gWU_@B*C;sto}1<542Bm3o^46wwLub}fwS zUu6FNZ2x+jY|$b!coJbQK?BFo!SWoY)Pn2AHpGCz?x*m1h-MMnJxyJfM82P5za=$J zP!4RtuX21gcrlFp+9xR#<+l}iLR#L**HtpJaEp|5+_mA^4-*IpB11~EbypYwDuqf+ zn@F04)qtb%;d5q85NG;eyEuJ!j#D02LqJa zyXM~8Ege(^Xoc4q1=;fz1%Kh$hih8#N6KAD{v)mRp??6Jotwgto9GB)K|W&+O%^Ch z{;wC7Kmwx$kR5pBOQbNW&unl{ddts;U|hLEqSSUabyw?$(0J>JAd_N(%CO2oIkwr1Ib;Auh`b>z+qWUV)d3O&t2b?D$Q1y0IIGwH z@m-tX`(aQ>4bWG$lE+ia4{^Ls0;c$&oZNpr)21(U@fMH)f}+f=9kpXmjKQqJLB

A@^C1gNb-x*88KAm)g9Bfe(XCA^d+ltpw>ARQSl3Nghe#Q?h!sk+w)4;nVD>$gN6- z%XB}3ZP$r~W47nu6`EpD_3DW@-n>Ya2FPZU9z}BDmQHyX5$0kmjNDrUL12m(YnT|K zdf6zE#H_U%A`!)`utSgHR+;R0`w>zO$#K)UFuto&0=v(EL5cc;Uc+R;gqW-Fe_9c? z;LeS~_{}~7`-ChInAcfClkLCXEAWyN2%yAP7vQ*u-!0s2dcK|@G7|kR=?PlEg`FnO z>jgqgIRf}|qi2pS1^uT%v~cAb#&2H98~4SrJo_Ua6Ytmyfr}1(9Nz>CC~=1D&uOBN zV@Ti2aXg<9mh?TaKbt3^Q!-rxRk*iEzzc9HUC!GJm(Bli1h7SR0&FlDP&Atuw7<|! z=4%T5uSbM^B=_00A3@h2J(!pXMuT4R)gl9t^UD>MVR2ZNWmqI_4~o#NrWL0~m$U`5 z7*a*b1(Z^0afT+vjrT7nVtk}d6z$P>^5A2kFAs!Q0cyXfdmxZ2{{VIuTkjZ%owwhB@5eko#lPH z@KofbH5jh6Iaicr7dHtRMYk+wmq1>uxV}+F`nq{FwzYK^Jp)na&!otcii#_gPxn+9 zfX=Q+kk%oNq;bkA@E>q!HH;32rc(7!{Es7vv)-FeFfem`hrVZ3iR2AFVSVS*^n%Ff4qT!>F>_$_l~m0j^=aU?bud5&~!ksFUFD@ zmOSO$ep%AYF8ozBas7VQnGZjw$yhNXHBZ3~f7E_}r4sq~SLioF0X5srt9kn}r6yL@ zQL|`ui4rxlV4;5nl1W8l&4R|!g2u{u0(UO=tfA&N#A09 z3!uOVV(o3W9h$s1{**GUj6Fs|8v6OW2n?M_{n}2E4x9DBhID~Ze6wf3p#t~SLw_d! z2fCkU>tFx<+LJCulWL6nhobloakUx#cqSsqS#E|j#LjQOMEBX9Q zGIc4*pQ@v@L6lcMXMZ+j9&lGp^$nToiANj0#8L$oeC+%KGDbQ0(_Qxq!^@AJLsg|o7bgtZ|$Q^L|vlU0dE zRi<&^pNU_#h|Yy3=K{ca@e2n5kD&)nOW=QejwV=s&Cv{3)?{^kzJb4vp|3|?<3f@<d#*yPXH@ zD6CEfU`YJGe!kM^W6uh8@AM~);FI3epV^S|OC!|u*x?uvhp>}2);NXN`PtcjVp)e# z3PPBaaS;(~Y6)J-d;1!x0u_Gy~^Uo}A9S0L@nDs59*$&6rM)4B}u5Ck>fO5e^)J#5`6ZgixOi3K29J%eg= zn}JVI$TtHycK-9>9=8_4AJGg$kKboQjUhGB8nLT!^|xJTwGj^M#t61Q%R_GjMaTt# zG+-jYo)b&qj`%ynb_a29w~ltp%0Jc_QIEPMNbf#+)AP&0GPK>vJ_ zIdxjY&8FNdgcknQ33zz#=76fG7?gh$vwFAubypafoW$okXR1G=+Yc6Ub|9RTYil*)OQ2JBh}QNxq9-qU_z@c8W-LzC$e zHSqBNz4$gy{w+$Six?tS2gcjD1qYSB>Wm;rf`LcV(?CFA>HCWV>B_ujagr^PM! z6K%ruL3ifqYRYRe^L?B2(koXW-yz#IXxn%UnH}aEt+c@AG4PoQFUDz!7oNukMea!x z^Q|q0WiI0hEwBn;?;3clD&C5uU&96tDM4y22g~2$OPGyjyaw>92EX^KrsHm;iYPnz zU}y=Qt1&j6E`8_p(=^#uKDy6*Pz=rjeXu*ZPv@T)2UrNGI!?637wBrzXj@t-XI3H# z3oABQ8HO(~v&Zp#WOP8eP83MR!Z9PLXi=o`}4n-ee(C41dtDQlwapY31M78ScREYs8 zfOqNdNWlCh%|wS>>|-4t##L^NZWT}08-$0imhTyme6(me&#Yxcl^I``YsjVTx5w|h zU+b$>SWPo_LlPEM#?bNDB$G!bwiXhuI`rvb;OA7@AauOEwSWnxx3s%&spDPhi*itY z^rf~Va9U{~gBka-;yJxSOeRQAyOEd&pi*qd-Qv;4X0kz)o2X78=vxSe)1*B2;3)qQ zgMItWZ}pS6A4B#ffnBlh?cVDAk6j@`_uq09QC%OVChl>+&2v{UyV)GedpAsW3(b(A zBmw^T&>)JT3Y*QLlU}0^3{F2yjyO2?vQhNZ6tf^1?nSk>@*Tz0ILa$=L)`?iQJ1-r~Opv!6V2uzx?kmSw8#SGZ6R$vhh>DYG~smtB8UKsp?${jWjU? zfp>L3Z*#>M^~bS%KP2G`K*7WkrB5+m+y%34RMHbCxf?)e-9XA?tNp~ZfCAd(S3r87XI6_RY4U+yz6VTfK<#z(Y z_-uX$M_%+;_(|q>XM0)E+RlV%)3b=!)rlLDB7TJ##ynj|4*qa~BRx^Fx{-u*s1YDj zibpOz{5{&uMN5^CI}q&oIg{;o@AK1Id@-|+7v5nLZ*Zn|BZP+{(F|VmPtA#rE595t z6M%q(uCtI^Az7zcXdQG!yEU!1)RJkP4rU}*h~fjd;!UHb0f_7Eva;AtdN9c8f#MZjsO9Vp?N!(S14b=4@*im`0cX{QwP-F{u#n@IlAT zOG??#a#7D=DyllbP|F_~5Ym#Q=e`J7Zmx8J?^!Q*@a269MyRv{-?$%<{RlMCyN#&)`u$ zJw1((l8M0co7HuWRIAb#gaeWn*rI@Xh!>plZ_xdJMSJtn8oB@su0Eqnk!ZybN01vK z%}v&vfQNsEoJ~5ikN0&nQh(=3w71@Y^hN2$w&AX)j`%QqL(M``;!BTLnf^gM2EXK| z$7ZPNfLTw*f{xpUM5n7n?~MR%p~!{a-(K#^_Bc&fsg=zc>JRch;N&bq#%MQCp&Gj? zmrF2kRM1=rg5vjUuSN^{AX8u1C4k))8&@~(;I426hB5#L>@L%dMXBv zHSXb_s$o}0+v>W&zHW72Gf2D5dj4lY@g?k9Po(g8olKsCq4qg|#+4nCpT4`;qd^nC zcL)_g^i|H&sM3ed)KYy@%oNaKYTnNMRK0(0zSJNiFZRwHoD^_j&dxRtYhPn$c5yOxUIq*Q0TM($pDNd#KWj zJIUwhzUN?eRc;ae!37>;pKL2M6{rmdsqYibWb^*3~RLkbs(V$=| zy3MD`_ zht9%PG{N(j-UDE=ol6H6J{lO`eu4ZvrOi(Zj~_@X7Gz~`_a z74$q0DJW6S3)*j3_Nm!h>Doui@5!Xd4*p4-eYFi#M4d1K*58m{cjH`vRN&)YV9hWbrwtUW_SJV-?XWIVEg!u?Mq zEdGCDWhKG4z5$jDeEM@E8{dUzjV$$XAEHUFXXOY)W|6DyKiQ8kM#Il-V5A^U(Y*&c z$BFln7PwrrXmxTE1QJo>RCSfLYZ0AhKaSjI&)D*h95kfTjJ+3?J{IZW%LIJh-Tl4u zZgZAoYY>>hP2_DJziqDHo7li?x=(W6Tu`vx-{HQzXK%k1trTcaFV||LHud0r=WNp8 zohFl4h>|bD?P5@CSYmrBbZ~Yw@mo|B)P9Q9v|AbeCJ^uzF)?Rga&3WVc$$x;D8H*RbC(#Aro*h6o^vh%L+vk3;sfWh8V1Dy6k-=uke1> zw7!H7nf^1MYb~3RA@6o}0OmHpLdplBkubF!Hd_yAu>k8B2lOVN1R8x$Hi5*Wnu7Xj z5*Zh2L@qx-0-R?giC|IAQ=|>?3W1~iP>GEAgH#1?3Fhc=#!ck!N1>A2W5?Cy|4c=O5s07H=XtOWapb8or{zz(f3X0;9h&2KcaP;GrUXd!}Dydba@$2d$N~_Bm5>q;8 zq|qG2X@Q~uz)h6u|5LJ!E8Z0L++5*K_7{owXZBW2@VB0)(qILq?Fn^MYY&m3Ey&5d z!3M_XaaPL(^H98pcRr$($TwfAHjGp~8&L!xpOb|XoL-*N@xI&0cl*BCYCVd+-{D7| zd(C|B8hhx)rmcU-t*u@~XZf~^%?x&F@Adbolv=V|-`9)C7yzl2yz=G>4B<9ycCqwo zD8IK&6a+GCY+J0HmXWxJ$QWJnu22LzfcojeJ!{fB#VSmR*A7E(dtc zeFTQronTz$2Hy12D*d)!mvf!SVba7N5>bMea~83zAT2zJT&d5{F5z3PR?354Dt=0; zBQS9H^UNP)KVsxI00eR|H7q!VAB1AcceovB^WGo#m~IXwo{oXQlV;@->#Ieglz15W z8k7M3P@}xfm;d{UJT4Ed7Zif|S1H#_^~-!Fiz|b@W$~Uf6maM=tk+KGeuO zo@C#>Ipl)lX8tqITs`|@J_wvlO!7K<5p2U?Kjqw2Z@U?n{>%3G24~EG4&vUKSnGE2 z%R>Fkz~jy!fAL)fibfAo4n~Mr&~pmm*m>rI&l8Q4qUNQ#+3>GMT;|ZhMF(zWC~=VV6rhA<0LjD_Eow~Y?yZwkb%@r2?Fi7Ub?Vr+nGQ=B z!MdH>rvRDR(j;1BrncyQC$Q{dhQ*zj+JomRcoK0}PrGV0=H@rPy;MvHKoe^G#=hPs zy7faGt=_bTMGjuhg{lQ`Exr(P*j{XD$Z+U>J~+-1k|6Tw>8nrxIlKxLviu(;4e=vM zquZ#oi!~jKp^EUP!e4-hok@NDSyIg@#Xu0+z0s~rElE#E^LgTC*N-}#KoW{mJKyoA zgp4Q@lXf)>9gRuMr%B<5$)U#7Ey%HX>>>Bx(Q@taA6%_x1}(jG8R#_i$^!viz>!2^rD%v_pB!a!_}4CFqZ=3b+ce=if$15Nl+ zq%GPG^$O*OyDE1nVBE}#$GJ_=1@Cp{UB(6nE#_>dKkrn6*LG08pBzK?P{ZFY+#Ngu zftkZMN>%OZ3wm@dL;INonG@NWgt`bUfw<_yp{A*$k z!rTb>RwI?`9q4>qdb6gVIKJ6+Z|M4D9kwUrGq(wPmpoh%E2ghi8zc^S4nc94oT)J{`zMfs+EQ^GJp}*MfKw2qS_)# z0fLfCwW9PyS@~7Kf)HjAA+k7yiaA30)q22*pGww`3e(M^XBN~geLp_tB5`m7vsY$6 zAiq1<3&hLPF`wU-(JbG#?7ObmJs%@H?4^2rbAQyqDr;xGTqfZ$cV~PaQB>e*=jy#Q zH72}!Q{x0m_Mtk}_qO-C=@?e{dK@_(Y`Si~IJwa(UU>;0^1)H_tJMZKY=LLw0p}0> zjJn7pE`AA0#BlP=Qay+Z=TBQTuA89`PAQ= zuNuQM#mIj%KZ#LC-3%Usp~Dr>+K?j1=i=egI|J;p-A7pa2kvjqCh;Y@#Q`iSA#}Y0 zn?B8E)D4&Z)St&D&SyfFCeXyp#d5kdQP23YA1G~v;c}OxK7jE<5%o86EM^ExHaSMN zXg>KkjqN|1%%2qhF+cNzN@3c9fmRANonu^0OjzhTfW>a6ZQEN&UBDausWn zny6uGOWa~u4U@u9z?d0Ha>Giehc^S!DT6573x=XV z^HMTZzH?=I8VX$``#vJ~1)~xu)7{0UrmQgo9}57o#R$@DKdNRxTPULCeO)0+nFiRN zeB#2H+ckaePo#B-tpQU~jm3yvmy>oU%VL~n%{NPd46-_ePoVb5vPKCTvd!R=P80hqa-5jLJ`m$M8J=+U} zX>RdKuI&J?Gsf-UmzLpJrr_q3s-nEi4xYyTEGSp=TkC9RBjc&|qzyjqCJ4 za7jT)Qn(_>u?r;<*84)5*%7cc@XCb^ADoinoi2V{g#~g#fgUC*$qpbcxKfZO`XA1&~)_R{(;XvT|%eK+Y3**hZ58L@utQsgn3!K>&GumE~np-(zUlt87sK+9Pwv zoy%E~4ItUrCJ6qXW$AuL(Qbw~=~p+qgt?!gM_C8>qDqENxd-JEw&|pgz;pz+pPzMp zGdG;6)@Aiao0ogn$Xi^RG!9Elf7_-iIp#LC-y^BU@PrH`*sdR>ll9zEpSd0`XvJ_b zj9)xlEzf6C;^;c~?Kf>@j@z&bUur_zfPFrzV!s?jIdeM;yh=^7b96s8F`zMKgNf_? zCu>jl9O`d)J%LBwQv~R>fk1kmKVnN@%?eL2>pjqb?NU1{3c&T{ z$y&hr;MxOa!?e8_6r|_&vjFY@${)3p0{8#bH3G>= zvcTllCXj{tm8!8iLxW|lvk?|pSq4AlHDKKNoa?W(*>31CVJT=VwKv(Nw*)5J_acx4ctUS``f0i7i;eG4JC)lPDL9trv`4xCwI(@;7amsl-PEz>{@FOU|Z2zj?c-w?{%9RJ}-Rx=&CG%{}Bu8p6HVVp08l z(od4?arV16ZsYgwH|boq49vG@E~tHEBel=j&BU6;(FCWGkithbpF0Y?^m_}XJE)aR zQ~wDw!^BDVkjc5kzY$1uJGAJ$?KSv02ofZAm}Vh3zv@G#bQ~p+a=>8qnJXav7 zj2$UUg*zi}jn1osLhjAqcbdyk*JjbqLT2i`W9;BOGb=oF#qN_N>5Ma+dn0GEDsVql z)dA#GYy*!Tnu^|(qW7WrL}DGsj?m*ykP9j`q@tp+!} zZts=caVC_F8fAAm(Yb1Gr0sd4pnFf$BNF#Wk~R8VFfgsRjJ;Rk`aYL;!@7;u1E16whA-6F&w2-IYCB>iOV1$wOdL5Wt{XYb z1(n^f8iUWwQz8rHoP`3eFWm+8Dy$XopJR?(`x@naL1ouBba*PifpW*2NG%MXck=NA zNhATc0>4fXhbb!x^xhr|^q+)NDUbG-|Kv10Fs?yQo;-m-l62g5|8=w9z8-?%JINU# z2@yXy8JLh_LwAh_#_%#)^0^=HYN{)X!sIi(`bA}% zFA7$P3M`D+Y1Yxmi1$@n>nD*@QnYrz1%CJ4mgUd9rlSe?RJ)XN3(!p0>}-)BMIuTM zl+%2PumYfS$t8B)okYi2_}=w$i3!LmiOawod_6e6t!Z{<1tSvoa=0v7U7l9_%td%z z-ve?XTbVgm|FDdjv`5lB7bvlF!FugDOQJP?t5Km#Wi0JOY8<4P5Zw=Y9sZj+bo%Sz z60vwyTmYmNg1-#Oq`w^=pjp-{3d6^qNfx}Ef2U2{8ccZ7z!j&{k=bL!xCBZ0Jep7t zp8T#6+|78l*%jL`sQx{jl12K^_1iL?!r8;P2$#v7@xwQestxD>-#rfabmRiyQRv>j zOo@R)4kbJ|Y({NLAZs!^EQp#?y^!`dkW29o2GGh^BodlBR0{NYT`C<$f^|A@d%>!z za_2Qc3D;dm_=pkk-K?c3I>CjQRv*t()&DX_8MF^{o!PwLUF+P z!JuNwuoK&rJDGx?h~YESF6s9)`<-^lXUD>xTXi!-q!WjY>P~0b^mLKJF-%8ydy321 z2hkTVK@@rxh!)}C=JxI7AS=*8noHc(GROd*%f`Zf1PQAAl1?Hr0FA$PgPq=MJle^Q z*v)_P13*F;)Ch~H9`Y8C!Q-a+S7#)Ql)ZI;%eZQHfXQgN9~NJ?y;=O6FI_ol^Lt8t z;!{#i-B4{VD+_$~q{jVbv3#2VnJ;4BOdHiYc8263Vw;9a+_YXg| z-ObkSq=UW|W22ew^M23*O2{ zq)`NEhVD+066ugskQ|imQaYrhYiO8w&hWi||0h1rOI{6UpR@N~>snX*o8~|)XkCH6 z@F>`mMhAHK0swvgQ}lR@-rC^*YjfTGVN`=g4YAc60s}kZK1v$7)BV=}?186+JLUA7 zbGlCUO-C&skq3kaT95#0I^|I|CB~YKqYIQFYqEsD4UIV=RX@Z5)wyNV?A(YPPE)a@ z2H31kn>R(Y92QjXnWDnPfp9YZOEsYkyQ-g7t%85Gz+g?3`vD1$(D4(;jyj8&uZ@$< zjE2*g(jjwdSJ<$Tb{fOBGs7T@@+KA&H@@+1B^dx6U z7_79n_<3QJhgJEX+Q7BM4=JPRGl4qmI{Dh>Ew#UWXOXkWagE7l*u18r>8{A2`lF0j zBnvCO3rKhO#Zh5<+*J~*s|DwVks_ds`KReZ;SUVEKi@E;o91nRTMA%;CU(s~{rb|1 zx&@Ywsb6<$45XO9UY+g2&metoMQyI@_%UH3+v+oR*vQ3~33om-BEEarX6s_={Q>RO zq%;T`QR;6c0ZPekNQr*AEMe~rKf zu4>e%yjt^^-N!91rj9-RAfLsZZs@Pqy0~DQ8*ZDGi6w)-#{!e|p*8&ahJc}zkLSh? z$*o#w$fElRxFt?RAukX9InLLiO?rO4+@ptJ5Mr-DGKWlDux~>->R%x!zP{=D2A)3Z zs93ek6`3A8kF8Nh$s1E0pt>Ple<4w>S?54!oMk2L()Wf5y%RsjkbMM0Isj^`U3}=6 zzA0bqkTCYYVePD|Sb+^-RFw|M9fpRpL~KXfENqu+plTB-Y`{z`nb!iD&5Jq^H?Fe| zhj+M#SH1eqJ!#9LD8C}L4N4Nz;|kW&jgTLlOtyNZm&O$!9GmbZYjk;b(f z{E`Aoc=yZnIwK<;_;5oYubM!s{A8uA@ptw2 zaA)nNgx_;t5`GVjOf?9e7Rhk_3R_438ym5P|cAD-FSSkW6WgrXI1%T+rL!xZz|2c2Bw)MUqbhfn|HaRl4|x8@N92Jo`rap`3>`{0n8Fk1gb)S0V?gNG zkPDr)+6MV*(tMub0&EgC4Co?ySaWcig{Dbs z#%{(OSfbvqe(Z^(-fCN649N0*?+NVTv5P+B8|zvPdC30P-$hzA2q{r`^P6h+KsG3{t6%JKDWx<9CT+D=({ zjv;gfLF}~1ZN#ewW`?7 zgZ5@RjDj!n%aNLJaYAII=yPMGL`FVzt#|h3Khcc>+H!8P+DwX8K{)E&6DkByxes9h zT5P~z%-BI|R_{W3n{&@sul#<8mJ1uNmlvN%ixwzbPf%7!L|cOmDNHlIAM z?yy0@y;m^|@2F?MIq=}$)o^-ex||}^fA3fMM*c<}nd$=f8u(?g**eBPmeoBcu6_#jb9X_q%`>#gk)k-(qU9As#MMJ&EG4h^~xLH zOO*?cAJ6x;cXBoMZNM$b#gzO9O1M3cAnNaHH1n@8$jiCEqLVN1*qGFS3PMT^PO+ zuP7htg{B-_empZ?6jv^J(GIbfm<}AgMpmr^#Kv4eV;aG0N1QXglgN273B_ffEe1e* zno|1IKj5}Br6yks$!+!k?v zVj>{f1~;)OJZ&0y^P9GE`4bKgt6dar%S~-y92?441rQlaLLXl9q3)%noXGRc0tguj z8`Bw5k{aHz;VJr;;+zfr!U*G1ktrxW3Z%(5`|7yBz@)b8Gb~u7*@u+p?S8722IUmO zrXRT!qKlyU1n$)3?N1;Ws0Dn;Z{vK)}@6lje)@fIz!sV?+ zi}8hHdp44iql697DAop<_`)Kwnz%Kvt6xka>BXafbUs(;xsrTNd)sX4qm#HTjy;x7 zGN*nRP}15#7Z{6Fq%p&XH`X6R3)BT@#Ou$vle0nXFXnT-KmSumE?zkSI;s?)qiV*t zp0P^3Rhx1-8!w-|M5&Q@LHENt?!hM-TD^@lvmRj)NyilO9{QSO~-A=Ya@Op|b{JXm| zW(JL<7L%HB19DYl{$*-I;F{Y($;SiDca|tt#65leFEYT%*FXkaSvlKJUJX6e=~A)S zDKc?*HhXz*U})$)45J3<^QtJ&`WE9_%V5gc4CBzpHL}(5eOudqpBRC1bXk9cxbVg) z&VJ&|GYfI7nX>S%kK4D;!8~|+9y{@jaLX*y_Zgi#5=Uelmadpw6zm!?uf>+zBl=LQ zrp9fS(KXEP{N8rE)b}Aua{5veJuy7bF3{-@x<4e%DNpAulb!M&|TqO2D1!w~z1e{tOA|dWJR3 z1_h(G3wB%N9-kvt0(`zKepp7_40O$?=|!jWi<7jO&U*$aGC*CIps4u{s1xa{Qmk;` zNNR+7ZbOt^7mV`){(IE)-3m$pz=E^r1$VTraw(*}GQ2(lp&LgX2 z4HW0}CreS;=4#978Y5EFXH-3+4wZ~dLWW2H^3kv@=D!ci*DW@{#$yl&VpEyMduUEa z)Gb0hhT_axt5~ z#~w9VOuaek52m953!Yw5nrEVvV{fAjws4TS&G*NS%^*q=ILWxJEHkhEKn_=zY%p(aG zgn4%Qw&hn=n+Ef4k1E^@0}_hvAG5j_A5H}v~>vHCppXhwFx}_o>J4%~!G6Ch_opEfz#h;OhyTVmN*rt29 zH)X=#b}V;W-$n|AWZm&^x(%Wz9}Y&j)!G_7^V22u`iM_&EQ~r>at1A2!bIA+*vE% z<6eG6&8rrf;=Td4?Q9ft<9&IxlOVpA>Ra2=^|Dj8%2r1Vd*O{k*+b9(M0Q+3_;h{r z28&k~h1vL)dWZ!n!#bfvNNu?;H_`=E^U60^as+P_LeDbcZwl9-76DI{7=PTkpsb)$VgjG{h zv-w6BI`d?&yP8jlj9SDl?rdiYC>OI*CVY*qA{75Fo{nZ_MkJ>3+$JQscund@ng5H3 zu>8V~{auvU|6%AU`*CC|gjW|O!pk?bu!fn59Av!l(osX`UI0qnG2VkVy@ayNLplbs6n2dMbUGCBGoH;HLn`TqId zSs#ynnMkUa#!5bAd>EV?hpLmxHSPuS>}>&I%kJVO3}goSbt&-bn;yU94Nlhc^Gu_! z4~`gb54hX&q1sPo{_wNEsOq`Y>;a!6W-e|U0ulV(E3^$xW7TGvijXINF1#kR#Rq9} zN9(Z?D4u1ZtEwoOf27pU{eMUUa~IfZB*?CsJflsP?8j;ZT`{V&ujuym>*5K6k(bq` ztSBN_Nvie1CGIr^NwRYVE>*@xSz1)kU+Kcc4YyP8YyByvPv<6xI(lc`URY`FnOWPjC4JE!f5NKD$&%L} z_Ev#OkQ@qlJHJ`vTerw}9y?LLg);EYfVUfm>rI`sA(xOv)+6W>E_2fq==JD`%}o1; zG`j_{jnf%W9 z`bxA8igW+m)1-p^$IF;r(xqup=(k&lwU(*J%64760`{4wsoAE7@YcTF@4#W#YP9*;el&Vu0|*% zZ(esE(cGRBc(dbhD}r^$gVvAlq|lUR&i98!x83Yk>YIrURpJ1bQ$OqELH>;DWpJTQ zb}nkn1B>IMd>C}H)-%1gH<=NINWUXF0u-}|j2fVEocN5a9OIvVwv z$`fn&UUB>*!6*;N)fdkS(3=mYldE>d24~r~&(hl-J{b}B|2L9jQBkxIY0iqfL&MT5 z_)ERC4N6&)Atex?u*XNSgj9(IGwjS}N!Te{ ziRuQKA8-|KI#(A-lRSwy)NN(t*jhQe{C~*wk@DCt@RJJ)k{9#4*@#(uS-=sE3n_G@l<~&zI5s6$D zNH4f%@o$|%9Xx`YNR!oM3FOdCG-Rg(eQ|8{f>j`b!}2$=6O=zH=uj*u-KU;y6_Tg< zHSCFh4jK}YwF{MjUgvAo{bM;`N6w+~@@``NJ|L#`j4(OEGoBWZ-q{W9sfX;o)?!%x zT=w^8cu_l`rWs2ce)jcgj`$aLQl6hNa_@Fs$EjG3GVA0vB%jIKnC+pQf6chZ=LH0Z zPVlVbHw|Yl3oMpxMH8lJ_GUhZcDZB~^C%44=v~~_OqVIUA({yNyuED$HPRmOsJZS! z%D*9p^2K;RRt;98wuPI9cX?iVE>y<^qP#3}n4a-_cg7h{Bu4vGw{4k9R1dAb+bLf> zBALJhHGer*ju#Qtfml!=ZNG!30jQ?u;?lcj%eI$;ph&ak*1A3OZk`RCJwJeNcryr# zIEl`!P3V1k?Z$b63_#A(~<;kyrAyUbK7XepajklY3cc_Jp+ zB^gYU)|lw$hdR$rV?ypE~eCR35?BiA$4JN<$*4VlC6PTJx(HOp48zCzYxpWtE#RyvkJ& z7B4;|e@7;#MBQS0f- zk`1csV)9p&4C>P%LKg}ZdyqfLW&s7pR2;qhQIMW_Fs+B;wx*{m>b0^zQ{vJ|Tb#G; zWAjMBE(|aPc!!+;MtbkeHI$+el6B9zkn_k+a7zwXDim&BeQ=qvd!Kt2Hyzs%}ds#KISUDt>M9V&_2nEa|r<&fVL*ED$+r7*+YoFWj%Z+Abi*&Ahq~(!L zMZJ+s`;F&jC#%VzP$>h;?YHG;6{yD3!=|p0*Qz56@=e2zSfTx|+Q1hien;mdn_z^} zU}kEm!S?ZmshBBZ=kwBL)t=PA2PwBg&l(0K$AM39o@zbQ&elwYnYuZAO?$vat8b8C zrHEw(RjtmLF_TY|s%P$PUlsA`4LDE*R5wA}@%~qFAf&A?0yviPuiRn|jxrq@ir-fb z+5WnEkdjTn_`fvnetJyxO#bhGn9)|F3?F8h-&56XoX2$D<31|!kZ5329!VE((zBW91BG*$xhp(tc<&@mu_lflQFb07siMrwO6c0zqY z!PiP`?3}_MqFR>IpRnhIzeZwX^KiV9fU2iFcHY5)MR=%?6~{|!$e381KjJgx25DaD z*de41A0k>5fQ);#$}^1fkY1w4P}L66xfYoV;4G;({`zX~01>O7oP9Y|0~`7#0UOeM zZ4B`8r>=``CQUt93le^bUVdxZ0l=Hh)xm%oi>c2E8vPJu-d@0 zx${3LmRVMkla-_^uldr-#pAK;4S?3|iJfD3R|Q3oQWwWOHi!fazn{sA)y@(HcLv^q zm`n~P3^ZCcKBS(!B>e-@X@u8cVzBF$ck$sSQirHaf<@&}Qiv`BLRpjyWytSq>Qg7d zJzR1OIffHlO6^LOZ&X;l!QFSgLujBJu%r|qlhu*tp2N$1{a+Xxd917)Y6NA6h>zB(W^K%KZ4+(nk~{q@>=c zu*?c#5f)rK+j37GgMDB9`_uSdkv$INbVZd|GmdpUF5iezs>ov2yQl+_vM@1ooFp8h zOKa3rW1*;0bet5^)OW^7kfja_i78mYV6>d;+0m}UNW`PNB`G2O0XWwd0i2h~CbyLr z`aj)gL@))~jze>*hs(asMoJ|$! zC2W4iP5O3n5TYR}daZog`jf75_$qq`1bpBP|5b!3C{DJ=sF&L)CjkhrMWaHs1d>F! z48^*5p>IOLK&BFjt~by=2*3d7Lz!=YSk!j4PRWapYay|G&%CfkqBG-(S}P1U3B>J5 zNOGV*2^6t+lO)2j#bL7z&vx#s5$<_C#eNY5)y)eaa6jm$O$J@e;Gx0dLZKq?14+8` zw3vw%F4V0CzZBAd=KRo}k#ehUJsv&5RYq&M8_8W3f4rLnEUPPOg4kS1v5B9S#fz?7 z3QQIZeb+{84x-pM*mY!jrxYj6QChiJ5hP~yyX+qJk0-mbzOyPKyD%P&J_{n)`JK(V z6q(HwjlElE!g3#YMb40Y2#{vpo_ZDubd*5A)n=HAv(^AJKP1k3-!<#`wHS~%@U(jz zfcKI@{)0X_udlC#aF*f`iDz%jc8=~Vdt?$HXr}R- zwS*<~N&#)G!e=!yYGIqGHXt=X*kQ7Wn=dom1ahUCuK*xy$w+RbK8C}GuxgQ&rvi~?7M=l``4rclBhBO zP`pjKd1>(m_5K^@ggw21FB>8l2p$1IMTgTn+rpv5c@6d#)B=E~xTXSV8}PpEyqjm= zCwQnLYZXk0va9{=_OX(;+9;fvlg&J&(JKVZ%bAadQ(;=KzJl98ealQL?80>vka9(bS(U$vm_|_KFyN}`9&Pj|Cme2ME{FB^7E9j1k}Ctn;9kp`Xupb zxfjmGU@qgBOEqlD2l21euPq{7PTb;QBtf;sTgSD5;)4Xdza`ib{@dbDKasZfmfJ3p zNbdear6OrHPyYTjP($J50si{>Y)xO{b5So4-XUj)zH!5O&vbXEzwQbB1@_;)yZ)~m zKUp@Vlt+PFxbSsI9+gs+kpEgIrN^_#c?J6QuwZN_X>$H7!4^Hi>TPRvvA!9>RfVZA zHPMK3+^qB~dAtWNS)=R}iTbb~L0@(r(+{Hk_EB@m)ac&)k6F6vdXx-Me0g zDK}A|INJ|frB$DA|GAm%~B5&nlVb=kj` z{l~FNrZT))K)4PDE}O!X$pi76RPgWkzhgC>`(rm^22Tyt#cJ zz3^M=1^=51g2bzAt=kzNFrhSvV#$^q5H7OtSFE@V5a!ubpTmETf%PY9=;pzH_nJh($1~hrS+-sa z6$q6K3^XM^0NG5vR2r=H<4?~o(d1FbuzE^1`SG(@3OG8M*zIt1{m9C$ zzn`&_bFG&(k@-G!N)p52eT`@^1%Hypdu1nAA?Z>FcpCj{2HN;<5|VzE-5_b}*k@IQ z?VtVQ-ynh}3u1WaPc9Ea4-yfYeprpsEQov0MhOuNsDF=}587}yCXiS~09zsf?|)PZ zBXw+G`pOH45IZcL*yzhNnogJm0Rl7HI`fR~newg}rwZ#`1Yb5EG6-ISj^LAye6zVp zYw-n=46j+9gw=End*B~K0z}0Wk1`g&eDBn`!Ts-gOSnh^W)UeUOL#oIiF-4#IQHLA z_`#>aDr1pz>F<2GwZG7aDR&Cp_W|O^`-Ei9S(_yKnm2!v7|$R@z90VuV|c%n_m6oM z!dV*nqbj3Mr#05l|Cm&cWCE;@%8`Z0Z=Jf4yJKU7OOwnnJA1jbsu1(8tn_43+`+^>fkAbBRqBx z^NBa1y4NIm>CC~;?l)<`pbwB8yp#so*J#2YP&hzk{ADD^8R=zVdOiKB*XtzDZPOl& zajFk-8KRgF%m`V(_ai;uX`~wv#+MVZ%mqo#govXUR+IjQuDmpPVu{BWKX=$Lv!C1< z5mKRz2r=(}$Txbi^Pokiiu6ssA?U?(PQ~FQI<7y8m|>9LCY)H(%nGk{3(4LUEbiLBG2RlQ!eqp1*8{e1~8Ox>AiSzMDZZ!y~^%e|U3liV>*+KHIII1sM z91Y#XVHiYo;<;aMW)AT+E!O`de39&lM@TIwb3+VoGF?D1pca@RmIxed5rl|RVPewE zSqK#)d%<7h@IZx7j^@7Nyy5HpRiPFqpZ}+b?M{1h8j@Gh*J~kt?@ymSJp62iINsPMsUO_lUn}^Y zFWNZ;U%wKj8vPB*{~0c#qV~&5FJq8g?($vFjXUY4niaw>n;lslA9i8o)n0p7Put`m zJ5c{@nr~AA-DN7VHveV2X55_b;qH#y6?L7YMs418E{S6A))Z)jZSckj=lU0VB)0Nc z8w@swxH6ZE>py0m=l-q!lC@CTW(q`9ypKtZ{;_H-^0V_^I5tI?R>TgTwzTlJ{9{yk z*Vqp$g zUQ0*QWiXUlf=HuFA217RqQnaqqRcfXNxo5pOe1hmcPQx22oYt`=U1qRj#}k_(aS;t;G{g=iI|os3Vcf7S!{j)^B2 zP1OOesSrrx@T$IfghH|;E^-;KT)K4;{4IMan8;iP^8g5o7G!}=t9P@ z7QUq~ksR>4Xi87n0j(|a(jJ&riY;D2-pQffvdE_7Y5O2T&*^|CRYX|>r)t*~?vAD? z00+bZ@y=M2A1$-f=~3#GY0}7#3};?vLx}@3N77tJ@eBtRVdz8V|7_JcfkMV3T&3DI zYm<;-94vFtHA!@-EhNx=FhNTYiG%&JJLe{}S)t3B$C8L^qP|xDVUGIF<$Wb8%c_Tk zfgm@p>6)PKw+7Dto0^IYDd~ltDL;^Rz>jeu6j4Q&uzZE)SxjT#n-=)hh^G71s&c=! zG9w${43q!6vXqAi?Pfj8Nyf>|okH^bN(Lxu_-+A5ilzO(9#zzpW~~=W*Q5UBXL$R~ zm(M2;kCtqptQs+FTB@G$aAk=fu){MLG}~iSG(@3poWNbrr1M+V85~?>3vk_E!btuD z%-y;2S3^3M~>*iYV}K`h~4ZdscL54rM5 zHzs(Pk3aQbdR>w-k|i(^c4vVfe*+khYjQsT4LSHoro9SuRU{diT<7YI^|yPa6Ys52 zwyr-qQ&9Vz#j|a_A3orDO}sZKT~iFaoy2ksdv1I7;+(EB-ka0&zLf#GuHODD@#iVc zfW^hb1+h%P7sR-6siaxd^|uk4n9ieO$vI~o%V%>sBHk3N>)aG>ym>V=pskya%wjg* zBe~Q8hu;IrQGmK}l54suz&}k>bJGMcm~Xop4T0#nG%6m0en1{$3n;BZsF;itrM(VK zfh)zrX7_hF+5tqR!ER&&P)cP&;On4zK)6^vb!1htuWWhC;FXbD-KKfr7*G^VrGfN{ zE0TlZZcvyJ<#a>Z%B%+!l1yIMX}5L8$oUJwl)l#n_Jm)_Bj4PHGCevc+!N@S`iu%A z#=g!64tR9S+_xR1gOl_?lN*SO^iL+0q;HNpR!s&!4?q$5G|t$<)-P8=YSBi@pV7{f z0Gx)wd*!m23peq00 zAbc*?W}-&jW+%nAzG_?C7`cFN+-zM8N8WOqVMHdX0fxIWo-!CX1Ay~U)z=`OngTFQ|D~%qWgux3oWreO+u#ssN zB|04mjk9p9cp<<%cM=A!zkhAfbntQw^@`uS7l_9MRLOnddq69Iu8`h|2A?%mop7L2 zLp@W$9eF^=@sdZxaL&p5oRdjS>?aP`^9r9e*InF~L{-O}a$CD`3EfKnTOiL^pD2xFd4(EPKR zM@)?JY>yFaK9)T-m+bg{eUb39{HZT@01rZfB+T__s|Uk zwQ4`6^c^i&`R27k`kB}3O>pcZ-gM&D9JW*a+R?GG%bdG9ElTEG?sK(ksCU=v^`dKj zzg?13X|5F1l1SjY$3r;RAcvRJk--|Tq>;Tff z|2-*|J1PPFErBH-yvr}t4#a%ukucj$YX7hVV)-Q9>#iJgc%xSOy6`;^$$OAJ?=y>{ zzt4=QYm~XkUV^{znXq2!xO$y+f-n@`+B&kGXtS_6flJhTOw_B6j6 z7jSk2g8BoI4IWm>^sT3Jrp%bh!RXKz$5;%zmZjUlnflE}sa>}-|2fw{Kl0Za3GA)j z`MnHGWw4GZjNy!X=`!jMN=Sr}&6W!=jDPag3zdc?jsjPk5 z*SLCRQfv}cW7<54%n-2PAIjiM{qlvy7^qc;v&F}d!2FvdOVYE#Qm6|EIdM%7~GAU)K)wSur!e%9vQM=>YeAGlVyy!&zcqPKcLTZeL%i4QyNwg z;&Bt12>O4RYlI-zACJaHOQ$-F&{#=>>o0)gvuPjR;Oc3TTwI(72o+B|*3eJMhl!H0l;E`uDMtIaOEhU8nLc}Ri z*N&6aKDRp^u!)b!^)t^QDX)?4MQ9_A$uEioxB7QPFrZ6!nHS9xvsgeEfYhCq#|I6>nrhM&PXH1GF{!8RKROi8<(_s<83Em$JRDLo7b+}#B~`1C+LAO8lP2xr16GWE9p($ z$_l7Luy~7m^W)OJhu6k?l7GLgsxBvnzZvXX-z@j5PBNY<9kh`8kfy8o?9S3Y@da?h zcCrU`0`#LzzyKoy@M72gu;gio_NglZe4GBPjWmogS)14qP4hKak_x&0bv-PnEK3se zBoj8Okcd;9XQGt;+b|gRlka8&}jm3R~-FuChHlnad#l0vs~Wf^!f*MlQN>9{MOn2GSSf2`oqFj%)P zGp$J_&VWcuAWms0vE&^(W)YZ=zD&D8#14+HmX)doKI-m(OGJ=(gaF0d47K)Ojs!1f zLipC{1Gk}|?3RAiL0$Hu|2om+HwWEXB{cOUDhS~C=^p-AC%%Ra32a7lfMD^dOrC#0)Kp z7hdPmzEkoA)|d(N9d>QJ0bnn-F>T_O4kYcdGk#x&qc7xw-TT+~VKEF9)h2f3<54bE zIt&+QJq53ga)Y{N$%nhb=VbRr&#F^0@V_Lfty-H$SxV3v5-zd1a|$RAMKCL3-GRih zzRJh{YuR0C;iY&ccJIPfm;&yel86iq7>L@BYcVnw<0Rw6cTFa$4Wd4^f4eEL3P!a$ z3YVg%#}+LOXoi=>b@o}`!OP$Y`X3cQj3J#Q=)es_nYx}LL+wkk!kS_>P}b=XF(}VS&;0zmv{IMUo`hr|!Aci(pBC$cL* zr?lC2I+~iQAvr*7YVkgyLOnSh2S7!`<^L4ciEPsveJ@apgSRtcrh*(@t6Ho2+j z-lU=!0+HVemJTo_tCz+kQtjOrx3NVKlSdN#SM@(Q6B(b{klH!w?STNK;YhP{!ek2v zQdQR-e->cS*TNL^XH#(Yxv+2JF7bV>M>(c$u=bl1dXm^7)bVT8A3G0#IK!E8(%m6h z{|b2JY8mzefRcrwD`Tq11z-gn9S=?}zv2*~kvGkSV6pp)x%V>8aJ4uYi=cmEP;vB= z>gIlMMxWgk&2eh-lR)$*UB-zi7-N~P(&W0dG4E9K)xJ@!bVw~U$^;Oq3V>dam#A`j zonh6R2l7rJs%wiM{w8gB6XdftKuJY(X%J64sHLv<;-&1jo32h*Obd%p9Bj~_H3G*7 zNB}Z1A-Ey-V_Cfgy<9-SYD*GpjJFVgWyZ{~-i2oHbMovBD;jV_*%Z_UsL1=Cs-K{NF^MgWS zCEtCyn^aMx+|8c%tG|YHQ7C_*6TY4Iipn&wSxeW{^zACNMx!b>Pe5CxBovF6jlgy zfZ-kB$H~WpSlYN6TMKt2ozrg`Uc)EjcySEKBxSzKcjki2cocp!>C^-U%9YGm&<>!E z%XmzXk=B37S2JGw$^mba>}F*LWNw9yDI^V(Jktb?th)Yu9(0bCjhBvXfD?xmefL8J z`ANvfdn;<7Gw_ap(a;ULb%Ap)X)dd`n7`#G(GnjWMOSs{vKY8uNdQFo`R%@l!=lj` za;SDlBQz?HIVIadKsCBEv?J~T9+_P)33rMeHnL0?c1WtzXl>E2>S=Up?*_B6D zf9o3;plm^;YqLZtpZ@Qf2;+R}9q;`^FbQ8I2R&49eGJD1%Ckxy1@8!z?X$bXYKatY zi{t_XOf5v;;*hSIg0iQTu^^96~%eUO*9>3z~Flv9?U zf&TD+hL}U3UAguu3ubM4O!n>-uCb2qpHIrVQdeHRY)6v=iohMLpZ#neAbKu1Dh+Eg z^wrMP4R-R0bEs;16LH6mkASDEC_)v!c&}uZYe>+@(N2x}Y4-XHCEaHvATb&&zcuGx zfuIZ)8y}s9p^D zIdzu&gUSrvvG|mgKRr%u?(c#;G{=E7?&OOxYW$y}mi&^H>0_5eEh1X?QrecCDyW6bc`T2~6FDeoac}F&I38*3={Z zQi#T*dD2fFV+tk#19A1nS!;rF-O_}O%}rIn>cZJrj1qRxxS;W2CBSRW@2K525+G$I zfzv$=v9N#HR3M+qD>MzOr9gijP06fF1f%Sqwygq0iC^6BQ?HX%P{VHUL1D#594Off z9(g1I7jSXRe4rcvR~suDHiA9)J*PGOEp(u(JwTBV+8D%+=}QlA=BOe4yr|~l_dF8& zp941Q)dk|2HSa#q&oI2Rg9StOm!BR35einD1U#i-=3 z0Uvpt)(2pVS7%^qK$+?DAq%t3z)-x*d_)*nZ{t(vS)e^qHPFqqC` z+Y>{D_QEx8@hImtZ!;2fumg-Cg0YfK2b%AJK=S{*hCfZwd#aE36|Q^`~#&| zI>hH)#kA&DCWFkhp-H3PQj?I~=m9Q+%Pa}c~|HkwV9d{Xvo}~Exc=t@g7|Ja3Ut>`LLhzYix3=BF6np|Z_Zn$8 zj=K_*SQkKCXp|eUA;Os)MUQppXfR2Oz<}eT4}j^q44k8#+0mV#$L{X|gOAQOHA6F8 zSog%UDzq4B#FT+tSlf)3Y)Wf3T+2LU{EV1pF|nlyx7rEW4GGe;>#Xk@D3sT3a0hJ= zGO}jEG2IpmB=fy3+xZnl>!5}FwXJL_*L?`rI%xYt5m-hx*NyW~7Bq*N&Ol1P{i6!& zbD>pha-G4U^tnqg(&NHJ8-1e_YJizU4>zBT>rH6 zQ`jRSx#Up}t?a1S@c(!nLM?jKi&I(Ak_=T5T*{x=^}YyItqhRdOT!=nuHW}DS$k%{ z5QvKvVT?NS+V3YRTH(`F(>Sqn-{mS*?Mq;|&m)5#`_6tN?Zi)cWbQ4S=kt%o63{jqSW%6#9Thl(^|PA>FbpmWFysvYXcd_n4TWei@YwB*48S%`Q=ad z(1sG9T4epXz?xeFY7SX7^O%MmBz@<6&8 zBP|1(+(8=5t!R03wu4Ts&H`f5zuweL{tB@A|7CpWss4cNHG zy(0;>d#WFBAt2_FT3cH)4wQr*NqdSJTK|Y!qWuQkf?%pkf|A$WT;_?Mfp%i=K(=t{ zTJ_}a@+EdTgu!NP$>Ynqc&9RgxLX1J*or~y%RiWkJ$0L^&y@)6F}OneA8@Si=PP^` zjVt2KHi#R{ZAMY-X*4kiDkd3nvarE`isUitWMb03b$EXT!FDVz+jnCSz(8 z5yHaxX(rdv!8;{xO|+pr4KXJgeMJ zf!lvMsH+{w*4J_-Y^@E+@2=mA%exKn5J0&jP7&zKZ434Tg%guNUGROYFE|kcs^y#Z zVwdqW@Q$aLv4zR6p6KT#6HX*DGft}Fae{t;xj178VCM-yDnf0#VFtD~`0+vFptiW-AU zJ&##Fs;onIoRa&3hyBu1Cw6)*;%`S!1NkDeDLI;&)?Q^lE8z7GZ&usS=VZa|+11ie zI|+`HrnZ=I|6>wB5Us%8YbSptKM>2i7X_MFQ*b?FYA14~SOeFLn&sVSTAX&F#lL_^ z+CM-<>Cyhf$7@sKWv}$3FFk$-J^H*okKyY=Zg;2e<^G~}vyrE|C=T_eMi=%g^-|_O zw%D-kD1-{A#O*=5H|?Mpe!&UhP^`^gfzyH?UwB{%2lNz>U`g`&bsKUTe&l-F!5UJe zy=M(0%B)o&=U!mJr9RO%!XfV)xpewNqZ`-I%fviU?6So^x?r%6r}yG^axg)X@)z-~ zzp`j6=}Lb@tv6p#S7t?)2j`Stp~1e5Uuj(5^E&4_e0Ht-X4@?DY^U7tUs1Gdmh=0n z)gcL7i#}IQ%}BBHV4eZ*|7Pgp?0WPi;pmU)?-f7ZdsJ@u$nW}VMjT}*9aAiTJ=iEn z%?I8e2cUC02y%0);+mIlAHr<~5Jmy*Ih#iZB=F-$CkyBD78BJxfDx&E)q|MdD~lr| zptyqf5{}pEETc@DKz0fnXxjw2j% zM&hG<##HGU$yWfP11{Y=cu)(o1FsNBQr8TY4gzg7U=bv4N@-k4ueQXc|Bj9N1|j9~ zo2zc{sC0YVI}T)1nfVMpO}R+~6oWU(9hyt}A{j~U@N3ho9&Eih!o3AYoaHcDSlIok zRl=Ew+u^>kC2Wv|t>y*QSWaULrFzXOOjB+=?{|0Bm`E!x!nGEs*9Xa&e$dEX-DzB? zaMT>OW`%h{)wH;CgGZY2ofCl%+Gc5*OTye{nD%7)NBmrv%Tp_Crfm~*)T1yYyz>|R zOxq{Ss6v?wfG%6mX>jo*&s%wI0XC!7H|1)cg5->^Gn5Uy?kGhbD(KdaV^@&pYBmGu zQD@C>DzK??FQWPJ+Zwo}(c}d&GNZ1;JZffc8n^Y!+?O-wL%X;1wuK!XJR{#&e& zzR6_{B4ZaN;cI;4|M7Qf(i%vh)y67&pt9lDqZoKzKLc!XPUCO>ywFi9;>LMGFOuRA zFReyE#sFuP^)GY0Gu)a(r2#Y(4i4Iod4^!Hk&ZUV7*#L|s1mY)z34a5Jm)gduR%jp z%Arj|n+C=O%#{r~nc1f}&Tm`eDNgoyYckFGjP2fywJd9SJRh{#>a+jW1;81sm-I9h zpqK)8AbJNxaSnbUF%}R|ZXC!8nwi8}SNPoj?N*?hehu6I>jh5EsA>zg34DHk2^a!-@$I!E$c8j> z0oafV87g8wI@BV1NA=y8a*1k*Aw3F+c5&nY$u1auXF0g@19>-{&f9sXq~mlf!>(i9 z!(HN^ybB9o;<%U>w2DN&wqx!7{cO}=)B%i6ys%*E$c3PL8D#H&sxnG18yB0^@?0VL z+ut07nbzNZv1ofFyW6B90{{-EwVLds1jUy+Q=W0JIdCF0xRLyN21@eJbcR;i7Q336 zW}_D^uL<ce|&FJ;^iI1X2T;`8gnjYWjA@jXibXe<-QOI?{Bv*?~h0$5A- zzf3C$eTB6>84Wb$ZvUXicadCrW>H|cXZ&3JMJld3jY0Nr8_<3;@I`V zj)7aw>iqqY`iQC0=4byW38gi>ar@3oqH@zus0=&jGQ$XMP8H(tq>o|-s26*(9F|!U zr7tSSin#KXh2=PIT)gBJ>fAPCsn29yZ7DDfqYFn7jss&XwqT4|RYY`t{%z&QTFr{* z3xgl=-#&4;%0UA?(;aR!L3E2I^l*z&0Sm$-?iRbT5&A%UQKhP zeB1r{{SuCN8^s&xlLcmV!;rP+A&hxb))lOZkyGqzpaA=Mz<$bj!5Dbfe^+q%k;OZ5 z{@v`;ox)7RCNB1i^PZ+;3$UQ^;#<6CtwDh=ZYN&!vz=T}ieBL?1%D-%a(pL8#mwk` zkTEp9s#`OB%gY`qSaGFDW`mXs(p(}@wcU$9k`+x(ynJ!c^A#9#+kRv2SDnc>rX0R2 zE##8_FKvo!_nWce{g8e=wKe#Ua}fbu`W-w_X@Ul5TaL+hziC)~ZOhm-nTvH5w zZ4{qKb3i2@kH#uFQ_tO=BC4AP0UZ?#wfEow`th81tbWQM4OuRJ*qlFRZFU#Pj-e)S zeG*qh>V5rg|8~DQeJxRiBK9-ejf*&?0}mCd^+GLxLFZKixgkqRA}N5BgGQdk<|+`3 zy2qD0?{So!=cK@`&^#5Q1rsk*UyUxK9IF3vhjbu2c~RnW_Sw1<^v zk2ssC6PsbX^jgB(9l37(Z5A^cJNg9T_UkV>z6Q#5^rU2*|J0*exemalCeZ&B#JTD` zocL>JE?tS6m#{XWnJn~uW_ zB>VFK$o`N*vOm@=@Z85wtv^UFS!GFzTs(|*qjjxTp=0@P_Z~W7Zuu(SrX@{y`V+)e zq4P{>j|Sg+es6U$L=7Nss-1ZHi?{3z3_|~a?3;d-5#$#)Vk9YeJ%R}DOAcmM+VHww zc6V66rTNdjxr7Om1DU|j-2-uLfAL}2hy_zb&=la>lKrmv?H=O3OgpIQWu z?V1n@d(t5Bk#q`Q`bcD8JtyZ{C^O0Zo;6o#hTW+9^7T3wSdGhj)Vatg!DU#sGGiXV^G`wYcg{!p#oKb(# z$jc~_n^pdep)g;%WIjn5@p$++PUcdVhgXEC$V<+Tl&;Y}%TB4TGL8J*JvAZODrYMm z+;z+i3aa?JxK{iOyx0_f*cJ5|eE*QmlY`kUGlMBEP|5m(pSHgrv@x+IpREQ`+Y6Q2 z+cv@V5KQ5-^7Oj%J7E+3BO=9sDwU}zxV;Oih{f~lc9ew0KGiA)KJ-~HbPtE{>$Ybx zNQ34c@0&6W<ATe5q-#=Dzw`*~s7_Bnf6gElS5D zoLqxj~SgGE*5ET zT{`@oRe1E=A9+s%jE|eSzW%G4|DP}hV?}I_3l4T;^E<`Ur?0n6P}$TX>*?I=G^Y`9 z3E7f^7}yENsX9{xQq9?~!c;dKk~7I)XBQq{J483dhry>SM_&{o`7|zgospuKdD7YH zaJEerGY+uhQErZ|Epx5TSa_rV6^RLk2E}gf|2_tm<4FXCjD}aF8ki51)HQ&^-Y#y0 zy5OPT6ZpNSenvnkLZ+zmoatVOiL&MBp>tDcVs_V*=(9-9<=FW;GVX@i2efB{)}~0D zx1rfBG{}UQ23kNTa8^_-8ZZ;pnF1Okt3JPtzM)MC?LGJjYAP=DJ-Enj9O=S<+rSWz zt`cDn+Ea2Cu-{-*f}3-I!)=R2co0LQ!nuJL9-L@3^c<7KWMO4aJkTb&`c#<%!!f$1 ziD(f8@J(`@G&D8|(?~==zm#?WT4WI_lp?w!uN0qxV$@EFzgV%^V%#SMX@%aK^&gDo70$tj2_ZLcmol@*2P&_w<~QW+O-m+ju7FBpkX$5x z{FqTfuv;}6f!@{Ok-sJWyN*`M6QK(?d2^|tpgpWUoB9G;sxlvx!T*!RdpTdh6}B1Y zy1X2Fe=yi%kql(D9_P z`q~x2d(uw%LHS|$#Xd$8V?x1_X}5JcO*0E92T69lm|}fPju8%lgH3|{J*Qq5U)Vhx ze&<2EjDatdsuYu*-|#Q2bsP=tfaHl09*_T(x5sBcMuG_y!j*Lzq_iHG0or+pQv{k5Fa2Ys$b$P z=4yfc@r9}*#YdA^@V%*)KyO}jX!s7Lu2PaM=4!YyU};)fk4I{HNTGJ3D{SBLF;wnN zQ{q5Ah)_b{3LCr8MM6T-T@D*CK8(=pmLPr+FQ;;Na6fi8AWEpdoqPB-_7bt)n|M`Qt&|qcHc88j?Qg0I@ zOU!6^c3u@){7~eA9wzjmQDcoeoQ*otg7rU(f6S4q6R*WDR2EoUs&|?9h?M zhK_W&x@ZJ#RtFiS?__t{t08XTnO)s&Xj% zna4R)pI`ju*bT3~G3zbER+JF!PhCH#c2i9Dp<{XoyfIXP8Y^dx0;Mga#DO({Cr9GH6g*(o>H$`dA!h7~;6vgo zo1YJ|02aC0=?^&AGicgliUf`+%hmB@oKOoTBJJ9ssWxWGXjFt|>!EE@@2MQk*A-7oUnJkouiU0(>g z)c|%$h@AhDS!oE{-Z;kb*5F4kh?RQQYska0We@)m2vsf+fsk<2;TXbfKTVjyf`Y%e zGyK?~jAN8Iu_Ke;HR5BAv?9Ac^1);oQ#NwidePfEh2YK+BhV%C_S$<%c+5Zem~5Yr zpA}+72irG&q?6g#_9yM@OOw9;sJk zJCTKOn-=%@w7XirZ6-ctUzVVeLPpSsKaj|d^%A)9S0mx5CL~6s9q-ZGS5(BdYVpVF z&etWB@Sp6A;D%ib?_FU9ck#%pbw=D)YTTA- z!33aqbi_1`&rG&lf{FWjHWUNK+vg%kWExtmK8(2TSBfd51M)0R$TI0a7{j>Gu> zwWWEUP=MhfkihcuE^0QR8?eb)hW`Yc!ApD5ZkC!vrNg@4)H%RPT6V+y)g!RF$01W=uZJ^XTzs+u#%rRH| ztkRX(1m$j4{t*^w=(G^fHP|nHuVDX()vf zP*I|U9*Inpp-Z-S&#L zBxI1Uu-_#smgd4w5ghP1=^8CO61ZimcAQb$Jz&-nT zDJBNq;^0HZGtiC_a~gbp`0`X7uO`*0 z*K#bc`NK(Fs2?v5oprfRK@pac*M%`9vIhL21)Nb@)RJ275_h;VKj(O(CT|^UogF)N zx-Qc+zmI!PlLr%;c)g_Qzb~^AQuQ}b1D-$QcQ^L+Iq!_aLalWXGEVS0UVm8Bd@~F| zfS@fzfK67!?mMakY&;0Q#-(oZ+`2T#mG`Yk3Ggd5#fL@LfZ~Jl8CapYmQS3iG$fdS z$upm(q@AQfdXQ0&M5vs*GH>OAnI0n!bacbYLYtqDw~dZyAj86&!QziLy+|Ty$tOn6 z4tRKDXUZUWk_`jyz*8K?(At-vjLEJ;DzD1OPYp0}Hm{ zqVRVQ*d*^hcLz9$!E0Ms2OavxqhAtT1Y}}fQVRzaExXiZ5|E{Kfmy9H1Qn?U(u-IN zf|BzYFn7=fTLo+5+9EH2%g!f6+t>GxhJLhRgs;zgMKJ2*>KwwbIc=evlv&w6svkZu zafy>SZH$k@`jP=PyR$`8To~WxLyG#?CewgveEV>1XENbS1Oa~4eDUzokNhTaP~hK*qZiU5buH(@HS2 zpA4^43LxM)&o5)Rdh;)xOWv!^6n)eJ7GRQ5UrL)kE!<4ri@0wBiTt=DV|Ur0Ap_K# zF}5gcI4hK4MnEnR7gm8hIKj}8JEK&U%qnnwChl1KY7=~9{Px=`H$gzZ%L{;=_eam= z84wUWUT$9h?1jMXK>+SK>_Z|y+s#`Q;w~Rqq*oJFh7-ffTf%{H*4*}Wci^pd03PF~ zU)CHH7Ov-8HW+bS$m?AQ*fEh9Kz*@n=rN`%nf{Wu9Dxl?s7z(b=Y>BXFqC0oA24Eg z)kk-G)7|;n?k~sGD`}KIK+1wc=cl>9aat9SvcPpah^GDE&`UKJN`>!w2dw)BNMX@j zBt9{(L783GF2W=(hZTzsQ^sq&c9=?#6v_kv7G)y9;mOQK!{yCuFSM-!ONn)x9SO!@ zB{KF0L*&m4&9vvFxNYHQlwCx1SpArXW_f1`h*Kbjg>QEAePC8)&kZ-GY0AipZBrXK zb-qIU-DhYM1$1qlYpbP~SMpuIXbn>iC|8$WiMv|H95n>w01Nc8uX$hdyXx1Ptk_ekv+Q+rpI0uFwcG=zx9*FFrm#waF!iQepSCODrF0!7=r`;JE?q z%ZE3|>IcS5AvJfW`2S?)Z093_;5g}`;LSfy$P)c0h%v{xrhJrOJ6$Cvwa!hqabs^T z#q48KlD?q?j<<&MO?G!rq3h4S`}ED+c+jS18zbx}SvXRrAl8VFo7s)Qd}^V@g@afVtO-}QLf{>YU75$yTxx%92kt8%dQE8EDDv-@XC{dzE# zqrM2DMD!J&jl;&gT`;_JdhFrQ+n;+;qpmW6%+%+p*S)PqR!(xDrGm$jW_f_q@ZOl5f( zGIHa$#-Cq16XT3Zcp*d3sYM1eAhaeAPS1RAh?l-*ODouadEHmL!S=D&$DoqoN&RqV zW)VM}>1nn*1bvo?dfMtP)=D?V!ae%qVcV8spn z9-8>$Kj;p0-+o~pb{`@M9m;n5HQL`@BLbCI!;C%wpuz04uRGFjvlKja6HKFIv}qmuyD{t8BH5A!VDsagaw$csw51T+{zbJGE>pF zr`0MfYii_pLPYe^tK-Mj^6B7x{D;`6TQ2q+{X|RiTYM%a5z{(FUHqzM4L}IgrYyAz z!$;O(>WV_FsH$`%G&(OI18|W2R67FoR4{P6x^ERkyyS)R-2farq)L+|69QCrTu`zn z;qloBV)n`Rk7jwOP=6mE75CN-b~MlRiU9IUA`Jd0vI$ns`nxl?y6`GAY|_tuM5Q#H zu@g((B{LAve2Vh{S0IKYOf+I~(=T%PWSN8EeZ4&RSf4Ob=xI63A~Dg1o;@1qhUj^D zn4)Wqe!w;+TJ1gtY$Y&=DMOD&s&x}e&3C`FsJlx{)yfkjd3`!{PWRyPe1DYUUfn}y zP%mn}Xy~`iIWSE@Wfad<>*YHQAsx;PwGbMb$vLTQVj>u2A*aDzcmiuq51~huPNa zEJ`C9V0SWtrzSWdX(^VtwSL_O+!J!Y6yg;UUy6d19W3&^#4PrgeU<*ofsQF8Ls}?W||B83b*R)=-FH0A>ui^*)=ie)ORe*ftkr z_zdzHq_fKSRt?R7EIqKLh?N-RMm2|;$vmGrJ^ER|c4hraqdcIFi?)GR?7d!A#A5%_ z>l=^MND);(F#HW9 zH9yz8@q%1fOO)%!ACP>wyAXxXZ-K!gu? z1aG0i6&T`jftHV`5>mG_z|1Vowy(Fzdu97s2XKUcm7Q?vZS9%Nqsx6nkp%uJp8DTC z^fnsa-qpk>`j%dA?Mk}vqq)@M>s*6c2ago||pac+c3L!CMxZ?#j%> z50})PWi7Q73}J+M%+TunAKdzQB?QWlRp%0TC}fG&Fq#k{gj7IuG)6A`4GBu%wF{9g2;cJ9M6i-fj7z1-1$eYk8uoACp+L z-D2h8Af#l=VDhkYS8oG*#rUabPCj6W@vM&>DJMisKoNw|A)s1Sf2%`5Psh0f#@@PG zujPZA8RMUE-oiO9!<>vD@uIVu9szwF^2!I6dwDm?)cV$9eG#_7e{apL2y34-TNrUQ zL)P1CarxkZ1fDD)PbE%lR&+5}CJJtk%48>-KEK65-7`-?ou7wF&>I&D%6KoBtnH_l z)&J|(A&?F}E=3Q4Tnfl2)IB?-=N5|{F-pM*72|+G0Lx({*beV}$?BU0Jlbc^(%7VK zZUIq}I#u&oKdOKw3MxS<&>#7}tO@NAbWkMf$)9jP?z7^YmB#7n1`&zBX#&=w3hl86 ziUXZq#drGabKVZa9fe}bo1M_MOdkB%i7*5swkJ9R!Q=|czssq5A zdqEDo@P9rqgU0$*wE)!QG*@l7{o-!5mq3DwXBbs?<`~nHMvtf~`1F&o55@Bx@i=N_ zJ`^l3@W1<<-Vf{0Z#NaXY|V6Vo!Vi?mF*%zWSEIFNwOfd7{>XCHb$B!7UG{auVpIX zDIe%w|Key-RE^I>Axl%yhJJW*IH9*wkpLW5`@w`)h@4XNz7CFk43Wv@r65b9_Su_H zGkM$4r=^znKS(zG;ie^Nk%RS`c(UB6oied7o8zEtNdD0d(Z$r9B?10jKH6 z1of{~J;9HrB|3|WYW^wy4K@SJub*^$?_u|^iDSDVV`u~()!upbaOAJ5O_Jz1s7FMb z2&7Fu1+)tT%7}qPYXGpN?oqVzz<>SAr5d7-r(f=m+{p9VQ#^qLz@5Hl1~gd7K>8<; zf2!9VPBJ|yW(HZyJ?V9nqAJxzzH&)b_y)2>iF>ubz^>aj#>>$JT!!Y>^e61%@&~HV zbaWnhCC3->?!x%>J+Ob1sTDk1%}Pw)skB0K{8u|rUA}yNWZ5HhgfCKTj>e79SYTmK zyClW!%PklgUXiR#$Vwbdxqh`{FYFimr(HFj?-NTY1_X#7mm#%>9mIetb6GNxyC4ys zRU82o(s9}p=}3nc=^I0=UK!c`MDCY=7z`ozjmM5i4B79>7$lq@^`MlRD&$M-!%y;i zntl{qxU65Xnso6>^-B#<(J{xh81J)_YM5FyIZQ#TQ?UcZ0Z2Lnl>_e>A~G7_jhxD= zjlV*`BFqBf$=k)30+z31R?@!){nXX&T6ru`A)1F{B6#EH@KsG&C9fp7@Lbn551nO) zT+u0~EsCUL#^$9*-E$-YIbPc{ptxky6UQ9_F4fPyV>AqVO|eP~!9Ie6*lc*bHtE&3 z?p7seaNi99t4 zA%7_-h)bcVeen*tV6cCjnEYN=!4nWbBAC`bEK9^uK#ksOA$Z}!Y7sqb3rcE0y@xLqTiiTMe$e+99p5jT zpSDvJ7hx0%qFRKaUDz&ow-%(c+gO>uEhL$Vw4K#}3~$o^=JxyFUkR||%R6@oGDznD zv(%!1Co3M_^`3n;EiA7D$b+uKJUr)P#z5QCB8_(Q=3sW1J0qg1hgluD^f4jbvsHRy zRxo3UG=hkQM+`82Ha}ws>%qKTz9P6lja%h$NL0P5nCySYCKrgN!KkQp?g4+Ev zJ_#x_9|^+QB(#*^v>H-YLPpv%a?dM910 zLKgdlEU!^^>EC_y=^85gJuwZ>tbv&;Wpeg$$ZT4~b#l&qn*{V@$dX-kbnu=vOL9!! zIncX5KB{1-%SMQ3u!?)P(oRpaa)yW{YyL{q7ThMMPdu4=xwMM(WYN46c)o@#pBJIU znsNDql@8w8z!S=qIPf&V^sVxQ(jRpQs`F7Qpj?~Y$;t?xW+>4rL6|KpOef3ZbKZ>8 zx{i{hx7`c+_~9B^1YGJw??*QuMpc4MOPxG3*>B@xXjq}9g?xHc5bfjkCoXll0-B;K zR{Ae3g;%>*S?36;DP(P3DcF`r{>Hpj@kfV^4s4+N0jeT0ye6GsD;8m8h0UV8@fwp! z_yI`0UV7t3<4BdY_GVsg%3zUIvenU$L@i1Q&dY}Xx$cEgFR@5gS}m?u`8Xh)n}3}kGohA8g9#DH!ACDdG_9OX9yi7cb2ZRi(OqqNQN zUC}xta?YNY?fZ~Ekrji-@s(SbhDF2Q@oYJCJbx1{j{5n8Iahq|xE{W7N_dwtekc9S zrK&@qP!wAw5#&wJ0oAi1*yrghB24gWpz2;(uNx=Vk!9G2p9{I5@v-o4OGEwlgolnM zDohBRj37uQ=rYu&u_J&TZB3<$O&s-?HP=5s;Ukq0UKek-GDWuta!47z`xph2+}qGR;a(~1GWi2?_1}*_mX2I=Uhr%X~`vW9D*B;`CsonW8S4m zqGmLaxqX*zBgyQZKyHT{X|{Xrlx|OD`dOuF5-Wvx_Ngwq`syyJ1j<~JT1MU7P9q;# z6&yi9xm+0>d9Ejm)0dUUxh0NSbd4wHg!b0>zbOo*oN>ROlT5t+^+6Y%iJ8J`#087* zfLGO23~C<#Wdx`lYCfCS&vS@7_j>V_+?w0wz~@IOWk=t)k38kV?jWAKIf~P#k^fd_ zwl_QQ&}nX^oNh(XBwZ4VgPa~!WS^&Z|K@|KsuU;a!BgT=s0{qZ$2%7lch?zHisU(# z_`U=R@?wVb9aB}%&Z82&(*Sy;77HH_O&7Ro^?!rTbe{Vizqji4PIpDvzz@;ihCULx z8lHUy+OPHjA>CX2BXDR>Yh$jl(kb}-Xr{m*Pi^1c-#h4yECPN0139EhZ{-*W_5uY! zDWk!k3c3yQb0^=kJUyW9*PYS2D5maY9?9(3^nSWXU48j3G zPS!`O(#Cg|L)?8?uGlzV-I;O4Pnl%BDjCC(A(hr$;3n0Nak{(Oo-e8OkdYwA_Ea%M zkdU}0du^hRX^nl-L$@k;nQ{ZDLVzozFFK4$ZG0hUO+i7#jQe|?P!cFKHG^+Fe)d;w z=n~Q5E^J<<8OTxa@$(PfO`Zbl=B%7j-yF(70S$LICI`r%fwdX7T$PdQgL)=$ufQB9 zuK|rCX^s%iEHr*WUmP3L6H%6DQ_@{A>AR}4C%&KP=dOEU(SVolzJp2@3lb~rkeBeV zk>4K`_g}Xm-MuDOw+^`8uv*D}D6dq)yJYvh4}<=#&_R&|YF_#B-FnqUopYc>qINND zPzZU)=s74r6W`hy$+e49Y^)t{`yqYXs+?a;|BMZi)v6}mihlNpDgXknTXvfTb_|d9 zJ|)usPNt=&SML!$nJrFkSq;*a{o5v3eWtx_|1jfLT;BV^`v51T+g1(}5E6O;<`f9C zv9C~D12lAne!yb&a|(08bzo@FJvfqe7dBA9!5#G27RG2^LXc$7fCztJ^ZKlE|0@>G zz{hxx#~Td(VbwNQj9bsPN)@9e&Lehk=~OuK(5j#hGoKBeK92LJBMN-7-pPh<+&qcU zwLzc6rMKE;?i@5E`O05RfTfkO)NO$sf^~|Z87COHn|TzNGa__~`S_BmC%ha}aB(`R zO`~Yaz6ikaUfan$ zuJr%9F-bzX|GQV(m*STK&{taD_j|z1bCI%KnN~*L_keYDyMVL1qkP|Y2$b@3s$V4C zrV*k8#=W4qXkbfE!t-;bH#+UA%OK>04N|xlU{KFf&xiv=h#J16g^9*ppH(Iy)7in$ z&MsA5R3>jn$GT51ql@VtQ0{mEK-O36>K02a@r+k{JCP?tHo4k!Dxdaxy)7X9M*a=@QOykVAfpM|mieY+Cx<9U6*6GOp@ zcON_Ln-l*}?2;@n?)?5VO-wlfLNV*8e*5G|2*Tagd5o@aJZ_|`)--((M<}g|Uv_0( za#L5z$;i}+B#)G*GR{S{*nNTT8HJ7Q%S{x}JM7D1W@li&=-TWs`zQ0=dZW=W5>Mt! zI!q_aAeu%I!&Pgm-(M$L3%UqY(HD*6aZ&HsZUR ze}8@C1bhqPUUgAWGEm#fVSfV>1&vcbK{#UK0n0x(=QzSg2pkA@1(hBF!O2QSL)$Ch zujjm{wwlLs`1m8_cT3Ux#g5+!N0YbkdnuYgF=X~g^m)6+5f}lg24h91-22k}Df6-` zWfs+gR$!LtKk(3%vPHYP)CuIjlppqV@urEvv}c<5jo;v5KUV^gf9Tq>YuSOkB6NRt z$`zYgE-;H_%RJ}BsQCCD+;9xx#klG4aNNFcelP#T{@1S#AW!EOw$r8WIEsHpn@Xfn zd7lcoo&|N9uSt!wca@C#F7+BZae3XZ;Ew}x~ffeqp@0&AJIGt1VrD*WJ@#f*jh@146*D+5YIMU zm>6$z`gPCOwlTm2;eE>Uci=YtH7WL?4|u<;mPdvy{+y!cf%r)CH4 z)$i#<&ttI2IoCLD-B|vzdvGf5+X#}EJ0hKc^O=SE&-_Srz9Lg^q$)~9XIHuUX|o} z*w=VYmzmY{&{@oWBx0~1yuVf-p_zJT;!XL!xC6tLDj8gQw!CsbZ&9LPFz{$l;r6@C z=wDklv^MZEuJ?CWrZCWq^yvK}vs4R7SWL6|w_pu5SV0<#GSj-+}{GJ+)FG+8=vm8q-g9?CYk4yxBECz5A0P3UsMK!T8qS#8wq2M zd9U-^UUy{QlB!%`Kj}g~+^LOvm#zNY{`a*D`W&p7x7!Z^qnDRT+;cm|0w7y8uC%Fa zmGew3*-A1Sl&I#sHnpT5Q`P+$Bno zp1hsy*OeH4<__F^Fg*mQKxwoF(nl33drd~VP~ExUms9k`5)c>y{E%NHDwo|mo#Nv`Q^y`!y z#F*02-;N^&OuVrr@)f*yB)QIaW{c9HjkV3`9)6n()?IOzs@*;VbbCkXx>j2y8xlS6 zKuILns)Aq+Tj~^6)mv+0iY8lGMOL{0sj!ht0~jZM%w{KFGBSu$*@U;VXogLmu+ubh zO&jEcBj~~vY!yyn%-xq4h2;#Z)a!(^!hjZngLc%P0^?E>nFkQ<2giA6;C9rUW&89-qYcD!lH? zs?k)dc7G@$`c3f^7v-d&YO6V1UJC7Ql!(q2a7Dt;C8M9oVRD;RUt$hr>x$>S=RVYc z`bf&~126cV2zVdSqV>48@LL2ZQIn5SN(z*^cVHdT8y(y@ON`;qvJ$7^$K1jpok=(K zRfb=OX+*{uxW0pc-RS0r^~HQ9Ps=}xlV1*N^i<*sr}V21#ucT43AH5}|dh(N9c8vR*$uYYKS7HLT++TV3n_v?cda!7cG z>ub=3o`6qWduJ4>?tWvBg!H{9Q!eb-40W3VBkdGmwVZ>RX6?ONwuds6{_WN?%AR(o z&Qh!niIdwj7jUisfODyHN2AeqMp?en?QqS?SH;;y6zaBQeXG?>oIedJ4#8-amUM&6a;XsnSU^PuZni9w6X1c?g znHn54GOpRZo-A7-<6InUR_fAYyQ}Czwg2bUS><@^zT4WA^ZLxxETB-n@Vh3*=g7J` z_0FwiwOFDn1IJkYYbV0-ydWZF3Wb@U|1B!rdf%dDan{dP{^HhqtGWmzDytkO0CY7~ zcHO9)jMKCrCRP-%E@cEs;$Or_3c6|O<=nU4&YdSI4o*^#Z!%*f*^StK!f6Hr303>Y zVGUZzawcDMkHY*vwRUlU!<%_fX2talc3_bn+z7;26|pY+vi*<(w=QU~RjW7xFZuB* zyRK1>W1fp96m1w}p9}I^SJ(jG;M>x9-xz|+;$&bDIsGLHXna}RKIw{_kBqc8%uP#LLh2!4@V5I4^MF|Mn?vMHm_#CF-hNg`vB%`pL_c%Bm zSUC1fh?n-IOUBgS)fvx}c&mlBuS61I#94a_J3^Q9g^#$`v?rZBHbHnmZjnwWcmAC< z_od;{NYFU?1w$a?N?$@M0IXuLd#_&{wL+proVEx``IiJq8MzrPgI)F|cA$eKWLZ+7 zt9r&OQJU27b8hIQe`WP8e2POYbM5Clo%1&$u5`&5nkY-_V3`xblc6OfN8!}gR>Z;} zXVQ)=DikHt6?>eHE;X&f+{s!5rDBIERxHsfb)7QnhRET__RlPXMjIXr;d8Z$_s-c% z!~ae_8H<$uD@ivV0FK|EPKTbu+%IcGn_eA&3;4MS#ERUq)1M-%y&Fo?8O9u4&Q%;z zRymeeSw?oyVxg$dQ}&KnGV(J#xTULxy7bKI=t>vVX=U$YU)RZID3;*83HCDiwUOS% zr=SVKsryO-yBH{TY8!7bEQ3ih;yIW+0W=|!R!~8xam;<6%D20cY{;=y43BN(!KYP0Yxu-73Eh<#X zOCsb0kHiAKR?qFK4*tDkE}4EQ84ov$A+x};##`XH+r5}!cyIJxL&P{{6mMxv`cj63 zzC%5IO)#gm|FQ`65naPnNQk(d#6DvecDvGgjaTk3gh1@IPa!y@sH%%;C+Lq%eqtE| zBQL!a`rcMs0X!VtZ|N6}DQ&#g(@kL4v7YBNRbu?sJ=*U(5q9It5=8Vnc|CH@hmI*i zm(?5w4Bye)Tgl*krllWY{hfYR`k4q_{sNP8v*AaQu3FNu8%Altl>I#Uq8Af$NH6yx z+4$Y~w>HJV+IFP;BhTFV;Qd%}rvvUujlT2EI`@D73u3=F23i$lQLD0s?XgqxpQA{(fMPn>-D`7MMveiK1_lVJ?w zqc+z$)$W=TqPd-ThdTE`lb;sePAYWtI$XORx~7?}RuXrU%=^C&el)5I_Sb3(rCtrz#IwCZBAo_rECcJ)eNwK`IP&nrmL z0+3`zlK4gtsRiUxmhzuV*<)^=aR=oR#Xy>2CZ6RLJNm@qIJ%9i7T$8OVq_x5ASo(o z;Y2W<^m9^Mg%Fsw29JzZbOFC+l0G0(vf&wWkmaW5Ch#SGLlBZ8`R*Ka!Jhj>v=Dztha?|t4#P{77O@px9ivOs?+F=q`UPF)HLmcz3YBoY zT8J%cG4#Q)WxRr{h9l2xM-rRY_pQ_uXQ(Brryll{MPYlsecT{CtaJZA%d#!{({Keg zL^jLmqvhS;wdsz=A;6eC35QnsVPP3ud(adggFasaVRalNxW&a<*Y%G%f3wsdQFO3!haxE%QOaJZXw}IDH(g*h`;VRgyE{)* zjo>jCLgJzX>31k1%J2b-dRd}j zC+CTai(g)@$!$p!Ps9QSfad%DDL-Cj8M|k_~v6`7H0$u3m!?1h9 zAQ4ocDt}>HKnLu^g;wo+2$4bQR`tFO04pF&Qs61ytX;zHR%wfkCE@27hU%;QE%|UU zZ%*aT66yjkS`_N$*o>TH+Wt-2GAjwgO8SD`smGNc=ySbt3XIq@Q^1YphW=0VK{#JB zbc2r)O}vVLXS|(4+MM3S19gP<(Ct-PhZO}zYwSmF!8pieme6(t8)Ym`pJYScJdQuy zpBGTJ1|Glu6VAJThbElMx##UKR>Wnx`n3Bx4D@BTrU8AFMiXIEKxpPOsYWXYiw}#q zlY(`3Q8TA9sG{-f77ei0D8e{|b3!QntYjO2t%LH{?K7^5YJ1PWzyj2HoI!HQCDFwt z9^m$(Te(9YOVz$doc4&mh-PiIkDSH*@z0z`3Hp7o0umu-~d|3-Oawd@x?*a7WtX|NqrTOlL;*8 zTXR)|)DoEWCKHHWsX_2SBfic?*I;OFbhCCk@p-buKGllcu7tS4cjmNW9uu!V7<+uo zKtVY$e;n6m?!~jPRY@6(Fw&2d@hvhzjGqwHI@S}x7RPFOp`XE;8H=Z@j~M#UsgE*4 z{#p*Aht(y1ji6asBMr4@T@vjRoBeg^rroneH3Bf5BNFtByY-S3>3uGhUhB*LDMMFVOyXh zM&bopLQrno2j87!EKv>9 zyX$LyE^|MoA=|2ElDPi(+s6E|wk+h#FbwN@NDvcPM{bmEIjscq|Dt>@+Y$Y{b#MkX zi~k+{V62wRXFt;iG)yit_)kEjrMPEX{#JXjz=<=d({K4GSL`vH=Ny8!XhATCEU z?fVD(!*@t5OTj^V_4{UsxVuNlDg)4LAEO@dtAK6vM-s~7`8a)#u)bp=@6U}6-%*3Y zpOuGoy+|jUo(m;db5g`(bu{B#lJH0{%(0S_z)Spezu*}733XU3tXL%zwfk<4ozN#T zSiPAVM3NnLl`=fX;ui^0BZIlE2|dkh4CQZSdY&T!$8>Jj4-GO)5NssPp;5lkWB22B zfgTK>&*nJS>tc=i4lP$-+ktC`yO(-jF4*A{fxH#iprdPsLd7lP1CNy4Nrd zRf)4DIG*x8%Z61TvL~VCXDmE40HkDlo{x8r7y2?#JpJuu{p(Ark=vNGN8WcJBYaPH z3s5NHK>^2526ex#rbEN}z4MFh^7RidqvJww$rM0YkiV@`ngz{bYELV@!{l}azaBTS z(CNM^2sL;#XYu4iGmcMV>pYFsIvS1QPg0h$U~?p@J{>1(7HY>p4UMQ*r>RCZ8n?11 zyI}J?=to%8{spykZ5ZArR2?*YW;}~w^i#WYAGg$|^N9teVFJyCaEUv@Aq4cNq_I2aaUDc|} z)2A9o3=G2}eBcXG4IASQM?HBOFJ(UD2}6Kc;Pe2((84zu-DFUYVRlXU1~TIP8O`SQ zKDPR-J2HPgS!OKc1>T)}-^+j7x|{bEz-&!U&8FuMe%uVGlb$MxA=EN6HqzAi&04W6 zCwf-Qk&&5s3gKq#V*)}rdM|ZsjU!&x#Exa+MlrSN5#_V18>~*Xpb&RXAJD<~qL#?n zfe_S;&u@hRt9Q!z>X{Vi`cz;|yY4MUUenqTVrMbs?8eHqhJkE!@7+8d2;v%0M*YIO zf%T9omDIR6`n9&EUda_Q1QC9udVempo|m9ngPx(*hI`qR{vr$3Y~2 z7x0b@AY)PzCW(Nt_8Nd}B>^>&jPvx77SN>v2LjX>%)w91Z>7wu+(E23? z2LuvHUIghQqn9Ybz0OgqTowvke!S-oh=<~P#L!$q+rX}a9U|>x|9)m0OuBIwl+2HJ z0D3laKvOF==HUEE`8#%+YGry=KDa?)@^x=|n|o^2_-q)N5RLD0?$$aAP(-(} z^-0IV!doTUfDQxG;DyDL`_u(W32=e?<~$6)(WxGxz9vdyyw-lc`rFh<_Fe2}B!t&a zEnLHOy3XbMV!4h#iNo2!GD;8_fe6zBNRjHp(VLb+5A&vzxv^?V4GaGr$|FX_#IIqv zfT+oNXLe{{C)LHSO#{j-(&=J0CqBv)ZKT(h341uW;ez5^9w=|G9;ykZZ3;MlC*6GOuRHbyk>>8<~827=1=h|r&FZKrx98Fcb`5UH}pu09uQ z(oz_7eAd=U?~9=RTy; zewLd9sS@2!G>_6K@a6v#<)@Dat8cEzFXF5HIGd`;e;hL<%VTqeXgB~N&cJ2zW%x~( z$%sb%W(nDA_v_>uI0e6Jy#MlAk`@VEdt3ODOv5Q1Gv46L zIA2s9F7&Zd>MYH?Do?r$F_?Mx>s=4Hc0GOD*Q9P{j{uH7%Dasx;Sr>Y6IEnQS_J$&7qlHwW~NX)+4@>w&B& zSFKBX-DPIP|AL_%&0XBwr9lnd?@8@`o07!6rQ12?I1NL}bYbgiM#e*eg@g2F&VSO= zZa@EjK-4hVZckd-pibwbl{XEdjP3y@~}0wZqXW!1{LiT5U> zQ1t%w>0*qmWw$AqLQFAfwq^he8q}tg=Eqmx#Fiy~wNrXF9In18oAoRpMvT zTnu&&|Fzv5W8T;EV%CLUq;j6k^d9L;ty|2 zf947$Z6pD`vy?;qa%djz#MAbG)kvWCW_@ZoJd|kkx!wjq7)zRahRE5a-EGoD^29>0 zuqK@Rz9*5prFCp#`W(^Ai_<6zY)7j9;3|nXTOVVA$gXalAYt*8Sy?b*FrZmCz}>S@(iQ& zT8j%LjRs0{pPMH}2uKQeX#%P?q(J4wbSA{~epLkYa=+KyP*Tdw4;(!GzYScfpmG|9 zdK2Lk5UlmOfHdkqiWMl$6&0f`1kl3i>K`H013&^9 zCst=LQC$Q2+t>^5FS|2yV8zOuV&oFh78`G`3%zi38s#P;6t+TA@BSM{hW&5p_zocA zy!nblTrWJ&U2ieC+Ds^_&dubOIwtwj9_6XlYvm%9wIQ~mwi|&C17GDp?#2>u5S$l z%RuN2A57lREb1`4mhfCiYEP3vdtWr?k`4Jr-!6`Gz95l**)O;{rm- za-;=2!-SSFoIg(M*wIy2&Z*rY<{fVRk6Ok}&dtl{T}kNfll%TW@FfR5VU2db0!c5HTS$V}O2Ui@yKMRT0)^~w2rcCc^LJ6}W-5NN!60I%mT)hZ zcG&P#YpwA0L_$?3zEC~JlLH@6EA3GGLoFv@qH;hug**U`11^|qoQaMv;N8;#1p*kp zL7@jrZ~}5^A&(uaQ|>)r7bbDqQV`ky8(OgnNS0b8_euBe`v+(z$8vKG%H}`b~iSrrNhqPVg2c2K{>+ ze9VBf!Xgu{a(?>O<@+Pl>%K}n+X;z z<7NJ+C&rC#l_P4)9k!3uz$d>9W_*NW4AlVKr)uaMGA(lxv?g#XDA7`kcpY9Rg$Pfi zuyED>@I2@->%c?e@1$F`E+%H4(mbt>JZs|Sia{O!o$YU`mi^gxJw+H&*Kb;T9R!3~ zfSSamD@7H0=xu`ZoW2)skPT`luocjJOJ6>EU4oBQKfiwR>QC^wLm1X`9t*ed-2-o! zQIG6|!8+`_#YJd|wBi{jPG>o+;K3j1kP=*tvsOim%eMZP;#ZO|7vHdJ0$)j?u&EbF!gkBPf|3jjBB1;)GRb~0r;eSmTF(MIdPTqXqAL6vvB@Yr}O0QVb-g|4A`YEQXfOo_@5L z$Td1o0e7`00DUL5eo1_4KsIo8xSH?0+7qy>z4-ux=!*_P$z&(azlti6=8|K{N-p|} zI567L3*nCCfI?=FE{yVpkfvEwKO}yM>y9K-{Inv-eNlaxR}TiBMWcj% zUj$eLG>KBEUwC4%zf@%5U>{~T@Kkh6??CyCH~d77J?m1ihMG6HmvZaLXrsBDxF0m@ zUW_;)zI`TlBw?99Khd9>TzD zxi7RLS1?ziX!36TfFI>sqw05OJT^Jx#R|j*o9BTyOJbutBM`u)xOu5^_`CLq+6H zvSteNvFFj?e*cmvK@uNKK&-4~%LaKQ>t;jNMby?%3w)oY%|K>+)P_oMFdQo&@E}!m z6`$xj`h!3hn#;$l{jRq^+2#W3Mv=G&Ao+%s7yt5_m}K9U)F{sKvg-$aqTA7ZFKVci z5#kr8+Z*U_cEcTCyjl@`lFXJ|P4I;yKR@4NBn&R~-;urlrTk2tqvL7PQh1qNBdEIC zp9OxkolM?M3)&;?M+x1 z(Fd~LHhJW9@a@&vfA;=&r}GEXO12TmsPA7inGGZQ-)}53Jbiv)Ff6+7^9-0OcbW%} zO@q0hiYA$`?feHQ2lMbKxTT=0$LpXSv)TjypCfkf*0C`p`k+qL)-obUYx+b8CBhL! z_ja$t&MFz_j?oGU+P1Z@PDp7*wBeI)cmkv1g4}``_1GUNBg`%%7K6_&O@i>dpbPoB ztr2Bm7)kL0mE|tM_~PRY(ZLk<`0CkY2K83smvP*K2Gv$DT2iH|ljpf!d@fdj9v&cSR zl?xXeozqaz&NAou;Z89~!})_EQV_AG2|yxJps($GE%{VJ*8FlE*p?v&C&3y9iy6;D z%1zbrE8I=T7jS7I`LgqoD#pk>&nDtjVXrFVob*2_&~XID7h!-V31T1yK)5FhST^t(JSLTX1n;uWhxhuyx7cm9 zXIB-S`5-JcewQsJI`=>x=vJQ7RsdtL7dYx)f%VtFy6v*2neNRguH;?Z^P}a zWBT2Rg-aDZ>`{n?3b@M>!i z5r^*T7rXn>kr{y2bM0o`gsi8svbpr;0E4_g+0fyUp2cKl?ZJ5&}?FfxO$YMa&nUZQlP+aNA2ECHBodC^S>}mw*4&()Xo_`e%gd zu?RLxCAeoD&i!*)CV}rSn&(62NV>VVXp*U0x-b|jFEay8rjsmhXFY?l03 zYxf1Gs$XqPieWgOhe3w5Ra4}u#xSqu4Z#t5h|D)asT^$kuC%OsUaWR>zG4(FF7d@(_(ca<&Ay=qw}IJ%?ukH!6#j3=f(S6_Wj&iT1F($0@yuCRC- zm>w$tPbIvHg#C*6MOx+3tpGW3cF#zBZX#tt!vqQv@W5WzfO@-NiB;b&(*e|$6k?EO z17ae9yi!erTDyj2QBKiz3~8hd{1hIRICBB+5)YA2j>WVHE;|19?5Gl3%3~zM1*Fcf zjK)Z^ar&0mIth2-1gN;Pp4Z6r-YH_b41_eu*0c|%#3dy)IA0UNlAir{$XWvNI*(!F zO}-3(lcVq}ru1-!wuHlH7$hOdK(TUAtyg8>>0Z180Aulh0g<+R0updjs71Wj)_XIb zxfW1*&N#opZG*R&MfT=3XaM4o^uB4ufj)7 z972aEKZp}cFtJ}glK+qVrrR{OjA+@kp!19;t?p^5EJOSeA=xWbiB0QWbGPCI8t7p%hUx`hd*ZqFXnUq?!Q(F^6(kA|9bkkA}n{Sr<^rdec zwQs)lo*V|t&*-4wB-!jxwFl$#=K2l_Lh2$D4{BrjaaLlUlX>F-k&kKm@2+cTAN_y~ zk$S$#y6L$lCRByhoN!|eigm;aW~dHL9!{F+K2a{NV_avd6HoReiHcoa88-sVyI;iD zuP^vcfTmQoBwJ&>wch9t&@3X+K++z{qb|{Gd9&|2J^}PA!q*wuXOx zcRs=mD(*TU?J)KT2X4EE+>}Y~&`f`&JJNnq8jv1bn2MLDW)4?-2SMy!9&ieg+H3&$ z1Z3_RjBXX#NO#Ti`um&o@yO5v4nVQ$@|*Q$JXz~ePZ?l;>)`kpJ=k(X%V}Kf{ZEs= z&=zwgRGY8@I`x=YgWU~gl7!ml3`*Mv7}UF(jd>OmXDeR)A$$fl%Ku9ip%p@gm_pnYT`WCVIa7=i<=T;h`3DLvsvC&~ zQNp$*V98r?`o6_r1?p`&(&8_$P|~os;%K-0Mgl=N&9g(+fA4% z&AD&dVz-41Gn%)$n!b_V?m7-ew%n;}k#bjlV-tQXu@U!6w+7Az{Q_Z)ZY9lV9#tzT zO=9jRvtedLS&(;Kh0FD2- z*~H@xd?GoP^?rmLmqY2I_&RKGJAamT%|_w;s8y4M63Kx-{KvzqwJ%?XpuwhTpd1n| z9=pKp)b(ew8|xY~B3rx*irOk_1gR3hMAaVaSZ9&8vONwS=%b?w|NG09rmy0OO#cuEoXk(pM@7xu=pgn=sYJoh6}rh+VVD`9F6WbBq43|4M!xb(wu9%@p3 zrnS@QPm>|M1hbgzH}%&D5p;!__C7fR7%}52wF}BbSW0oZ!B+(k>8jsObyxK2AP90{ zLMt^PZ)EasMW{GC<=+yz{f-PIov`6(vEM(?QSyf!{D zLp=aw?O{^-Kzoy~06Jw4igOUlM^Ob0c-DawEWD#VXe0vn*Pv^CFdR(6W1+C+Mk*P|%esW<=M(x}GqLtlc0a%_QYTddATyL8**utwW`0oLx5)fBab1 zJ}?CnOa|JV6ggORCoGxuLY+fQ!xK^4m&hOT1v&9%pJWpP^;!2*Ht59;T=&<75B+o$qu zolXu!S>g{v_r-T-71CBc6BypQaVKH?EpX_+r$hwu2t)esE4bOM<_}X&&oJ~7e0!Xy z5c8cyoGZ{f&x_XIhMI&bU6(A-`d#aru6vh;R2O2`EdAh7U&Niz{u8BbJKWer1!A1? zXbj4Pua!rcl?1KqnzAs~1cgPXu9%zQmskcJAy|ci^BuVB6XV$tC2QrvvwScy_@pQ^ zA}EvLOO4SFu2iCa1k=SdLjkOw!%-0ZPb-G9@@U;)gd3Q8eC5wdur7DHV1`7r(BH;@ zdk#2d>~OCAJ;L+JRyder%`UbXt%Gko;bn}-FZScmWh84SC@RlvLvqeMrxh6OYCBPz zo|<8`IcSgzR#c-jJud@P%{*g#mKbkflWjHBhB!)4$^;5(4=60rng}?jpIh`1M#j`( zBEYta9?+0@7;rsA+spz!kxT^~0uZkPz>`}5nywHHyJa9xlWmpHAeUWMMc^=5Squj^M+^50qCiH=)Il#Hg-}U$BD>& zAAnH}DYoaw@i1;P?;RM;mb7Q?12rwl`m|m3_n{>c={E!(Ls?2rj?ITM!RQ{2i5rlb zV(L9=!xzZ)9Uz2fRzFHbR-L;5lFJA2)D8s$#nGInx&wu$lxsjM2+0BMCshjI#N|Qo zVp-^-_}xqG`NghUw=&&lZ>&=9B=D_REnab$HMPoAO;j=@iHk`PHy#VFeIZSoE`Asg z89zb7yYA9pd3STR0>y5T4ikwq^8O79Fb$vpT|4n|`#Mp|pWR+j{CAJ7)8%^Qx@_iH zoJUxyY|ckWPNU+c_c`KW?>#`aCBvBN%?FQ(mcvlZG@Q^u??@;g-H*=B-(mhm?Re^S1;XK7(0&mD{Gq`6>;9 zoZ_)CV-gTrYkl^JWqK3ae}O0U533#Xmm;f2O;mP3v}eLe`-T6Vo| zA+;S3xMk-XV|nALn682IRVgyq966JJ&&g;K9Lz6z&AV5FXnImAs`K*lq%yp>9!=Yn zl%feqLm_w#U@-GtjPR(iB@WYT@+WOzqxl;!d6n1FZbO%$q|^*qtls`{D6WE;TYjow4PgOB{0(+!=>&y!rT;s<(@OCSIn^yU~d zPJNe15ycv%CB9@Eetv1EZ{XT;Og%bpAzN|TW&YSCOKTm~C7%qP_$W?B>aUCrv&$Q7 z9^x!kx{qmJDaCci`1w$St*=KhW(mR?SOhR|4heiay%R9LynQ6R0~-t1w6b$MM-FR2 zo$ygJMV6EbTkZ!k1j&nSAoits5Rbt7_M7YT*XDpZ*y6SMpa0cx9r$s#TtWq|HG-W* zf3x4~1XAJ)qnJ9ohJ@y?1Y;$|T<4X^A}CwJOL|=%4%pV`(4Ooc0V5}Ja=W|t{LzT^1TD(yohVUfe#_zX zqIpQe3aH(bs(dhQFzvj22{s*7#=@YOZU8_ChE3P57~D>WOiBD_4;2V7;*>CWcJMn? z=<1D!KS08lA>pfmhw%O0np*hLw&kl}g-AUk6aP z?P*beCtBAgNMtB4)SIR)k?~S{G*IQc5(9tn4S*rcu@ZiU%JG=++`W2Z%XIU-EpCm# zFisyZUGBWeDpjcB8o4z#j1U67dFUp7oWa-a&*l>U9|@wBO=ya-Qt}vP^mwVd{}nsU z5N*9T6`d=)U@);`X$(}&w|?yx^+SS&P*%E}+z%do0DVq8g7v46)J2UW3kdg!6LjBt z_+}R4$UFrl`d!B!5+zlVjRngH;2$swFyUel%#k58NEdg;=}dif2H2Qc_QcreOKz2C z>vw090tI&R{GCyH0+c+>gI#xZ}2@PEvQ4=m3+nI?)Nh}CAWfpaD2CvfKz+Dm~KJGR{Xru?s59( z#T8bWYC{1BipnUf9&Ou`#-Xx%wT`x{;jP;Y*{~@?n*VMSGuZ9$t~NSW%M+d#e&U#F zkiYdH$U>!zyl<1%w+cCq$J&f-W2UFp$_~b9S2SzS*j~^(VA)D@I*HM>qtH#*3Q8nK zqudW)6FmVUp#8=Gcd>t>N>1g{2>jIupt8q&$ajo+GnNG-4NvM_LTp(EUaQfzy)n+5 z`)>icB;fGj4-3C9R%_kSvANNLk8wKxf=YMVSy5mdyVnD~4zS~=Lbg6csk%e_6Ufr) z0%R)#pq@?n*!;_hGu&-|!I<;2sd@^4oa~9T<3~XoL-1v6)WoQhVh~YULUG)-56DVD zh4RljlVJy`V~=Qu9@{|~BM#8OL}%%7hQA;yce8Q%a}2A^E5B<&8b2oRTFTm);4CO{HV+BZUK;OwDw)W5W0ei#3L)w^ z#2k20Lr+LClr^Os7!V%|N3-(B+H`~;Ws;lrNMt zcdSos)DbQ8Y`_(?9&R)0MsyrZ*38NuEqZO1xt)vz@WWOs%>=@mz#3Vp<@%pt(-~!x z!Q(A(I}4FtobH>Bt$2*cN$(N%yII`3-#vWaNX!pptoBn_rpNtzR@_0$Z9h}Tloy19zcl5zKIy>a=mX7`4fcNX-{^^A?I({k0X2H!8!((sHA`>B>B*C7(H?x~ zU;I$H3WU}vkgwMxwkt9d7)`jnjx2;sa6SFCCf0+**FZdQGFS;uKED8eoE6YbEoLT* zZ~%;nn{%kGdtVcq2BXR2;BqA%sAF4A%#LxQ0i|5(#QO(r{Lx!p`x&WWnmNrG%EpvB zO_tft(y+KjfRnt@NP+H@9z4j42oTS6Bn5V|7hPBsAh&k@PM~6$vyv9mDQG<9>Kx`-AN?59 znIa0v&GDFi1u{Z~O)x$DmNYz+l3$JsKiC7f^qRqW!x}4dvclf&t6ot!oxla`hcZ9u zoNpEAB)bwOx5{mTmk*GQCgMwL1Jq ziM;3U>cTyvqm0c{0L`1+`NO@YdYulA3-vG0{q9R?_&qUlsj(Y8ciH0=(bOPBdwT$4 zR+sSpgX=5cBC>?Xj&YwE$>ARAFw$Q&lAL%qVRLt<559cg@!)<NT;0_c>LyL0$ zPHYQ5F8FGfT>fEdxjfvTGN&mZ;VgzsTj@hsduQI;=9eZ6{VWf@5+@+0pObq9;~m!d zy|~7@X6oyX1!CO0D9~Lzs=`G0hy>CGTRCNKMJE((7zgzmJ;o0W&Ot(}@QC`Ge?hHlXyBBTj7R(u7vFz)?F0 z^%D8*sQPQb>>CSK(}Tc{N;-Qz1O4n$zf(e~n$}wW|^7=2ibSLUFnL@(8`ltw{!xd zf#O{`G5E#F_N;}-9#%V%4ht%(6!@iC*#iv-+9^qA`@vYVm+P#%|0jkbg3MLWdqf#@ zN8cr9H6wh)!V(Kg(|*t#vYGmw=TqRt63N^l$A#84MMehz;W^_Wx+KTEb2os<5~98z z6_GfD)})H(*EVus6A*Es3sIX{iBTKT#_%myu-JoTD4u@8LLW_Q3EJuBWev~zGsZP2 zNTUYi7d(!ndWK9Qg+k_&0dyc*01ynxAX6zQz=VV(DdMgFEC`5Yg3?E%9qZ;jGzwLy z&VkN%s3>Hf>tyR^d!X1%Ho`i$i|&a2Q%Tv%$G)IY##IOx|iPo zz3*kyY95e20gy^x&84XoA+q<+kuh+@rm-0}YSqqoKWRAcAGDbC^-z$P9@Q&qvDa_N zPm{=jK!);t8(O4AYuFYiG@ob)gfHYCuCsS!hU>HOo}{ zhA>^|`;#2X7MdemeFIiUPBEYznDB4W%w{2F_HB=Zp-)dacAHI(&2G66EjJ2bm3X{V z0aF~?65>6wr>R%i4Raj}PROaI?XYKz9(nd!42 z6ek5Nm~l!NSfoeUgwU;bk*L;u!uT*0(*(T0gQax~HYuMw#Rw^x)+-gW0X4c7^0CMQrFvciRKOnlbNiJJUt#oL!)MJ(Tc7Jaq{W>+yQmDRvL z8XeKG?&UN$1>6C#iFl|)c!B~0xI$`62SPhvCr$3rEVh&2;FEKTL9=eEjZ>{`8s zU+yc$P>T%7oq(vL?u0*!ZJ=R|U&a(z9eQd3f2Z$qxVxcOtjlJkB1c;6Hldal{8Uc> z6(genso_TMjAkO7OKL11h`klq_S=Q7a9CFwBQEdbG-QRkH3IQ^R308?wv*u{^zfz) z9$bb6k&fSN;E(PFbm0?$Os&bEBb(M$WOJV^_AM^uk_nDPk3N1WZ{T#M8qjOhCe1PW zXo{NzG|kKAmj%}7tk?U*Y=8tF#87>q)lNmfgn5&%q~!SZrw&$G zLT9ClK(aO1v6LOmlYZzz02b@6LyWsUi4cVvh71Wgt_Fv@>5s4;i{~CWW+<&7O11Xy z3+3~Y$7}$TDc1-n)b@3N{ODs|&to%DBti2M4Em11FnV?$L`)3Ku4`s1JhkX4yIT_^ z{TJFrx6#VfIYWex&_$Aa&3yK7T2!mXVc;#GS>n%2&yF~2GFgjyRTkt$^;8*Qf#^Ko?me_LzF}@8?#07@ ze*}G`;?I%++7;PnrrJ?r#o~GI(kxx2Xc<7aY!NKI=I?>ow@39+{#3Ln@@+af1zKNL zq19Bdd{RbM@SEj+uZk$sgcFEhNVOJ(iGB8ilbuk#^*Izt-1*TcYg5g4X{x0c#pwA(EjhXAn>}~ z#^+EU>IS=(h&WcJh*A#*KV?p6I3`Ct`XSFpuB@h!+x~?RN7Jz{W?fC3timVx`((h} z=L^g?4`y}msa}18M?`mnzj72KltyR|MHQ+LVztLjVeb!=dW>13d0H<1urB|;s@GiS5KQiu2q$iX#+;GBSJ+Azc zD71RO$Q}JPq4VMJLH1yaopx#oda;@on*D!%-#2p{FucSRE`Ey8uU)g;Y(SX4ni{># zx1B5-^UMjmaSD`Up#}AgYJe}ODN^{;oAQyUqt>2FY`M{G6m5)(In3d{XJ?J3EFn&> z&lh#g(Z3Xl7_pU6zpc8)Qv5NXgwPlbhfSFNx4so8jQV6}1`ksFdAW9`;m+XaW5)^Z z`)RY$>SbQq1F)PJ09br0&+Cbj4U2PLi*mL2;p?zz1WwXm+R$a)w5i91B+0XFN9Lwf zCMVo|J9c7WEd>24*0|EBM<{`38_`M)ZPtMdzAgoKC*AiO(eVrH=YkI#h1sPet+*gB zE5eLYe8UV;9e~ow(3|8N+Bt& zX#R7_5VvqR5#ml@i!TMh*xAM+cq4boW7eHt(I>SFPZ_T~`^IEQS4(gnbEEl>gOXan zrphrYVvPf#-*_gw>8mCQDTF3UBZV6N@bRI6PLBiVty7P8rhl+gFHW`4w5py+fs{|$ zAC_tVwB82wr8%}dq7w9o4P#n-Y-m5rxl0!MB|j}TyX-@!QH^p(2dao{hzrYOGZDXx znT=zPR}<_pzn{W{DcrM&bw#)2+;2L#hQol$_M5p~q`W{cPcaE1_3}H)ru&jKx4Fb; zXrJ}@OZ1HbTGN-0-s@mL}^gPxT{VcS`n*db6^Zlo_Dhk(Q>BgasRt?!mXSZxO zQQK@`2>t{tIe3BqpF;}m=JLarxG0RYK|BEXTF*$yK~pGJ26hh?Y*?o|KQ*79ub?)Z zG_-E&UkPkaDmUkR(0p_J=82t+ZnTX|OyULo>p;5!rxow_(y3Af4Y8q(rOip)nS(KC zx*x!=Z**h|k#*n-nnv(@vxI9FeA&%e#q8e`+Git(_TdZ(;+DIaADev_$#s{{1H1ZaBnP?X!30!$H6d^d_(R>LYe@46>(fVwJKfQHDkyL#q26tf&uv=rsws}pE! z6G*|%BSTTDtf6nxwVi31$COQP7uPQz-jmv{f-RA8muyQ+zqWj->!&mtDXknE*oG3w z3~>4^VZZDRHhWk<@rL*c68~3b1VzU?WfR1~U%y8PyIi^+Z52H_8Vk#m0}f9V+uI~W zuV}Fv$*w&xDWiET{f3JtUS(B61~AH8R*ObxQ@dK;ba_go>sUQ?EmNYWfTB)5-S2{L zU{d!k3jPtj*<)vb?JK}^)-rMfzu9N78#8+Y@qK~j?Hz1CpYeOM|6_p*8|0LSb*vGA{00g4RZ$uxxYk9cPmt0_0eOQuhff204KcqJp!8&5j5Bn?Tn>xCxuiKZ@DF9QapS>n@fm-dWxk z*p2$6`h_F^tCB-Gh2I&;BpdT3*j@2)i~>r#;tx93k_}7C9<|u4MWVSsv4BuzuP;Av z>lh*3IgC+s+=0z7LR)n?7(XRYjZE_&rLd#EI#^1BJgkEBw+ykLxB*c@QpZosRz~++ z6_P}G@hBGhX$V!R?%*#9#v=x`;JT=CbL=nP^79U-e>*sMHk5{SqcFXJl$9Z@{|71i z&s8W zr$TO&cS`1hLHM><)jZ~4rEzq^+pu}t^caGhpT`}#9)N5p#X#CS3~S8=${*&?h09{} z{fi5B^gFHS*gEmD%b6KO^p%hGAKge;;e{003>DWdPy{jLe)UGZ%Xyy|KYQv5bu%V+ zGr%MP{yP!jUw1NR>FNdG)j-4GF~#V?uuX3J8z8=hiJ*bnae=iI$jtqLG_r>beSi_aV2hu&?H zGF~jOYuMsNKD1kGm%SRxePP^xDz~T)6$v%2v2X!F@e9L~tBuajnFC3M^N+8GcbUP0 z+jr^3?EzEFhyvQj+=+5s)e6ZxHP3L1#MU;Ki6jn05Lc7Z8&dL2l_UhOI6~-rv5t5R z_K*(8{!b>x_htLB=YhMZX;?{UIv7!?5jS`Awdh-2WGm^P!C`woJXnuCt^?VG)$BUj z#1hr4#j(ZJ)Wy}Y(Xr9-#Kw`NrL7;eqmkgMHHtTSUH$z&UG2I5t?6|Bw0@sxSlweJ z^camT*UXb&`p$G`Sg1e1z^WS*<@G$kUM$}!E zc1L0@PLtB$&3pggT&;)3Sz<`*-_r(adSm`MV&-$;aQn}rb({VoE9dT!oXp&zoQHYC z|C|qQv>%?rtzD%3?jzbS-2j_d0JDDKE;4vXfISF6%@Uw*&IPu7#`IBnHbP_rQ`j}{ z&sK!VV~xk6?$lH76f!l^VGc(ePVj8T_6E9WnjD#!AeyyFa*g@$D`N&#pZ1uuyT5<0 zI{Yvj)cR0g^c&-9#&Zgeub;npKeJ?)(kwNL@m~c&<+b7IQ@{Lhu@o?%S%lY z;cU~QH;UCi2{%qfpVzEl>QIn*VYgN7u1J%fU((<`&nHNf5OC%RE=$1wvqM1FZF(>5 z8drQWb5B$Rc6`NqRrvBR+&Uv&iXrry^hV~$J-yrCLM%2Gt4Z?*a1P6iUZ}VUY@*X6 z*#Sh@#P2x1W6)!d8Gm=k2BJrCucHmVmZ^h5Vnn*AS^DDq5v!@oG}>iKv?2Z%!>eHa zpJSnH9r77w0~B#`Mfh9Gj`Zhe;+s$4dyU+pM->%r*If77r2S5uAKGOG|09lfM{J(w zZj?{cPvK`1US3zfJvF)i14ge*E||L4vO#^H$BB@XRgY84_$7E%var&Zu)@V2Vxz>p zPb5?M=_S0k?SCLz8JhN?w zfA@c(uz1zu9`HhKjlX<_Ly@f}(d{~w7ez{-Ptf@vaj*36qD$2gnlI?AL`}vPm4D9D z)b}%M`O_t?FaKo&+GLlwf#c1OVZjPH67s-6ZVb%bNL2oa(h*h`K~UF;+WFiXi)m9g z-AKbCouKW{YGYUFRR8Sbn^g}@c##*^#Ppx%tp~6SIaZc9xsrVjEe)uvyzAt*`*zhT z(ooX)>Lgfo;7lJpuz!ObCU}kTbYo=J3Q@P4$K?28QFOL$FHb(#a_o#Q^G7P_NDlHf z<|xoDJJhC|kV3DuKm=h0Q+4RjwAeZBy(Cl8vDVhZS*j?Vza?{ci+?XA>reVD?vaq; zTwd=htTMn5ZxQTbhS8g+#jt-^mtU|KQ8hedmAhO}B0R;~mWyO&6Cc zFYPUDaK79Pz>_fpNoEsGo{m5Rt>5wJx)6>2-Zuc`2{$qC|98r2-PqzlT0FlbD7epE z8x&p_>$|z6iQ2`0@@`7Y(Fjy$aqHoK85aAylv(j}+5Vw;cE!a62Q#UNXZMCAwIJ&q zPM#EAa;zM;Kg!*R$!LA8|7KF6c|X&%^*XX3DpmRi^HAg=FUZKIJdW-#BGPyz)onDHrO`<@h0jzKmnyP~Jt1 zatOB3!KAh>eq3Ar3fzes?QSw`cHUvgeG2~S^hfvt^_%B|ww;5p4i|;C%Z=2F6pihI zJDWhHkIkm`{t1y=?dLCKEKSIp@2_oR_7W^9B003}>*UcB_5fd`M( z^KCYMc%@`WqvLF>i`vu9nZ5FaQ?f>KTArV5T`h<`$X>K#8hbsR`Ty|s)?rb$?boQ{ zASI0;-Jo{J;-QC^YjWp67GL&>P%zQUK@BV%7_dR>>zebOP zLx=mmt~1uT&b1igB0OJJi=h3KlrKBs^(7<4k;Y(1P6tcr8?Dt-{M)`=BIp)`xehE9w0}UMe~5kXClSqi&6~5vt$1rijTJcJt{lz zV3-Z;roXQ{n15+eXBne&sQHQ+fgBUSD}Jofol5)mCAN+`beZ9tRYT74I9!t;qzb(a zHt6qxAI<)@MEmW+CRxIYee$M!VJOD1lik3NE->54>+`aYig}}`U1^=I(~1`i40@n84=>R#}Hxy0Q zzhr_p!T*UGqIC5bpqcs8GI*W`ys&SgeKZeR{kS}BfPX-;tvD0T ziH~DO{m$c#h0HdJp`=g8hl=#wzGb!5B9qRTCd48;X6~rXSnn6Bl0$`go8K=y3+_cjeCkjtX3*5@kb zXwPm1FVRG;2@$>xmGqTMx)L~4*Iw@tctT0q$eM{BU$y!=8*MPAE~Wg(`#@~B+@mZ) zw6@0zC;j63EI@j&%6Z5M!jO?dSC1CR%AeUp`=ifNW8m2l=(Ab6ztt9Aq$n`kED#Ss z{{`$=`16$^MS;Y>mAg{E5E#}yp|M7EK7O--+Lzcw{mn&PogSAJo6WCzM4G&vYIG^p zK6ue2=AzF^haEH7_-IAwlg92Qw7zq4W2@EOPJBsZQIHV)sXw$*q1xFd&8mN5&6*GPKLMfP*#Bjh{wWK#;`c#}_WkN)Pjmq;hndZ|U&#+bOM zlGj$9h_mJuWmf{Rw}gaRYkZB-VI?KwbDKN_LA#?RmwbXN!b|8?WYadrI?3LSpmN)u z9SR3mS-!@1=@ai^uAJV}BfdO3kg7~aM50yZLp1L{yEEBFk=UIjHrPpBw^Ax!UC4klK4_#)3qIB!u&HeuKo&NWSZ3(Weg7L_*%ndp}60Qtm*fW3P$?q<< zzOlv`-KUm#+G2s--B*Osl|JsTLKJ1|+0q1Ly!=0cG~ccIq;{&pzTUg?#We@Hm>sKE zF8*9LpVN5V5YmJIy&`%e3^5D=>c*cml}G7d!qHqY@}oTDGw8fdf%89=HG$9Gbnz6h zu8VdhiSj&{jzrl?>EX*iB;Wt}h!8+hmoOY4BMc^B**HfT7PlDswpntaFHqhm6mC$4 zIyfvR{yL$mZ%VoVq$Sq$DKY)H7d%OyA(8}zu&t_dy%*obu#WS=knxU3kw_%$)t#j* z$2j0#hHQIO=7&`qy&xv77D(~WczBpCJb3r;`F3}r+Jet1BKPO`ocacY$^TRX11~`8 zB_G8eC?uUAwqbup!2(gbiC}a0|NSf<0ypqC+hN=a5*i6j2|61%4NHP|=og>{#E*Ky zUyAp&A&4g57OzuYoQvX*be;A(s+??I(Dhe}6v~TpY}(oU9EsVO^iB)5a20iv}1!Du06TAdLSQ5Mhg?^>X6j|L$F- z$Y8y;ILNKx>-_|6BlI1Y^6K{HD0YgM|4(B()Hi&il5vw7zzG;k`%dn|n*E>49U82u-R7x0dru| zZEF#3J1_ei#KmzxHrmrqmuN;4vT4fz3ICnxVjbu6;E#0w{J=xy{P!h4d@y1nMTtoI zH*Rxev<^eRqdwtO`Yz5etDP!j{Sw2Wuq2ZuoA*ZmGS#by@+aNbR}G9&KogYc{;gdT z!=tu%q=fb8Th0En0LObV^PIae+BZ{Y(qpRz*YF$5o}WAh^18-qE8>9BtP>V<>p!K@ zN08gao>VlW5>TwYDGHSNk~ux|f8Lgdm~Hr$YS=JtUyYpBxLRmfjX;CSfouGO7yx9sAWc7RFB`_^~Kz z;xs6vy~4;}wV6~vRk*ohR2>@r$STK0+-tWSv&iW5PV~+<-3W3OdcLyDla@f7- zKgwi>j$}%O5-I~?ay!*yBH1dr?*j5v{%O?6kdc6InGKLJrXH3B+9ZBNM@!L`0w+Ow zq6qTgdGPgSR^h)dIT9dSBcn2r#p80wFcjY&jLNh$W|2sV(m$f77#ilsF8wj#@gK~D z3fyh&p1I5ks+*`5p;tkY4a(TA9kEVg(jfX2`txTB0|qf|o#H3`3*$f&@i%)%9$k$Q zQ9jGnH3`dg4s7oYBAfsy{BQ3r|6~J;{t9UqSr0d)V}fQJaBv)oX2fmm3$75&(bK2E;vvW=q*lfacu}n|9QIn4x{qhb zS9T57eu!&_pUqwG%uDj|vWhVJIZJtwc0H`7fmm#V`3V+l$ zf_3afe#iZpa3-|^QGnkns}#+-55#>2p`^n99S8pN@EK{4-ctRLlz>`h2q6RgQmu5C zpMG5p?am_!yb@X35>#1{_b<7Wm`($pa36N`O12W5oP3X^oM2)U58l8HqhggB?#%ds z_AZ?hPfV4rZ+m`?G_ik<8chsnRQfH%uF})MZt$-D58Ew}r{Ib2OdsU40DIiIlx`0l zKu6^o7jR7G*8#*Nt()Hd4GJ0-w#NkBh0f91q_PeR`yuw0>r=o^?9HsYILd^|b?i75dg2qp&Bq zUOZ$jQMtARN^uuk6$SzdOg|fNqfK!0yb1FYBy+K;2q2o+5AP$y^`!tA0AxwO97TbK zjh_)=l|(dODV4|ZsVpb|DE!>&cG^}Q4=C*CpWK$g=0l>=+osB%g@e&~{TK3R2{|9Thw&ym!rxD2r{B+*Hx-Hq$*jyaYS z9y2bG&~&UhNRm07(}5g%4GuLAC)IR&`P~RMoBRhy*lvfUwIcA$muqmBJZsvPX0xb{8ey8d) zpxl`s`xh_ypOWn%3Aspimh5Y9;H?tS) zzVGI%(mXAkSQwABv4ZK%+K2;YJ{s^bqd}AM?M50DJ3FRCs0#KjS(**Si7$ z!VRNVyH_ip0JR-ZpMu_6{f!d;XMM^DlBD{f785wUV#p&oE)evXNl57Dv)^w@PCm9# zd}{sjHeEj~t7XJNuEDG>P&IH0*A{=|JFVp}dglHVN2R<>BlLI3a!+L6rBe%_!;%?^ zvRj+$FqT&2+Q;HlDkq;Cl6-~xaAt3^=<7h(2$~QsnsfK{Zi8RkHqt^{7iBi2Heq>yOaMV11j38)!QF8HJ-y_61@n)=G=P+J3(#100qQQgL&hOWFR+5#n?p!e{Qq0z+VV>F1j*!Ymg7x~S%X`q#DzKjapk!1C2>$r9cbxtHNu;Jd1sumUI%&PD!(|@JISvxeFCZPj zjr$ysNL2h&welx-w^c)9>sE-}}gRYikB(1p zbT)JqvI(>CkY&3K^Ym9Mcu#Gv0HW`}a{YHt5%fh&W&k*Iz2r`E(dEpSbVaR zpjDvn{F53cPH=~zdLp{CFybTv5!9GIwG=7+LP$u@6^igF^DB5C^~2bc!}_G8-DS5q zbTknm{HA^?c{%Ci8k4;J!g;W^UL{sA{7V_)e6s#tV19PB@-reN2OBer(ZDQNuiat% z40r9{obrzXkjMQAgVOdOr_-W*n`Fj82IAwu0A$X34|Woy@pWe@5#PyJ40r z=$BYl1L@;OZLqYv30t@Q#_fKOn8G)+^B4_2No%icrO~w$*tFXXt9~5PKuEXmv zj(s`?67ac1$ti)SiGdR=|HVf}(<3qLW{Xr0e}~1ZftEk<@fqdOwe*Fx(o`QNm%>x+ zBiAkDqOi>m55cuGeJZ8or<}$(m&DtR%nV?An!>bF4rDv{w z75PFKha+{X_$!AGCd#`$t8{TH@A)6%4h#_)^-Yv0NKv0RcTP!$_%TK!*|fjcJ>?MAg5Z4A}7Re>@mRr#j`6w9#wH}EJ=!SR~PN@N=P zp;Se7$Sum)XLS<_a(&e8XhcBu)$na-yw?^C?0|S)1G|!1GPf<5-{B+-K94~P$wTOB zLKvsupYV4ia-Lx(RQbkl{PV8x8xfIC2=cl1$Ruq|Q4e2H+o<42=}5p*5Gjz-!_><^ zvp$;!{xlBI&e2@;8oF#cfCXlMVvMJy4?PtBYgEx|)J!^mJRCJ~J+w|EAW8NTq54w* zayi-Go-Q?wEf?$3K=%4`7tY+OKi>2q zvWwS;?Tulya+Aws1nA*0XSkDGUe>XTjtfq0pGDGk>SZF<)lwCVsrBZN_ z&4V-8bpqvcG0F-$J?-6}YZZ&^vkPCB!}d+widN{Uh`2vu;_A_k(*=tJH(3cW;3@2KN0c?d*><^|7eUW(?s*TqxGB5{+H#}aYBrtG z0$nl#UzhbizW*p9`K?-%uDXvha*##TsaN6ZeR5j10O9Jb95y`rb70+&==V@!Nr@Z@ zrHKS|EyQjnLRahW(P=*C+r!(EFkG-_#y~4A+|)y)%U|EMLL#Q&i`DLLY73Gi&$dUR z%R42a%l^rmTv$H9KBo#W+DcewdY>UGjs}2^nYQNtw6Xt#ngH+LR*1?Qob}@y{lxo( z;UQiBn-LrWTU+a3aU3MOV30<{n&cKClaGpC9{Y{i;}L`<3;4v3q^y>)fG4gmBcRekC)f2F@ zYTK6L`1OPk$0nGrvm1Q4gK~>=v&8?w*$&TtQUpjp5H%Y@@th3_&{UkTv?2+I74q|! zYVzAZ<#W7;$0qjKKi?XkhG11UCcfs2$=6w|_tqZPmsdMCb4}|ksv%`0PvIHsb&3lerLnM@sYs;(U87kPbqRJr^4@;-Ltk$>ok1( zIEHh+4o8JwMnK<>w)hPuD#E#4%-!&3#1k#3MN7r4IGfHpKAgTHZJ_{o>7eUJ~Y_$-wQM(=CYP z+T~t`-y^+rkfj-8IASeJ$n^^YCUeqemF{E%oZAAPc`{v4)xJ5+w{4v#ge>&zIdMEU zMUQKNF*JEUgx^cPWc4PR0;a)!ADb4ON&O@4j9vJg8yz8SQzDA6*1{<0>XO@pWZ6F- z+vG`wJb7ju{$$nZ=r&$UojDT;*Dv@1!KI4E@utdo7s~|dM1{rDaRFU?l&Sfr|Bgm~ zkEpFSm7anzW@5F^DG|$%5V`)K-KuT9u9x2la%R9!x9B-s&bVG(%}<}e9(=mn`N!n) z`eoEhqHJunjU@!fb%q-m7*mX%i_4uzQL+U%gj6_Y(*_OaLxyuCA-Rx}hll3p8O)OT z#YoX0nA-#%9w@VCT*Lv*RT2($gVfB)gRj^D`){TFh4VAtLdC^O8+8e~sPjLjTo&+1 zGFdbNpsA`4sO?4PLQQN(a9hAKqgh}uS?v31Ij?NSL$qURE~7?)+YKSaqy#t$S|0CZ z=`+xXwhYN&&MUuWW8$?3pUkYT=qwhv@=oJ~c>0=`0cVMhX3Yfy)wDN|041&qML()TqJ&}XA^b7i2YzV{dt^z z`z1~=$P~bH=(2Yy_fJd+JrlnAxW*5RNAgO33sJo|*5r}bP?S4{MTSio#{Ge+b{kI$ z#1_ptu5D#nx$gz;un{Zpi`TZn<9-2YI6oA%&`25j4C_tz=3hs zWT0$*(6R=NP_4py#q|I_OAQ3!RrC>;;V}@?5tyJ7QIOWE z)VE`s8_?5f99gP_g_wU~oMe=!4Gh)W))_6x8PrG;TzZA`YewbNLABt7SX7fqeP1l; z6(23?v_{z)&6T*011??UZ2BO9K{HMT*tXd*W96AEYX^_MHJ{OnsP8$ zUf;Sz&n0v(>7sBNBm0`uZvQBoPd5jx9Bd(*<3^Of_eXc=`$f7Y;J#h@HrC-fVj5Ml zWUwSG72@#2{}N&x4&HezFIIh^LY z&mV(9bvCPtK%LB62ozG}|1P9}JBaky<3nKiU|>g~TW42SOSP2V=@74vkwZeU$z(^M zpD8HJ%)`R!gBF|3?d8DayFfX<(C|FitpO5R-#wZxy+~T~Fc#>DfArTX3xBlcA0_owrDQ(^C=Tg$)><)@Z z4pm`C;k(|O5Wt(At)5fTFn$Xh0Nk?+q$4RDM+Kn4dLZax2pNN7ij0!*Ma;8xz0Zslyk3`kgc%RR+2e42CR;qn!czTmDyO`0_?o@Sc&5<;uP(2H3erw? z=~{;?BicQoC3I!1ut|doFI-}yA6ztMz+<$cB12WTX}3+f z!q8BMd<2HLyw`Rsytz>3v2M81zjZ&ksyT?a$vzDWlo|6M&u3ZR2UpKfI+=f0{mP6J zLc)L9CtC%7c#dn>J_FG}UtGnbw_r~uSEX2LO-?0Gcb~0^sGQOLF1nR_GLGYe9hJQb zsoY5q-nW^Ju==$J;!6Blg(eB~2@-&pM-Jc)pb8LR3*{5JCVtAO68J>p06{IqKe|Xl z@m_&!*2g+d*8k}I%{L$J*Bz02KL8Pg;MiC+y6+dT~f$t+XXoQL&rb z^Qh9#rpBs<aTtbZpl<}6x+=D{QC&OTvGTBP|tTUjlH~4nm;kVPa zj&+_wn>Yc-4+Ud|GtR{6OhkW;?Tlm+w_QWwR^!J0`RCyng`QjY*8L|nYt@`-GYaPc zSA`XEAw3MSJ zzV99z47A$iTf%kU>PmF<4`CkH$J9(}IiTkooJ8-K#D0%8B@7*FMVj2xZ@}j3jGnOG z+dZ~gZcsm7tu&%F8jFhl7R#p8JCw$7o$fXx730@W5rQZ(3f&!ShNTpdV&eKW2|&H; z!Ui=FQPA|5Z~PE!3K98a;^I=Q&;03do%TQ@G|l1po`mPy)$8XNY(%9D0!m^8e^7bf z$Ht{L&mL#Wf-4}DO7&BuL2!ewqj6`_Os;HW;rD?TFm+M! zC2--BMSH5{d5chC`8e-=1|a55Un~AydDQ%qIdVKK83t?_A~OHk_{x83gHE(U;{!NUQrYn~>EdHv8I@Vn?AQ&W z-iEd(N~!jpXQ#KNe0kxq(j;;M3;3~fy1zg9p?*mYs;v^P*(WW%8i5#-Iy2iyW*;xt z9Xrk>NsCDzk2$b1Hw;O|?b2sQhvL1VK|M5Fk6jVYK z+jD;<;|WQJolw#%PG|E<3O-AaK@N?}vBoSrwYo~gb=UhX!p~)|kcD4?Ht<&)10o-&bye3d@d+y#^Y>^V5msb3L1#-I~`( z**~nyHys|v#b`JCBs`PDxZnN|I=Qm^#$!>zVk2XhS<_ZotEl%4+|%7@aw&2(v(Nyg z!WF12X*AQ2aWs}a%3*R$p6?1GnuDey!RCkh+ZQPzv^(yEEvz@Y%yB4dWawCP&ePWJ z-p~x>*q6>t1U=WYJ@!Q+Mp8T8#{!T8IA6d4$?vx}u;o$lM0ODuKVvTr|4mZgH``l= z>)a>OanXMZ_Pl*&a^iLB_um@h8H!t0Qwa%`N3MYf}I~)w!@}@?cdkDg#;h9fR zOgp#iJP5Imc~zm(^=dcVJMTVIelTB=&K*i8CMl}7O*y|mjyKx=vTerGcf07|EATthx7QcJ@SlP(6ZkobM5BrWE0xjoC*H zS%RC<-?=p?noi@~96wDl7Pur%Z7x7YiV5X1L=B%mH!Ssc>1$8rO<{;Ukgf)kY%_bG z5r0<0gZ1%v8o$)h*>CedKy<$fGMn+QH|yFTn;f-D&fdw5`%Z76USy}?Fz>8cy`8~Z zLcMV$;SYpbozao`Xb!h~=`=_N=19fZifh0&L?jN2d@eGFCkUxT;bh?F(L^IlO?Z%3 zd^6j-4ACu8!eBKgY4T9G^=nREt6|ZkPVsc|Pkecst``+$mH{v z5+?Kn!xV0=N81Ok#?r_SB)0f@62B(YhCNK-0o0MuYQ8FfNxeh{&=<%6^pkS;BtQ{E zfZUNR1a^Y5zU;c4XaY3jTpvocc+K0MVv^8N5_UlPkJr1+01(^}kPTJ_#L#-e08}j7 zbiT^S5m@_wAb`FChWpl}fU;uNDg>be_aPj5&ZHKs_~s9fU4++OVSbZQE3O5@&*VTC z0D(3DvM8{MFQ@zW?ei^&@6fW(oBVJ_s&+ zh>*=~osRr~4r|RoE{1lE0xf+&c1v3S+eQ>JtqLK_t8Ki_lu(bVeU5@PH}YOO{^>v2 z?$FEI+IFD%63wO(QY)PH))E233U<30d-s)Ie{qnf48oUz;t$u=xL-&xss9K-LZRD=c;UtO?#@9*xlrm)bq65G8@^xe{I-aBr{<+Zejk~u3Ej$@GC5bT;dNXd zIA(F&#LviInHS7)bJk?j6|El8n}NDHCI6mNV10j3b@c(reLWU>00;t9OD*eP)Km6vXMWWUH$|=1ScYJ5Qnh~J$OeM3N`~&z(-}xYoa2jNd zP%k_M#5w83F|SE(0LfuA-|qv*0FeJ4ApOup?*d|f8Gvp-9TPbku-2asC_5+vpr*$e zuQKicDhNPYa0|?n|DD1oX{bnOWTkow-P(|(61xXVY_|f>Kr95WLH=Z8acK+J)*GU%rPv!KG&{rnb*{|@%uJhZk1j7%eFAK~TpAMzki3o$Y zNr$4i;|Ew(y1{X(7j&1@KNetPMwrQbX;xnbgO>HxOZF=?)h=t!R8kSjp1=VE{>P3L z;q!s;-<|<{;myx7_V;Vw&E9^hJ=btMO?#(*-zRagNSVXHV%Tx*2coG>;Dp$?a~w9{ zAXyidKsXvuoXV8I)n;{)L4m2a6X93H+sT&9iLH6tqbv>Y~U5WT+9h(;_yvxs}UdH+dH32NZR0V^ycAY&2;GUVjPW&p8 zJbYTxK5$0qa6EjkktZj?IAdGhaAIvf$oVGV%Vk82JCoJP(89gc6^XS-AR&0qPf zmtKAy6~5A*V0xvJgX#$gtevm8A58$f4T+mGXg+YjY2=GRv6-orw(H{c68%r9KbKQc{) z3WEawtP68(1`~UBoqu`y<1WCfpd?&2`BLnV0ojVU@?C-C9z*rBa*UCjSa|b&ddAq> z78YUDrViJyvc~odmBtcv2Le5hc|0gO3OUKZC;{#iu0DH0=M6Sarj88m(?4`~6Q624 zP3rbROHdtc`u;9PI5icOh>>3_sM?t$(I9cWu~F<(6QXkGL~0Vnx9}Dx>Yc~nP_=)E zJyI4JBA_Xse6t?KtR&+0WrwH8W&L;EH+_|6rQigL%u>!0YOd+oLO~449~Kz*QNoK= znq{-yS{+Y3o;C(HpFBFKE!9^p7w9rrob4O*5VA!pd8nGz=BgnMuv;Q6! z+nYESJW<7+{wOJ~QmDEU!_97pctvkGdh%R2<$M3B&0}MSVxqU-bd!U(w{T6JTkc!@ zJww33ybfzHAye3K&}1##$iEE*`Jj|5Kb4k5G9WZQ^WNQQqK_9542k-f7rnOAGS=TN z*GhF8#ls1|Sc*RkFUlM84w@MuCeo1wew?A_>TpHz6JJSGBL?1K2Tw|R+!GKDN1~B> zbG-%w^8RWXB-!Oa=cx6U#<7Wo_!_$O**I$V?ObotE1*9m8g}BQ6?sWu3}G*qzm3lL zU?k3SAFs6}+V1&=xgdlmH|PduJt=Av?6LD5^?TesUTZxIbpk!J15;$JJTg<){^oe6 zUyT{-Oh5SisE@ZU;Yf!^Z4Uc8X4JV=WXtuUgyIm51dYdu0OEVVT0NWtyFYr6C1I#( zuc%m(<8dc4=*7x7VE=(@D~T?7j0+m-cKiTIOVrUDsNKp@T zpocv!!Hzb4rVm`cCJ6_EcL&vINGPbdKy5+KP}FjN5;~T^=~wa|+4sd_nyzLJCh6<2 z&%kD612PRxUY2 z(M zN-`+BTOc-iSN(F(wsV@VK^Minzdee$=!nCB0;}R4308idd`|QmxBpz5#<$HG_TxH2?z^nmU+xFxzzDewz1l-jTb$%<~?yVJGfTh~qQNsL**s z!X0t%t^5%m5hv2d4&1hc*zHm4XSXN`jSH83=*q)(s5GCi^`hPJm>PN@~JZF6|>PG#ymUx&%8Xx24`b4%eq-=CJuqw8oe3dQ~qUhYjrYgdsREa?;#0 zv|?O&Y!;!qpZ(0ib0MRRC4xp{aos$f8ez{K`wpzOrz@nlEQiANC^=62fuU=Ks9^xX z#UbSAwjR1#{O4%PVQdwIJsOw1SD#6`SI=Oo8-Pp!K~H|^PnnKZ?u_vZsR=qk{oxQwgcpsuV%26nCEs>EzYm(M+2H9)&GgI?+BAc21 zQ2#4mP(koa@XaE?Hshdv+wLXox8k~stJQ9ACOI~|d845%t@Bg~Gv_F%J=LTH9Cs$i z@k@S5ZZ(I0Hu&U7a1=x{-UpC9A{+^m$T^IDV~QzGuM=$oveMtPZ~{eNZeSiWPiBZc zk@Yo312LfjugpQ^Sk>1Se#7xwh6iAEP4slwU|mxV4jnZE@>No&Uvyf;Q$;*)uaibj zXOxOXAFKw-I(qA#_@IC&apE#oS00I>ffzV8k)w5wiG3?g--9CP0S>o7+zzwVzeiB( zIH=r-zXd<^bIduSIt2ARM@>s`KvDgG8t4j|Vd~fa;vcOO_#+zovPPEs>wE0SN}2#J zTF7eF<%7idO(G*u#;QLEH6B2)*AIm5Fr2df5a|U%+pwaZX(LYjDlwnd>o%0mM>|5h zmvbRvo2#9QDVA!XsZz6w7Y;A059p1VF z<;zYv<+j7m^~_sx%Z)wa*LJ_@3_J=abotL{_f}8Rjsvd?*Eh>2!-pMTucKN7tK}Mr zkN~0jL;A?;-fB1`<%;-jqG8#&OcXklyr-Pa=2LxypG1~52wS4>&3I9 zW@3EPfhp`8bmZ1>UY!))O_|OtrBSuyW%!Xn!qyB!tqOH=uv~CIaJQ_ICOE zEd1^kCzlJ_CjLf-e9&g(z2|pc?5AHF*~f`~Mca6;+X0B#@1XXp`%Tuwv@1#wO=$#e zwB}3;e!>;7Qf@%7DI!C43UrQ>fWF>KTTuzxNm+ifj88$E$DCh0U9=s-PpE$YMUp>d z#?{#vIFtdchV>Y?8<xHg>VBe?)Rh97wiCsfP8+C`!i6|}IW&38qnm*1gYfTV;bUZj9KEr#4e0bWvckdjNiYw@ z4zxdHtUvYciZie}d{wZof<_qJQP?z%Bc@uXYc~$|xPDYypq>IFd#>v;9sbL1oN;)3 zj3I36S6}*%A6*ORv1TJxtK%URk5Km6Tdsul9GZy);JvHEIccmmSj9m>p+!sbsZT47 zpUsA&Ag*rjAOVoW-09sxJ`>O+QCFDmchw88=r}x&V98O$YvVp=2$iOMkkr>SdX1su`h}Eqm&isLISMJmuXNXPsOHQk>5c& zW9$wHTB2;4S>3}jm}(D+<_=)0J@Bgt$gYyEYJHy&VfuK+6tm;hePmw``s`^y;CXbj zPmZAFi@ACp+`{u1DaxTy;ZO$TN!Po87%y|pl*L?yFQ2_kCihsgb4K)bQEsj_xHY0` z!2Exf5o(aEf>%!R->%xgnw^1BkU4z>Y5v}W*=9mB0N}g;h$D>XS5!U&Jt6qJh52be zP61#B?@T=^c0GHO%3|^B?qtB49{IW{hz8Jsqc^p%oU0I9@SQ=11$WpC<|!oiRJt3Z zNtPtCfiZC%v9LBW>5)3oJQC&}DG*=l6e@Q^!WZq`G(dRa-T(_Fyr3@s|H2D_RTf?( zeWWPyfJDBhe=tF|%kuIRzq?$A|5Qc|78zdJY6P<}M+y`SpQpsmu5YehqoU#QAd9H{)GtMO-JLHRv|xfxv69yzN&Y~+ z8j9v)fLEpzbz8SP!}YZ=!I`x5kkg)g2voW$*Y34dgo>Eql6 zG!D|~%k3$TT2+dJTfymuVomLH#BxjA!m@`T_bk?8-fyq3BX8))slIpy z+m3wPM|3F?Q+-&d#Q{N8&&RUUC}A;s@W>^I7>U7niE9INC*gV%<_hf%GTTOh zC+UwgwJS_pU!dA-^Z;f18JJQ|BF&tPS3UWW&p?M6BF7DWVXYJDrO7d(?r=1&<$C`6 zYRTka@Ym>SOgBe>6K?4}6kACczjSefxG8&@n{!N;9Ui&zf(i#0GwRInD*)VhK@rgG z+sOv6)a={1_V>zeRx>0+arDjAd&i?Pp7-H~Q<~f>lz7uCnz2RiXZ0N>Vepd(FvV>Y zFrxA#t!>J?wxh-IVUKL{y!$3`3b>^(pyyNh1V2a3=z`sxWqW)P#X<5x?E)N9UPdh7 zDwT_Z!~+1){vav>pAc4lpZ1-s(516V0A;B$`bjAG8AS7f87XC|JBJV!Cc0K*vnnEc z^@Rov5NhCh@c{gBbS$*|8~D=(2=G68oT^a9mL+daiEzaN^g)~w=+CT;@qBSnE(zGK ztH>6faAeXCH=^=WJ^c=o`5YG`@kJdTH1-W3t{{`+##9WJTsng=BtO4MW4m<#dj+i= z%&57owRVtUl1Ya0_h({fhCS1Mx(|j6TVfUbuFY*xXgrDUL&)T(bn51ygdrSWH}aMp zZCbmMnq5r3Y`V?6HS200>#psYXtH{8<$;W$s_|M*t-@5+rV1Z>{b3BvF0q_2%ry3U z1V$;;5P4g%n*|h@n^P1MBd}QbJ-M#k*`y!>;6}XpQ2x9Q-q*U2C<4hZm;3ov@l8FK zFy9Qvok}(~B@X2?w*~dDM;yN$6?4t?uTca(1PzwB&qJln_Yu2yZ27<25o?nbxcvKG zC-+hSFzw`Ha^GJKuV^<`I43IUw7bkmES{rwp_UtUz#ak5m{bG~FuYDg#>7QjP+B0a z^g8F$MMPffp&za|vD@i>IYJDaKtz&=cRaI+ck)x6(Y{ii)e3;S-o6lotfjOrvp)B{ z|I>l}PU*7oM|eO2*C@{cWh(aqKISEIqPD32+V)g1GB zzU`+|PH-&uHu@2`?Z*OI#gccWcOQRR(}9uG+dso7TPuCb15xFTV`I6oXd;ilFThD* zh6Y+3uG%j2UQK&(?z`(ma|GIC&~hS#O4TUxPexBiL6zoj;t_Re9G)LXXj5G{==cI1`CPx~vg_ zQDo>R;q8O50Un;HLc*4>Q*|2OtzEY#V<9Ln#J5uBN)?*Mm#xyE`PVy?tniWI+czJA zMCn3!B@G2#@j5MG#<$l6aGJ*vhsj{aCQ* z1a0rjbt}UJSB%LpoBdN{gkNKVP-YLcgPx;KK)1X5j@Wb*0eAtYR{w2NdU-_uQ~8|G z;u$=aB|+Rf_5kGMP9)q*Y9ya`&u+!`k4m9Qq}y#0bCd0@ni_(s3aUOtz^U~sxPsj{ zEx3@(B_aplW`f=v9^MV6`v9>uu`i;pvJEDl{Mlgl8|e{eO&7ZV)KwYy4dL3Ce#Byz z!1j}$Jc*Kj6KVv=^c`xw8Ec^3u;@v zd2IKy}WId%U&!w57`7;ta`N9RRd=WM)Zsb97sc*Cr_)zLXo$Y100 zD|Ds2jnJn zZUctY`1_sj-FNd}BmCyIr;uJR5)fP4ToNqL7kwJOM+y9zEeC67;ZvN(Fxt7rSU^N= z>R$ew{ne^mLm36k)lH96E?Q(c=HXB!M1=Fym9Bi4f6&@K^rmuEdB(EE*r+sN3InP{ zckeRVKI*J3V-b%amg>L8(PV24y@JVM-w&LO-6h-yJ6hmPr&oV`wDk3w><}AUa1C}b zi9LzqLr9iV)USmQUxLE8k%F&x^6TJYkp0`~FYnmRD^fTP_ESNtq6y2xT@b!EAI405 zP+z1!STFB4(?H-b9`{R2U>h@%@|jw_wazWj80`Y?OWDpVuF@&q*=x;5ZE|B^Y-%AX zG|BJAiFW}QM0pQ-iSnA%9xpaJ zm?Uu-p(hbp#rK7i$_ect=}$aJ$uqP9HVgP_@;g3ku(%ql#n&Y%yu{m3i5!6VRO9qV zYwuTKw4OP08DI-%>jQ}b$e9y66D21(Ko3Uu(1Xc}G86y12NS?~Sg(g#=d>KW*J{vS z!{e$()7=Pl#SYp0m4q3Nu%(1*6mx{eb{!HV=G?GG=9f{+({ z662dcfu%oKH%3-Lhhj6Y`IG>5;VbnfBu~K>qE>K)#n$oG&B4%z-IH07{1jJv$_pT_ z;!99<>DEY>wI7*uL4FD=-w&mhKf+9^{07Lf5H-L9MecDt1IK=N_-h|(J-Nw&jcZFl?B2LP{wx{{6 ze8F!kMPG4s*f~LG83f!%A&5#}0CU(1E@2vzfC7sJj|qdx{O&0Q;H(xJ@7*tB@rh5j z*(D{~mk1i*RTVw?)3v#tyr#hBhg5=u-plAi0SL{mbQw7MC`D%!nB3pL#K6*xJfIV?2_gS@tG{ufHrDBayiOUDQzAtfOtg0z&h zG^2DQ-3;B`Onx^$Z#>WM{r+psTCf%@m^IhA&vni|d+!4uM?60F_#*KdSq$Lq6VB^_ zTi?C9N!(HXJ2?0X!G#`0*+70Qx$h(#jc{k57(2Bgjrl$m=PO3X;e1X#NMma&9$&Hy%0efTmBYFOh8rSQA0X zv9~P>)5=x2Pdc3GXS=O-4WI~}Rm}%jLMqFySUG?aMZ}<1^>#zk6*Wd$EuI1j1$RC1 zs`fcD-Pw9vl*zo=3M>SB95*%Ud$`c(04x%FyYhjmsa7Z#*IMY-z`#DNF6jS8Y%xhC zMgo(kKBFcH2C0QgI@iN+b6C}#XKhmy_m~C8Xz$|AL|Ka9stIeo_lwCqnpjDz#Bk2S z)oj=&6)|B}|Msr*+mL|0-15&nroyH2mE7Tf?(ihWv;m>{Dt^DTMlQ!COE9My;1qQu6B8VzEKz(s)`{B_!5XxzhTwv#Tt+y)jN)sm%pSdA|(8 zkj=AgGJ1ddPVG!OGq}wv|7sAcwAaqpArDk_ z-*)Sz*QZWaAppGO=!ty%+IF}luJk~&_6%CiB&mbGnCFxbZUv0ROphleF?MG!plzG! z``5wZi5=}|0KB;JIRxVW^~|5G$UB2nv*1knB{__>M>6`%@A>auHrHb6N`=-&wMH$S z0<5gG8TOja2;AwRb>z_{h4kh{n;+5xBPHSPo@;N*P{`KEZULl*ed>9_FQTbq^ER34Od0=}8Gry08lc_sgHFk`UYiWMcdm2EUd4VF4*nTC#-sgpuLFGUxO zy6}PPZt9k`nX2V=XvZ2fZxPHk=m3U zuyPm+2ZxCE(>*5IVt~J&L;L}+%0XJ=m^|{AILso8LyRH991|lA$f08tKW27GM5K`C zv1T{`pYK_B4iM?|wKVKhO&`I>URcC!3n5ESw^%QKMIn(xX zd~}b(x3ztDQ#*`h|5k)Egqo1@|Doep6|Njl`toBzdpYYnnK^xc7*(4!wDbn;Lw;as zH#lGKl1oP$QL_dp{rFv{#)M4XF?fP-$KikhcqCv+m3I z+@I+$@-_Ik&0Ft+lDAV`I^WNWQ{*yHC-nq>NE{HIcWkSD%v`qwh=J}4V!Kr?G!cRGdVUspgJBC(n$3|Vy9Q`c4j>2_6eCF^Y=#j z5T7zJ-x{KwlpXY55I_Bf3BUC9PxTwcc>ZzRNpLzm7P{E9Kxoj{BKtltvW4FjsG)Jr z{2nrIJ`_IIupRnop7nLT_XMXLY+n85YF4;?rNZ_jy; zV}AewhmmEb-4f?Jy}1%my!eG$M^oId&hASu87WifS4K1?Y!P=mZ2=WkG<8d}eBmK? z-|fueaulyVUS#pPz)M)b48b@w8j^SLMYQ(@EA!ZO3-V=yD5k}N7auKBZg8lc*s#cO zGUghf^^Ta7Ki!MIKB{hI;bECVCOU$ME`Os%Mx@-xrklg=_#Qn+1x4J^Jh`Q;)hn@7 zy{9+Wqu+rfUY^Tj3Ae}ZltlS%nVGwFL%7J*Y-inNJy`(VUapG8xMp*OQGJ|<>yp}^ zxd{knN(73+_T;dMvFA_j~*C;@4|MX zJeH$#Vl+OD{cLOX6wj}DgxQ!!)AsP?N|D|?l(dYg{pF-OAx~iPIk6!5$$UD)sE?q` z1NHc?^J$G4@6558e-?i`fh@L)4`oH3Aq3yqGBJHm_XWxkT2{LXWtL?moA}b+ z^&*-t-*o z-U!_t-=W}53;&Fy&OK&IJ3|Gw3X4>g%1~efE)W z66j%HLHO+PdA~J8PMu*xy!D#&B2}9{z|jFQ5hf$M|tR?E(9grm3H#*bD0Dj+k_Hnf-=$0u2P>-i?t#jxBp6T>@pb| zP^}c!Gv14{lE9M$aGmjo;MT0j>kVr|*xB^!XZ)@K4pAM}y7ZSrtaf>CJDF(Z7@T1T zXx}YpGfJQ%0vA^L*TU*9FL0rr(*fv#HaM( zHoLFuqu}(cL5|kvbb)LkT%|@h)2=qQ2|-N2v3Xu~pCL|s=eLXk%`1G$$eF1zRrGRO ztB`}fYlb+A@TSR7BI%cEbK4t%J;Ss?kM6z889(oCKd}wI*7*v2b-Hora@w%WdVf-U z$&`z1eWgZN;(j)>DPl$5`sl}jyH+bAy!pHOk^7CQ z@J}2}UlPGA4)edGpl+^CjuSVNe2Dk7v(wI#v4{4T)-$lAd+J-909gkSUF2WcBPQy6gmYhkmVsGw{5NxL7Pg^?PM43`%NQgC zb_BI=Uv@dibPBw2prs=u01hiPC=%;WwuCKXY;dFegD8v-(W84cQ`lJ`&EM3XEo-1( zb}y1j@f1g^WfTZ3SFRtCsWGi0-cz^5hhKh!wtb;y16=xU6ce`!Hd;e z^WUMIo|>=eCjMppLKb!>a7(*b^LgOGgU6PGQ1{wdbRYIm<*JKncjn7D6K(^uyGM!+d9 zyRYmX@Kvn7tKh@ixgGRPN9wumm2GYChNgxY*(2ux_8K0WBm)a3Az6-e_+bpdiT2#- zU>f1aeCw_MxmM<#+(Ofqebc8P?T6Mo}Iu#Qihcgxn{;rgB5^o#Q zJK_HRnZe64d9Z!t#3qMb+rh)k8}yE+8?vw>T2x9jEcSKQOa_&FQ9vKKArD3ty+- zBB7OPp z0ZVn;4^5Qq3=Ry0L8g~FtV83&1Lod7a*=vSVpU7QDcs&^M$4L1@ zMZWaS2kw(|OWXD!dnEjFayINW7SgvTJLN{{%ZyI*OpE1!W0wQq#iZT9w^T&)*RiJ8 z_f}#B*&z?Q1Vk@uTN;o5PYlSivzP;qzL53sy?(uKoGsRpY8nw-;V9o+bA+{xEe+iI zGK|IeX(`|7FJ(o)zy>;M^SFC2i(XS$KWit?uiAM!yf+>4SF^)U!@N^~?#NU*cZ_Hb z9O8a{C5RtVl?A}$TV`Y%wSzW1F;zj5LC>s({laC}6P;XC&#r-)24;}-MiAw_pl5qC zsVe>$Y9Q(Ja}sZElzGWHaP3dnDWaPzyiaWUZ3j}m(m%R>3}0xFu{dJ@^m}&z_R(wv zZs-V1GEnRTeW~ZqinY_t5yGN%3Bvqci%XT`ZS^8W2TYSeqIpSz>xcVhL^&5*Cg?>P z*q6bQ$p*|xCp#==&99Q%CTOGSibC)DOxtSaA2;}HCvzVs*kyS$xJtNLU*Yijyhm*tm#e4KrJZ68ITE01NGO1Skpoz1t`Bvni_X#Fj8TH9Cq z{_k#g3!ERH#tHVs=^VBwdQ{50aeHCmLMew~Gg1o1x4s$lVcz8oO&DU=4ddxKHE2J>?lEHh&%rV$XZT_5XMpUt2w{8Op z5PBXv{mkU=f#&Uatp;GppWHiP(tMB2yh95*ekZ@q1_~OQyN@`Uc6GR60Fga}pM*)> z;Ei!*BUJ$PVSE(mVM8ztGg8K=e{lb5r}eG|EKwdlLXa(QGpnJ*ya|3F>2`I+bjt4( zIT6!{Vs8aVTA=Nn@4@Yme!-5-{%fjW{sb7v+ee#vZ4aA6d&KM6D>`oEp6~9>x0nK$fw;ft34tz!<0@8&O7%jHSGoEg?JYA4RpV`fAhY0O% z*e4k<=coOY%DHx3N5b?E1Q*+8mGfn>igE@Y1 z3ZGOlR&Y8AcS(T3rhKum+Hh|jUy_i>HTll5dfx%T;Q%|3IkLXBxCK!8W$nx7t96}W zPJ>KI~7AhkY)|DGwoueHVCi%P z%pHS9Zc)w{&hf}%9Omzb_s%{DT>l7~pCsf<_X2$xr1{M@_7dWR9B{zKr$9W*u`Jk5 zFg+SG_+0w%9z#a_-)4VFk8vzZSm8-kAGdvTz8xa?dJX>+LInnF_pT{|B(0t;>+fH8 z#&@qFt;T@+NtRK-;V}i5QI>ji-8b>OFQG{RnBhoL!5zxqP?9QB+R?y2U51M(1ayd$%ZQ8aw0=rCKOg86p%~&h>b1qOwYSRO~BBtNJJRUez4BY6Z*KgbVMbx1FWfB_$uATqP#Ex&RE zLUdf_o7~r<=n^M#mojMhuJkE})Ax`aR}QSkmkV}CWP|yWR$5jP>PE(xwwyH(!2Dtc zX^K-uy2*!J!@q(7Ww{caV>X4o;=%q4o$VDWC9P3So1yD{H_S`WHiOJt0L>aS1L8gu z{8-fKva205b^Wx{c6W6i4EH|zW~-BA5BPd(*K#@;JO0f(jN~LUOe&H#E4_dHueu+w~2>2bj%9~nKDZAE78u^$2Q=?Xq zz^6A~T<-xfcJW|5k*MlJ?-VBcYrE?wG-ep@2~$HQebs#}6<10R0D|bgSWDL#OMcwE z$aM9aC$;9!#E@o`YDG)s=~aj1x572Y5wm@c&zwDt;lV9y;2Z}+vye#l5>utQq@@CP z#K|AD)o7|E+4gjW-F3{zv~=g8`}`7j&n>o=H01MnogyFb20Q`ay>~Dv%`cSTvk$zn zcw5G5CeWn%*QXhxVfN~v1$cWo7*qkW=Rk-iJV6r@($=yf|o#%rpJI8PpK zetc~^wktJ}e(0AUQC84T7-2evm^!hSQPaatbx-Jrf6V;Yhg ztau1|EAxu+bc3@0OTBr<723&!1}me#1SzXzv0vlNamhm8f@bM~I1IgTWZ)#3rENen zVeJiMPzA5B0h7>igW!K5@}M*@&4BGUX88m%8~b0$6oIu^*fU!O1_rvR+IrPpf7=Cq_Q>aIkEAF}Z zs>*WIb|Mg`BI>WgfR+v8i>PbitywOB-Q1dQD$N@DdQn7%t>tL^Ukw5of% zLJFrRsq@RvL;5}}3p8h9@s%9gG+KB!a`Re<@z)PetUQuw>(*;j`o`ZrV!oap`FMBg z?N|W;uIDz@cc!s-hE-Y@wPWDDSzDp2HPW%4SDLlf;=9NdOWW-&VH3~oHCsi?!^C}7 z-=28hc8iZ(0<}RlQYHY)e+&Qwb{K3Gm%sJwjbFVX>*sBeagyHPO?(gujMj2lZM=2{x@SwP zN4C-E54}X~=mEkDqj4qqEaHJs#?1{4)Hkw?ms>KP_};O2Og=U*zD@`!yn3Dz&P`CG zdA5t#4Kf3Hv*(%#N{zLVl$_^`{t1T6P<*C|7&mX>gm(- z6@b*og?H_^w-5-HY^tzgxQqX)k+X*%MaY%%s%X9y_2d;47rC??j3*>>?P}N)nw&wQ zz7n}7Kj;g&94;Dhvo;V}J(hq5>caBU-uAc2p)nmTYPnVXci*Mt{(&Ii%)o*)YK z=3bGm;-<90XQfh2+Q+vV{3sxbRG^{9SCZ&knhHG3fMWJ()9SxE%f!QTwbhel2pCH5 z=+i@jvHgsP2Xs~&_-sbw2ovLfl8{biawYo;hs);M#9J6kIG@L| z_ekZewJywGJ}pQb1?|g8UYn4}cVQ1)1|?B| z62Cw`g2*k{9sx*6zp+yOVabAL|F;)F{s)=;c?K!BPw8z%+24EjH8sj-b!BQkrxJs{KW zYZ;TR=h$=!O?ZH-;E{9j{f?acVc>^B8ATV`bI~8D2#E=IM2N59VE=F%jv6uouPR+Y z)cn2Q6u9gJjY87;SIu6y=sXJfWs3_Zi4sOdI3?!*R)*C&3vjkK08k4hzNV%Vmq1+Y zFZ3HK7mc?`H|IY+>Vk2d9D@6Ny3yl-K;C3GOv(eOnf_jyesOja zaWxXI!_C(`Y4)A<<>Xn_y+^!!W&EP~7D$z^Ld)P|#`v?-Kq1dfwI@0y$*P(xfAfzu z>tByrV8oJjc?8t}QkWX&W57}5!Aobge-upDpn$Er<~r`JCmn{I0~pQtk+}~7Cem^A_sF{sc5rc(7m?XV zgJ0o?0dVix5B2+)Z7P>3>5YO{sHH(;ap+fZ)=xxOm4|LF2gcN)Y&9i&bEso@2uT&A z9Bv3LS@%4Xz;?)irTC}y*qsJNG!r&q$A0#T4X1Lf8lSBpwZFY0m#~|3;5@bpw}!XzDv3B3x__6_1v_Mc7e_!UqQXBdQxq-v7?__< zL`0^fUn9(d#a_d_#3fJ2ZZ7)D^Lv)7hJWcR0t5o@s6eUWZSDjYq-hHIh@&1i8X%vw z&<&E@U1A)ZOy?u^me9gZBb0gKLJWDhwVkL8>%G)VfkJ$Xc0VGo?QRAY|8TJizT+V z9=&8Jhni7vnOKF|#=mbg3*<)Aqr(+LTj0Oll54>74qmf_&*vD%Lf`ysE>SUeQwn4y zxRJGs{LZ*4?|xy_`uhxgvVygfo$ov?^DRLNaha)kh3?#9xav3ebNKZXrQ06Vz-`Qr z7)8@J9$4O3kAXUz5O)rY3-Fd!_y1Y!-f_?~95m z#&`<^qcnju%U<1xsZdf;l=2?6~O$}pM zd`$2_K@&IWdBUChpZ@KYj{~e@_F#(S{_N;h@}nO6%Y!#s7P!Mh;o(;5ZhbQ3mO}b6 zg73vKITJ64GjIvwen0PZxXZOiwf&s$`bOtc)li^x;C+WNoxhjE-qBojR+-5M-oCYt z?Ck6(5$Mfz=8H9v`U^yoqBPgY#SR&Ow%ZCSoa4U@*bLi3qvmz@*~UEA8#P&Xvcl}tA6qm?A=-C_--uH=^y8>e`%G*%Yb-{oBcoh z@-@mde=mhQ>9pCuwzl~t8zYm$3CuZRBM51n9zT*8V&yKQa4#Nh>NpV>xJa~V_!6i; z5jBNz>bo_0rtjr#eu2VB`4zPj7>%?~JU>|I zFJOD40m-j<+YoR}bYj-*Sa9BQt#8n3sq$>+aAJhyu?r%=2FkCkQ>MUfMgu zZ~5RR;yU$ zm6mD`*u36z{fxQ^OZ!=a-hd>cLF%dEz;Bj0_HKwT1GDG(z)g1s#2%TKJzlZGpn(!b z$iM;eRd`Hl2RIjv6twJBEkHTx}l_{fyY~>If7YP)3vL6N;HQYvu711 zb;d55q?`suO@|{lW2fzVKWEVr+ha0)KOh=*q_qBJJoUQIkk`^c&?4)zOJ}C@du!TN zEH^nsx$j-oTpOHxxD6shNEh0*0@*&~6KRrwYU|awm6* zpexs{+z7hv7XpEQoKKe`dsguYhShGJZ!~|R=jUj7(sq&`y3mM?bxv&`9qsYDAk3O! z96NKlE|#yoQJrB7`HC(kd+G%n@zDLG$5tDo^E@vk@bR%hd4dWh=Hbfxh{d4?P}I)| zP!^;kD*d#3uc9v}f_}GTIR#7k9aew(FJMbh{_Rds3;EOZ7D-KM>;$@;9w(J|w?F=$ z*I44a`^*yZ>%}UnIwYfPHE*8axJ*)ab^s-&{Msj%dv$zWM7fQ5J)Ax1Z@;eeQAJB_ zx?|ZCQ#sc0W-RWm7%@f0#PJ%Bw6oRp>!Yn;6i6kNwlP=lC7rPiRL-zHZ80Q&3))~& z9#qw~{AJ|hLUw$^^PotfQQ#vpq*~2}=rtE6?!#a>=ohdSM8dvS7u+kdXgj8jhRQGa zT4@^J{CJ46nuRHdrG^1}} z1Z|Sv>-nQeod-f8s9?$Tg3CV;`kAB8rv?;fLl9^F39>sJv2m{I`sqc0g9h##xHfp2 zB@~}|1mZQI`?cJ&01?m*jZTi<_(nfwGqRl2{o(qOY6HA=14-<{Z+oiQEiB==daf$r z5Liax8ja@Nm0yEty+tUK9s1jb(s~8rtG^bwOS_+V0Y)RxNZ;!7BmbA090%W-jRo*I zXmSVhdh|8-Pb7JL(#d`vPmT(LGiiNiMr~HjD(hrT#|nP1vl^s4G5m0!c^6dBaN@d@ z%23+iuX#=;*)5=ni7pn5d|RmdkQoAYUptx*jp+LWF=jAL^d;d**Xh56Ucq!vHof-VZyNv*T>oAUW1>es z_ox4j!P{W`mU}-;6)@spo~-7R$Aac{;W;t+p~a^*yO{^#tD+n$k38)% z>jGY&5L|C+NJK3B0SsiC5=(E-~EE_iN z{@}1?I8vMSP$=%Yc3o^z2@M~DUS`jIZ~M`+Lm)^6s0#Uf9(*FG;G!p&Uy*Q8Bl@h@*Rjfv}Ki3&3u^DKk3mBs=ScQ1Z5)0H`U8vaEu<0&i zCHSF;)|5lhbHZF3;goPT=SS)p*&OvU0xzN$L;#b_^D5pwfnbxQ;p>wt3W&At>51qZ zDytPzqlW77M4h@3+Y-m#7^dnE&a$KJ;mud^9Q`fmV49RQV#moPv-$Zg&F9D|g*~60 z=h^6aH2hUo3;cxvYAw)AVXSqC5W^7X^0x6Gy z4{Sh*Vui#;p>tZF_^j&cFZfz*S!_t*Ohafr_7!xOi0DaUe5%Qr|BWg{XH~ z;$|oT#rs5no}g7NjXPV-FWbfS7I)vVPaHAh>yQ+)?5Sol<%&n&Ai)OBFZcphd;HAu znwVFPp;={SOAs!7MXr;ZASCK4qGKN@ClDeP>IqxC#mH7P?2~1xeiMp!EnU&11s6D`b@lHu+@2942Z@CXQZmogyk^JW^wVa=u?j21r%!6pxwObqa|g zW|o(dg${>Y$ix+F)dP%BH|LoB2o|c>?NhwzIy5M^7Nq`_cNb52nB#K&+TcL5${I_2 z(k}bv&e!L2Xkt&>f!a~t-Y~th`km5lsQ1s0#__G9pldpky=*E-Y(Q|NO`mNi(^XVC z&t%(zhQ}`5cfD0ZLjPj4N_`3D5x5mu-EyO5_H{?Fry#Wwr~<2Ci$jgkWsM3y7~t*l z0;dVNGA}(&W`PSd@q3=r4nBF?i@6bG)luHXf(_t;2j1AE_f z*Sa7nW+hsm7iqK`@mpxvIPp5eoQy44jDJvT?6)98nRWsjR|D}ncRz63YLr2DKOXb` zKtuhONIlFdB-wZ?)~TJ?7A433CR%PRKxM{`d5_=|;Np|AQLVXAz^M5|ZBXGhSO4#; zl8dyGnz_!C4J?o{SYo3%yn19t_+m4uojN7+?htQOXlba`%>4Oyz1=n z35UMy-rQWZ%f*WuhuQd_g=j-*9v<3JRtd&JUTsR_+f4H!`i#lL<=H53T&{FzIJ(-U zPD0OB+nSp0;Yjwwis6Y;>yn)*>CoF0o%8hO)hPimU5l!KE+%ET2s52rF%%Ei^pubv zBx2~x@%j;Pj}R5HvTM`y1TF<`T!iYu}mAev+hJtlO%9V!J@+dVQ8cU8A5zNZrLhRjsuyi%s^VF-*5@mIveUu;fuM z;DBW_Rc7#*_iLzfe?qnFzjax+zx9!Q`D+QNKv01(^4D7`BhP!A7Q*XtwXfFd)KTx$ zmC*uktp@JQHU>Yg*g0GX8X#zH{A|V6P&83SncFRwW=ZNg>t!~UXkY>-sA(E7M2CuJ zt7a1o*}SI_)gm?L?6RBV)8@EmB%m4;nS74;(9}6Jeq{nt=U9%iaowX8B%}|#&?+uB zOHS1WUHgntGk3vAilKjyy%`nxQ1ln{;HS2)MilTaMC0@VQ)+S5L&(!-)ll?v69KLp zf&}oc9(*fVA6$JYs`k|lg6oN*I+SF0L z1MdXtzzYCQ?_F6CyXwM&%@J_W8oyAYT8Au5r!lWHF$6!cK?j^>Ia!DE)N7~Q**e!- zLe=MwfE<+>ZX!?xPiSF#l)cFye?<2+;Cw3XIHfmVv%fq(lWc^EA zT#18#?XvJfKaZ;}$WzUC*-V<6a3y#^^%lix#g!?S2l7&p)?@I#irwrr@btz)Mla)E z-_-T^_ibBX_P^!gct)~={zz#P%l8AVL32^Yb1+$;w(&XnMw#Lr1oqZ&jC(|Fni{AD zS9g`VtEIpyYi89aI7nCX>6&%o@{IKNYx$zx%D0ED%pU@X!F7OLc|7Fo+c^A>iGZ7& zAi3)wkdC%gh+AJ-u_~L^bhr_}iiDB<0Z=({PLyP8neSy5zYpYi6swz8l!-7 zV;ysVZEM2u>%8<1WuKxXi%;N6r()ZW3ehT3-2~w-K@kHh5_uLm64z~eiPoXYL4Ny- zH&u5_(k|=0_7wfjFI8=(CaR()lW$toBMw71hL^WEfIocN{>L}QH!O7jiB{CeTtCy|V-$4X}O@KxxWeNXD2;AoqfWjk!tr$lVe5>uVj z!BfKghbT5_ut5)l_J@<>`dH6=;GXGoiFUue_&#}HyvHG?3|tnn?2JrKRhMJkP&Q!v z4`}d{>~r#CL50(z?~J*E1jg^G)Sk8%dTjF(*hf~=sVV3evPbF~(Ii^nwkPTsS1n42;9zdrcNvYJ zF0*fhQ@k*txKDW+bcbPULz)5T_xn3P>(R(ElnQ;@0rSme4fyk}2%r3Vd_A#8$20zs zcmr%#>3NOROhc^_fxPtPT6lat;ipaTHsb)*aFWqHEv5R;wGse^*df*?d0ZGA^Qx;3 z-PQD9X@=@vvBsy=k0wi{${*ygus9zgD)vt*0fm_J1+RA1H+{MFRzLp&NYd&&L(1-Z zSs3b(M(Xy%)&_XHfEnGP-wXJx!R9gDs;ea~aQgREy3JD8=OByq-k%8fru&n8>hu_R z3&`KR36I2MMinPXgEH$Nm)FuZhNlBikxnRn^AbmSMbmSJ9uVo{JH6GXVV88#j&O7MGCoMu^V>@;K}-1QRWLwA5MvV1oi~%fFKK|7mB~QGvg19Lf-niV0DEerH#XAA%mgGA+8d zNwp|^e?AjQlpF!Qc`5KmpQK%&D<(HDd+%tKWqzJMNGo+vGtVWhejS!K0=mc^84zrh zG<29=kk%h=$oDLsH>*WbaSV$WUDO*uW)d!mA=F0%U$eo9>%tT9zq3M(8Sj1(Hba)FA!c3;k@(9=1HJ4*S7=o z;UIq9flmsONTTyvKA~yl9Cmv4#6;#Playf4XP)%W2JEOVX$YYLTMT&g#ID^*qi;Wo zW&MMNJo#Z{(QWQ~C4(j5z0Y|tU!Mx_5(--|@R%Pf^<(@4Eqs#kZiu9Bh`(qz-4VCv zEg{@cH`WUGQoWoeG}r9UE-3DNfqRXI{IuPTZr@zgLs67M=?yI6Ba@>z!M;_ngP+}> zXHBzBb%oznzv_Te-rwww7gq<41`2;Kf0egqE9~f!NVCh@pz9(n97xZpL_4jvoM&Mt z4YpX(HK=-%!wpACdVP}_L5aA+o|jlpy<%DZ4fFZFUqqB5I62yc`EcJ`4prEwy?m7Y z@`4v>(S~rn?5XzeA~UFQOeeon%QE>IXRq;sM)?31d)|Bbo^SqpgK6mYzhUjM~SjumYcRIq*@r}%o*m$8W*RRzHHP% zdv0@M^aT?Ve-1hiChh+M ziA^%X*RDjy59*D25B1xeo@G!}r+y@a?;AbuDMaRn3pgQ4**6i3bym&7qWdXl*UfTW zdAxN&-^wlT{b_^i)?L14X^s<;DT&e^Dl^zTVK@NC!B`DjEvup)VK=oHcbf*xiV^f2 zT%7yJ&Pqmu!r4kyqjJkrY!K**YQJ1Kn}C(S2bBGXwae}CGJ0WUh`RE)ww#2UXYb0& zu}&%&(EsDo`#=sLLE0qg)tvy+Wnyvw%8?uJ(~Ms%)ObeFNfi0emK)EGv-1?j!Zn^f zv()vCo1?VTqcZr^4n7`dK_@3lJ*WETb?7hD)}K2#Jq&S>oom$Z!Bn21z5eY{*I6== zwj+;!L4|VW0QNri)7i+N(WAiH#-h;N5feLM;?&yVe>>B^U8;jD<7h-=)T5BQB94Jx ziO+@D8~ma@ie2{Byt*SK_Ac;k#94gUC}i=@!D&9L1n8gZvGi-SAs1Zur#{Yd^=&2% zKf5leSS%wm^+R7iIIsP-qpxI%Uc2r%nT}$4!UC|TshDT3KaWw9N8f<+A1C*mOgWPY z<2mMBnD{T<0!*}EREPKPDhI0H{Mw|-3=)qzSxKaY7Fm_!)Ng)0kDOR0uHyYesPLJQ z2B$0=F^j4-t3>a(k;D-E#E;{5x%Q0%xvZ~Wf6+b4&Q3vb)@OQF#?T2W(g@kVyEpLn zTpn{&n*YYQ9eiNgm|#sG)U8I4|+zx@JJNH7xCx89ee zPNHH_<^KFNNmR#g@_)R!#gIE#2@9FDevZ2WePvnB&prS^x14UQx|=b|EY#yt&QJt-{z_g3Z$9pnx+v@(?Yeo>a z(-xT9Yv!fM2-$L)7lzYiJXlRBe{Gc?{gIkIflBek7mD0s6pf&9)8LA*7;f#S7ftmx zb%7=s5Agq<=M4I-!XNj}A?c1~QX*bja0_4OiKsIa1c2s=t`iZxg@k(r1~s-GfUBs% zqs{+%U&~cJ^htonscN0$)Cid&F06#*$+fn1(JETA?5DbIX1c4!fwwLqHtLvxhglz-4br!A(tFA z-$l$pJTiVQp1g>OWJj#;L=@0t+Ghq+9v7!wX#nl_diB{zVh53=nuOObt5r|jXTV4S z+g0T7_PH?c!3D&Vy@3$~bQ0QaD3}v9;AlMvOu~Bbo^CnkN!%ud2>>nw}8^j1^GhxPG4JQ_> z_7r@Z#}n(Gkdcd12Kpt7`iaO8Ci)4Q{_qDBD$|a_!6Bmsi=pO9*-Vx|kTSSIlSl^SKg<;Yad^lzE3y}#pn6NW75 z<{9AGgwnU4^r7Ps;NJ)=4kcIWjqV7C>17S3@pm912_A{4#Ytbz@MYorEp{j|04tgF zx7&Z@=)nkg@tgIe4pm^w;D47t;qJu}-xw30Byvam=ah=k85h7aNaD|F}?k>a{Yev-I{AtE7;VpAwt^({eA(R3SV7{9Fc*S$&ygZ1U3jcPH^(Xa@@Y@ z%BL8&OWJOA#8?S{Xqbm50BA3qquPL_e%)O~t6`zaPdKXy{v%;9hSV~fhbp_ot!tQW z+D0xPVqga5rrn*!4iPs@?H_yNLKC@h{WE7UVN*6CuzHvGYEEJ2cYoQv8qxAG zd)z~p$tAkr_P%vE_seav1+36E((STxILWX(tN>Jw?7l3)4qz9N84TOG&CSeyD|z;l zB|4`7ZJ$bIeiRpvS@OlF>3=+@`E(&};gL5a#RXS5EC%LMXQxVoh2QUIM0(bri0{*y zaVgmaF{WbcLsvs7%~QlhahOwLSb!Vi4cOh18J-=hA_AygfH%2$lpSJX*oSSmp#m7= zuP*tEg+wj_4&~H>wlPvbBpAz-;xZ`$QO!#BOTd6cf4Xf&G&~TLvRu8t`gVuPc)u{XG8k8p-hcfQa0kzz#ZYdD zNh;-sco%gEGDq2P9+0Wa{@c%)L6@^DbnNuzH{7_)J`3(RbmbiFE8_P*$tvF=n!q8X z0pOKY0RszmX5X_BklCN-X?dv0v9dih!2QoOA}c|vEFrJpO49z}=1f_e_ng)e z2N9I!ZK0{SAEkEqhu|~b6O|B}i#%vmx*o|x80nh`b+xxAHHzHn7fFz)XLB^UyVSsw z$Z&hnIUrDtH3td*nm+InS5#YK(w0NLrcFEs`FnbST>>nCvK?IImwp@9cBJ|(zE$Tt z<4U(HMR(@wT{AH+tXcnUO2MwWlp_WNMOZx5KZAnhE^nk1aOK!c_UbYz9E~a9*GW7p z{pBY#{Fu6Tos2K~H&K-jM&uAeHZa^Y(tsu7_Yl6EB2u1BjNttjrd5uJ%3w@-Hy!15 zS7)w!ScEu)xCg3N-jUMt46iEDIgdH-$ALtqJP?bGN7-M7rC!ko*?$vqo_m0)6Lt3p zibL-!%dU~d4Iq$p%lOBGgM(xh7R*38JXF2C%TE_PNBRJB7T%|ZwVzB|{Z(GZ4KFPz zUIUKg`2kml7))%FA^$Ia5OxCM@j$*|3XJ6UK2c3c3AH-m#}6M|=p)DZbHmy6cQB2D z7PP|H4N@KNIf1{{m{INe^Xg+#3n<^c)J@Jxf0escW>!A8K62TE>N$y6t7x@3|7u=C z`Bj3p#c%8kIWEo%?7DFIP8)PS>h9W|__A`4@OB^Z=?5xP`vH4l`#<0ERnruK(|JUy zUuhm=}N6vOKD5umu$ayuxv7aXGvc zV$|iZ!te&K+_Kl3%f|%<4Q)m+)LA-Ci+_diUS80KbNiV4tGY>(8MkbV%46-94Vt@- z%3=Mjh~M6uHfV3Nr0eh^``69H|5A4WzSRWPOBFQ*s}o6|zd9(j!Dk3MPq?|8CghMv z3p^2HOmE-1+t&In^W%0K5gPPc@4T$L}-q)wVnE?w3)X->DIOTUxZ+ zqFcWtCqcR0C;M^eP&Q)||7l(Ccl0#AoBBiU#TQkalF#VZCm$lJf;aAv;Ql?Jh}Vje zKz;a#*XxyVPWqbbTr2p8i=jbx9CeL6bN5(*stvH#_!I>{8UXytc!Zs%ghH^}l>sk` zd|*2+WyD5ZiB2iIPKkCx|7WfN8TdX+z)@@ce%-X`Wkh-!HTABCDLPP=7MOegx+JYc zPZ9Dw<>VwmFuCkam0#8aZho^Zel>3v8b&*#F7~De=IWemhW=Tp9c|z;H!szRLlMP7-mtUm3t&ob8p71uTX%UO9c-e8ch(>UX+- z-NIFFD@wiP*{m{Epjp`ArC|GqXEf7`&@hD_n(G5(;ROq=Uey%p+{ja3B??VC#sj!S z^6A*7qIkY+w&J&^)0atf%}~a}?{=#0-021r8SZXlU)RE!Q~C0OQ6rRiFeF^@{A52E z&#&8Si)9PsA{n)x!A!e0?B~EqBW~h_4%VoN%R8M#V8Vq@Vs4se3DS)c5+o=UjS`p? z-OQ*ZT@~V@_d=U}7X@i#xfK~VNd`w)Hb&%INzyU){RWr(I=hKJNPG!G1|}xp6r|!U z;hD~FpF*PFWugzPnLFET$f}^bt;Xfr@X!(Lh$?;zPA`18dsfi&Yk;Pe+Qu{KslSR2;a#Ams7kqYpln5oD_UE>lPUU_;sb{MUAd zn4?~mJEN!8yU&b2&(2%J3ob`n)}~{UD-OB|gH69)<$j%wDR0+v?edJ#Z_E*egiKg; zzZE;+bt#}BF-8Drw3w0oo|J6rK#f|jt2IV$Qs&ou*ZO)Q*{CDvAf=d`@K7Of1ho{c z7m%i{JSSd|wqXF$Tr`tq7LgIf^v8WAr?7!icri7Bxa#6MwegkTyNW$!M zQ(lD+W~}-7ELDPw&(j7!-99O_QH+x?dEWVIOLzO0OLqnLRU!61)*#}5>{pI`e7@V? z$nW;3)*xNLG2<9(>us7ZJmxT~Assm%H#OaIAJ0inP9A;!Z>#h|yF6iI!=X1^bD!*` zR!js6n>P^$Y(;Zie0^zaQ&>e4Do1TCMAIpZAVo@M9)JJoCLVFToI}@R3Hkfv9`SJpr(!Tx zkog_v@lq;NMXn|LjLUk`+qzGaiH?*dZ)D$ZKasIR$bk0bZVrn?)?)!oB+BKiEp1Ki zT1-oqG`ZaEw*iyqP4^qK?gN=^d*%~3kh!9aAl(CZcY?~3BLqg&7tGEz`hJ`4v5*o%B zlhfIJ55?r;2ubji+h;U6RHc13n?M8-q|N@ug1os2*8GR8#_{!`d*e9?z+sS+M*FyUD7YrN~w`>s!3U)p4`tb~&vRt6=M8284abwCpAr~6NV<72&PgXYo z0X>f}eA5hjFctl{_T2W3eyVwcH3(}cuxG18C~U(o5NQQTuFOy* z>~0eX&l_9azm?VoL;rjYzdHEi(9k|DIz>cDVc%m6CZ9;=)EF_^xZ5qC>Xm{TG?z>t;-(CI%aT*JdIaWi)c^-Hlh1@}BL(5$a*q#E zPzXVzukwZFQP=pM#rR?VmDFU*w!)=z0K(DdVO4+ck~3~DNKh@7K8=Xi5j6qC^SVX> z7M3Rjb8kqJRxFZraVDvp-_r#+GT1OnnVh=cr*)BZu(*mEI;XN3O#zcdufd>HwOi_w z6^6=Eoh^48m|!165K!Vjeh&X^{`)-&f4LM$kHuvN8m^P%6?Cnpg}@3Fs#G{89E17T zOA_vvo=NLDO)9|ATqI+(p)F@+r!|IVt;tHZo_R97IpZ?phGrs$rBhN7+*2wo-WUnI z&A4UfQ1A>IPz=;(Ekpx-ZI7-K685}3 z{FM1LK@S@vNhL5yq1P`>={Ggh)y?1vSxOne%zjSygk!8wj^7<6h?l?7tdtGHWTfTh zBs1<4YeT$>?e^)``qBq}uY4z$l>FCN=Bo00X3?Se_{6#r;1pC}zUB^I#_gUrTs%jX zDzqnKPbWH5nnVCs;x$!EhB z|F_`A9e|)3Xub9E+$v`4e>K7=cL zOL~V!RRl2-%DDoviGtkrhKq?8GODgWv4&?Q#plwZ8q>VfmkwD~ZOF^vRFYL%_}x^u zTek%%!QuATrlKvLyaSxvzfz9ey)N~6tvEMDp6U^wkM~1t?j`4d(@c2!WSS+>aPsYi zqd<4G;3wvQ%hP#FFn&+G_h5_ZVCe~`5-5z8mT3GwfI(9{jrVdvCs+!Y|mQ7)JlnU>qiv3QnNd;j7iezY1vwG?*}Rr<>-vVoG|xzz1}Fr4hs92RCUq{km?Hb4_{Pa@l{LWszA62w9Am9 zLGl-C)eg$dRk^h3tw4b`P<1+sv;<667%>>?kI%3}@JN18d4I+e-sz!RDJh6a0CNvB z-g5&yC0!z*Ia@7*kA;xYPi;R4eF*GJR=C?~me03RM$`{({p>-cl)!24S@LFO+eM7% z_c(BZyJgWMh(J460yx>{{C!r>n!t!_#z@)9jwq~%8x6w+vEs8<)h08~L4`hH z6=-2)l~DpqXBeq^8`P6~d1g)ELp}7trQ@30!HjkP&PsoIvyPGXI|%s)rW`Rcu3!kS zee>}b@;7Rn4Sq+9;XEd-2$}oa#S-U=A`iY(yj5u>S;ChF25V7yQ8im6)HY3cv`mk;Z2)^9so`kpNk?4*4jl0S=teDDLoBG8X*f;~ z&}omHZ$yX!q?(PV)&iiEUb(tDnKHBKS`2X4*9HWL|8>_(l4ATr zgcwDWVy2avhWmUOQg?u^)M1i*iASY->vVGwDIl8k`^Wu1co61W*#Gb#lJ8wZKp-}L zswBbZ=rkMNdf# zs)SN46nA1#>RMsdiakYKY6q!_nX>WX$tdLLIM5k-v=&(60#cZFhV^1W=vG}>zzt6u zM>ew4vZc{u%}m)p+iTW#=V&>$d*Z<$|5OhMHx|66Tw`=(PY>Y+CjB7l%yE&L4Yk6M z_gkuO_QGMaR3jf-Jazhkfnc!|_(?t}UJd^HB;z77MSSjyV^J=&OW{Z_y8L<|H8=`; zzi1Ji<~qtsl`)odt*{gO0b^7il(0xIUfaa}gY{Pyr3r-yW;|1fD@r{D@j99aWM@ji zOqzTycl~yAZ1v^Lg4wdN*<4kO`FZ$qta;n^gH@1ymul{RRD`w-M1yKPec44w0GO;K z4-TY*(UAMqHJG$6TK>SPofuEHD(brIb#!E|lMd2GT&M|X@$jamH?b}6M3DcYD~=h{ zm&&H6@mXAxA;RQ3`dO7pJ?2@qEM>*=Ct#M~R4c#01W~p>4oqiwC@$gqS+M0eS`G8? z)hC}9AAOhJ0xDv>la}pTSYS~jbwleadY?)bH~#gWV6jN}5`6vUi(vQG!KLFDg@E2-WTeJKLi|n;Gf%T5S)xEl8t3c_pnMSttU>_EeR%^}LWSTTv2EAtaEU^m zhT?8@Bjqv|VYp1k=n0G<#G~~)njT<(+d&+f<0e?e4LAypUGm3ekd(3j^OugN{{jK# zWW>OBeH3 z_&X{Yub9uV=KOYmyvN{+h`5|#bSuZS_)jMJc5CgOqNz^2*+2zTtzIoBD04WS*SY`W z1rQHLQ}qJA(f)!xM)*sbivW_9qDj5XO%%MNmB48(V0>*q552QB&y+?Qgb@B2#N8V6 z7MPIBjAd$i!H)6yHywdfNdKKOaIYG~Q~!r)lWK;d%pcS1_o0@+&WNJ!`A#+18mScxWEt9KAr|9JL1uxQ~yO zt)#G9@l(OI!fBFp^C1l#AlY_yb|T+?*`G2_{q)vXUj+L{+GFUo<}58tT7ShdkVK9` zVDFv~XS#30(>WAk1CxIB+dmMy!EPcND3J^-*a0Jir>p0a!l{wIgUS$OJF}0>>Qw*- zQv~)DTz?_F9L4Lm@9N9ud@oBPu_3DHRvZ5PhFCB+L=HcDJhcYhYAzZbtVy)Lia6$< zYa)3Jmq>=GOa_ck^iKgK}ToCRJCQa-DH`qXORzb2U92Z4`rk-mw}~Lwou1Vz)l!9pC8N#;UO$D zXBEqwZGj9E!)+rfx+$(h(`1Z9)U4f3ztj4w^(2v&kuf4~08B&BzhJFskj4=|GNBCnsFDpQ{I}39 z86G35G&w^fxOKhyIX6+L=+q?6>kNF>ioS?4_-y7$1%C#{74_*{%yZ!&FI2B+x$rp# z7dewI2lg4sfdPPH4nEB1sHy&0+f`9QKojp5nZlHmJnaHm^ZNtOp4q~g#p?1}?$W! zhQOAcBiboVTwH%-Xtf0RPcqa3{$IZ$2%RF-oP~KHP_i{aPqSDxxvY%s@F9c4c@&Zb zEH(ygGvkF@l+x`VAIwu49rts#q}l9%O`Wz##jEV*?(TRUDJ7z>w-@i|3{2|I5f(XI zLcyFil7b)&gMy2lSwasC0T0{ToYMjLDjuUu-fNH(-ysZaaB0V2nie}>H@@VVr@0WX z{!9!(LuVBaB$dU02$?$1)|L8!G+0|A2TR$Dfk2kJJ`2qM*#VDp`gG|42vrzM;FX2z zwh(M#ITS1DT0ENSI*mrY!I3>oVHr6DpN%+drIWgGK~NObs*vs7)!|nTN9WuHXotg9 ztN5U@TfI2&bNj2Z+oT5Vf(E^y7oMlq?Q9&&nk(oYnUtga1@ucjHa{a#C+Q5PXO~>q zBSYI`zB`wjM)W4aY(j6}6{RiW@fwNLYBBs|OG!3v?e)-H71(fGiTu&q!r@cv&EXZ! zh=pG6H;ce9n;#uY4eH|<<~XxhFgwPf%ZE+Pl6Qm88Wa&KCT(RAHfg-pF(#-z#KwMj zax%KD1M!-w!neUpUgt3Hf$OCJEJ$0;u&;;*6M{=RNok>X7cS|zB?;5TA2k}r`o0qb z)30^JFVEb8ekb@b(iMrHw~ZJx@0$CmS9pmg#N;}v?Bf>|jeQV;fejl_`?d@m0KvIH zK7`i-%Jl*b31U?ONQaxH)IokwGpnI27O0U+F8chV!|ad64?K)M)L1}e?>cK8wQbxu zqJl{#eyv1lmgZ8n{?Oo>zwGtmt{^mUA>xK|8u!KwggIhkrbQw*=TBfSFA9<{1Rdd_ zHb|3*mED&frtxZ>54R=0=}i3um+^s3m=a2Ip=9)lx?yLNbnE+A6H$jC3NI4TUqv*c zA?@=U--LgXylck*?X)Tao%+VFhoEii1c=Ivrs+%re9ZQN+l10cR&w+rv}$ZvVZH+j zCB`lOSzCzJNH}j}g_a^ZmHYf+W;meNtHud)Sh!nBkA+vW%xg8MbtR!n*lEJSr0i+? zL8LcEw!nMqjhDYc8jTsteTu|Jt;FZ_VOlnoi+izkp6NUHOpR`uUhoW^ata$a_ZQIr zuEb2^6?u6e{T7;tBeW2RzrL>{MWF{6OySIK_Y^DbxR#XDkC|{7>hy4B>~FX_Es$6? zXOy9~#?Ime^eIuAO};cr9cGsNa#jLNg#)pcWCQBU6gWMAo4J`!YzMhb7;w&`yg4uumP)YOBVay{D*tXl%S!ZbZEm4B%Evc2&H`Y{ZO&UrvYjy8Gk zx*Q-|`GcfN?gps;-7j!VA^o2BWLxp|rVJls!ySWxg3EH~%i~?Vq6e7GK$Ih=IR8h}u85qDxK$j3$*n%> zPdg2?dMz5*E_0u%Ab3Cdtf2)|EQfM?-ENm`c*Qs$>38{13gXYTkQtos0a@rkZoYf5 zn|m2TtW{w!L(%O^MX6u|1dw|_<|PJdYjFYpS*)5p@F1d+{1C&P`z-P(kdz+kvhXz< z2Ju1#`h^9$Vy##)KooRw_K3jwvEM#kH?K;S&qW$9pApEK*o2H3WtIlM89_b^o_~~G zHZ;bLSu4+0+$iL-cB0tw%=94K%H=?@)-YoM=H^%B!TQc@LqYIatPS>m!R^4_?FXse zmt~s8-wajD&12d$9h3Gdhd+AaT7TBXUkBF?(Ew&!>i4wAh)m1Te9O>D3{kz)Zqm5mpaO!kwuRjGlkn1O1?%P!42CqpQWbq*A0sdG4bg6 zMF)FkTg7^=;ssIS(1b|hRGWN5i3viUd*>$m4%?u>IK?(SL;ry`(hgmkI-Tz9Z2~wO zJioo26&QJ?hA}&*rBK*IIEMsDy_b&r{EiQ`ogRRiWPu*%V>} z13=2wci*ugpfrCwqRo74G%td zn(2x3cT{>7`I0P7+-v-PZ$Z*eh##V3Z`2RHa$5OSc$gBxEP}TY+OG{26rc_X1Fg}W zSBJwu3HrG)(H^+@YdT;QEm-#clt8n>R{B$w0mNeou;2uc6^u6i8^iXm)U86dt8~Pd zm>Z%U3@RBgVP1vKp<6L|Z2Lj_=;aX0nu|b(Teq1+Y$>j8bCgH)^-m5`zH)a{aRSQd zw*FQne_i~}!%K(RqbCN3r(`_dI~Kc-@a*$%(Zb*M4jn>^C=lF0kCwnV4m%~*r_>1= zS!_{%oGJxsO7!3CCPq;PZi~wfK2aE%p%Mu0WUt4^q=db_rt{9>fMb;DwsO$fV)J&r zH{FnsrRx=Z&o>D`&U%MW^pyUoECfudNg$TJYvhi-;<+g>;G;Z-gqE>q z8e9uDK-uRO?u8UZZ|0R9QM#ozetO6l>iba!qVAoUlQl+V6v)501f z48ilWUk`zW^IMnVUNNrEJSx(TMzNNA4@~c-yj#%V-+Qy3sO=aJ)~!4r;yCz{RqzGIwUEtWU1r*!^!H}b;wN^|?Q?g} zu|6KJ0TSEkDpMEa5Ad)qd5oV1z+rGex*+jdQ?mckoUo-jkqg>up|S%}Bo*E$yU&Nz zN(!HbV$zUu)k!D5Z^8aZh>A0Y<=H14cH_;i2_F04`WY+_Lfi!7xEWKj4kb`!dt*6f z0B3XE{)40bsMx)DebD*sA7>r1rsD>Y-IIJ4f__ zH%;KQU08R7I<|AmhT}$a@%cb;_^Y$xd#J4+vJdgQoT&#=1Q@dJILgkw!)Xc-SzYPd z9ln+f2fH&!1mR)g?k9-Va1J{t)9}v(A3cW)J_szm{|X>Ps;$EyLjdhc(Hjf?CL|h9 zpaapL0!d~gspCy-Y58f|v@7fRHcXQ|ayUL9$@SnFEcI1WRNM29a?~uT-?7#L>F_s_&=NDb2mpvq3 z>|SbU_euJv;xalkT)az1A>JG|^JS0ysSLi-BX0k|Y*O=zJ{-<=#7#Z*`N-n!?OpwV z{GUUrk|}Tp6`O?GeRb%LH~YU?fn?GoWSXl^@YcF%A*gE>&gLw1A-6Sm>&R$%poU*8 zD+~vdQuKs0!DdbGhf7R7p>}DFyQkQ)gS&_K<34?Z>e%395kCqMnrPC%Niu?8EeQeL zQx%|Xh&@;HMMljq>E7vHyTD%zK7El&^c2atdJH+}fo_CD)R;B*yGnlM0~D9Yn^ zXs8}y${aQm0~8bmxjb<6OVXR* z;r@h<<;mU}oRCV=x&1GljS71-W=nD=fiYyc=J=>Fv~4`EwO(Btt;Zl0s%Dmh^Jwf*SR(@Oru7v-2ei+y{Zs~5#?*c$a!Ta{VyhK<7L#=f2w+2@Jmggd- z6_PH``cP8rqyra;^fL*cPWsBe5wec+2-9P$4_&CJ?pR=x(Zu{X!9j+JkQCR@dIH23 zvFF3l)K^;bn?G{naS!8et~aNOA0Ec|7Ht22wSB_-a?X)vWy9`#Y!zIC^6|C*r!l8+ z-2s{?8AqfjRii&le@n`$IOnJY$$)&)Z0ZX7ai?V2Ae}T0QY}z4MKiy2N5+K9tqyKH zKRO%^nF{M6CNN4N4!d?+9n z?Fi)8hS~({@F9g(U!O@zn0II4p0+$E<)D0K;I~-WUw<@1hQ=*B^Orr@?RKzGz91 zPpkk$54xdA39^X{=KjBgU?na{P+2#3x6h@e{q(gy^%z8|5qB4nPzBhmSDMATR59-j zv`(Gsws%GCVyGdIc8bQGk-GsTmms!+`X3fqJyw9QWqs>Z_SZDlJ4IWcy>KyFvrEji%dQFG%Ei(Amc8Xob z zOOQeZb~^ADv|3Bh^A}@$%fhd`E*yfxWZxO(4jaf1D}8F~mG4Wh{6Ow4lK!$p$7A-p zs?<+qL-2a20rx}5;RgE|IgLlS!Bh!6e#00NhF6DIFClc-^s;u1 z&xr&c65td*7(1GjsZT{~-W?SW!0jU)szdm77pH-Qb)79vz;PD+0FA;$QmOwSR2{la z-@pV8#&4f#NA!nM^Z}8WwEWZ&LXNbAj3yr1$aRC0=vI*p`*lr=gY2#=mAq)*+W772 z1z0+Z(`odTZ;q$a_$yMAa6Q@zZ;08KkGg;>4ea8`7z|p_AqZKPHciH(IOk#KYAh~i z)!gKph#GuIF~S%3ac69QWi&8K`*}f-i{qQ2@ zw@(QdI12;6GY)|al zhL|O1j`J9ctnFtc16vOWQI5`Q-PU5=?AL>f0QkgXJ4)X>FtArCarKU=86Y4xWCG*m zk^oTQ5~iROQ3L(sd8lDxVgJ!X)5%A>sBUm|f4Zra_Ry3(7NUKV$v~2+`ZXO?HjYWL z>cfRk%amWl)a^WJ7Rbc)ucYe_sC(xzEGI2!H@jYvMBiPE+Qj8cB7h@If)0eFFyRZ- zAB@O&zKcnRr`8+0Lw~fW5gj{{NWt?iQRzQ>)GXvBcmCFph{Xb@!`H$I65i>VG)NE_ zqR2?vGb+c-K$wN$zEX7D5Lfh>IQsxzm$$N392)WR{qx#%n$X4)<**wl+wrnGsr@#@ zM_H?5MMc3M2U8`@&YYA1EzAS^845$s>acRKi-s@-S23{9> zZqugWiU0wy*qE-?s5BDued{W7_0UQtKt1wB)Z**={2jpM^#5us6}|5;y8hZiTZ1i% z{{S1uN~CT%Sd3xuZfd8YuIJ$0wNT<{DC2 z@#S}|9W17!P~OOnZ7uqnwYurLoJ1*echFfidDJNU2j}gX=g&vSbR_xN_sJCz24HrP zw*k#351-$fD9HBXa*RBe=F%_%x*{N*Cw_gl)8AnnjRubfRbmh$?-ion;o(%lc|XmX z+S>96pspq5w@c!4{^aKy$+k^~Nz1SV@8S4^<*QJjUc<(F1VtzQGK?Jds>COM6)L*$ zqoeLg1L+Meoj1RlW1I>aBfLWc^QqdrrXj)`TwX))bEbBR+z2@}RuQ{*1D-cL+dZ{z z2R29tA$U9ORrwFc^wPM^!;bMt1su{Sd^f7aGk$7+319$0R)Kf<#!6f?Wcls%F21A` z^-4O}oKv2vFzjg1q6D=Xkz&f#X8Lb#A1455qTWdYP5<}k=CnI^`X8rhBL=fwsPqIr zgb{=+kow8$waDs|MK|-7xNq|XPn%Bbv&uM@Aj79?d74%Id+Ub3hk(FraBA(Vwe5Qe# z(%`l$N$oRT7JYZxr9C8-#SGVyc%9`S!!AteRkOs-LLJny;Km)!)=!b6yaY-B*RYrQ zN5lMp*zfX|=OK~Xhqjk1fY@tg<>c5~pHgE~&0-$DwHx(YhuvKY#0VNtGbN0vNf`PZ zZ0OOG`w;?iK*`Aec!kzhluSy>LLOts=dbWrM;90%x$>2)^C#ShMB4G4mrzutimM#h zEpQ2>5MTF;ASJ8nZ)N$6Ge9!pj?VJEH5rdH1>$|T8g;`!(Fydpgxu>mK~}=k{v0b8 z_NC*Xm}L5r%M0GFrq2y;SJVHeUCluy(uw-y@W;F{n8r*8EDY&Wha=k;xj|B{fuzfb zg=7zuEAWxb2U6;0U7%YXGv!Mrm#MVbC{C^>bjCD20t_nN==~kfe~6=FaZzd)LGS)H{+&E-Di0{5 zilXG8h`Z|6ki!HmQ%2wN?4)_S=j(hg_b!&VN6VM*!8KNB$PY&-Q-*`6-MT3sydUvV zzXf;mLPu|fodD`i1*&Mk$cnj^{oqse$`Rn~iK10|xlzD02Me12?(Zup=9;Vtn3S5J zFRBw}(p&az#?Cq9^&Oe24jSxgE_r<}dy3xx#ml&$YnssyRKKG4^V_$0lUIkhFk-C7 zx_Zg~s`Z%!IHvCoY)=_c*dAT1eL6sYT{)GVkeW&ywb1+EDB~3D-vsj*VL8A)-yJ z*fu~apuPIL!LYkNF+pKzO@zPYBb?ro?u%j)v-gCNSnz<8(tW!6g%>wLo{EeajE>m9 zQ$8(bWh1`TiVTO&UIRfRigBQ|mDTmkTnweeI(MOX3|)UU(D)C3{9NSH{^S1 z;A`gTy}Up58SvuQPOWWm#ZSJ{?ILr5TuT^Gl7(Qw-9{Ce;A(eeeaJ4jJ2z+{Uxsrz zd98;Ck1hk5XMrui>zzK9oNIa#Efq{{dt4csn_eLKHjE#S{Lvk;z5SzB8xi)THbDk(~kWi#nzomS1l<53t1 zz-kog?BGK&g}94lL6;~3^RT0<6<85Y(A*jxDvz1-4p1jzrF_i&nsasb8u6kQCShf2 zK+T;W3QwXvd@oD%v?}a0^nl`q3AEA0HVxKaLd8*<&vumEQ_kT0y_Dy>NTM2_eLE>K z?24nCeq+z>Aepma*TKgV5Xm4Q(aZKeI{Y{lqJ*9T<*=W4ZrKd(Jh;Tyt%oMOM_ryW znI~837hs!C9ptNE3AXb8?A@5gj-P#ba`Z+W&g}NG~0F0gjO=aBxg=>-oNC0}*lOOSFyC|j?x;WyV0ztjy zVZb!@J4~O;h5jx3fd63K3{mr>O(TmE6#Vbx z1DoMa165|LCq*^SfDJAC8v?PW-chmVg8TCC@cjBCAEpu zFUGC=8_NLos)MrrVszKlJ+gAg1n7oFpWepGcJsn0EEkI=N z?gJIzuVbZZ{{`h%owG$S!<)X%g{rooci8fWQI252ZjS)r(T0Q-8WNyy-5P&5w zz&IICK(Sf92I_XcUE+mL*6=1LI7;8T^9n$!UrNMSSy_vc0l>tfdL48RU#;=4pJ0Xy z+-E~Xrttp00}~Q$xrG25hQH4sb_{S{fZX>6Ec%Z=8Ou+at z9yyx8Bid>CM8%Da`K}uWQbkWnd$5j7-o4LRDW;I3G{9eKPJo$@RrT2G( zS`St;P+h8il0im4a`$fnFL>&uaC>-x?}ZQK1ELswFaEZg5ksdxfj%A@iufPc?F?tq zidD3h24W%g7C$go!aGFpqFL_-ZD(<3=3T(P=&SU|^~KZl?<0?ca<21HmN39AdW0@e zHHjZg0#!~%ZlUsSBaeAkR1&8?!QR842rfFH!dp`nyy_3pbm`~3Zo7o_c$qHW{%bMT zRH&VL%ifoYeAgYUwA13(p%&6CV1;A2$N$A}4OU`kLK+(0%i8NK{)BL^TmK2+vRtJW zm_Mk{X$@H-I^XtsOJB{1`EQRIOTc6qxSxK{mrVqzuli6>uli76&ZEeGxGA0R*DF%{ z*fc7}IfTw6p&5iIT)TjtB7Kz9!g%U>h4!hPhI$oNS?i)<_+Xqw|KQ#TaQeIe5UiYi@DeQltm^!e8jTdzneldu~0h*=)NJBTnk7NLdEhv-<(tkp< zu3~MCGVhr{c?xir&2RTcU*4wVNGGzdgP*2p47i*K(o+`n*Lob%fHnp}Ci$Fx~@(naE*6{5{`rVlrJF5>TIN{e#Q8V=z+(>-s!pAKszCcN#apFG!%Q{F`xdhzK?i?Nt1M7G&j^vyqexUV#1j3ck$Oc* z4^fiI0$qtvAaY3$b4<e8+Cqb2CWSa;V%cK_ z1rdU(9vIR?ZRXDW+ZRK14^cFvEwTpF#6So>X0Po#uR1I6)}38V{;FUbZRNsKEPRXW zaLKdUH0bd@OUVqu;n5G3-s_gCNwTH%a!IIbkLO%%3;108&PoHa0yv$_Z35itBMuI4 zKT15Le13y$Qu?xXfa+K*eNFOV^#lF%1ow$FUwy zUep-gk`!|;pa|EXu{4CZJ!+n+#AbN~>3dP2aa-$sJBgZ_0P0`Y0L?|Q05bn?ocS=VDW9Xy1J_`AMHMen?MmL|Q= zLi7~3($o9%ipX07MLV0shL7q+QT6ZNPbaE<{f2KSlMpr?vAnEyzI;}2u3oEfx}F{4 z6D3y}Up!%6XLMPcq;Iu`+X2=+m<_y!xHe&m-(N%79MTGAJZbHhxvKBg!jJQ}G?lbu zNuHBEiToVOxg}{xFQ%pH%95-%n3rZ$_5>q$L@4%*QNdE6f@E#`b&~Aj(UIn567%P? zU4fv3;+ea0PZUq*o}c3mLmz+7CWb&sED-T&gRzGe|GVv>k|rOoM}+h=R0}+s4Dtw# z5Ld;KJW#Vbik(yz8gfo9l9k6CDD~5uVjj`e=An5Vbx`hjZfrn0x-K~JZLhScsfLG= zl_=_3A9r2EvM9!d(c>n6?@(sX;FHwhS78{d##+#q!m8@C>EmT3-``)nznGi?n_%PB zKn73S_oiJ{a(+8459=oQPDEx0E0PyFBk}u+)Fn$UW5z!W>#@0Xz{LNuV5%6vXa2mo zUB^!RZ4y3r<$SG9<;mSr62eCtyhk`Y!_Ap$)ao$8XP=LbiXrk9564?P>^fQ6kL6Db zBB9Y-o6g_)KaXIZ597C3PFfc`NK4D!OF{$(^A0$edFt-ezaLB+G*}Xp)OyT%xE9aQ zyuXN;#WcU94mGwdMG+-OL@a$|jH!2IUB|(riwH|ym3ywqc2>$~VzX-Ry{F@I)>|39 ziAmSj-gm8y;>J{ls7qCit-?L2>ePJFe{yLrYkQF{r> zofhsn@Ey3jD^)DZ@B4K(E1*uIuBCkDlFhj3y>t3Y7M+NaSZi?Fq~L+jW}E{3k3cAu zM=v|Ht8eA$8T(k&EO}1vc7-&e-uV;Rums|ak+@<{A{iCCEPQI$6?ucrMa%Q+`pMnq z#hYZWK1Nc$<4L|r9L^V$jN7(-n=k;9+bS`UVIW)r0#lvDC${$&^&l91a8BP(M{Kav zR#S@=JKq}<07Ucn+3`AnC9Jcvv+GCX$oW%Pm}e&@i2BK~(!qn)6I0WS9t4<)Q~$m< z##!#-(~enlRxKJ{VB;Bn%tt7gPb@0XeAS>d1!G>r({U`^2q9L09pgW9ZZ4jlw&p*0 ziC41wC}n3h_Mm+EsOCnrxsG@$75>n6Hcr4ds#Pj@ocbm|8Gm6-edBqv$C}0*fP^-C znR4rbaYTAlMb_xwtfc5~fI>Z@vs=$P+cJ?!tsV%8-@(DqIndp?87^^_A59@Vx=8{~ z7BhMrloT_-Qk?FO$7bBxs2!dKv&B?&BBHifW!=c=4(1iSWZP-o7;@Te*c?;tq001P zQ}KNpFB_$7H0xYVR!}6crFxXe@Hz@-BUsK>&6eWHZV>PR9abV@J-gSd^{6CZsP(wG zKKLN~;K9~uZqbu7Qrb5Yr+4?sof_IeS7n3Eya#_zrT;!_rikZrL*LDp)egWXty)t} zY{Khr@@?$2k%32L%ICsbu zg+wGkeQcZ!O7qQ+Ln=OP>Ev#Q7kASdis3UnQoR2|V(rk=s3K!xYfT~2M#3^;zdBVH zU-JUro4^>~ci%fEw{5HV!u4VVLDlvPjexKoYx`!)$@WmJO=PO0Kz6+v`Yxc3 z^`VzZA{=q)BDJ1A{d6cUdU3S8d9q&l>ic&mfIqK0?Af#|C4yrd7J`5gPqS_PMTYi2 zR~vPpq#+V!eH6LGkk!`|J-Y6PO^Rz6R67cZN+HaY_`xaN7C8~>wX*!_`Hmt>3GsXV z=UrB_>K39)wsh4kUCHzm5^39N*y{IwZsI|&{E@g5$foliLs9tsx$|L3F&PU`{2%nt z#ZK4HInCLSfPQQ8F(4OnVDtT&AHKm~>3t`6HIh%82Zn&;$SNsa=y+M@$MyS3CqaiP zL_}#yTQpyrl9@QC?o;f1N3{~@-E44-$K9Yy950+Zn!p!1SNCvB!#j?-GA;XACV5fK z#q;8bJ=qdL4W?>hqGfpSCo5LWGIHHdQWcbt$|%NPHP%Roz^&O1liS1ud;7=i*MJm` z5hB{mS%AaZ0>>w%{RAEGASQv}U1M``y^;U@GCsg(260;4Hz#`I#yaMir!bRJlU!bM6e2tBCslEcU!EK?0tB;3fBhf&Jhw^-lq-^l^* z^S67-h6*gv5fSa`N1k~C+6yXmSC(*bV@w4Baa(#ZKfMogH+d+c@0}|jeyj=MbH}Hz z9B0lEdU}4Mndh6#Iuaw)Oz8LZMOuPtGj>36{zZ|X@bO7w?3VUHP7# zZ#PPv>f2w|`t{Zxq(4xr0>>uU`XOH?)3d83Qj(UPUt9P0hhoGdUMyGp{{DC*ey}d~ zTBmI5jyE2m3Ljja?Z`b}ymxLkS9uY^sxUfO(O3C^E;IkBIG_0e3wowZb+eKM=Yr~CPe401_B3b2Mf z1n6d4YNs@=(mUOLrE%4Dzx$$*$@g_wQmmGz`YDV#FoXL0(26@GCW!tCimdS0c1lE> z*gd!TYToG7(Ov_sl@&y_Ei*(zQaIK;_TZ#3*SM32mh-?0S8@F>M8 zoqu+DQ+TU+c6eo-OpSGNpHj&+Nq@&C_*S`J_+`ZFY9^HJM~26az8|7=S! zDXIu0w~~WMCmb!nPH`4!n#*6jcrp1l5n7s@m`(G0a9~AG3V)uvsuRIXgKt{cC8k4Y zv@0tY&cuGpd!$q{(+&Tka?E&%`w91t7Olg5Tk&~W)~7ESEHcGQiKFu~C`LAG>TV^E z+vP4FVa(k7Pe7|6KOgQR>ffw-5M2-O={dmvPULf6r8yj1-x0Qn@y|!RL)nx zXKlhq2%q+)TMh3YI}Zis7TU8nlLKA5kvN}#By$5#qoEoCC9kF}QrOvyBz$qGn~`Ii zpJ(ASdiz5igVN_aqyA52-Sv%0$lrbUOlykOy1`7;OYSUh6JQfi z;H>`0ruxEqVwZJO^9PBk#O3j@<5K72Qqg51`166z5BD$AH+(ltL3?`LHNg*6lLhhr z^9_S53&f5N`Izm_kQ?eR!V6hHe-7rM zV(7CQ=PzE9j4ts^QJh}L#MWYc8B52RSh?_?CwXutCc-$`dp;hY0@K3t;TQT<`6WCWb@9Gq<<7=i zjtRH}+snhy*=^s4XPVjAcr$k|g)VIZG&+<|=j_gh+nncntSn&96 z1wgmF1WqEN__Ur>k3$qbKHb3mJ>E_>N;XP&642JJzGJUFVd5K29M5#-btCcC?!{r? zriG0(NEhnWtGpBMSBVl?UVVf9kv;OGhsUYY%;)YiJdore5*(fX2yh0*i+Sa`)lU!1M!vS?zNUuz5La^Xd(xR$-_b2?kR)m#s-LYeVoxU=&wCocW0rd+*xZxc@g zdyiB#4LSH*Y{k5Fv=HnCT1_+kKWiO=d)rC%M z#_m9<)4f6|nEczW*aXif?8aFsMS%tw!_O>jGp9R^`KH#2e8(njT|a2Q*vQvQfG6t8 ztJqhm=a0sbON!dnl!KSGs;^$PoNpHKnn!`}!!BtB{s87oSuR0?*`luwmjCy{1cXNL zTyIn=;N|!~?R|+i)b0C!i4v6*DYBCgl5ANi~B`QB5{)5G)oe7}Fd?|hu&9CPN)-0u5YU)Srp?$J(%Zp!EDM7%9MkyXt- zEg@Dl#%CuPTQxLt))!nLd5vKtvTqvq=nDYId-(qbAyPtS|G`Oxr59hp&T(*+E zigp~aEvU&3r|Q8U+mq72%eX(1 zGdDrinTRIE7$-ho6B8PxS|}@{)-7P;+g$V={lfON7$g3qo1xGsvBs)XQcR|1mvDTr z;ANUObAw527ligZu4C;hQxfzk-hM@Z_`A$}AI>Mf@3)`0t^D%TRqv@byt_G5{wY@5 z$Qlf;3>a+5unu|G#aKDUzT}b{b!mlllXgzZp^kpP>%bvK7fBs9>pCL@tch z*HYz4eZkg7AH%$#9_yM~KJ_=@v#g5i0^&2)6dAJX;-&4g-k6qGPKwFZcUsb;D(vLL z{sQ#I=sZ-fm9hvQPp?omHhPprkw=OT<@J_fbd?SW->Dt4k+<$YE@7zuKG!$58vItD z(Z6nVC1ZguTr!6p(aL_JnDfDb+1`>Y?-(Z`#ae(|cOrBx)V2o7aO)KF6s@h^v;LXL z_{B{p^KgOSkx`A7`fJ!I!Qj(|hLf@_IVQ}%9d!g+wv7q3a%TXStAnV=`n6nhYvYFm z>kr2GMeu^~@8)>|d@C&qXdvS8gyiDFLN@07f0VKoN+v#wwz7w#rp_0nDnepD1Wijb zKTC1Xxo)Un0PU8ViJ?+^(|1O#($2WiNVyerO*LF2$ZvCW%9%a*3*L6It6k2#H&ZOKcL*vs~~fl_-AKVJZt2yI`Hyd~iJ zh@QzGK0PZkd@e>~+hfTkryVqBI&;hpXZB)QCi-inUk)|vUG)bFGp&^}!Hq>l(%fmj zw)!|K?H8MSN~L#vId}?k?=>k9nLcwm!!^01{1iqglLg!T5faS?ckqi!-w4>?p(ZOk z9r2?*-J-!BAL5)!#=Y_Ub_HD};%A1e-JBHS{4Hh*WIB5E_mjM89Jk7l6h|D7hwY&p@>aMVI zPkPc}imAj4%mUwSlUIbwncqhGri)Zd$f^Wxd8pPUPz7g7Uhgbml=1q;YEmTJ@yj>v zCf`p@5Zm-@hu}A>e+5QYZ5j90d&$qOwGC1RrnY?;FvZsqY%b~`?B3MOimepd=2tYn zzxPIc#BYBk=(!w>{3nQT{+Rj)Z>a#xy>SqyYdC5;`BG=bO)D}-RYD8L6qzy|U~zle z+OXk#G<7DS9LWE*y*+dd`6r3GT^Tv%aMWcr!GBOT%Kfdrs}MUr7nxR}9m`AG$=HUh z=H1Nf@vW3-JP?FJL)c(;mLsMEICHqj-dy((uMB>j|I;otKBBo5frS`Jj97nolp>cln?$u7M^fKb5$1WY^K?_lpwhbZH5La>0?!nwz_u zg@rs_I>+VdZQ+r9a>X^n?neD-j!P|y(A4LDW8oN|WGjMUoS zjQJ&Hwj7}ED#^pG8LyjF;9i!5<)4)HvXax3ylO|#$$nOUOSo@8n4fG~DQWiHm@%s7 z@)|kyx0iA{P@}Is)ZrOzMIqyyZzi6}yiL#~fz}OfdyZl`l+t~7auoJ(J?q!gq{XNC zdGo{?O_Z#LErtA~GOPpYICOi-e}}5W@}N>M4|}g9)+}(a(c?B&;sB*-AjPM;@g!GW zd({A1NmvEwI=F7bnk6&~)F>|KaJtLzG5_>`K**BGrY}}{eKq!Nq&K1o-CP{KKCjsA zRhQ0k;ZmVth>ENX^98zLl`AE>gdh1+#@|wH#5YG+4THMw1NYPPf3iw1)AHplh~g#g zkI~}m0wU`xWMu%Dyq@3!@VaCE{FLQ@5Q{q4=qNMB?F|U{9qjOrwsDTVeBR9YK9%9{ zDswsU?SUnZQ%yr>MsL|pUycx*xhErUwR1|12|EAkGLv^omMD3eMX=2+sgWI+9QzB( z-H5?wEw(|q@}ek-Q&vXuU)G{k94}Df)wJ%XXP1?n&r>wpaCf(4=zErEYnXl}Znw^T z%*t3~`B>HTWT@Rfs!YZ84IXV8*CY`I3^g}EeHjEZ0PMjiirJhWUTq=K=XLo65l0F_ zmwpu|0C>I zx#0Qi4=2gAbt4?opVYnAgyMv1MJV*{3X@=sjLn!x=~>@B1;6zC;ucRkYZ);Q+vI_B zs6~dnYXe~20i#?DjPm>yw^3Nqqj=xh5yV!%zTg7mMSS4IN$xQnDZ4cOj0^$B^bH5I zz1vX`$jUf#I#TcQ&=KIHBiK2ifGLY~jKDMLpccDBsJneTUi>?&@)U{0ehS z&G0tHNK4||E6`rwgH9pyZEgp{7vi~$EkWVO)uxE&h{ZnbFCBVm$Vw&1XX9r@KfjFu zmc4XLuMVyu+x;~@Hqdv|d%Lf@Z=RyO*P+l^i0==g_6m=viBDzzNSLdNk8xQuzlyY$ zRFDf#$<}<^XzcI&OOaGcvML-qgU|ybFYYeGyfA(i@ozn=M_otV?e_#T9rtr($QlU= zh^u3sfC&I#M;rSXfnvKIjL&7XoU3=bz;q&|Mf`}-%p-wHnr&K%ti-X|PtTne_Q-v# zdnJ3q^&gN;hT6aXsza|A76irebs7$#y)j0DutWnpsYz#AW62gsFoO~*-6LWLYC!ND-X#a+r!BN}b zHWsU3kC1!a$e~2sY~m8U)*}k!%@$eVvj7HpQT^#hg)=JVzH%Y*;HMxO7 z<8gs0=l;VhN7TNDAoBb?`gwCWtVnID8-R5ff6?po9fb11VKKAU%UHU#JoR?Z;mDoO zO~`Imdf7j7i*#ZRS)QF`wd&^KQhT+ZaS8+;pfd5gv#8raZDN9?b)wdi*Vn*4PK1z|50-sqA564#|y$HaR z{^C&X%4Y7GP4y8#Bm~1Xt??D1Qj)upW_jz*j}OZ%0LsCv(e@W)^p9r8cFM!AK)T2# zB2yd13Mh;=B@&*>&`B?9B&!LZ#tqE#)I?tGs>Z30X7>2h2FJxKb)4v;>rTEL^TOtF zJ3Vcb$$ihO&Q~FfRagIn8t7%1pF8fY+t8J!p&zm^%Tr5rbx4`TG(?}4epVGwFMmdc z)j`vUAxdq7huVg&u{9HC9d3$onypWN<68SBA5>Bg_ZI{(|3Be}E6D+vu8Ig37;dh0 z`^qff7TxAM(BiRky@A13d80PjjL&8DwF)WI3VEygnL53Dp*?wpPX3Lx8XTGOll{5? z(z{9zTQC+!9T?yv+$@3Ik~$;$3vyLkkUC6Xp4-%nahbPeM@d<={i*%MnlZ~2pIH(6 z?sWMD=oL`J2`8lU-BharAm|bx$bSU;E0|9VhOe30m5sXDW~s1dkF$RfhCMrnAVz^+HB5f7i*_kK`w^yV_0r~tET zo5*Mc1|K||>ln~?5bLQsXIL{dbOqNPs8!dsWZinJ5qRqs!bycWgObe5rcIUBO+!7L zk;ep)H+FZ{9*?Q#?5bdM7#B$Af z^ZGi4X9|zDoivrF$8GM|t+QTYirrnhTIuI&0lH|yYsIu8&Lhuk{8yQz3p;(Xs7otb zY|Ff)uHvhjCGYJ$IHh9zAT!yh1I%kjPyqK0GF-d?FJUE}9RdwH&CacHgCVnVLoJjY zLmM!o`Oky17+I2qFmUz{?1`@*&#F()uci~Q(_!JNs;aUF(}WP;t|bm$(zkn9;ES?F zDLHsWqd-R6-`7_vF!nR&&b8B#+6~eV4=bIHHle`a;}iQWvD+g*^&J+;uaQZ6Eh%Z+ zTLmF|y%@OTGNfyjH+D(RaoyJ?T3(lPta`m29y6}WpdueHm$YNIR7JRV`(mLuL|yzb z-3*5P^Ov=+EYRsdJYd4ZFY2crgMNQo#C%MJdYHZ`8AGd%QR&F0)9>4OYW<_9s>KVV zy*OrCkDD}@m-l^iuLpyGF#L055@*5h&ceDj+z9Pptkj5uNV{(wbftPt}U| z7*&nFWUokh@`{X=JAK}QSpI%2BS}W-Jb%H)1NA`J`Ihja1KF0^i#YzbNPW)HMRC9K zTxs8z7e6b#qz~E=G)>BSq$#Z+rnse08nhSRo}uB1hk>nN^RQly_qB1@?U_25=)pJ- zDk*InVR=cM_7UJ+Jb{sIw3ciPI0+rzsbl_V^1<&b);@D}eoYA9{rp(6nXm)Ux0kyxBi#+M&Ni6Dly|y7&DLE+>oi-}UFkWpH9cK7 zqAuE}2Z+bWhkGAL-wm(Uxrn!wx7yW5LoKQrm1!biN^9DNQnl|I1k@I^r%*OUH&hj}n6Hsq-yTU2|G@f8=05%X?hFzf_z&7b3HY41JRQwLoJ~-nAeETbQF|(H>?TA|@E{Xg!0mqiK zVVui+r(eT1Mhrbm*va-V)`3jLYjDY<2`k5@9u|NgeuO*$K z(=)u2JC6H>pFTr088yQUWIq7r-LY9?1Lf!hasXTy@6qoe`7v0>&k%Z(sz;D-wFfic za$L?Z^2_UZjq{;A_76e<^q)cqcd&u_u~BEw=C}BB=Ly{-bjk%6^f4C$t|(Y|;&P@Y zfx}F8WKxgnM?HTV!{d|QY8)&pTtr1j*m!TyU)RJ9Q0K}`}agsXnLy1~RIp+267 z&itXSfXaJhEVr+SlfqS5AR)>kKJNQJG9eGaRCx~{wh2&#?45CHy|HloZ)^E$w^BZF zmHjZG-<@k(TB4!b!mZ2u574pRrg+$+NN7Uo*Gq6VwpZFHH!#=TtD;CIu^#o3sc#>O@qzz$K7kz8O#*!dEC=;p9^vgzYuP29%^vy z6gPher<51wz1~~#*!WYA^}elr6-u6TP!DT(_Ny@4@w~aQ8ykc0M+oeuUfVv_8sc_xp51T=u9>`GG+iuFr0pMKIPgWclShMgv4k4KD>x1 zrxKMs)gERg4}YM}_kMbj;yHy>b$Yt=hwIhdBeU5NC$-Lfx&5=>ciwD*kZtpTL}$p);RA+rTlv7fXsWGp^Yy6P}MOzOeTxRz841|6#*Y&1Dzkk3 zhPM!HXc6g2rgyVl5bueNO2u$IU`eXt@Imeh)93`z9j`mNP0`eS)pDYg(BN}tdAk+) z=GFX4ujGgoeO?=th4w8)cKk&{1=XMpIb0fd>Y2^DtYt^kOsEufle#0mYQWQ;C*(f)TyFNB#+mlI z?~~y~s0h6Qml@h$o8{m$;K7Q7a392fT`HaGFDB(Fmk8%PK|rhlh{75-UqcV+oj>|w zIk-l_Zq^-TVqAqKrx_CKzayI5J(B1bn6!Smul_}9udo&}FqJ$?(U$pvK4N3NwsK&_ zy(c9{PPBXkk4?@psZDMalJWral!?vQkedyMi?-D>&B7CC**{cRo$K=0$!)BSLpQN2 z)W}~gsoR8X*3Bn%ZbxKvh?+MoZ|Utf7`hMZBlr3(?th^mAO_4cat?wccBV93lsNvV+>NgI8y1cyIYikd-ekgl{goz&vtz zxX9!N&g$JMm5JOrm$%^*E>}achx64W8L%wSq4y3Z;?20lkth%xh-mWDc={XI4b^%M zP(MWn4dzb8-*&ta3DLS?Y{KUASikCVqbCA~-EZ0^#}f(*fY8^{eLup!NhDd?YzzNjCHQqEiP`GdOE$onNk6=p<}Z18y-;$^aFEhQ`g-I4gE zc%EOH)i0t;o*+Uv?xV!ZwaoIW*18BLbXDNl$-eOBD+$O>2b+!s5GR}Zv%T9 z6)IHwfFug)L!4NHjUB~@Wp<_b8tbc&`7p=b8F!BRqYARWsN?u_q1rsG zF%qmM5W5}ggPTL?nrHgDsfJe1v1kQMb@Mz zXYz!4+dBH3)(p(na;1i`w*Q8}y59e6yvsan>_9UIdtrw2-Xh8zF0e2%AroDNgS1r> zrSL}Pn7p@#!26xV8~@xd3j4Sh>sQtyqRQ7dS&98Q6Q1+vkYktSb)HsP`B zkrwStTsLy|PA>HH(Oats3sLQB&uOu?s*e7ul;L^&_^c%bm&a1s8h-{iyy}F6&{fWA ziA*)uA#{r$6#_10g-_Q5P`~D3l#rZeeV~IcW&bgxD9)RxL}W=8ZZ!hkKx#P>H)2Nx zxusmj+LbO@Omc6&tdafv!?k1QJ$IsX<|0F6l+&PGCGDA|X*yZ!u_o36el;WIL?QH? zCy_BzY@ENEJn#_kCZwxjApk1~%jk4Jvwhw8(&VC4pcppr)KagMdl?L}r6Dre%-rox zVni-6UPsBmxQ<-^c;QbC3Ru%&&2g|jscq$}7ewY$_3aPp^QeQvf&@K!88PabaB&sR zOB0aEcJ?shdm06(PN{xVb5VQ4I&}ZZVf$etZ#~Sox1DI7p?N6{h=VH`V>r}wKHO0Q z9#DZf713ZTQvteA*r?lo#C2n{M>}!zu;D74;p+6%yEC3;v~JC8sNRAkb-hz~u;vyx zJ)@IBegEtluwk_XfwWlbsy#M7RJk>zl*<@rfn&@Lyv!^-QOJU$w8&J++&w5EAiJ3{ z>wlf%&5J08tdne*E=G32s3zWl^4WkdBXrG6tI2OyAbtngNkO1#8<JI5C_h*@%$r8hU8XgOb6d4{-}1K5j# zkkBtjw(}(ca?>&dj3IGEtW2;C)dnhWMGRgkR0}9}QKUBPiZpWF55FMGecxOuZY`Wi zd?AO%u}3FkosPVRPT?5;N@~S6MKo$lC)=bLpGV`_@+!pv-<2$EWYYrDF`AfHif1+< z_MISnSDE+!Z~{%6(`0Qc~Q!sy=%2qFaM` z{E)K9O7FqD&%k;A=M8Ho0Hdc1(;~1*hK>!C=-LS1cC_#&DPnCkU(V~gL*zA4A=-`; zYY)OKxt;vdMfIQ+7kDyrUpUdyA=Hn8PQ)=i18)m(i9uczUlWb)J?fmDe~iTTuc_UE!76&bVqn0qrlsU-q6aWmLZOPG!q3EJs^V!8nj$Hl+n zm)cJg@JA0U`nzAr&`!ZQhPr;h`R@2v*`p(PibV2jzT?{*M6l(i_BhOThcN+2#3g8f z*;!CEqd%f2{RH4%8@Mlq2KjNtcV%C|>i|09H$XP%wleq?;khAV_uN=GRVNhkVzen% z@eYN*8cTM^cv-```P%gh#$$_-BBLCNQXELl$}^yH9Px; z*jbt(bDG@^SphnM9{|yPomVz-5;5_DX2feT&w+L^k7^3sP&j!PMN7>ooC+6^jj9)( z@?_c@Ekhn-f10Q?`4jadhM}uicKr^knvb4*e9X1AT@5CZdaR~+)XmuN>M$%EvBu>C znZNwk|9P9Z>qWX&Y6<9uEUA;q#K~jc?`dyEyz}WRcaPoL+@ZAD+x|idII^U?e%Reu z^uY!UL8T>b$qCng)UoR

;#wq?HO{8v5>ygChK$CF!iFjWgYTjTk>H-+oOSX6{`* zD~1@@M>^nd1vyJEUhP=R3viw-kji8sW5f>InNR1*Ukf_C69b%1G`O^C#56+8!ujX- z!d=pLh=7h8B@LnRuj#sLO&XlYf^thJIki2WOnZ8Ne#T-h#~w_a6)VmA@^RWN8gj?3 z)C*6-`arVyev04xE(a+`bgeZXyxRd{Z3+pIb>onVncU%4C-{u{;UnOr&~OIl$3WEB z^hg>LnhZDi_6a`FO68c2w*nj>L(@?^nBd@p6Z)`cXYJF%1=i`Ic6MI_!vQf- zR!>{IESyH9QDDfd+Frmh)xNY(3(+Ppg6FBuwhClv9S79e|Jc28oN$kHZXlhfzCd1k zmq$T+hEw3idqrIgb?E^Uzu}Nt|LrC{qm~#)_MF{Yqdz1a7mXH|mX?a&w#>uFv|SUi z`?<@qJskSri6Ns6^UBJ%an3#mioWbyP}cyUaN72K3ba|=ISAI@e?@;mv$XnsX%dKJ zGkfFJRWuxo!TMy5T#zg<}Gz7BKX&;{4Y^}PPL~nim@Z@wf|aLFTM%LNCka_@c*w>b6p3| z5aF&@v;K)b{`FblzJ + +# Package Overview +This package generally consists of three main concepts *benchmarks*, *tasks* and *model implementations*. + +## Benchmarks + +A benchmark is a tool to evaluate an embedding model for a given use case. For instance, `mteb(eng)` is intended +to evaluate the quality of text embedding models for broad range of English use-cases such retrieval, classification, and reranking. +A benchmark consist of a collection of tasks. When a model is run on a benchmark it is run on each task individually. + + +

+ ![](images/visualizations/benchmark_explainer.png){ width="80%" } +
An overview of the benchmark within `mteb`
+
+ +## Task + +A task is an implementation of a dataset for evaluation. It could for instance be the MIRACL dataset consisting of queries, a corpus of documents +as well as the correct documents to retrieve for a given query. In addition to the dataset a task includes specification for how a model should be run on the dataset and how its output should be evaluation. We implement a variety of different tasks e.g. for evaluating classification, retrieval etc., We denote these [task categories](task.md#metadata). Each task also come with extensive [metadata](api/task.md#metadata) including the license, who annotated the data and so on. + +
+ ![](../images/visualizations/task_explainer.png){ width="80%" } +
An overview of the tasks within `mteb`
+
+ +## Model Implementation + +A model implementation is simply an implementation of an embedding model or API to ensure that others can reproduce the *exact* results on a given task. +For instance, when running the OpenAI embedding API on a document larger than the maximum amount of tokens a user will have to decide how they want to +deal with this limitations (e.g. by truncating the sequence). Having a shared implementation allow us to examine these implementtion assumptions and allow +for reproducible workflow. To ensure consistency we define a [standard interface](api/model.md#the-encoder-interface) that models should follow to be implemented. These implementations additionally come with [metadata](api/model.md#metadata), that for exampe include license, compatible frameworks, and whether the weight are public or not. + +
+ ![](images/visualizations/modelmeta_explainer.png){ width="80%" } +
An overview of the model and its metadata within `mteb`
+
+ diff --git a/docs/references.bib b/docs/references.bib new file mode 100644 index 0000000000..cbb059b493 --- /dev/null +++ b/docs/references.bib @@ -0,0 +1,17 @@ +@inproceedings{mteb_2023, + title = "{MTEB}: Massive Text Embedding Benchmark", + author = "Muennighoff, Niklas and + Tazi, Nouamane and + Magne, Loic and + Reimers, Nils", + editor = "Vlachos, Andreas and + Augenstein, Isabelle", + booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics", + month = may, + year = "2023", + address = "Dubrovnik, Croatia", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2023.eacl-main.148", + doi = "10.18653/v1/2023.eacl-main.148", + pages = "2014--2037", +} \ No newline at end of file diff --git a/docs/tasks.md b/docs/tasks.md index fd61d8af39..b7be0d58cb 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -1,6 +1,7 @@ -## Available tasks -The following tables give you an overview of the tasks in MTEB. +# Task Overview +The following tables give you an overview of the tasks within MTEB. +
diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000000..d8af026671 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,89 @@ +site_name: Massive Text Embedding Benchmark +docs_dir: "docs/" +repo_url: https://github.com/embeddings-benchmark/mteb +watch: [mteb/, docs/] +edit_uri: "blob/main/docs/" +theme: + name: material + favicon: images/logos/mteb_logo/mteb_logo_transparent.png # TODO: probably not very good logo + logo: images/logos/mteb_logo/mteb_logo_transparent.png + features: + - navigation.tracking + - navigation.tabs + - navigation.sections + - toc.integrate + - navigation.top + - search.suggest + - search.highlight + - content.tabs.link + - content.code.annotation + - content.code.copy + - content.action.edit + - content.action.view + - content.code.annotate + - content.tooltips + - navigation.footer + - navigation.indexes + - toc.follow + palette: + primary: white + accent: light blue + +markdown_extensions: + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - toc: + permalink: true + - footnotes + - attr_list + - md_in_html + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - tables + - pymdownx.details + +copyright: Copyright © 2024 The Embedding Benchmark Team + +nav: + # - Home: + # - Overview: index.md + # - Getting Started: getting_started.md # TODO: Move usage documentation to docs (Keeping a minimal example in readme.md) + # - Command Line Interface: cli.md # TODO: Add CLI documentation + # - Overview: # <-- TODO: find a better word + # - Benchmarks: benchmarks.md + # - Tasks: tasks.md + # - Models: models.md + # - Contribute: # TODO: Add contributions + # - Adding a Model: adding_a_model.md + # - Adding a Benchmark: adding_a_leaderboard_tab.md + # - Adding a Task: adding_a_dataset.md + # - Development Setup: CONTRIBUTING.md + - API: + - Overview: index.md + - Benchmark: api/benchmark.md + - Task: api/task.md + - Model: api/model.md + - Leaderboard: https://huggingface.co/spaces/mteb/leaderboard + +plugins: + - search + - mkdocstrings: + handlers: + python: + paths: [mteb] + type: python + root_package: mteb + docstring_style: google + options: + heading_level: 3 + show_source: true + show_root_heading: true + - bibtex: + bib_file: "docs/references.bib" + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/embeddings-benchmark/mteb diff --git a/mteb/__init__.py b/mteb/__init__.py index ff4e065efb..80d1650fde 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -2,14 +2,7 @@ from importlib.metadata import version -from mteb.benchmarks.benchmarks import ( - MTEB_ENG_CLASSIC, - MTEB_MAIN_RU, - MTEB_RETRIEVAL_LAW, - MTEB_RETRIEVAL_MEDICAL, - MTEB_RETRIEVAL_WITH_INSTRUCTIONS, - CoIR, -) +from mteb.abstasks import AbsTask, TaskMetadata from mteb.encoder_interface import Encoder from mteb.evaluation import MTEB from mteb.load_results import BenchmarkResults, load_results @@ -28,12 +21,6 @@ __version__ = version("mteb") # fetch version from install metadata __all__ = [ - "MTEB_ENG_CLASSIC", - "MTEB_MAIN_RU", - "MTEB_RETRIEVAL_LAW", - "MTEB_RETRIEVAL_MEDICAL", - "MTEB_RETRIEVAL_WITH_INSTRUCTIONS", - "CoIR", "TASKS_REGISTRY", "get_tasks", "get_task", @@ -48,6 +35,8 @@ "BENCHMARK_REGISTRY", "MTEB", "TaskResult", - "SentenceTransformerWrapper", + "TaskMetadata", "Encoder", + "AbsTask", + "SentenceTransformerWrapper", ] diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index c65f25d55d..127b251b7e 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -55,6 +55,18 @@ def _multilabel_subsampling( class AbsTask(ABC): + """The abstract class for the tasks + + Attributes: + metadata: The metadata describing the task + dataset: The dataset represented as a dictionary on the form {"hf subset": {"split": Dataset}} where "split" is the dataset split (e.g. "test") + and Dataset is a datasets.Dataset objedct. "hf subset" is the data subset on Huggingface typically used to denote the language e.g. + datasets.load_dataset("data", "en"). If the dataset does not have a subset this is simply "default". + abstask_prompt: The potential prompt of the abstask + superseeded_by: Denotes the task that this task is superseeded by. Used to issue warning to users of outdated datasets, while maintaining + reproducibility of existing benchmarks. + """ + metadata: TaskMetadata abstask_prompt: str | None = None _eval_splits: list[str] | None = None @@ -64,6 +76,12 @@ class AbsTask(ABC): is_multilingual: bool = False def __init__(self, seed: int = 42, **kwargs: Any): + """The init function. This is called primarily to set the seed. + + Args: + seed: An integer seed. + kwargs: arguments passed to subclasses. + """ self.save_suffix = kwargs.get("save_suffix", "") self.seed = seed @@ -80,8 +98,10 @@ def check_if_dataset_is_superseded(self): ) def dataset_transform(self): - """Transform operations applied to the dataset after loading. - Override this method if your dataset requires any transformation. + """A transform operations applied to the dataset after loading. + + This method is useful when the dataset from Huggingface is not in an `mteb` compatible format. + Override this method if your dataset requires additional transformation. """ pass @@ -182,7 +202,11 @@ def stratified_subsampling( return dataset_dict def load_data(self, **kwargs): - """Load dataset from HuggingFace hub""" + """Loads dataset from HuggingFace hub + + This is the main loading function for Task. Do not overwrite this, instead we recommend using `dataset_transform`, which is called after the + dataset is loaded using `datasets.load_dataset`. + """ if self.data_loaded: return self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore @@ -192,6 +216,7 @@ def load_data(self, **kwargs): def calculate_metadata_metrics( self, overwrite_results: bool = False ) -> dict[str, DescriptiveStatistics | dict[str, DescriptiveStatistics]]: + """Calculates descriptive statistics from the dataset by calling `_calculate_metrics_from_split`.""" if self.metadata.descriptive_stat_path.exists() and not overwrite_results: logger.info("Loading metadata descriptive statistics from cache.") return self.metadata.descriptive_stats diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index e3038b6348..35f59fab13 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -220,8 +220,8 @@ class MetadataDatasetDict(TypedDict, total=False): class TaskMetadata(BaseModel): """Metadata for a task. - Args: - dataset: All arguments to pass to datasets.load_dataset to load the dataset for the task. Refer to https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/loading_methods#datasets.load_dataset + Attributes: + dataset: All arguments to pass to [datasets.load_dataset](https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/loading_methods#datasets.load_dataset) to load the dataset for the task. name: The name of the task. description: A description of the task. type: The type of the task. These includes "Classification", "Summarization", "STS", "Retrieval", "Reranking", "Clustering", diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 9aaefda3cb..8ff56acb4e 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -6,10 +6,10 @@ from pydantic import AnyUrl, BeforeValidator, TypeAdapter +from mteb import get_tasks from mteb.abstasks.AbsTask import AbsTask from mteb.load_results.benchmark_results import BenchmarkResults from mteb.load_results.load_results import load_results -from mteb.overview import get_tasks http_url_adapter = TypeAdapter(AnyUrl) UrlString = Annotated[ @@ -21,7 +21,7 @@ class Benchmark: """A benchmark object intended to run a certain benchmark within MTEB. - Args: + Attributes: name: The name of the benchmark tasks: The tasks within the benchmark. description: A description of the benchmark, should include its intended goal and potentially a description of its construction diff --git a/mteb/cli.py b/mteb/cli.py index f9dc1a352f..3c6c821f52 100644 --- a/mteb/cli.py +++ b/mteb/cli.py @@ -1,10 +1,17 @@ -"""Command line interface for various MTEB. +"""This is the command line interface for `mteb`. -MTEB is a benchmark for evaluating the quality of embeddings in various tasks. It supports the following commands: +`mteb` is a toolkit for evaluating the quality of embedding models on various benchmarks. It supports the following commands: -- mteb run: Runs a model on a set of tasks -- mteb available_tasks: Lists the available tasks within MTEB -- mteb create_meta: Creates the metadata for a model card from a folder of results +- `mteb run`: Runs a model on a set of tasks +- `mteb available_tasks`: Lists the available tasks within MTEB +- `mteb available_benchmarks`: Lists the available benchmarks +- `mteb create_meta`: Creates the metadata for a model card from a folder of results + +In the following we outline some sample use cases, but if you want to learn more about the arguments for each command you can run: + +``` +mteb {command} --help +``` ## Running Models on Tasks diff --git a/mteb/encoder_interface.py b/mteb/encoder_interface.py index 1fac3a9405..fb4b71ddf7 100644 --- a/mteb/encoder_interface.py +++ b/mteb/encoder_interface.py @@ -44,15 +44,15 @@ def encode( sentences: The sentences to encode. task_name: The name of the task. Sentence-transformers uses this to determine which prompt to use from a specified dictionary. + The order of priorities for prompt selection are: + 1. Composed prompt of task name + prompt type (query or passage) + 2. Specific task prompt + 3. Composed prompt of task type + prompt type (query or passage) + 4. Specific task type prompt + 5. Specific prompt type (query or passage) prompt_type: The name type of prompt. (query or passage) **kwargs: Additional arguments to pass to the encoder. - The order of priorities for prompt selection are: - 1. Composed prompt of task name + prompt type (query or passage) - 2. Specific task prompt - 3. Composed prompt of task type + prompt type (query or passage) - 4. Specific task type prompt - 5. Specific prompt type (query or passage) Returns: diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index ce63e85798..1c70b528ce 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -8,8 +8,7 @@ get_model_metas, model_meta_from_sentence_transformers, ) - -from .sentence_transformer_wrapper import SentenceTransformerWrapper +from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper __all__ = [ "MODEL_REGISTRY", diff --git a/mteb/models/misc_models.py b/mteb/models/misc_models.py index 2429cce39b..61dc549b15 100644 --- a/mteb/models/misc_models.py +++ b/mteb/models/misc_models.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from mteb.model_meta import ModelMeta Haon_Chen__speed_embedding_7b_instruct = ModelMeta( diff --git a/mteb/overview.py b/mteb/overview.py index b3a61e73ec..90b29661b8 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -250,10 +250,10 @@ def get_tasks( Args: languages: A list of languages either specified as 3 letter languages codes (ISO 639-3, e.g. "eng") or as script languages codes e.g. "eng-Latn". For multilingual tasks this will also remove languages that are not in the specified list. - script: A list of script codes (ISO 15924 codes). If None, all scripts are included. For multilingual tasks this will also remove scripts + script: A list of script codes (ISO 15924 codes, e.g. "Latn"). If None, all scripts are included. For multilingual tasks this will also remove scripts that are not in the specified list. - domains: A list of task domains. - task_types: A string specifying the type of task. If None, all tasks are included. + domains: A list of task domains, e.g. "Legal", "Medical", "Fiction". + task_types: A string specifying the type of task e.g. "Classification" or "Retrieval". If None, all tasks are included. categories: A list of task categories these include "s2s" (sentence to sentence), "s2p" (sentence to paragraph) and "p2p" (paragraph to paragraph). tasks: A list of task names to include. If None, all tasks which pass the filters are included. diff --git a/pyproject.toml b/pyproject.toml index 45b0694ab3..4ab7f1f88e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,13 @@ mteb = "mteb.cli:main" [project.optional-dependencies] dev = ["ruff==0.6.4", # locked so we don't get PRs which fail only due to a lint update "pytest", "pytest-xdist", "pytest-coverage"] +docs = [ + "mkdocs>=1.6.1", + "mkdocs-material>=9.5.47", + "mkdocstrings[python]>=0.18", + "mkdocs-bibtex>=2.16.2", +] + speedtask = ["GPUtil>=1.4.0", "psutil>=5.9.8"] peft = ["peft>=0.11.0"] leaderboard = ["gradio>=5.7.1", "gradio_rangeslider>=0.0.8"] diff --git a/tests/test_benchmark/mock_models.py b/tests/test_benchmark/mock_models.py index 7024d00113..ef393df4c2 100644 --- a/tests/test_benchmark/mock_models.py +++ b/tests/test_benchmark/mock_models.py @@ -12,8 +12,8 @@ from torch import Tensor import mteb -from mteb import SentenceTransformerWrapper from mteb.encoder_interface import PromptType +from mteb.models import SentenceTransformerWrapper from tests.test_benchmark.task_grid import MOCK_TASK_TEST_GRID diff --git a/tests/test_evaluators/test_InstructionRetrievalEvaluator.py b/tests/test_evaluators/test_InstructionRetrievalEvaluator.py index 42bc23b48f..240f66b7db 100644 --- a/tests/test_evaluators/test_InstructionRetrievalEvaluator.py +++ b/tests/test_evaluators/test_InstructionRetrievalEvaluator.py @@ -1,7 +1,7 @@ from __future__ import annotations -from mteb import SentenceTransformerWrapper from mteb.evaluation.evaluators import RetrievalEvaluator, utils +from mteb.models import SentenceTransformerWrapper from tests.test_benchmark.mock_models import MockNumpyEncoder diff --git a/tests/test_evaluators/test_RetrievalEvaluator.py b/tests/test_evaluators/test_RetrievalEvaluator.py index 1d4714aca4..7f3f937839 100644 --- a/tests/test_evaluators/test_RetrievalEvaluator.py +++ b/tests/test_evaluators/test_RetrievalEvaluator.py @@ -2,8 +2,8 @@ import pytest -from mteb import SentenceTransformerWrapper from mteb.evaluation.evaluators import RetrievalEvaluator +from mteb.models import SentenceTransformerWrapper from tests.test_benchmark.mock_models import MockNumpyEncoder TOL = 0.0001 From 6a8e188e8d748962509d03441277e13fc5d627d3 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 18 Dec 2024 19:15:05 +0100 Subject: [PATCH 21/40] fix: reorder argument for mteb.get_tasks (#1597) * fix: reorder argument for mteb.get_tasks This should make the function more intuitive to use * typo --------- Co-authored-by: Isaac Chung --- mteb/overview.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mteb/overview.py b/mteb/overview.py index 90b29661b8..77bc06b3fa 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -236,18 +236,20 @@ def to_latex( def get_tasks( + tasks: list[str] | None = None, + *, languages: list[str] | None = None, script: list[str] | None = None, domains: list[TASK_DOMAIN] | None = None, task_types: list[TASK_TYPE] | None = None, categories: list[TASK_CATEGORY] | None = None, - tasks: list[str] | None = None, exclude_superseded: bool = True, eval_splits: list[str] | None = None, ) -> MTEBTasks: """Get a list of tasks based on the specified filters. Args: + tasks: A list of task names to include. If None, all tasks which pass the filters are included. languages: A list of languages either specified as 3 letter languages codes (ISO 639-3, e.g. "eng") or as script languages codes e.g. "eng-Latn". For multilingual tasks this will also remove languages that are not in the specified list. script: A list of script codes (ISO 15924 codes, e.g. "Latn"). If None, all scripts are included. For multilingual tasks this will also remove scripts @@ -256,7 +258,6 @@ def get_tasks( task_types: A string specifying the type of task e.g. "Classification" or "Retrieval". If None, all tasks are included. categories: A list of task categories these include "s2s" (sentence to sentence), "s2p" (sentence to paragraph) and "p2p" (paragraph to paragraph). - tasks: A list of task names to include. If None, all tasks which pass the filters are included. exclude_superseded: A boolean flag to exclude datasets which are superseded by another. eval_splits: A list of evaluation splits to include. If None, all splits are included. From d6130ad964dba93560269940f7641b17b63ecc7c Mon Sep 17 00:00:00 2001 From: Christos Tsirigotis Date: Thu, 19 Dec 2024 03:25:05 -0500 Subject: [PATCH 22/40] fix: Make deduplication in PairClassificationEvaluator stable (#1315) * fix: Make deduplication in PairClassificationEvaluator stable * remove prompt type * remove prompt type missed one --------- Co-authored-by: isaac-chung --- .../evaluators/PairClassificationEvaluator.py | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/mteb/evaluation/evaluators/PairClassificationEvaluator.py b/mteb/evaluation/evaluators/PairClassificationEvaluator.py index 7a53b7bdf5..ace76bd60c 100644 --- a/mteb/evaluation/evaluators/PairClassificationEvaluator.py +++ b/mteb/evaluation/evaluators/PairClassificationEvaluator.py @@ -72,6 +72,32 @@ def __call__( scores["main_score"] = main_score return scores + @staticmethod + def _encode_unique_texts( + all_texts: list[str], + model: Encoder, + task_name: str | None, + **encode_kwargs: Any, + ): + index_map, all_unique_texts, all_texts_indexes = {}, [], [] + for text in all_texts: + text_hash = hash(text) + if text_hash not in index_map: + index_map[text_hash] = len(all_unique_texts) + all_unique_texts.append(text) + all_texts_indexes.append(index_map[text_hash]) + logger.warning( + f"A total on {len(all_texts) - len(all_unique_texts)}/{len(all_texts)} duplicate texts were found during encoding. Only encoding unique text and duplicating embeddings across." + ) + all_unique_texts_embs = np.asarray( + model.encode( + all_unique_texts, + task_name=task_name, + **encode_kwargs, + ) + ) + return all_unique_texts_embs[all_texts_indexes] + def compute_metrics( self, model: Encoder | EncoderWithSimilarity, @@ -81,22 +107,16 @@ def compute_metrics( if "batch_size" not in encode_kwargs: encode_kwargs["batch_size"] = 32 - sentences = list(set(self.sentences1 + self.sentences2)) - - total_sents = len(self.sentences1) + len(self.sentences2) - n_duplicates = total_sents - len(sentences) - if n_duplicates: - logger.warning( - f"Found {n_duplicates}/{total_sents} duplicates in the input data. Only encoding unique sentences." - ) - embeddings = model.encode( - sentences, + all_sentences = self.sentences1 + self.sentences2 + len_sentences1 = len(self.sentences1) + embeddings = self._encode_unique_texts( + all_sentences, + model, task_name=self.task_name, **encode_kwargs, ) - emb_dict = dict(zip(sentences, embeddings)) - embeddings1 = [emb_dict[sent] for sent in self.sentences1] - embeddings2 = [emb_dict[sent] for sent in self.sentences2] + embeddings1 = embeddings[:len_sentences1] + embeddings2 = embeddings[len_sentences1:] logger.info("Computing similarity distances.") cosine_scores = 1 - paired_cosine_distances(embeddings1, embeddings2) From c9b00ace108ca41170b86e51dd3af559bb2a8283 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sun, 22 Dec 2024 16:29:33 +0500 Subject: [PATCH 23/40] [V2] Update v2 (#1618) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add new arctic v2.0 models (#1574) * feat: add new arctic v2.0 models * chore: make lint * 1.24.0 Automatically generated by python-semantic-release * fix: Add namaa MrTydi reranking dataset (#1573) * Add dataset class and file requirements * pass tests * make lint changes * adjust meta data and remove load_data --------- Co-authored-by: Omar Elshehy * Update tasks table * 1.24.1 Automatically generated by python-semantic-release * fix: Eval langs not correctly passed to monolingual tasks (#1587) * fix SouthAfricanLangClassification.py * add check for langs * lint * 1.24.2 Automatically generated by python-semantic-release * feat: Add ColBert (#1563) * feat: add max_sim operator for IR tasks to support multi-vector models * docs: add doc for Model2VecWrapper.__init__(...) * feat: add ColBERTWrapper to models & add ColBERTv2 * fix: resolve issues * fix: resolve issues * Update README.md Co-authored-by: Roman Solomatin * Update README.md Co-authored-by: Isaac Chung * Update README.md Co-authored-by: Isaac Chung * Update mteb/evaluation/evaluators/RetrievalEvaluator.py Co-authored-by: Isaac Chung * Update README.md Co-authored-by: Isaac Chung * README.md: rm subset * doc: update example for Late Interaction * get colbert running without errors * fix: pass is_query to pylate * fix: max_sim add pad_sequence * feat: integrate Jinja templates for ColBERTv2 and add model prompt handling * feat: add revision & prompt_name * doc: pad_sequence * rm TODO jina colbert v2 * doc: warning: higher resource usage for MaxSim --------- Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> Co-authored-by: Roman Solomatin Co-authored-by: Isaac Chung * 1.25.0 Automatically generated by python-semantic-release * doc: colbert add score_function & doc section (#1592) * doc: colbert add score_function & doc section * doc: Update README.md Co-authored-by: Kenneth Enevoldsen * doc: Update README.md Co-authored-by: Isaac Chung --------- Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> Co-authored-by: Kenneth Enevoldsen Co-authored-by: Isaac Chung * Feat: add support for scoring function (#1594) * add support for scoring function * lint * move similarity to wrapper * remove score function * lint * remove from InstructionRetrievalEvaluator * Update mteb/evaluation/evaluators/RetrievalEvaluator.py Co-authored-by: Kenneth Enevoldsen * remove score function from README.md --------- Co-authored-by: Kenneth Enevoldsen * Add new models nvidia, gte, linq (#1436) * Add new models nvidia, gte, linq * add warning for gte-Qwen and nvidia models re: instruction used in docs as well --------- Co-authored-by: isaac-chung * Leaderboard: Refined plots (#1601) * Added embedding size guide to performance-size plot, removed shading on radar chart * Changed plot names to something more descriptive * Made plots failsafe * fix: Leaderboard refinements (#1603) * Added explanation of aggregate measures * Added download button to result tables * Task info gets sorted by task name * Added custom, shareable links for each benchmark * Moved explanation of aggregate metrics to the summary tab * 1.25.1 Automatically generated by python-semantic-release * Feat: Use similarity scores if available (#1602) * Use similarity scores if available * lint * Add NanoBEIR Datasets (#1588) * add NanoClimateFeverRetrieval task, still requires some debugging * move task to correct place in init file * add all Nano datasets and results * format code * Update mteb/tasks/Retrieval/eng/tempCodeRunnerFile.py Co-authored-by: Roman Solomatin * pin revision to commit and add datasets to benchmark.py * create new benchmark for NanoBEIR * add revision when loading datasets * lint --------- Co-authored-by: Roman Solomatin Co-authored-by: isaac-chung * Update tasks table * Feat: Evaluate missing languages (#1584) * init * fix tests * update mock retrieval * update tests * use subsets instead of langs * Apply suggestions from code review Co-authored-by: Isaac Chung * fix tests * add to readme * rename subset in readme --------- Co-authored-by: Isaac Chung * Add IBM Granite Embedding Models (#1613) * add IBM granite embedding models * lint formatting * add adapted_from and superseded_by to ModelMeta * fix: disable co2_tracker for API models (#1614) * 1.25.2 Automatically generated by python-semantic-release * fix: set `use_instructions` to True in models using prompts (#1616) feat: set `use_instructions` to True in models using prompts * 1.25.3 Automatically generated by python-semantic-release * update RetrievalEvaluator.py * update imports * update imports and metadata * fix tests * fix tests * fix output path for retrieval * fix similarity function --------- Co-authored-by: Daniel Buades Marcos Co-authored-by: github-actions Co-authored-by: Omar Elshehy <41394057+omarelshehy@users.noreply.github.com> Co-authored-by: Omar Elshehy Co-authored-by: github-actions[bot] Co-authored-by: Sam <40773225+sam-hey@users.noreply.github.com> Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> Co-authored-by: Isaac Chung Co-authored-by: Kenneth Enevoldsen Co-authored-by: Alexey Vatolin Co-authored-by: Márton Kardos Co-authored-by: KGupta10 <92774828+KGupta10@users.noreply.github.com> Co-authored-by: Aashka Trivedi --- README.md | 43 + docs/tasks.md | 2144 +++++++++-------- mteb/abstasks/AbsTask.py | 6 +- mteb/abstasks/AbsTaskBitextMining.py | 7 +- mteb/abstasks/AbsTaskClassification.py | 3 + .../AbsTaskMultilabelClassification.py | 4 + mteb/abstasks/AbsTaskReranking.py | 1 + mteb/abstasks/AbsTaskRetrieval.py | 3 + mteb/benchmarks/benchmarks.py | 24 + .../Reranking/NamaaMrTydiReranking.json | 31 + .../Retrieval/NanoArguAnaRetrieval.json | 31 + .../Retrieval/NanoClimateFeverRetrieval.json | 31 + .../Retrieval/NanoDBPediaRetrieval.json | 31 + .../Retrieval/NanoFEVERRetrieval.json | 31 + .../Retrieval/NanoFiQA2018Retrieval.json | 31 + .../Retrieval/NanoHotpotQARetrieval.json | 31 + .../Retrieval/NanoMSMARCORetrieval.json | 31 + .../Retrieval/NanoNFCorpusRetrieval.json | 31 + .../Retrieval/NanoNQRetrieval.json | 31 + .../Retrieval/NanoQuoraRetrieval.json | 31 + .../Retrieval/NanoSCIDOCSRetrieval.json | 31 + .../Retrieval/NanoSciFactRetrieval.json | 31 + .../Retrieval/NanoTouche2020Retrieval.json | 31 + mteb/evaluation/MTEB.py | 139 +- .../evaluators/BitextMiningEvaluator.py | 29 +- .../evaluators/RetrievalEvaluator.py | 17 +- mteb/evaluation/evaluators/model_classes.py | 67 +- mteb/leaderboard/app.py | 58 +- mteb/leaderboard/figures.py | 95 +- mteb/model_meta.py | 4 +- mteb/models/arctic_models.py | 172 +- mteb/models/bge_models.py | 12 +- mteb/models/cohere_models.py | 12 +- mteb/models/colbert_models.py | 218 ++ mteb/models/e5_instruct.py | 11 +- mteb/models/e5_models.py | 14 +- mteb/models/gritlm_models.py | 4 +- mteb/models/gte_models.py | 66 +- mteb/models/ibm_granite_models.py | 114 + mteb/models/instruct_wrapper.py | 5 + mteb/models/jina_models.py | 4 +- mteb/models/linq_models.py | 40 + mteb/models/mxbai_models.py | 2 +- mteb/models/nvidia_models.py | 115 + mteb/models/overview.py | 24 +- mteb/models/rerankers_custom.py | 6 +- mteb/models/rerankers_monot5_based.py | 34 +- mteb/models/ru_sentence_models.py | 9 +- mteb/models/salesforce_models.py | 43 +- mteb/models/sentence_transformer_wrapper.py | 3 + mteb/models/stella_models.py | 4 +- mteb/models/uae_models.py | 4 +- mteb/models/voyage_models.py | 30 +- .../multilingual/HinDialectClassification.py | 46 +- .../SouthAfricanLangClassification.py | 26 +- mteb/tasks/Reranking/__init__.py | 2 + .../Reranking/ara/NamaaMrTydiReranking.py | 39 + mteb/tasks/Reranking/ara/__init__.py | 5 + mteb/tasks/Retrieval/__init__.py | 26 + .../Retrieval/eng/NanoArguAnaRetrieval.py | 85 + .../eng/NanoClimateFeverRetrieval.py | 85 + .../Retrieval/eng/NanoDBPediaRetrieval.py | 75 + .../tasks/Retrieval/eng/NanoFEVERRetrieval.py | 99 + .../Retrieval/eng/NanoFiQA2018Retrieval.py | 85 + .../Retrieval/eng/NanoHotpotQARetrieval.py | 102 + .../Retrieval/eng/NanoMSMARCORetrieval.py | 97 + .../Retrieval/eng/NanoNFCorpusRetrieval.py | 87 + mteb/tasks/Retrieval/eng/NanoNQRetrieval.py | 83 + .../tasks/Retrieval/eng/NanoQuoraRetrieval.py | 86 + .../Retrieval/eng/NanoSCIDOCSRetrieval.py | 85 + .../Retrieval/eng/NanoSciFactRetrieval.py | 83 + .../Retrieval/eng/NanoTouche2020Retrieval.py | 94 + mteb/tasks/Retrieval/eng/__init__.py | 26 + mteb/tasks/__init__.py | 28 + pyproject.toml | 3 +- scripts/generate_imports.py | 3 +- tests/test_TaskMetadata.py | 12 + tests/test_benchmark/mock_tasks.py | 109 +- .../test_evaluation/test_split_evaluation.py | 220 +- 79 files changed, 4401 insertions(+), 1314 deletions(-) create mode 100644 mteb/descriptive_stats/Reranking/NamaaMrTydiReranking.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoArguAnaRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoMSMARCORetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json create mode 100644 mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json create mode 100644 mteb/models/colbert_models.py create mode 100644 mteb/models/ibm_granite_models.py create mode 100644 mteb/models/linq_models.py create mode 100644 mteb/models/nvidia_models.py create mode 100644 mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py create mode 100644 mteb/tasks/Reranking/ara/__init__.py create mode 100644 mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoNQRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py diff --git a/README.md b/README.md index 06b9d817a7..3d859ce8c3 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,21 @@ Note that the public leaderboard uses the test splits for all datasets except MS
+ +
+ Selecting evaluation subset + +### Selecting evaluation subset +You can evaluate only on selected subsets. For example, if you want to evaluate only the `subset_name_to_run` subset of all tasks, do the following: + +```python +evaluation.run(model, eval_subsets=["subset_name_to_run"]) +``` + +Monolingual tasks have `default` subset, other tasks have subsets that are specific to the dataset. + +
+
Using a custom model @@ -315,6 +330,34 @@ evaluation.run( ) ``` +
+ +
+ Late Interaction (ColBERT) + +### Using Late Interaction models for retrieval + +```python +from mteb import MTEB +import mteb + + +colbert = mteb.get_model("colbert-ir/colbertv2.0") +tasks = mteb.get_tasks(tasks=["NFCorpus"], languages=["eng"]) + +eval_splits = ["test"] + +evaluation = MTEB(tasks=tasks) + +evaluation.run( + colbert, + eval_splits=eval_splits, + corpus_chunk_size=500, +) +``` +This implementation employs the MaxSim operation to compute the similarity between sentences. While MaxSim provides high-quality results, it processes a larger number of embeddings, potentially leading to increased resource usage. To manage resource consumption, consider lowering the `corpus_chunk_size` parameter. + +
diff --git a/docs/tasks.md b/docs/tasks.md index b7be0d58cb..f4ec3c792e 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -1,7 +1,6 @@ -# Task Overview +## Available tasks +The following tables give you an overview of the tasks in MTEB. -The following tables give you an overview of the tasks within MTEB. -
@@ -64,7 +63,7 @@ The following tables give you an overview of the tasks within MTEB. | [CLSClusteringP2P.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | p2p | [Academic, Written] | None | None | | [CLSClusteringS2S.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | s2s | [Academic, Written] | None | None | | [CMedQAv1-reranking](https://github.com/zhangsheng93/cMedQA) (Zhang et al., 2017) | ['cmn'] | Reranking | s2s | [Medical, Written] | None | None | -| [CMedQAv2-reranking](https://github.com/zhangsheng93/cMedQA2) (S. Zhang, 2018) | ['cmn'] | Reranking | s2s | | None | None | +| [CMedQAv2-reranking](https://github.com/zhangsheng93/cMedQA2) (S. Zhang, 2018) | ['cmn'] | Reranking | s2s | [Medical, Written] | None | None | | [COIRCodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1056326} | {'test': {'number_of_characters': 36843313, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'min_document_length': 54, 'average_document_length': 34.71, 'max_document_length': 334374, 'unique_documents': 1003765, 'min_query_length': 2, 'average_query_length': 38.19, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 14574651, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'min_document_length': 95, 'average_document_length': 49.99, 'max_document_length': 14008, 'unique_documents': 280310, 'min_query_length': 2, 'average_query_length': 37.58, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 2587540, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'min_document_length': 87, 'average_document_length': 37.9, 'max_document_length': 334374, 'unique_documents': 64854, 'min_query_length': 2, 'average_query_length': 39.41, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 3641108, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'min_document_length': 54, 'average_document_length': 17.96, 'max_document_length': 5280, 'unique_documents': 182440, 'min_query_length': 2, 'average_query_length': 44.92, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 629446, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'min_document_length': 83, 'average_document_length': 20.83, 'max_document_length': 3992, 'unique_documents': 27570, 'min_query_length': 2, 'average_query_length': 43.73, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 6791137, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'min_document_length': 77, 'average_document_length': 35.55, 'max_document_length': 7615, 'unique_documents': 180866, 'min_query_length': 2, 'average_query_length': 33.02, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 8619431, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'min_document_length': 94, 'average_document_length': 30.2, 'max_document_length': 4904, 'unique_documents': 267725, 'min_query_length': 2, 'average_query_length': 38.21, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}} | | [CPUSpeedTask](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/blob/c8376f967d1294419be1d3eb41217d04cd3a65d3/src/seb/registered_tasks/speed.py#L83-L96) | ['eng'] | Speed | s2s | [Fiction, Written] | None | None | | [CQADupstackAndroidRetrieval](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) (Hoogeveen et al., 2015) | ['eng'] | Retrieval | s2p | | None | None | @@ -120,11 +119,12 @@ The following tables give you an overview of the tasks within MTEB. | [CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CUADVolumeRestrictionLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CUADWarrantyDurationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | +| [CUREv1](https://huggingface.co/datasets/clinia/CUREv1) | ['eng', 'fra', 'spa'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | | [CanadaTaxCourtOutcomesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CataloniaTweetClassification](https://aclanthology.org/2020.lrec-1.171/) | ['cat', 'spa'] | Classification | s2s | [Social, Government, Written] | None | None | | [ClimateFEVER](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [ClimateFEVERHardNegatives](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | -| [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | | None | None | +| [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | [Medical, Written] | None | None | | [Cmnli](https://huggingface.co/datasets/clue/viewer/cmnli) | ['cmn'] | PairClassification | s2s | | None | None | | [CodeEditSearchRetrieval](https://huggingface.co/datasets/cassanof/CodeEditSearch/viewer) (Niklas Muennighoff, 2023) | ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] | Retrieval | p2p | [Programming, Written] | {'train': 26000} | {'train': {'number_of_characters': 935841, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'min_document_length': 18, 'average_document_length': 70.99, 'max_document_length': 2532, 'unique_documents': 13000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 13000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70519, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 69.52, 'max_document_length': 1811, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 57880, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 56.88, 'max_document_length': 601, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'typescript': {'number_of_characters': 61092, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 60.09, 'max_document_length': 659, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 71797, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 70.8, 'max_document_length': 1529, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 67900, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 66.9, 'max_document_length': 751, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 63984, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 23, 'average_document_length': 62.98, 'max_document_length': 807, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 62927, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 61.93, 'max_document_length': 766, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c': {'number_of_characters': 98588, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 97.59, 'max_document_length': 1672, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c++': {'number_of_characters': 115480, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 114.48, 'max_document_length': 1856, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'rust': {'number_of_characters': 68503, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 67.5, 'max_document_length': 2532, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'swift': {'number_of_characters': 58279, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 57.28, 'max_document_length': 727, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'scala': {'number_of_characters': 65833, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 64.83, 'max_document_length': 685, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'shell': {'number_of_characters': 73059, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 72.06, 'max_document_length': 813, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}} | | [CodeFeedbackMT](https://arxiv.org/abs/2402.14658) (Tianyu Zheng, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 79660} | {'test': {'number_of_characters': 156266302, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'min_document_length': 127, 'average_document_length': 885.13, 'max_document_length': 32432, 'unique_documents': 66383, 'min_query_length': 2, 'average_query_length': 7344.18, 'max_query_length': 9403, 'unique_queries': 13277, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13277}} | @@ -348,7 +348,7 @@ The following tables give you an overview of the tasks within MTEB. | [MultilingualSentiment](https://github.com/tyqiangz/multilingual-sentiment-datasets) | ['cmn'] | Classification | s2s | | None | None | | [MultilingualSentimentClassification](https://huggingface.co/datasets/mteb/multilingual-sentiment-classification) | ['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] | Classification | s2s | [Reviews, Written] | None | None | | [MyanmarNews](https://huggingface.co/datasets/myanmar_news) (A. H. Khine, 2017) | ['mya'] | Classification | p2p | [News, Written] | None | None | -| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | | {'test': 3956} | {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}} | +| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | [Medical, Academic, Written] | {'test': 3956} | {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}} | | [NFCorpus-PL](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NLPJournalAbsIntroRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | | [NLPJournalTitleAbsRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | @@ -360,6 +360,20 @@ The following tables give you an overview of the tasks within MTEB. | [NTREXBitextMining](https://huggingface.co/datasets/davidstap/NTREX) | ['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] | BitextMining | s2s | [News, Written] | {'test': 3826252} | {'test': {'num_samples': 3826252, 'number_of_characters': 988355274, 'unique_pairs': 3820263, 'min_sentence1_length': 1, 'average_sentence1_length': 129.15, 'max_sentence1_length': 773, 'unique_sentence1': 241259, 'min_sentence2_length': 1, 'average_sentence2_length': 129.15, 'max_sentence2_length': 773, 'unique_sentence2': 241259, 'hf_subset_descriptive_stats': {'afr_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'afr_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'afr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'afr_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'afr_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'afr_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'afr_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'afr_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'afr_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'afr_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'amh_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'amh_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'amh_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'amh_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'amh_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'amh_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'amh_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'amh_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'amh_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'amh_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'amh_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'amh_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'amh_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'amh_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'arb_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'arb_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'arb_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'arb_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'arb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'arb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'arb_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'arb_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'arb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'arb_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'arb_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'arb_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'arb_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'arb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'arb_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'arb_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'arb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'arb_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'arb_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'arb_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'arb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'arb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'arb_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'arb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'arb_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'arb_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'arb_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'arb_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'arb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'arb_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'arb_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'arb_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'arb_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'aze_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'aze_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'aze_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'aze_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'aze_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'aze_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'aze_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'aze_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'aze_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bak_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'bak_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bak_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'bak_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'bak_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'bak_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'bak_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'bak_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'bak_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bel_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bel_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bel_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bel_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bel_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bel_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bel_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bel_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bel_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bel_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bel_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bel_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bel_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bem_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bem_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'bem_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'bem_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'bem_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'bem_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'bem_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'bem_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'ben_Beng-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ben_Beng-deu_Latn': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ben_Beng-div_Thaa': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'ben_Beng-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ben_Beng-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ben_Beng-eus_Latn': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'ben_Beng-fas_Arab': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ben_Beng-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ben_Beng-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ben_Beng-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'ben_Beng-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ben_Beng-hin_Deva': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ben_Beng-hun_Latn': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ben_Beng-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ben_Beng-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ben_Beng-kan_Knda': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'ben_Beng-kor_Hang': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ben_Beng-lit_Latn': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ben_Beng-mar_Deva': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'ben_Beng-nep_Deva': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ben_Beng-pan_Guru': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'ben_Beng-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ben_Beng-por_Latn': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ben_Beng-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ben_Beng-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ben_Beng-snd_Arab': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'ben_Beng-spa_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ben_Beng-swa_Latn': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ben_Beng-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ben_Beng-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ben_Beng-tel_Telu': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-tur_Latn': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ben_Beng-urd_Arab': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ben_Beng-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ben_Beng-zho_Hant': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ben_Beng-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'bod_Tibt-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'bod_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bod_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'bod_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'bod_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'bod_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'bod_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'bos_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bos_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bos_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bos_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bos_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bos_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bos_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bos_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bos_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bos_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bos_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bos_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bos_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bul_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bul_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bul_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bul_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bul_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bul_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bul_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bul_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bul_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bul_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bul_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bul_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bul_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'cat_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cat_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'cat_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'cat_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'cat_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'cat_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'cat_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'cat_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ces_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ces_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ces_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ces_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ces_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ces_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ces_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ces_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ces_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ces_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ces_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ces_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'ces_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ckb_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ckb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ckb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ckb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ckb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'ckb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'ckb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'ckb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'ckb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'ckb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'cym_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cym_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'dan_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'dan_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'dan_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dan_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'dan_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'dan_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'dan_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'dan_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'dan_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'dan_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'deu_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'deu_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'deu_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'deu_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'deu_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'deu_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'deu_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'deu_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'deu_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'deu_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'deu_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'deu_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'deu_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'deu_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'deu_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'deu_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'deu_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'deu_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'deu_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'deu_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'deu_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'deu_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'deu_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'deu_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'deu_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'deu_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'deu_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'deu_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'deu_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'div_Thaa-ben_Beng': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'div_Thaa-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'div_Thaa-eus_Latn': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'div_Thaa-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'div_Thaa-hin_Deva': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'div_Thaa-kan_Knda': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'div_Thaa-mar_Deva': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'div_Thaa-nep_Deva': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-pan_Guru': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'div_Thaa-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'div_Thaa-snd_Arab': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'div_Thaa-tam_Taml': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'div_Thaa-tel_Telu': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-urd_Arab': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'dzo_Tibt-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'dzo_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dzo_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'dzo_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'dzo_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'dzo_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'dzo_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'ell_Grek-arb_Arab': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ell_Grek-ben_Beng': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ell_Grek-deu_Latn': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ell_Grek-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ell_Grek-fas_Arab': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ell_Grek-fin_Latn': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ell_Grek-fra_Latn': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ell_Grek-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ell_Grek-hin_Deva': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ell_Grek-hun_Latn': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ell_Grek-hye_Armn': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ell_Grek-ind_Latn': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ell_Grek-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ell_Grek-kat_Geor': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'ell_Grek-kor_Hang': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ell_Grek-lit_Latn': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ell_Grek-nld_Latn': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ell_Grek-pol_Latn': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ell_Grek-por_Latn': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ell_Grek-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ell_Grek-spa_Latn': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ell_Grek-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ell_Grek-swa_Latn': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ell_Grek-swe_Latn': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ell_Grek-tam_Taml': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ell_Grek-tur_Latn': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ell_Grek-vie_Latn': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ell_Grek-zho_Hant': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ell_Grek-zul_Latn': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eng_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'eng_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'eng_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'eng_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'eng_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'eng_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'eng_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eng_Latn-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'eng_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'eng_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'eng_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'eng_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'eng_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'eng_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'eng_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'eng_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'eng_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eng_Latn-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'eng_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'eng_Latn-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'eng_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'eng_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'eng_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'eng_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'eng_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'eng_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'eng_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'eng_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'eng_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eng_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'eng_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'eng_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eng_Latn-hmn_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 165.64, 'max_sentence2_length': 643, 'unique_sentence2': 1997}, 'eng_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'eng_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'eng_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'eng_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'eng_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'eng_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eng_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'eng_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'eng_Latn-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'eng_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'eng_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'eng_Latn-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'eng_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'eng_Latn-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'eng_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'eng_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'eng_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'eng_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'eng_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eng_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'eng_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'eng_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'eng_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-mon_Mong': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'eng_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'eng_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'eng_Latn-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'eng_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'eng_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'eng_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'eng_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'eng_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'eng_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'eng_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eng_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'eng_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'eng_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'eng_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'eng_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'eng_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'eng_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'eng_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'eng_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'eng_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eng_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'eng_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'eng_Latn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'eng_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'eng_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'eng_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'eng_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'eng_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'eng_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'eng_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eng_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'eng_Latn-tha_Thai': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'eng_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'eng_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'eng_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'eng_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'eng_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'eng_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'eng_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'eng_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'eng_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'eng_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'eng_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'eng_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'eng_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'eng_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'eng_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'eng_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eus_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eus_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eus_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'eus_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eus_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eus_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eus_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eus_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eus_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eus_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eus_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eus_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ewe_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ewe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ewe_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ewe_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ewe_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ewe_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ewe_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'ewe_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'fao_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fao_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'fao_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fao_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fao_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'fao_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'fao_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fao_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'fao_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'fao_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fas_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fas_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'fas_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fas_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fas_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fas_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fas_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fas_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fas_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fas_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fas_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fas_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fas_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'fas_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fas_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fas_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'fas_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fas_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fas_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fas_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'fas_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'fas_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fas_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'fas_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fas_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fas_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fas_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'fas_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fas_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fas_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fas_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fij_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fij_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'fij_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fij_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fij_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fij_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fij_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fij_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fij_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fij_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fil_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fil_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'fil_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fil_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fil_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fil_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fil_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fil_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fil_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fil_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fin_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fin_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fin_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fin_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fin_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fin_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fin_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fin_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fin_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fin_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fin_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fin_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fin_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'fin_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fin_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fin_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fin_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fin_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fin_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fin_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fin_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fin_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fin_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fin_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fin_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fin_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fra_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fra_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fra_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'fra_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fra_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fra_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fra_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fra_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fra_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fra_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fra_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fra_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fra_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'fra_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fra_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fra_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fra_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'fra_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fra_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fra_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'fra_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fra_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fra_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fra_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fra_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fra_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fra_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fra_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fra_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fuc_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'fuc_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fuc_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'fuc_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'fuc_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'fuc_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'fuc_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'fuc_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'gle_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'gle_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'glg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'glg_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'glg_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'glg_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'glg_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'glg_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'guj_Gujr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'guj_Gujr-div_Thaa': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'guj_Gujr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'guj_Gujr-eus_Latn': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'guj_Gujr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'guj_Gujr-kan_Knda': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'guj_Gujr-mar_Deva': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'guj_Gujr-nep_Deva': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-pan_Guru': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'guj_Gujr-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'guj_Gujr-snd_Arab': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'guj_Gujr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'guj_Gujr-tel_Telu': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-urd_Arab': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hau_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'hau_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hau_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'hau_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'hau_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'hau_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'hau_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'hau_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hau_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'hau_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'hau_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'hau_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'hau_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'hau_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'heb_Hebr-arb_Arab': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'heb_Hebr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'heb_Hebr-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'heb_Hebr-deu_Latn': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'heb_Hebr-ell_Grek': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'heb_Hebr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'heb_Hebr-fas_Arab': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'heb_Hebr-fin_Latn': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'heb_Hebr-fra_Latn': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'heb_Hebr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'heb_Hebr-hun_Latn': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'heb_Hebr-ind_Latn': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'heb_Hebr-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'heb_Hebr-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'heb_Hebr-kor_Hang': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'heb_Hebr-lit_Latn': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'heb_Hebr-mey_Arab': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'heb_Hebr-nld_Latn': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'heb_Hebr-pol_Latn': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'heb_Hebr-por_Latn': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'heb_Hebr-prs_Arab': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'heb_Hebr-pus_Arab': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'heb_Hebr-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'heb_Hebr-shi_Arab': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'heb_Hebr-spa_Latn': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'heb_Hebr-swa_Latn': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'heb_Hebr-swe_Latn': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'heb_Hebr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'heb_Hebr-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'heb_Hebr-tur_Latn': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'heb_Hebr-vie_Latn': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'heb_Hebr-zho_Hant': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'heb_Hebr-zul_Latn': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hin_Deva-arb_Arab': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hin_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hin_Deva-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hin_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'hin_Deva-ell_Grek': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hin_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hin_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'hin_Deva-fas_Arab': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hin_Deva-fin_Latn': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hin_Deva-fra_Latn': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hin_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'hin_Deva-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hin_Deva-hun_Latn': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'hin_Deva-ind_Latn': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hin_Deva-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hin_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'hin_Deva-kor_Hang': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hin_Deva-lit_Latn': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hin_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'hin_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-nld_Latn': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hin_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'hin_Deva-pol_Latn': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hin_Deva-por_Latn': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hin_Deva-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hin_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'hin_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'hin_Deva-spa_Latn': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hin_Deva-swa_Latn': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hin_Deva-swe_Latn': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hin_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hin_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-tur_Latn': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hin_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hin_Deva-vie_Latn': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hin_Deva-zho_Hant': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hin_Deva-zul_Latn': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hmn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 165.64, 'max_sentence1_length': 643, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'hrv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'hrv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'hrv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'hrv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'hrv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hrv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hrv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'hrv_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hrv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'hrv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'hrv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'hun_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hun_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hun_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hun_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hun_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hun_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hun_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hun_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hun_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hun_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'hun_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hun_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hun_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hun_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'hun_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hun_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hun_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hun_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hun_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hun_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hun_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hun_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hun_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hun_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hun_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hun_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hun_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hye_Armn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hye_Armn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hye_Armn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'hye_Armn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ibo_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ibo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ibo_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ibo_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ibo_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ibo_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ibo_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'ibo_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ibo_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ibo_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ibo_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ibo_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ibo_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ibo_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ind_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ind_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ind_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ind_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ind_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ind_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ind_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ind_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ind_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ind_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ind_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ind_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ind_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ind_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ind_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ind_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ind_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ind_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ind_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ind_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ind_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ind_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ind_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ind_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ind_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ind_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ind_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ind_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ind_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'ind_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ind_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'ind_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ind_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ind_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ind_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'isl_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'isl_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'isl_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'isl_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'isl_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'isl_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'isl_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'isl_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'isl_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'isl_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ita_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ita_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ita_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ita_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ita_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ita_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ita_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'ita_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-arb_Arab': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'jpn_Jpan-ben_Beng': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'jpn_Jpan-deu_Latn': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'jpn_Jpan-ell_Grek': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'jpn_Jpan-eng_Latn': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'jpn_Jpan-fas_Arab': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'jpn_Jpan-fin_Latn': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'jpn_Jpan-fra_Latn': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'jpn_Jpan-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'jpn_Jpan-hin_Deva': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'jpn_Jpan-hun_Latn': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'jpn_Jpan-ind_Latn': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'jpn_Jpan-kor_Hang': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'jpn_Jpan-lit_Latn': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'jpn_Jpan-nld_Latn': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'jpn_Jpan-pol_Latn': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'jpn_Jpan-por_Latn': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'jpn_Jpan-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'jpn_Jpan-spa_Latn': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-swa_Latn': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'jpn_Jpan-swe_Latn': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'jpn_Jpan-tam_Taml': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'jpn_Jpan-tur_Latn': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'jpn_Jpan-vie_Latn': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'jpn_Jpan-yue_Hant': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'jpn_Jpan-zho_Hans': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'jpn_Jpan-zho_Hant': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'jpn_Jpan-zul_Latn': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'kan_Knda-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kan_Knda-div_Thaa': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'kan_Knda-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kan_Knda-eus_Latn': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'kan_Knda-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'kan_Knda-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kan_Knda-mar_Deva': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'kan_Knda-nep_Deva': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-pan_Guru': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'kan_Knda-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kan_Knda-snd_Arab': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'kan_Knda-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kan_Knda-tel_Telu': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-urd_Arab': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'kat_Geor-ell_Grek': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kat_Geor-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kat_Geor-hye_Armn': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kat_Geor-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'kaz_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kaz_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kaz_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kaz_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'kaz_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kaz_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kaz_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kaz_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kaz_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'khm_Khmr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'khm_Khmr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'khm_Khmr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'khm_Khmr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'khm_Khmr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'khm_Khmr-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'khm_Khmr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'kin_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'kin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kin_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'kin_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'kin_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'kin_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'kin_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'kin_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'kir_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kir_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kir_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kir_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'kir_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kir_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kir_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kir_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kir_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'kmr_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kmr_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'kmr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kmr_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kmr_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kmr_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'kmr_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'kmr_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'kmr_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'kmr_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'kor_Hang-arb_Arab': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kor_Hang-ben_Beng': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kor_Hang-deu_Latn': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'kor_Hang-ell_Grek': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kor_Hang-eng_Latn': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kor_Hang-fas_Arab': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kor_Hang-fin_Latn': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'kor_Hang-fra_Latn': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'kor_Hang-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kor_Hang-hin_Deva': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kor_Hang-hun_Latn': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'kor_Hang-ind_Latn': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'kor_Hang-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'kor_Hang-lit_Latn': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'kor_Hang-nld_Latn': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kor_Hang-pol_Latn': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'kor_Hang-por_Latn': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'kor_Hang-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'kor_Hang-spa_Latn': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'kor_Hang-swa_Latn': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'kor_Hang-swe_Latn': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'kor_Hang-tam_Taml': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kor_Hang-tur_Latn': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kor_Hang-vie_Latn': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'kor_Hang-yue_Hant': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'kor_Hang-zho_Hans': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'kor_Hang-zho_Hant': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'kor_Hang-zul_Latn': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'lao_Laoo-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'lao_Laoo-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'lao_Laoo-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lao_Laoo-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'lao_Laoo-mon_Mong': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'lao_Laoo-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'lao_Laoo-tha_Thai': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'lav_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lav_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lav_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lav_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'lit_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'lit_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'lit_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'lit_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'lit_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lit_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'lit_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lit_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'lit_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'lit_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'lit_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lit_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'lit_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'lit_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'lit_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'lit_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'lit_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'lit_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'lit_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'lit_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'lit_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'lit_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'lit_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'lit_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'lit_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'lit_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'lit_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ltz_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ltz_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'ltz_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ltz_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ltz_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'ltz_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'ltz_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ltz_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'ltz_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'ltz_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'mal_Mlym-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mal_Mlym-fij_Latn': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mal_Mlym-fil_Latn': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mal_Mlym-ind_Latn': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mal_Mlym-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mal_Mlym-mri_Latn': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mal_Mlym-msa_Latn': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mal_Mlym-smo_Latn': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mal_Mlym-tah_Latn': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mal_Mlym-ton_Latn': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mar_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'mar_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'mar_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mar_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'mar_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'mar_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'mar_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'mar_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'mar_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'mar_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'mar_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'mar_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'mey_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'mey_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'mey_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mey_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'mey_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'mey_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'mey_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'mey_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'mey_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'mey_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'mkd_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'mkd_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'mkd_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'mkd_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'mkd_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mkd_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'mkd_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mkd_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'mkd_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'mkd_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'mkd_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'mkd_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'mkd_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'mlg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlg_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mlg_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mlg_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mlg_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mlg_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mlg_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mlg_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mlg_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mlg_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mlt_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'mlt_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlt_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'mlt_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mlt_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'mlt_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'mlt_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'mlt_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'mon_Mong-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mon_Mong-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mon_Mong-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mon_Mong-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mon_Mong-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mon_Mong-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'mon_Mong-tha_Thai': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'mri_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mri_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mri_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mri_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mri_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mri_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mri_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mri_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mri_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mri_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'msa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'msa_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'msa_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'msa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'msa_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'msa_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'msa_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'msa_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'msa_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'msa_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mya_Mymr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mya_Mymr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mya_Mymr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mya_Mymr-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mya_Mymr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mya_Mymr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'mya_Mymr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'nde_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nde_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nde_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nde_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nde_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nde_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'nde_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nde_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'nep_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nep_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'nep_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nep_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'nep_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'nep_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nep_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'nep_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'nep_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'nep_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'nep_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'nep_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nep_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'nep_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'nld_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'nld_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nld_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nld_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nld_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'nld_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nld_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nld_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'nld_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'nld_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'nld_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'nld_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nld_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'nld_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'nld_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nld_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'nld_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'nld_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'nld_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nld_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nld_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nld_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'nld_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'nld_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'nld_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'nld_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nld_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nld_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nld_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'nld_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'nld_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nno_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nno_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nno_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nno_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nno_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nno_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nno_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nno_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nno_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nno_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nob_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nob_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nob_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nob_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nob_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nob_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nob_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nob_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nob_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nob_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nso_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'nso_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nso_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'nso_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'nso_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'nso_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'nso_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'nso_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nso_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'nso_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'nso_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'nso_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'nso_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'nso_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nya_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nya_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nya_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nya_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nya_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nya_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'nya_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nya_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'orm_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'orm_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'orm_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'orm_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'orm_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'orm_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'orm_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'orm_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'orm_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'orm_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'orm_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'orm_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'orm_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'orm_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'pan_Guru-ben_Beng': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pan_Guru-div_Thaa': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'pan_Guru-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pan_Guru-eus_Latn': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'pan_Guru-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'pan_Guru-hin_Deva': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pan_Guru-kan_Knda': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'pan_Guru-mar_Deva': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'pan_Guru-nep_Deva': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'pan_Guru-snd_Arab': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'pan_Guru-tam_Taml': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pan_Guru-tel_Telu': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-urd_Arab': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'pol_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pol_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'pol_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pol_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'pol_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'pol_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'pol_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'pol_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'pol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pol_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pol_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'pol_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pol_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pol_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'pol_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'pol_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'pol_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'pol_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'pol_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'pol_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'pol_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'pol_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'pol_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'pol_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'pol_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'pol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'pol_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'pol_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pol_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'pol_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'pol_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'pol_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'pol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'por_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'por_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'por_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'por_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'por_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'por_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'por_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'por_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'por_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'por_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'por_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'por_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'por_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'por_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'por_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'por_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'por_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'por_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'por_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'por_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'por_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'por_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'por_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'por_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'por_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'por_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'por_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'por_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'por_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'prs_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'prs_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'prs_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'prs_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'prs_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'prs_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'prs_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'prs_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'prs_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'prs_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'pus_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pus_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'pus_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pus_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pus_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pus_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'pus_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'pus_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'pus_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'pus_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'ron_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ron_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ron_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ron_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ron_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'ron_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ron_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ron_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'rus_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'rus_Cyrl-ben_Beng': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'rus_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'rus_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'rus_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'rus_Cyrl-deu_Latn': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'rus_Cyrl-ell_Grek': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'rus_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'rus_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'rus_Cyrl-fin_Latn': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-fra_Latn': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'rus_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'rus_Cyrl-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'rus_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-hun_Latn': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'rus_Cyrl-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'rus_Cyrl-kor_Hang': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'rus_Cyrl-lit_Latn': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'rus_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'rus_Cyrl-nld_Latn': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'rus_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'rus_Cyrl-por_Latn': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'rus_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'rus_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-spa_Latn': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'rus_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'rus_Cyrl-swa_Latn': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'rus_Cyrl-swe_Latn': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'rus_Cyrl-tam_Taml': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'rus_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'rus_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'rus_Cyrl-vie_Latn': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'rus_Cyrl-zho_Hant': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'rus_Cyrl-zul_Latn': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'shi_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'shi_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'shi_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'shi_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'shi_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'shi_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'shi_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'shi_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'shi_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'shi_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'sin_Sinh-ben_Beng': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'sin_Sinh-div_Thaa': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'sin_Sinh-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sin_Sinh-eus_Latn': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'sin_Sinh-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'sin_Sinh-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'sin_Sinh-kan_Knda': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'sin_Sinh-mar_Deva': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'sin_Sinh-nep_Deva': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-pan_Guru': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'sin_Sinh-snd_Arab': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'sin_Sinh-tam_Taml': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'sin_Sinh-tel_Telu': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-urd_Arab': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'slk_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slk_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slk_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slk_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slk_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slk_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slk_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slk_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slk_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'slk_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slk_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slk_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'slv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slv_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'slv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'smo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'smo_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'smo_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'smo_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'smo_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'smo_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'smo_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'smo_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'smo_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'smo_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'sna_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'sna_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sna_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'sna_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'sna_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'sna_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'sna_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'sna_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'snd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'snd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'snd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'snd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'snd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'snd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'snd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'snd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'snd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'snd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'snd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'snd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-urd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'som_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'som_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'som_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'som_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'som_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'som_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'som_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'som_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'som_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'som_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'som_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'som_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'som_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'som_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'spa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'spa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'spa_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'spa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'spa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'spa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'spa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'spa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'spa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'spa_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'spa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'spa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'spa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'spa_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'spa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'spa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'spa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'spa_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'spa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'spa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'spa_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'spa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'spa_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'spa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'spa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'spa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'spa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'spa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'spa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'sqi_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'sqi_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sqi_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'sqi_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'srp_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'srp_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'srp_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'srp_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ssw_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ssw_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ssw_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ssw_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'ssw_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ssw_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ssw_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ssw_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ssw_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ssw_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ssw_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ssw_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ssw_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ssw_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swa_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'swa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swa_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'swa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swa_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'swa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swa_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'swa_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'swa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swa_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'swa_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swa_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'swa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'swa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swa_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'swa_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'swa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swa_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'swa_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'swa_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'swa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swe_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swe_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swe_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'swe_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swe_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swe_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'swe_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swe_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swe_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swe_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swe_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swe_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swe_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swe_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'swe_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swe_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swe_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swe_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'swe_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swe_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'swe_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'swe_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swe_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swe_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swe_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swe_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'swe_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swe_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swe_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swe_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tah_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tah_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'tah_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'tah_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tah_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'tah_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'tah_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'tah_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'tah_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'tah_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'tam_Taml-arb_Arab': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tam_Taml-ben_Beng': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tam_Taml-deu_Latn': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tam_Taml-div_Thaa': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tam_Taml-ell_Grek': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tam_Taml-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tam_Taml-eus_Latn': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tam_Taml-fas_Arab': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tam_Taml-fin_Latn': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tam_Taml-fra_Latn': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tam_Taml-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tam_Taml-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tam_Taml-hin_Deva': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tam_Taml-hun_Latn': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tam_Taml-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tam_Taml-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tam_Taml-kan_Knda': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tam_Taml-kor_Hang': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tam_Taml-lit_Latn': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tam_Taml-mar_Deva': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tam_Taml-nep_Deva': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-nld_Latn': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tam_Taml-pan_Guru': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tam_Taml-pol_Latn': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tam_Taml-por_Latn': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tam_Taml-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tam_Taml-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tam_Taml-snd_Arab': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tam_Taml-spa_Latn': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tam_Taml-swa_Latn': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tam_Taml-swe_Latn': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tam_Taml-tel_Telu': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-tur_Latn': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tam_Taml-urd_Arab': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tam_Taml-vie_Latn': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tam_Taml-zho_Hant': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tam_Taml-zul_Latn': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tat_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tat_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tat_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tat_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tat_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tat_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tat_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tat_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tat_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tel_Telu-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tel_Telu-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tel_Telu-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tel_Telu-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tel_Telu-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tel_Telu-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tel_Telu-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tel_Telu-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tel_Telu-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tel_Telu-pan_Guru': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tel_Telu-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tel_Telu-snd_Arab': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tel_Telu-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tel_Telu-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tgk_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tgk_Cyrl-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'tgk_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tgk_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tgk_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tgk_Cyrl-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'tgk_Cyrl-mey_Arab': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'tgk_Cyrl-prs_Arab': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'tgk_Cyrl-pus_Arab': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'tgk_Cyrl-shi_Arab': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'tha_Thai-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'tha_Thai-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'tha_Thai-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tha_Thai-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'tha_Thai-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'tha_Thai-mon_Mong': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'tha_Thai-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'tir_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tir_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tir_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tir_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tir_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tir_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tir_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tir_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tir_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tir_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'tir_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tir_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tir_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tir_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ton_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ton_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ton_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ton_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ton_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ton_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ton_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ton_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ton_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ton_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'tsn_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tsn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tsn_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tsn_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tsn_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tsn_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tsn_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tsn_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tsn_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tsn_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'tsn_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tsn_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tsn_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tsn_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tuk_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tuk_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tuk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tuk_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tuk_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tuk_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tuk_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tuk_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tuk_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tur_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tur_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tur_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tur_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tur_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tur_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tur_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tur_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tur_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tur_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tur_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tur_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tur_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tur_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tur_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tur_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tur_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tur_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tur_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tur_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tur_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tur_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tur_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tur_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tur_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tur_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tur_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tur_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tur_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tur_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'uig_Arab-aze_Latn': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uig_Arab-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uig_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uig_Arab-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uig_Arab-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uig_Arab-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uig_Arab-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uig_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uig_Arab-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'ukr_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ukr_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ukr_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ukr_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'ukr_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ukr_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ukr_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ukr_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ukr_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ukr_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ukr_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ukr_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ukr_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'urd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'urd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'urd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'urd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'urd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'urd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'urd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'urd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'urd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'urd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'urd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'urd_Arab-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'urd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'urd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'uzb_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uzb_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uzb_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uzb_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uzb_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uzb_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uzb_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uzb_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uzb_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'ven_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ven_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ven_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'ven_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ven_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ven_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ven_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ven_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'vie_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'vie_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'vie_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'vie_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'vie_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'vie_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'vie_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'vie_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'vie_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'vie_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'vie_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'vie_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'vie_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'vie_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'vie_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'vie_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'vie_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'vie_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'vie_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'vie_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'vie_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'vie_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'vie_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'vie_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'vie_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'vie_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'vie_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'vie_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'wol_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'wol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'wol_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'wol_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'wol_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'wol_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'wol_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'wol_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'wol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'wol_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'wol_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'wol_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'wol_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'wol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'xho_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'xho_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'xho_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'xho_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'xho_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'xho_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'xho_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'xho_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'xho_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'xho_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'xho_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'xho_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'xho_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'xho_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yor_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'yor_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yor_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'yor_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'yor_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'yor_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'yor_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'yor_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'yor_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'yor_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'yor_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'yor_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'yor_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'yor_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yue_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yue_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'yue_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'yue_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'yue_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'yue_Hant-zho_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hans-eng_Latn': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hans-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hans-kor_Hang': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hans-vie_Latn': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hans-yue_Hant': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hans-zho_Hant': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hant-arb_Arab': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zho_Hant-ben_Beng': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zho_Hant-deu_Latn': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zho_Hant-ell_Grek': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zho_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hant-fas_Arab': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zho_Hant-fin_Latn': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zho_Hant-fra_Latn': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zho_Hant-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zho_Hant-hin_Deva': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zho_Hant-hun_Latn': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zho_Hant-ind_Latn': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zho_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hant-lit_Latn': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zho_Hant-nld_Latn': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zho_Hant-pol_Latn': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zho_Hant-por_Latn': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zho_Hant-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zho_Hant-spa_Latn': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zho_Hant-swa_Latn': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zho_Hant-swe_Latn': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zho_Hant-tam_Taml': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zho_Hant-tur_Latn': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zho_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hant-yue_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'zho_Hant-zul_Latn': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'zul_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'zul_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zul_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zul_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zul_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zul_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zul_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zul_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zul_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zul_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'zul_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zul_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zul_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zul_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'zul_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zul_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zul_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zul_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zul_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zul_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'zul_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'zul_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zul_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zul_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zul_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'zul_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zul_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'zul_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zul_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zul_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zul_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'zul_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'zul_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zul_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zul_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'zul_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'zul_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'zul_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}}}} | | [NYSJudicialEthicsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [NaijaSenti](https://github.com/hausanlp/NaijaSenti) | ['hau', 'ibo', 'pcm', 'yor'] | Classification | s2s | [Social, Written] | None | None | +| [NamaaMrTydiReranking](https://huggingface.co/NAMAA-Space) (Muennighoff et al., 2022) | ['ara'] | Reranking | s2s | [Encyclopaedic, Written] | None | None | +| [NanoArguAnaRetrieval](http://argumentation.bplaced.net/arguana/data) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | [Medical, Written] | None | None | +| [NanoClimateFeverRetrieval](https://arxiv.org/abs/2012.00614) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | [Non-fiction, Academic, News] | None | None | +| [NanoDBPediaRetrieval](https://huggingface.co/datasets/zeta-alpha-ai/NanoDBPedia) (Lehmann et al., 2015) | ['eng'] | Retrieval | s2p | [Encyclopaedic] | None | None | +| [NanoFEVERRetrieval](https://fever.ai/) | ['eng'] | Retrieval | s2p | [Academic, Encyclopaedic] | None | None | +| [NanoFiQA2018Retrieval](https://sites.google.com/view/fiqa/) (Nandan Thakur, 2021) | ['eng'] | Retrieval | s2p | [Academic, Social] | None | None | +| [NanoHotpotQARetrieval](https://hotpotqa.github.io/) | ['eng'] | Retrieval | s2p | [Web, Written] | None | None | +| [NanoMSMARCORetrieval](https://microsoft.github.io/msmarco/) (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) | ['eng'] | Retrieval | s2p | [Web] | None | None | +| [NanoNFCorpusRetrieval](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | +| [NanoNQRetrieval](https://ai.google.com/research/NaturalQuestions) (Tom Kwiatkowski, 2019) | ['eng'] | Retrieval | s2p | [Academic, Web] | None | None | +| [NanoQuoraRetrieval](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) (DataCanary et al., 2017) | ['eng'] | Retrieval | s2s | [Social] | None | None | +| [NanoSCIDOCSRetrieval](https://allenai.org/data/scidocs) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | [Academic, Written, Non-fiction] | None | None | +| [NanoSciFactRetrieval](https://github.com/allenai/scifact) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | [Academic, Medical, Written] | None | None | +| [NanoTouche2020Retrieval](https://webis.de/events/touche-20/shared-task-1.html) | ['eng'] | Retrieval | s2p | [Academic] | None | None | | [NarrativeQARetrieval](https://metatext.io/datasets/narrativeqa) (Tomáš Kočiský, 2017) | ['eng'] | Retrieval | s2p | | None | None | | [NepaliNewsClassification](https://github.com/goru001/nlp-for-nepali) | ['nep'] | Classification | s2s | [News, Written] | None | None | | [NeuCLIR2022Retrieval](https://neuclir.github.io/) (Lawrie et al., 2023) | ['fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | None | None | @@ -459,7 +473,7 @@ The following tables give you an overview of the tasks within MTEB. | [SICK-BR-PC](https://linux.ime.usp.br/~thalen/SICK_PT.pdf) | ['por'] | PairClassification | s2s | [Web, Written] | None | None | | [SICK-BR-STS](https://linux.ime.usp.br/~thalen/SICK_PT.pdf) | ['por'] | STS | s2s | [Web, Written] | None | None | | [SICK-E-PL](https://aclanthology.org/2020.lrec-1.207) | ['pol'] | PairClassification | s2s | | None | None | -| [SICK-R](https://aclanthology.org/2020.lrec-1.207) | ['eng'] | STS | s2s | | None | None | +| [SICK-R](https://aclanthology.org/L14-1314/) | ['eng'] | STS | s2s | [Web, Written] | None | None | | [SICK-R-PL](https://aclanthology.org/2020.lrec-1.207) | ['pol'] | STS | s2s | [Web, Written] | None | None | | [SICKFr](https://huggingface.co/datasets/Lajavaness/SICK-fr) | ['fra'] | STS | s2s | | None | None | | [SIQA](https://leaderboard.allenai.org/socialiqa/submissions/get-started) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | @@ -483,8 +497,8 @@ The following tables give you an overview of the tasks within MTEB. | [SanskritShlokasClassification](https://github.com/goru001/nlp-for-sanskrit) | ['san'] | Classification | s2s | [Religious, Written] | None | None | | [ScalaClassification](https://aclanthology.org/2023.nodalida-1.20/) | ['dan', 'nno', 'nob', 'swe'] | Classification | s2s | [Fiction, News, Non-fiction, Blog, Spoken, Web, Written] | None | None | | [SciDocsRR](https://allenai.org/data/scidocs) | ['eng'] | Reranking | s2s | [Academic, Non-fiction, Written] | None | None | -| [SciFact](https://github.com/allenai/scifact) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | | None | None | -| [SciFact-PL](https://github.com/allenai/scifact) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | +| [SciFact](https://github.com/allenai/scifact) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | [Academic, Medical, Written] | None | None | +| [SciFact-PL](https://github.com/allenai/scifact) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | [Academic, Medical, Written] | None | None | | [SemRel24STS](https://huggingface.co/datasets/SemRel/SemRel2024) (Nedjma Ousidhoum, 2024) | ['afr', 'amh', 'arb', 'arq', 'ary', 'eng', 'hau', 'hin', 'ind', 'kin', 'mar', 'tel'] | STS | s2s | [Spoken, Written] | None | None | | [SensitiveTopicsClassification](https://aclanthology.org/2021.bsnlp-1.4) | ['rus'] | MultilabelClassification | s2s | [Web, Social, Written] | None | None | | [SentimentAnalysisHindi](https://huggingface.co/datasets/OdiaGenAI/sentiment_analysis_hindi) (Shantipriya Parida, 2023) | ['hin'] | Classification | s2s | [Reviews, Written] | None | None | @@ -524,8 +538,8 @@ The following tables give you an overview of the tasks within MTEB. | [T2Retrieval](https://arxiv.org/abs/2304.03679) (Xiaohui Xie, 2023) | ['cmn'] | Retrieval | s2p | | None | None | | [TERRa](https://arxiv.org/pdf/2010.15925) (Shavrina et al., 2020) | ['rus'] | PairClassification | s2s | [News, Web, Written] | None | None | | [TNews](https://www.cluebenchmarks.com/introduce.html) | ['cmn'] | Classification | s2s | | None | None | -| [TRECCOVID](https://ir.nist.gov/covidSubmit/index.html) (Kirk Roberts, 2021) | ['eng'] | Retrieval | s2p | | None | None | -| [TRECCOVID-PL](https://ir.nist.gov/covidSubmit/index.html) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | [Academic, Non-fiction, Written] | None | None | +| [TRECCOVID](https://ir.nist.gov/covidSubmit/index.html) (Kirk Roberts, 2021) | ['eng'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | +| [TRECCOVID-PL](https://ir.nist.gov/covidSubmit/index.html) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | [Academic, Medical, Non-fiction, Written] | None | None | | [TV2Nordretrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization) | ['dan'] | Retrieval | p2p | [News, Non-fiction, Written] | None | None | | [TamilNewsClassification](https://github.com/vanangamudi/tamil-news-classification) (Anoop Kunchukuttan, 2020) | ['tam'] | Classification | s2s | [News, Written] | None | None | | [Tatoeba](https://github.com/facebookresearch/LASER/tree/main/data/tatoeba/v1) (Tatoeba community, 2021) | ['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] | BitextMining | s2s | [Written] | None | None | @@ -608,1060 +622,1060 @@ The following tables give you an overview of the tasks within MTEB.
-| Language | BitextMining | Classification | Clustering | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | -|---|------|------|------|------|------|------|------|------|------|------|---| -| aai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aaz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| abs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| abt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| abx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ace | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| acq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| adz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aeb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| afr | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| agd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ajp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aka | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ake | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| alp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| alq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| als | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| aly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ame | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amh | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| amk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ang | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| anh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| anp | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| anv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aoi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aoj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aom | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ape | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ara | 2 | 12 | 0 | 0 | 0 | 2 | 1 | 9 | 2 | 0 | 0 | -| arb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | -| are | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arq | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | -| ars | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ary | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| arz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| asm | 5 | 3 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| aso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ast | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ata | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| atb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| atd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| atg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| att | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| auc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| auy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| avt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awa | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ayr | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| azb | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aze | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| azg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| azj | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| azz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bak | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bam | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ban | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bba | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbc | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bdd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bef | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bel | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bem | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ben | 7 | 9 | 2 | 0 | 0 | 1 | 2 | 6 | 1 | 0 | 0 | -| beo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ber | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| beu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bew | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bgc | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bgs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bgt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhb | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhd | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bho | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | -| bhp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| big | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjj | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjn | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bkd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bki | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bkq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bkx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| blw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| blz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bns | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| boa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bod | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| boj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bos | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| box | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| boy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bqp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bra | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bre | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| brx | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bsj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bsp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bug | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| buk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bul | 3 | 4 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | -| bus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bvd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bvr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| byr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| byx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bzd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bzh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| caa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| caf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| car | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cat | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| cav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cax | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbk | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ceb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| cek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ces | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | -| cgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cha | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cjk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cjo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ckb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| cle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| clu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cme | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cmn | 4 | 10 | 4 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | -| cmo | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cnl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cnt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| code | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | -| cof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| con | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cor | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| crh | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| crn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| crx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| csb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| csy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cta | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cth | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ctp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ctu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cuk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cya | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cym | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| daa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dan | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | -| ded | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| deu | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | -| dgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dgr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| div | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dji | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| djk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| djr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dob | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| doi | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dtp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dwy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dyu | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dza | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dzo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ebk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ell | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | -| emi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| emp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eng | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 91 | 13 | 2 | 1 | -| enq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| epo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ese | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| esk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| est | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | -| etr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eus | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ewe | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| faa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fao | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | -| far | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fas | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | -| ffm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fij | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fil | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fin | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | -| fon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| for | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fra | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 14 | 4 | 0 | 1 | -| fry | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuv | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| gah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gaz | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| gbm | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gdn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gdr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| geb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gfk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ghs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gla | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gle | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| glg | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| glk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| glv | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gmv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gng | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gnw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gom | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| grc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| grn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| gsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| guh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| guj | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| gul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gum | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gun | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| guo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gym | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gyr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hat | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| hau | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| haw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| heb | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| heg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hin | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | -| hix | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hlt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hmn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hne | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hns | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hrv | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | -| hsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hun | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | -| hus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| huu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| huv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hye | 3 | 3 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | -| ian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ibo | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ido | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ign | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ikk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ikw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ile | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ilo | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| imo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ina | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| inb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ind | 6 | 7 | 1 | 0 | 0 | 1 | 1 | 4 | 1 | 0 | 0 | -| ino | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| iou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ipi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| isl | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| isn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ita | 5 | 9 | 1 | 0 | 1 | 2 | 1 | 5 | 3 | 0 | 0 | -| iws | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ixl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jae | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jav | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| jic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jpn | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | -| jvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kab | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kac | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kam | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kan | 6 | 7 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| kaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kas | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kat | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | -| kaz | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kdc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kdl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kea | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ken | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kfg | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kfy | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kgf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kgk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| khk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| khm | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| khs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| khz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| kir | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kiz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kje | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kjs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kkc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| klt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| klv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmr | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kne | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kor | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | -| kos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kql | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| krc | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ksd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ksj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ksr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ktm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kud | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kvg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kze | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lao | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lat | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lav | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | -| lbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lcm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| leu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lex | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lfn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lgl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lij | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lim | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lin | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lit | 4 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | -| llg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lmo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ltg | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ltz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lua | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lug | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| luo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lvs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mad | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mag | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mai | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mak | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mal | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | -| mam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mar | 7 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 2 | 0 | 0 | -| mau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| max | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mdy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| med | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| meq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| met | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| meu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mgh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mgw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mie | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mig | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mih | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mil | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| min | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| miz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mjc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mkd | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| mkj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mkn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mks | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlt | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | -| mmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mmx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mni | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mon | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mos | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mox | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mph | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mqb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mqj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mri | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| msa | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mti | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mui | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mup | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| muy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mya | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| myk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| myu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| myw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| myy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mzz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| naf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nbl | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ncj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ncl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ncu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ndg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ndj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nds | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nep | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nfa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ngp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ngu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nho | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nij | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nin | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nld | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | -| nlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nno | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nnq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| noa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nob | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | -| noe | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nor | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | -| not | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| npi | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| npl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nqo | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nso | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| nss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ntj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ntp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ntu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nuy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nya | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| nys | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nyu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| obo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| oci | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| okv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| omw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ong | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ons | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ood | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| opm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ori | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| orm | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| orv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ory | 5 | 4 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| ote | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| otm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| otn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| otq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ots | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pan | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| pao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pap | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pbt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| pcm | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pes | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| pib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| piu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pjt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pls | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| plt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| plu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pma | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| poe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| poh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| poi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pol | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | -| pon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| por | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | -| poy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ppo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| prf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| prs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ptp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ptu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pus | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pwg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quy | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qve | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qwh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qxn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qxo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| raj | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| reg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rej | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rgu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rkb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rmc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rom | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ron | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 3 | 1 | 0 | 0 | -| roo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| row | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ruf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rug | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| run | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rus | 5 | 13 | 6 | 0 | 2 | 4 | 2 | 16 | 4 | 0 | 0 | -| rwo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sah | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| san | 5 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | -| sat | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sbe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| scn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sco | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| seh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sgb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| shi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| shj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| shn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| shp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| sja | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| slk | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | -| sll | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| slv | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | -| smk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| smo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sna | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| snc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| snd | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| snn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| snp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| snx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sny | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| som | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| soq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sot | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| soy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spa | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 12 | 4 | 0 | 0 | -| spl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sqi | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srn | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srp | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | -| srq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ssd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ssg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ssw | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ssx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| stp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sun | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| sus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| suz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| svk | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| swa | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | -| swe | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | -| swg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| swh | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| swp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| szl | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| taj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tam | 7 | 7 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | -| taq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tat | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| taw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tcz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tdt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tel | 7 | 7 | 2 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | -| ter | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tet | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tfr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tgk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tgl | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tgo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tha | 4 | 8 | 1 | 0 | 0 | 1 | 1 | 6 | 0 | 0 | 0 | -| tif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tir | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tiy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tke | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tku | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tlf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tmd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| toc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tod | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| toj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ton | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| too | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| top | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpi | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| trc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tsn | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tso | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ttc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tte | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tum | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tur | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | -| tvk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| twi | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| txq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| txu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tyv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ubr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ubu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| udu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uig | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ukr | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| uli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ulk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| umb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| upv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ura | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urd | 7 | 8 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| uri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| usa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| usp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uvl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uzb | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uzn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| vec | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ven | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| vid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| vie | 5 | 6 | 1 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | -| viv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| vmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| waj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| war | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| wat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wbp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wiu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wln | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wmt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wmw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wnu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wol | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| wos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wrk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wrs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wsk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wuu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wuv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xho | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| xla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xsi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xtd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xtm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ycn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ydd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yka | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yml | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yor | 4 | 5 | 3 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | -| yrb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yre | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yue | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yuj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yuw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zaj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zar | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zga | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zho | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | -| zia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ziw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zlm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zsm | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| zsr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ztq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zul | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| Total | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 457 | 85 | 2 | 2 | +| ISO Code | Language | Family | BitextMining | Classification | Clustering | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | Sum | +|---|------|------|------|------|------|------|------|------|------|------|------|---| +| aai | Arifama-Miniafia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aak | Ankave | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aau | Abau | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aaz | Amarasi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abs | Ambonese Malay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abt | Ambulas | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abx | Inabaknon | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aby | Aneme Wake | Yareban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ace | Achinese | Austronesian | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| acf | Saint Lucian Creole French | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| acm | Mesopotamian Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| acq | Ta'izzi-Adeni Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| acr | Achi | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| acu | Achuar-Shiwiar | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| adz | Adzera | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aeb | Tunisian Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| aer | Eastern Arrernte | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aey | Amele | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| afr | Afrikaans | Indo-European | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 10 | +| agd | Agarabi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agg | Angor | Senagi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agm | Angaataha | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agn | Agutaynen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agr | Aguaruna | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agt | Central Cagayan Agta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agu | Aguacateco | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aia | Arosi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aii | Assyrian Neo-Aramaic | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ajp | South Levantine Arabic | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| aka | Akan | Atlantic-Congo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ake | Akawaio | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| alp | Alune | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| alq | Algonquin | Algic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| als | Tosk Albanian | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| aly | Alyawarr | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ame | Yanesha' | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amf | Hamer-Banna | South Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amh | Amharic | Afro-Asiatic | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | +| amk | Ambai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amm | Ama (Papua New Guinea) | Left May | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amn | Amanab | Border | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amo | Amo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | Alamblak | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amr | Amarakaeri | Harakmbut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amu | Guerrero Amuzgo | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amx | Anmatyerre | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ang | Old English (ca. 450-1100) | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anh | Nend | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anp | Angika | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anv | Denya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aoi | Anindilyakwa | Gunwinyguan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aoj | Mufian | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aom | Ömie | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aon | Bumbita Arapesh | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apb | Sa'a | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apc | Levantine Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| ape | Bukiyip | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apn | Apinayé | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apr | Arop-Lokep | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apu | Apurinã | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apw | Western Apache | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apz | Safeyoka | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ara | Arabic | Unclassified | 2 | 12 | 0 | 0 | 0 | 2 | 2 | 9 | 2 | 0 | 0 | 29 | +| arb | Standard Arabic | Afro-Asiatic | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 8 | +| are | Western Arrarnta | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arl | Arabela | Zaparoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arn | Mapudungun | Araucanian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arp | Arapaho | Algic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arq | Algerian Arabic | Afro-Asiatic | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | +| ars | Najdi Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| ary | Moroccan Arabic | Afro-Asiatic | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 7 | +| arz | Egyptian Arabic | Afro-Asiatic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| asm | Assamese | Indo-European | 5 | 3 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 14 | +| aso | Dano | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ast | Asturian | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ata | Pele-Ata | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atb | Zaiwa | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atd | Ata Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atg | Ivbie North-Okpela-Arhe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| att | Pamplona Atta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| auc | Waorani | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aui | Anuki | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| auy | Awiyaana | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| avt | Au | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awa | Awadhi | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| awb | Awa (Papua New Guinea) | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awk | Awabakal | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awx | Awara | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ayr | Central Aymara | Aymaran | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| azb | South Azerbaijani | Turkic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| aze | Azerbaijani | Unclassified | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| azg | San Pedro Amuzgos Amuzgo | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| azj | North Azerbaijani | Turkic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| azz | Highland Puebla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bak | Bashkir | Turkic | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bam | Bambara | Mande | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| ban | Balinese | Austronesian | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bao | Waimaha | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bba | Baatonum | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bbb | Barai | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bbc | Batak Toba | Austronesian | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bbr | Girawa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bch | Bariai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bco | Kaluli | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bdd | Bunama | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bea | Beaver | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bef | Benabena | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bel | Belarusian | Indo-European | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bem | Bemba (Zambia) | Atlantic-Congo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ben | Bengali | Indo-European | 7 | 9 | 2 | 0 | 0 | 1 | 2 | 6 | 1 | 0 | 0 | 28 | +| beo | Beami | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ber | Berber (Other) | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| beu | Blagar | Timor-Alor-Pantar | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bew | Betawi | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| bgc | Haryanvi | Indo-European | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| bgs | Tagabawa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bgt | Bughotu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhb | Bhili | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhd | Bhadrawahi | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhg | Binandere | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhl | Bimin | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bho | Bhojpuri | Indo-European | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 6 | +| bhp | Bima | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| big | Biangai | Kunimaipan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjj | Kanauji | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjk | Barok | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjn | Banjar | Austronesian | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bjp | Fanamaket | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjr | Binumarien | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjv | Bedjond | Central Sudanic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjz | Baruga | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkd | Binukid | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bki | Baki | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkq | Bakairí | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkx | Baikeno | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| blw | Balangao | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| blz | Balantak | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmh | Kein | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmk | Ghayavi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmr | Muinane | Boran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmu | Somba-Siawari | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bnp | Bola | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bns | Bundeli | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| boa | Bora | Boran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bod | Tibetan | Sino-Tibetan | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| boj | Anjam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bon | Bine | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bos | Bosnian | Indo-European | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| box | Buamu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| boy | Bodo (Central African Republic) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bpr | Koronadal Blaan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bps | Sarangani Blaan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bqc | Boko (Benin) | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bqp | Busa | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bra | Braj | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bre | Breton | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| brx | Bodo (India) | Sino-Tibetan | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bsj | Bangwinji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bsn | Barasana-Eduria | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bsp | Baga Sitemu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bss | Akoose | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bug | Buginese | Austronesian | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| buk | Bugawac | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bul | Bulgarian | Indo-European | 3 | 4 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 13 | +| bus | Bokobaru | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bvd | Baeggu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bvr | Burarra | Maningrida | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bxh | Buhutu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| byr | Baruya | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| byx | Qaqet | Baining | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzd | Bribri | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzh | Mapos Buang | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzj | Belize Kriol English | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| caa | Chortí | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cab | Garifuna | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cac | Chuj | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| caf | Southern Carrier | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cak | Kaqchikel | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cao | Chácobo | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cap | Chipaya | Uru-Chipaya | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| car | Galibi Carib | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cat | Catalan | Indo-European | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| cav | Cavineña | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cax | Chiquitano | Chiquitano | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbc | Carapana | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbi | Chachi | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbk | Chavacano | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cbr | Cashibo-Cacataibo | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbs | Cashinahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbt | Chayahuita | Cahuapanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbu | Candoshi-Shapra | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbv | Cacua | Kakua-Nukak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cco | Comaltepec Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ceb | Cebuano | Austronesian | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| cek | Eastern Khumi Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ces | Czech | Indo-European | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 16 | +| cgc | Kagayanen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cha | Chamorro | Austronesian | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| chd | Highland Oaxaca Chontal | Tequistlatecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chf | Tabasco Chontal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chk | Chuukese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chq | Quiotepec Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chv | Chuvash | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chz | Ozumacín Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cjk | Chokwe | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| cjo | Ashéninka Pajonal | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cjv | Chuave | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ckb | Central Kurdish | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| cle | Lealao Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| clu | Caluyanun | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cme | Cerma | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cmn | Mandarin Chinese | Sino-Tibetan | 4 | 10 | 4 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | 44 | +| cmo | Central Mnong | Austroasiatic | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cni | Asháninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cnl | Lalana Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cnt | Tepetotutla Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| code | unknown | Unclassified | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 37 | +| cof | Colorado | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| con | Cofán | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cop | Coptic | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cor | Cornish | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cot | Caquinte | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpa | Palantla Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpb | Ucayali-Yurúa Ashéninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpc | Ajyíninka Apurucayali | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpu | Pichis Ashéninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpy | South Ucayali Ashéninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| crh | Crimean Tatar | Turkic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| crn | El Nayar Cora | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| crx | Carrier | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| csb | Kashubian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cso | Sochiapam Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| csy | Siyin Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cta | Tataltepec Chatino | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cth | Thaiphum Chin | Bookkeeping | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ctp | Western Highland Chatino | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ctu | Chol | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cub | Cubeo | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cuc | Usila Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cui | Cuiba | Guahiboan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cuk | San Blas Kuna | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cut | Teutila Cuicatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cux | Tepeuxila Cuicatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cwe | Kwere | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cya | Nopala Chatino | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cym | Welsh | Indo-European | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | +| daa | Dangaléat | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dad | Marik | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dah | Gwahatike | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dan | Danish | Indo-European | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 23 | +| ded | Dedua | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| deu | German | Indo-European | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | 58 | +| dgc | Casiguran Dumagat Agta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dgr | Dogrib | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dgz | Daga | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dhg | Dhangu-Djangu | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dif | Dieri | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dik | Southwestern Dinka | Nilotic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| div | Dhivehi | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dji | Djinang | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| djk | Eastern Maroon Creole | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| djr | Djambarrpuyngu | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dob | Dobu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| doi | Dogri (macrolanguage) | Unclassified | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| dop | Lukpa | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dov | Dombe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dsb | Lower Sorbian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dtp | Kadazan Dusun | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dwr | Dawro | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dww | Dawawa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dwy | Dhuwaya | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dyu | Dyula | Mande | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| dza | Tunzu | Atlantic-Congo | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dzo | Dzongkha | Sino-Tibetan | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ebk | Eastern Bontok | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eko | Koti | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ell | Modern Greek (1453-) | Indo-European | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | +| emi | Mussau-Emira | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| emp | Northern Emberá | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eng | English | Indo-European | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 105 | 13 | 2 | 1 | 316 | +| enq | Enga | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| epo | Esperanto | Artificial Language | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| eri | Ogea | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ese | Ese Ejja | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| esk | Northwest Alaska Inupiatun | Eskimo-Aleut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| est | Estonian | Uralic | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 8 | +| etr | Edolo | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eus | Basque | Unclassified | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| ewe | Ewe | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| faa | Fasu | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fai | Faiwol | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fao | Faroese | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | +| far | Fataleka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fas | Persian | Indo-European | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | 17 | +| ffm | Maasina Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fij | Fijian | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| fil | Filipino | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| fin | Finnish | Uralic | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | +| fon | Fon | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| for | Fore | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fra | French | Indo-European | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 15 | 4 | 0 | 1 | 57 | +| fry | Western Frisian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuc | Pulaar | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fue | Borgu Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuf | Pular | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuh | Western Niger Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fur | Friulian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| fuv | Nigerian Fulfulde | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gah | Alekano | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gai | Borei | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gam | Kandawo | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gaw | Nobonob | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gaz | West Central Oromo | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gbm | Garhwali | Indo-European | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| gdn | Umanakaina | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gdr | Wipi | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| geb | Kire | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gfk | Patpatar | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ghs | Guhu-Samane | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gla | Scottish Gaelic | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| gle | Irish | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| glg | Galician | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| glk | Gilaki | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| glv | Manx | Indo-European | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gmv | Gamo | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gng | Ngangam | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gnn | Gumatj | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gnw | Western Bolivian Guaraní | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gof | Gofa | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gom | Goan Konkani | Indo-European | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| grc | Ancient Greek (to 1453) | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| grn | Guarani | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gsw | Swiss German | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gub | Guajajára | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guh | Guahibo | Guahiboan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gui | Eastern Bolivian Guaraní | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guj | Gujarati | Indo-European | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | +| gul | Sea Island Creole English | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gum | Guambiano | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gun | Mbyá Guaraní | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guo | Guayabero | Guahiboan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gup | Gunwinggu | Gunwinyguan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gux | Gourmanchéma | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvc | Guanano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvf | Golin | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvn | Kuku-Yalanji | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvs | Gumawana | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gwi | Gwichʼin | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gym | Ngäbere | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gyr | Guarayu | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hat | Haitian | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| hau | Hausa | Afro-Asiatic | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | +| haw | Hawaiian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hbo | Ancient Hebrew | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hch | Huichol | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| heb | Hebrew | Afro-Asiatic | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 11 | +| heg | Helong | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hin | Hindi | Indo-European | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | 38 | +| hix | Hixkaryána | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hla | Halia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hlt | Matu Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hmn | Hmong | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hmo | Hiri Motu | Pidgin | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hne | Chhattisgarhi | Indo-European | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| hns | Caribbean Hindustani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hop | Hopi | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hot | Hote | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hrv | Croatian | Indo-European | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| hsb | Upper Sorbian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hto | Minica Huitoto | Huitotoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hub | Huambisa | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hui | Huli | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hun | Hungarian | Uralic | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 12 | +| hus | Huastec | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| huu | Murui Huitoto | Huitotoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| huv | San Mateo Del Mar Huave | Huavean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hvn | Sabu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hye | Armenian | Indo-European | 3 | 3 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 9 | +| ian | Iatmul | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ibo | Igbo | Atlantic-Congo | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 12 | +| ido | Ido | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ign | Ignaciano | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ikk | Ika | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ikw | Ikwere | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ile | Interlingue | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ilo | Iloko | Austronesian | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| imo | Imbongu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ina | Interlingua (International Auxiliary Language Association) | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| inb | Inga | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ind | Indonesian | Austronesian | 6 | 7 | 1 | 0 | 0 | 1 | 1 | 4 | 1 | 0 | 0 | 21 | +| ino | Inoke-Yate | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| iou | Tuma-Irumu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ipi | Ipili | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| isl | Icelandic | Indo-European | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| isn | Isanzu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ita | Italian | Indo-European | 5 | 9 | 1 | 0 | 1 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| iws | Sepik Iwam | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ixl | Ixil | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jac | Popti' | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jae | Yabem | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jao | Yanyuwa | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jav | Javanese | Austronesian | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 13 | +| jic | Tol | Jicaquean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jid | Bu (Kaduna State) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jiv | Shuar | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jni | Janji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jpn | Japanese | Japonic | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | 35 | +| jvn | Caribbean Javanese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kab | Kabyle | Afro-Asiatic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kac | Kachin | Sino-Tibetan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| kam | Kamba (Kenya) | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kan | Kannada | Dravidian | 6 | 7 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 19 | +| kaq | Capanahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kas | Kashmiri | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| kat | Georgian | Kartvelian | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 10 | +| kaz | Kazakh | Turkic | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| kbc | Kadiwéu | Guaicuruan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbh | Camsá | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbm | Iwal | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbp | Kabiyè | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kbq | Kamano | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kdc | Kutu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kde | Makonde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kdl | Tsikimba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kea | Kabuverdianu | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| kek | Kekchí | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ken | Kenyang | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kew | West Kewa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kfg | Kudiya | Dravidian | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kfy | Kumaoni | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgf | Kube | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgk | Kaiwá | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgp | Kaingang | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| khk | Halh Mongolian | Mongolic-Khitan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| khm | Khmer | Austroasiatic | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| khs | Kasua | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| khz | Keapara | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kik | Kikuyu | Atlantic-Congo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kin | Kinyarwanda | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 8 | +| kir | Kirghiz | Turkic | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| kiw | Northeast Kiwai | Kiwaian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kiz | Kisi | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kje | Kisar | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kjs | East Kewa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kkc | Odoodee | East Strickland | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kkl | Kosarek Yale | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| klt | Nukna | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| klv | Maskelynes | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmb | Kimbundu | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kmg | Kâte | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmh | Kalam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmk | Limos Kalinga | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmo | Kwoma | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmr | Northern Kurdish | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kms | Kamasau | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmu | Kanite | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knc | Central Kanuri | Saharan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kne | Kankanaey | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knf | Mankanya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knj | Western Kanjobal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knv | Tabo | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kon | Kongo | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kor | Korean | Koreanic | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | 29 | +| kos | Kosraean | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpf | Komba | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpg | Kapingamarangi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpj | Karajá | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpr | Korafe-Yegha | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpw | Kobon | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpx | Mountain Koiali | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqa | Mum | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqc | Doromu-Koki | Manubaran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqf | Kakabai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kql | Kyenele | Yuat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqw | Kandas | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| krc | Karachay-Balkar | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksd | Kuanua | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksj | Uare | Kwalean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksr | Borong | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ktm | Kurti | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kto | Kuot | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kud | 'Auhelawa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kue | Kuman (Papua New Guinea) | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kup | Kunimaipa | Kunimaipan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kur | Kurdish | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kvg | Kuni-Boazi | Anim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kvn | Border Kuna | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwd | Kwaio | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwf | Kwara'ae | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwi | Awa-Cuaiquer | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwj | Kwanga | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyc | Kyaka | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyf | Kouya | Kru | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyg | Keyagana | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyq | Kenga | Central Sudanic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyz | Kayabí | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kze | Kosena | Bookkeeping | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kzj | Coastal Kadazan | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lac | Lacandon | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lao | Lao | Tai-Kadai | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lat | Latin | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| lav | Latvian | Indo-European | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| lbb | Label | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lbk | Central Bontok | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lcm | Tungag | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| leu | Kara (Papua New Guinea) | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lex | Luang | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lfn | Lingua Franca Nova | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lgl | Wala | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lid | Nyindrou | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lif | Limbu | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lij | Ligurian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lim | Limburgan | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lin | Lingala | Atlantic-Congo | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| lit | Lithuanian | Indo-European | 4 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| llg | Lole | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lmo | Lombard | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ltg | Latgalian | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ltz | Luxembourgish | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| lua | Luba-Lulua | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lug | Ganda | Atlantic-Congo | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| luo | Luo (Kenya and Tanzania) | Nilotic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lus | Lushai | Sino-Tibetan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lvs | Standard Latvian | Unclassified | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lww | Lewo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maa | San Jerónimo Tecóatl Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mad | Madurese | Austronesian | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| mag | Magahi | Indo-European | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| mai | Maithili | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| maj | Jalapa De Díaz Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mak | Makasar | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mal | Malayalam | Dravidian | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 19 | +| mam | Mam | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maq | Chiquihuitlán Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mar | Marathi | Indo-European | 7 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 2 | 0 | 0 | 20 | +| mau | Huautla Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mav | Sateré-Mawé | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| max | North Moluccan Malay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maz | Central Mazahua | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbb | Western Bukidnon Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbc | Macushi | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbh | Mangseng | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbj | Nadëb | Naduhup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbl | Maxakalí | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbs | Sarangani Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbt | Matigsalug Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mca | Maca | Mataguayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcb | Machiguenga | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcd | Sharanahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcf | Matsés | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mco | Coatlán Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcp | Makaa | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcq | Ese | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcr | Menya | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mdy | Male (Ethiopia) | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| med | Melpa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mee | Mengen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mek | Mekeo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| meq | Merey | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| met | Mato | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| meu | Motu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mey | Hassaniyya | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgc | Morokodo | Central Sudanic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgh | Makhuwa-Meetto | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgw | Matumbi | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mhl | Mauwake | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mhr | Eastern Mari | Uralic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mib | Atatláhuca Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mic | Mi'kmaq | Algic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mie | Ocotepec Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mig | San Miguel El Grande Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mih | Chayuco Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mil | Peñoles Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| min | Minangkabau | Austronesian | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | +| mio | Pinotepa Nacional Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mir | Isthmus Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mit | Southern Puebla Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| miz | Coatzospan Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mjc | San Juan Colorado Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkd | Macedonian | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| mkj | Mokilese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkl | Mokole | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkn | Kupang Malay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mks | Silacayoapan Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mle | Manambu | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlg | Malagasy | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlh | Mape | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlp | Bargam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlt | Maltese | Afro-Asiatic | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| mmo | Mangga Buang | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mmx | Madak | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mna | Mbula | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mni | Manipuri | Sino-Tibetan | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| mon | Mongolian | Unclassified | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| mop | Mopán Maya | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mos | Mossi | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mox | Molima | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mph | Maung | Iwaidjan Proper | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpj | Martu Wangka | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpm | Yosondúa Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpp | Migabac | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mps | Dadibi | Teberan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpt | Mian | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpx | Misima-Panaeati | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mqb | Mbuko | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mqj | Mamasa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mri | Maori | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| msa | Malay (macrolanguage) | Unclassified | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| msb | Masbatenyo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msc | Sankaran Maninka | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msk | Mansaka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msm | Agusan Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msy | Aruamu | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mti | Maiwa (Papua New Guinea) | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mto | Totontepec Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mui | Musi | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mup | Malvi | Indo-European | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| mux | Bo-Ung | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| muy | Muyang | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mva | Manam | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mvn | Minaveha | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwc | Are | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwe | Mwera (Chimwera) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwf | Murrinh-Patha | Southern Daly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwp | Kala Lagaw Ya | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwr | Marwari | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxb | Tezoatlán Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxp | Tlahuitoltepec Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxq | Juquila Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxt | Jamiltepec Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mya | Burmese | Sino-Tibetan | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| myk | Mamara Senoufo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myu | Mundurukú | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myw | Muyuw | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myy | Macuna | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mzz | Maiadomu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nab | Southern Nambikuára | Nambiquaran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| naf | Nabak | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nak | Nakanai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nas | Naasioi | South Bougainville | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nbl | South Ndebele | Unclassified | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nbq | Nggem | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nca | Iyo | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nch | Central Huasteca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncj | Northern Puebla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncl | Michoacán Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncu | Chumburung | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nde | North Ndebele | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ndg | Ndengereko | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ndj | Ndamba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nds | Low German | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nep | Nepali (macrolanguage) | Unclassified | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| nfa | Dhao | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ngp | Ngulu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ngu | Guerrero Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhe | Eastern Huasteca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhg | Tetelcingo Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhi | Zacatlán-Ahuacatlán-Tepetzintla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nho | Takuu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhr | Naro | Khoe-Kwadi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhu | Noone | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhw | Western Huasteca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhy | Northern Oaxaca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nif | Nek | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nii | Nii | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nij | Ngaju | Austronesian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| nin | Ninzo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nko | Nkonya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nld | Dutch | Indo-European | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | 19 | +| nlg | Gela | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nna | Nyangumarta | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nno | Norwegian Nynorsk | Unclassified | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | +| nnq | Ngindo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| noa | Woun Meu | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nob | Norwegian Bokmål | Unclassified | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 19 | +| noe | Nimadi | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nop | Numanggang | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nor | Norwegian | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3 | +| not | Nomatsiguenga | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nou | Ewage-Notu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nov | Novial | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| npi | Nepali (individual language) | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| npl | Southeastern Puebla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nqo | N'Ko | Artificial Language | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| nsn | Nehan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nso | Pedi | Atlantic-Congo | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| nss | Nali | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntj | Ngaanyatjarra | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntp | Northern Tepehuan | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntu | Natügu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nus | Nuer | Nilotic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| nuy | Nunggubuyu | Gunwinyguan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nvm | Namiae | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nwi | Southwest Tanna | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nya | Nyanja | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| nys | Nyungar | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nyu | Nyungwe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| obo | Obo Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| oci | Occitan (post 1500) | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| okv | Orokaiva | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| omw | South Tairora | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ong | Olo | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ons | Ono | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ood | Tohono O'odham | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| opm | Oksapmin | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ori | Oriya (macrolanguage) | Unclassified | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| orm | Oromo | Unclassified | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| orv | Old Russian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ory | Odia | Indo-European | 5 | 4 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 15 | +| ote | Mezquital Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otm | Eastern Highland Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otn | Tenango Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otq | Querétaro Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ots | Estado de México Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pab | Parecís | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pad | Paumarí | Arawan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pag | Pangasinan | Austronesian | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| pah | Tenharim | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pam | Pampanga | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pan | Panjabi | Indo-European | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | +| pao | Northern Paiute | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pap | Papiamento | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| pbt | Southern Pashto | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| pcm | Nigerian Pidgin | Indo-European | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| pes | Iranian Persian | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| pib | Yine | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pio | Piapoco | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pir | Piratapuyo | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| piu | Pintupi-Luritja | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pjt | Pitjantjatjara | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pls | San Marcos Tlacoyalco Popoloca | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| plt | Plateau Malagasy | Austronesian | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| plu | Palikúr | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pma | Paama | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pms | Piemontese | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poe | San Juan Atzingo Popoloca | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poh | Poqomchi' | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poi | Highland Popoluca | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pol | Polish | Indo-European | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | 46 | +| pon | Pohnpeian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| por | Portuguese | Indo-European | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| poy | Pogolo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ppo | Folopa | Teberan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| prf | Paranan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pri | Paicî | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| prs | Dari | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ptp | Patep | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ptu | Bambam | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pus | Pushto | Unclassified | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| pwg | Gapapaiwa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qub | Huallaga Huánuco Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quc | K'iche' | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quf | Lambayeque Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quh | South Bolivian Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qul | North Bolivian Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qup | Southern Pastaza Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quy | Ayacucho Quechua | Quechuan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| qvc | Cajamarca Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qve | Eastern Apurímac Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvh | Huamalíes-Dos de Mayo Huánuco Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvm | Margos-Yarowilca-Lauricocha Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvn | North Junín Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvs | San Martín Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvw | Huaylla Wanca Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvz | Northern Pastaza Quichua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qwh | Huaylas Ancash Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxh | Panao Huánuco Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxn | Northern Conchucos Ancash Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxo | Southern Conchucos Ancash Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rai | Ramoaaina | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| raj | Rajasthani | Unclassified | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| reg | Kara (Tanzania) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rej | Rejang | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| rgu | Ringgou | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rkb | Rikbaktsa | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rmc | Carpathian Romani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rmy | Vlax Romani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rom | Romany | Unclassified | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| ron | Romanian | Indo-European | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 3 | 1 | 0 | 0 | 18 | +| roo | Rotokas | North Bougainville | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rop | Kriol | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| row | Dela-Oenale | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rro | Waima | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ruf | Luguru | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rug | Roviana | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| run | Rundi | Atlantic-Congo | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| rus | Russian | Indo-European | 5 | 13 | 6 | 0 | 2 | 4 | 2 | 16 | 4 | 0 | 0 | 52 | +| rwo | Rawa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sab | Buglere | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sag | Sango | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sah | Yakut | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| san | Sanskrit | Indo-European | 5 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 10 | +| sat | Santali | Austroasiatic | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| sbe | Saliba | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sbk | Safwa | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sbs | Subiya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| scn | Sicilian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sco | Scots | Indo-European | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| seh | Sena | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sey | Secoya | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sgb | Mag-antsi Ayta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sgz | Sursurunga | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shi | Tachelhit | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shj | Shatt | Dajuic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shn | Shan | Tai-Kadai | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| shp | Shipibo-Conibo | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sim | Mende (Papua New Guinea) | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sin | Sinhala | Indo-European | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| sja | Epena | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| slk | Slovak | Indo-European | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 12 | +| sll | Salt-Yui | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| slv | Slovenian | Indo-European | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| smk | Bolinao | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| smo | Samoan | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| sna | Shona | Atlantic-Congo | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| snc | Sinaugoro | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snd | Sindhi | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| snn | Siona | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snp | Siane | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snx | Sam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sny | Saniyo-Hiyewe | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| som | Somali | Afro-Asiatic | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| soq | Kanasi | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sot | Southern Sotho | Atlantic-Congo | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| soy | Miyobe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spa | Spanish | Indo-European | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 13 | 4 | 0 | 0 | 43 | +| spl | Selepet | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spm | Akukem | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spp | Supyire Senoufo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sps | Saposa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spy | Sabaot | Nilotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sqi | Albanian | Unclassified | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| srd | Sardinian | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sri | Siriano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| srm | Saramaccan | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| srn | Sranan Tongo | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| srp | Serbian | Indo-European | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 9 | +| srq | Sirionó | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssd | Siroi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssg | Seimat | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssw | Swati | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| ssx | Samberigi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| stp | Southeastern Tepehuan | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sua | Sulka | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sue | Suena | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sun | Sundanese | Austronesian | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| sus | Susu | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| suz | Sunwar | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| svk | Slovakian Sign Language | Sign Language | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| swa | Swahili (macrolanguage) | Atlantic-Congo | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 15 | +| swe | Swedish | Indo-European | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | 22 | +| swg | Swabian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| swh | Swahili (individual language) | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| swp | Suau | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sxb | Suba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| szl | Silesian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tac | Lowland Tarahumara | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tah | Tahitian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| taj | Eastern Tamang | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tam | Tamil | Dravidian | 7 | 7 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | 21 | +| taq | Tamasheq | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tat | Tatar | Turkic | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| tav | Tatuyo | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| taw | Tai | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbc | Takia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbf | Mandara | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbg | North Tairora | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbo | Tawala | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbz | Ditammari | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tca | Ticuna | Ticuna-Yuri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tcs | Torres Strait Creole | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tcz | Thado Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tdt | Tetun Dili | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tee | Huehuetla Tepehua | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tel | Telugu | Dravidian | 7 | 7 | 2 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | 24 | +| ter | Tereno | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tet | Tetum | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tew | Tewa (USA) | Kiowa-Tanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tfr | Teribe | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tgk | Tajik | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| tgl | Tagalog | Austronesian | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| tgo | Sudest | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tgp | Tangoa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tha | Thai | Tai-Kadai | 4 | 8 | 1 | 0 | 0 | 1 | 1 | 6 | 0 | 0 | 0 | 21 | +| tif | Tifal | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tim | Timbe | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tir | Tigrinya | Afro-Asiatic | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| tiw | Tiwi | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tiy | Tiruray | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tke | Takwane | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tku | Upper Necaxa Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tlf | Telefol | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tmd | Haruai | Piawi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tna | Tacana | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnc | Tanimuca-Retuarã | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnk | Kwamera | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnn | North Tanna | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnp | Whitesands | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| toc | Coyutla Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tod | Toma | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tof | Gizrra | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| toj | Tojolabal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ton | Tonga (Tonga Islands) | Austronesian | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| too | Xicotepec De Juárez Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| top | Papantla Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tos | Highland Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpa | Taupota | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpi | Tok Pisin | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| tpt | Tlachichilco Tepehua | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpz | Tinputz | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| trc | Copala Triqui | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tsn | Tswana | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| tso | Tsonga | Atlantic-Congo | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| tsw | Tsishingini | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ttc | Tektiteko | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tte | Bwanabwana | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuc | Mutu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tue | Tuyuca | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuf | Central Tunebo | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuk | Turkmen | Turkic | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| tum | Tumbuka | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tuo | Tucano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tur | Turkish | Turkic | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 19 | +| tvk | Southeast Ambrym | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| twi | Twi | Unclassified | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| txq | Tii | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| txu | Kayapó | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tyv | Tuvinian | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzj | Tz'utujil | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzl | Talossan | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzm | Central Atlas Tamazight | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tzo | Tzotzil | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ubr | Ubir | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ubu | Umbu-Ungu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| udu | Uduk | Koman | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uig | Uighur | Turkic | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| ukr | Ukrainian | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| uli | Ulithian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ulk | Meriam Mir | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| umb | Umbundu | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| upv | Uripiv-Wala-Rano-Atchin | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ura | Urarina | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urb | Urubú-Kaapor | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urd | Urdu | Indo-European | 7 | 8 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 19 | +| uri | Urim | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urt | Urat | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urw | Sop | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| usa | Usarufa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| usp | Uspanteco | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uvh | Uri | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uvl | Lote | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uzb | Uzbek | Unclassified | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| uzn | Northern Uzbek | Turkic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| vec | Venetian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ven | Venda | Atlantic-Congo | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| vid | Vidunda | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| vie | Vietnamese | Austroasiatic | 5 | 6 | 1 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 18 | +| viv | Iduna | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| vmy | Ayautla Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| waj | Waffa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wal | Wolaytta | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wap | Wapishana | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| war | Waray (Philippines) | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| wat | Kaninuwa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wbi | Vwanji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wbp | Warlpiri | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wed | Wedau | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wer | Weri | Kunimaipan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wim | Wik-Mungkan | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wiu | Wiru | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wiv | Vitu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wln | Walloon | Indo-European | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wmt | Walmajarri | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wmw | Mwani | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wnc | Wantoat | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wnu | Usan | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wol | Wolof | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| wos | Hanga Hundi | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wrk | Garrwa | Garrwan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wro | Worrorra | Worrorran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wrs | Waris | Border | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wsk | Waskia | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wuu | Wu Chinese | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wuv | Wuvulu-Aua | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xav | Xavánte | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xbi | Kombio | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xed | Hdi | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xho | Xhosa | Atlantic-Congo | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| xla | Kamula | Kamula-Elevala | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xnn | Northern Kankanay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xon | Konkomba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xsi | Sio | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xtd | Diuxi-Tilantongo Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xtm | Magdalena Peñasco Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yaa | Yaminahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yad | Yagua | Peba-Yagua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yal | Yalunka | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yap | Yapese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yaq | Yaqui | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yby | Yaweyuha | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ycn | Yucuna | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ydd | Eastern Yiddish | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| yid | Yiddish | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yka | Yakan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yle | Yele | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yml | Iamalele | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yon | Yongkom | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yor | Yoruba | Atlantic-Congo | 4 | 5 | 3 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 16 | +| yrb | Yareba | Yareban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yre | Yaouré | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yss | Yessan-Mayo | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yue | Yue Chinese | Sino-Tibetan | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| yuj | Karkar-Yuri | Pauwasi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yut | Yopno | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yuw | Yau (Morobe Province) | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yva | Yawa | Yawa-Saweru | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaa | Sierra de Juárez Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zab | Western Tlacolula Valley Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zac | Ocotlán Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zad | Cajonos Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zai | Isthmus Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaj | Zaramo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zam | Miahuatlán Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zao | Ozolotepec Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zap | Zapotec | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zar | Rincón Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zas | Santo Domingo Albarradas Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zat | Tabaa Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zav | Yatzachi Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaw | Mitla Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zca | Coatecas Altas Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zga | Kinga | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zho | Chinese | Unclassified | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | 20 | +| zia | Zia | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ziw | Zigula | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zlm | Malay (individual language) | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zos | Francisco León Zoque | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpc | Choapan Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpl | Lachixío Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpm | Mixtepec Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpo | Amatlán Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpq | Zoogocho Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpu | Yalálag Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpv | Chichicapan Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpz | Texmelucan Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zsm | Standard Malay | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| zsr | Southern Rincon Zapotec | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ztq | Quioquitani-Quierí Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zty | Yatee Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zul | Zulu | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| zyp | Zyphe Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| Total | None | None | None | 1394 | 795 | 304 | 3 | 28 | 67 | 51 | 473 | 85 | 2 | 2 |
diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 127b251b7e..cbdcac6372 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -109,17 +109,18 @@ def evaluate( self, model: Encoder, split: str = "test", + subsets_to_run: list[HFSubset] | None = None, *, encode_kwargs: dict[str, Any] = {}, **kwargs: Any, ) -> dict[HFSubset, ScoresDict]: """Evaluates a Sentence Embedding Model on the task. - Returns a dict (that can be serialized to json). Args: model: Sentence embedding method. Implements a encode(sentences) method, that encodes sentences and returns a numpy matrix with the sentence embeddings split: Which datasplit to be used. + subsets_to_run: List of HFSubsets to evaluate. If None, all subsets are evaluated. encode_kwargs: Additional keyword arguments that are passed to the model's `encode` method. kwargs: Additional keyword arguments that are passed to the _evaluate_subset method. """ @@ -131,6 +132,9 @@ def evaluate( scores = {} hf_subsets = list(self.dataset.keys()) if self.is_multilingual else ["default"] + if subsets_to_run is not None: + hf_subsets = [s for s in hf_subsets if s in subsets_to_run] + for hf_subset in hf_subsets: logger.info( f"\nTask: {self.metadata_dict['name']}, split: {split}, subset: {hf_subset}. Running..." diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 4be4ec1562..635f0a67b1 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -67,7 +67,8 @@ def __init__(self, **kwargs): def evaluate( self, model: Encoder, - split: str, + split: str = "test", + subsets_to_run: list[HFSubset] | None = None, *, encode_kwargs: dict[str, Any] = {}, **kwargs, @@ -77,6 +78,10 @@ def evaluate( hf_subsets = list(self.dataset) if self.is_multilingual else ["default"] + # If subsets_to_run is specified, filter the hf_subsets accordingly + if subsets_to_run is not None: + hf_subsets = [s for s in hf_subsets if s in subsets_to_run] + scores = {} if self.parallel_subsets: scores = self._evaluate_subset( diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 5e48dfab49..42de8dd273 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -95,6 +95,7 @@ def evaluate( model, eval_split: str = "test", train_split: str = "train", + subsets_to_run: list[HFSubset] | None = None, *, encode_kwargs: dict[str, Any] = {}, **kwargs, @@ -104,6 +105,8 @@ def evaluate( scores = {} hf_subsets = list(self.dataset) if self.is_multilingual else ["default"] + if subsets_to_run is not None: + hf_subsets = [s for s in hf_subsets if s in subsets_to_run] for hf_subset in hf_subsets: logger.info( diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index 1c3cba33e5..5172b06d60 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -121,6 +121,7 @@ def evaluate( model: Encoder, eval_split: str = "test", train_split: str = "train", + subsets_to_run: list[HFSubset] | None = None, *, encode_kwargs: dict[str, Any] = {}, **kwargs: Any, @@ -130,6 +131,9 @@ def evaluate( scores = {} hf_subsets = list(self.dataset) if self.is_multilingual else ["default"] + # If subsets_to_run is specified, filter the hf_subsets accordingly + if subsets_to_run is not None: + hf_subsets = [s for s in hf_subsets if s in subsets_to_run] for hf_subset in hf_subsets: logger.info( diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index 2ae1f5c359..e8ea495366 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -26,6 +26,7 @@ "MMarcoReranking", "CMedQAv1-reranking", "CMedQAv2-reranking", + "NamaaMrTydiReranking", ] diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index b8f255b356..b709c046f2 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -237,6 +237,7 @@ def evaluate( self, model, split: str = "test", + subsets_to_run: list[HFSubset] | None = None, *, encode_kwargs: dict[str, Any] = {}, **kwargs, @@ -250,6 +251,8 @@ def evaluate( scores = {} hf_subsets = list(self.hf_subsets) if self.is_multilingual else ["default"] + if subsets_to_run is not None: + hf_subsets = [s for s in hf_subsets if s in subsets_to_run] for hf_subset in hf_subsets: logger.info(f"Subset: {hf_subset}") diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 8ff56acb4e..0954c2de26 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -979,3 +979,27 @@ def load_results( year={2024} }""", ) + +NANOBEIR = Benchmark( + name="NanoBEIR", + tasks=get_tasks( + tasks=[ + "NanoArguAnaRetrieval", + "NanoClimateFeverRetrieval", + "NanoDBPediaRetrieval", + "NanoFEVERRetrieval", + "NanoFiQA2018Retrieval", + "NanoHotpotQARetrieval", + "NanoMSMARCORetrieval", + "NanoNFCorpusRetrieval", + "NanoNQRetrieval", + "NanoQuoraRetrieval", + "NanoSCIDOCSRetrieval", + "NanoSciFactRetrieval", + "NanoTouche2020Retrieval", + ], + ), + description="A benchmark to evaluate with subsets of BEIR datasets to use less computational power", + reference="https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6", + citation=None, +) diff --git a/mteb/descriptive_stats/Reranking/NamaaMrTydiReranking.json b/mteb/descriptive_stats/Reranking/NamaaMrTydiReranking.json new file mode 100644 index 0000000000..74da38e4f0 --- /dev/null +++ b/mteb/descriptive_stats/Reranking/NamaaMrTydiReranking.json @@ -0,0 +1,31 @@ +{ + "test": { + "num_samples": 5504, + "number_of_characters": 1293166, + "num_documents": 4586, + "min_document_length": 0, + "average_document_length": 275.8353685128652, + "max_document_length": 4158, + "unique_documents": 4586, + "num_queries": 918, + "min_query_length": 13, + "average_query_length": 30.702614379084967, + "max_query_length": 93, + "unique_queries": 918, + "none_queries": 0, + "num_relevant_docs": 4586, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 4586, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": 918, + "min_top_ranked_per_query": 2, + "average_top_ranked_per_query": 4.995642701525054, + "max_top_ranked_per_query": 6 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoArguAnaRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoArguAnaRetrieval.json new file mode 100644 index 0000000000..51d1be4353 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoArguAnaRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 3685, + "number_of_characters": 3737951, + "num_documents": 3635, + "min_document_length": 70, + "average_document_length": 1011.7914718019257, + "max_document_length": 6673, + "unique_documents": 3635, + "num_queries": 50, + "min_query_length": 504, + "average_query_length": 1201.78, + "max_query_length": 2164, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json new file mode 100644 index 0000000000..5a408ec517 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 3458, + "number_of_characters": 5525784, + "num_documents": 3408, + "min_document_length": 33, + "average_document_length": 1619.531690140845, + "max_document_length": 6619, + "unique_documents": 3408, + "num_queries": 50, + "min_query_length": 38, + "average_query_length": 128.4, + "max_query_length": 265, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 38, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json new file mode 100644 index 0000000000..cd6f035639 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 6095, + "number_of_characters": 2034629, + "num_documents": 6045, + "min_document_length": 1, + "average_document_length": 336.30669975186106, + "max_document_length": 1390, + "unique_documents": 6045, + "num_queries": 50, + "min_query_length": 8, + "average_query_length": 33.1, + "max_query_length": 63, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json new file mode 100644 index 0000000000..eb7f3d6e95 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 5046, + "number_of_characters": 6140916, + "num_documents": 4996, + "min_document_length": 25, + "average_document_length": 1228.7119695756605, + "max_document_length": 8491, + "unique_documents": 4996, + "num_queries": 50, + "min_query_length": 17, + "average_query_length": 45.42, + "max_query_length": 83, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json b/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json new file mode 100644 index 0000000000..92c11900a9 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 4648, + "number_of_characters": 4139437, + "num_documents": 4598, + "min_document_length": 0, + "average_document_length": 899.6326663766855, + "max_document_length": 10506, + "unique_documents": 4598, + "num_queries": 50, + "min_query_length": 18, + "average_query_length": 58.52, + "max_query_length": 97, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json b/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json new file mode 100644 index 0000000000..ec35252f78 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 5140, + "number_of_characters": 1784059, + "num_documents": 5090, + "min_document_length": 24, + "average_document_length": 349.6349705304519, + "max_document_length": 2079, + "unique_documents": 5090, + "num_queries": 50, + "min_query_length": 37, + "average_query_length": 88.34, + "max_query_length": 184, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoMSMARCORetrieval.json b/mteb/descriptive_stats/Retrieval/NanoMSMARCORetrieval.json new file mode 100644 index 0000000000..2deebc7918 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoMSMARCORetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 5093, + "number_of_characters": 1666607, + "num_documents": 5043, + "min_document_length": 32, + "average_document_length": 330.159825500694, + "max_document_length": 990, + "unique_documents": 5043, + "num_queries": 50, + "min_query_length": 13, + "average_query_length": 32.22, + "max_query_length": 101, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json new file mode 100644 index 0000000000..b6e2e3fb1a --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 3003, + "number_of_characters": 4468144, + "num_documents": 2953, + "min_document_length": 90, + "average_document_length": 1512.7301049779885, + "max_document_length": 9939, + "unique_documents": 2953, + "num_queries": 50, + "min_query_length": 4, + "average_query_length": 21.04, + "max_query_length": 53, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 42, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json new file mode 100644 index 0000000000..254a0ac40f --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 5085, + "number_of_characters": 2648727, + "num_documents": 5035, + "min_document_length": 1, + "average_document_length": 525.5958291956306, + "max_document_length": 6138, + "unique_documents": 5035, + "num_queries": 50, + "min_query_length": 32, + "average_query_length": 47.04, + "max_query_length": 83, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json new file mode 100644 index 0000000000..540b0fd3aa --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 5096, + "number_of_characters": 278960, + "num_documents": 5046, + "min_document_length": 2, + "average_document_length": 54.808164883075705, + "max_document_length": 332, + "unique_documents": 5046, + "num_queries": 50, + "min_query_length": 19, + "average_query_length": 47.96, + "max_query_length": 139, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json new file mode 100644 index 0000000000..78e927e208 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 2260, + "number_of_characters": 2044730, + "num_documents": 2210, + "min_document_length": 0, + "average_document_length": 923.5705882352942, + "max_document_length": 10000, + "unique_documents": 2210, + "num_queries": 50, + "min_query_length": 38, + "average_query_length": 72.78, + "max_query_length": 143, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json new file mode 100644 index 0000000000..00e8cb4be1 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 2969, + "number_of_characters": 4182563, + "num_documents": 2919, + "min_document_length": 260, + "average_document_length": 1431.2343268242548, + "max_document_length": 10000, + "unique_documents": 2919, + "num_queries": 50, + "min_query_length": 37, + "average_query_length": 95.8, + "max_query_length": 200, + "unique_queries": 50, + "none_queries": 0, + "num_relevant_docs": 50, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 50, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json b/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json new file mode 100644 index 0000000000..de076dae57 --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json @@ -0,0 +1,31 @@ +{ + "train": { + "num_samples": 5794, + "number_of_characters": 12311190, + "num_documents": 5745, + "min_document_length": 3, + "average_document_length": 2142.56953872933, + "max_document_length": 37100, + "unique_documents": 5745, + "num_queries": 49, + "min_query_length": 16, + "average_query_length": 43.42857142857143, + "max_query_length": 83, + "unique_queries": 49, + "none_queries": 0, + "num_relevant_docs": 49, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 49, + "num_instructions": null, + "min_instruction_length": null, + "average_instruction_length": null, + "max_instruction_length": null, + "unique_instructions": null, + "num_top_ranked": null, + "min_top_ranked_per_query": null, + "average_top_ranked_per_query": null, + "max_top_ranked_per_query": null + } +} \ No newline at end of file diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 6ca449e1c5..4b1e06e077 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -299,8 +299,8 @@ def load_tasks_data(self): def _run_eval( task: AbsTask, model: Encoder, - split, - output_folder, + split: str, + subsets_to_run: list[str] | None = None, *, encode_kwargs: dict[str, Any], **kwargs: Any, @@ -309,7 +309,7 @@ def _run_eval( results = task.evaluate( model, split, - output_folder=output_folder, + subsets_to_run=subsets_to_run, encode_kwargs=encode_kwargs, **kwargs, ) @@ -385,7 +385,8 @@ def run( model: SentenceTransformer | Encoder, verbosity: int = 1, output_folder: str | None = "results", - eval_splits=None, + eval_splits: list[str] | None = None, + eval_subsets: list[str] | None = None, overwrite_results: bool = False, raise_error: bool = True, co2_tracker: bool = True, @@ -404,6 +405,7 @@ def run( output_folder: Folder where the results will be saved. Default to 'results'. Where it will save the results in the format: `{output_folder}/{model_name}/{model_revision}/{task_name}.json`. eval_splits: List of splits to evaluate on. If None, the splits are taken from the task metadata. + eval_subsets: List of subsets to evaluate on. If None, the subsets are taken from the task metadata. overwrite_results: Whether to overwrite existing results. raise_error: Whether to raise an error if an exception occurs during evaluation. co2_tracker: Whether to enable or disable CO2 emissions tracker using codecarbon. @@ -438,6 +440,10 @@ def run( if isinstance(model, (SentenceTransformer, CrossEncoder)): model = SentenceTransformerWrapper(model) + ## Disable co2_tracker for API models + if "API" in meta.framework: + co2_tracker = False + if output_path: self._save_model_metadata(meta, output_path) @@ -461,9 +467,17 @@ def run( f"\n\n********************** Evaluating {task.metadata.name} **********************" ) + task_eval_splits = ( + eval_splits if eval_splits is not None else task.eval_splits + ) + task_subsets = list(task.metadata.hf_subsets_to_langscripts.keys()) + + existing_results = None + save_path = None + if output_path: + kwargs["output_folder"] = output_folder # needed for retrieval tasks save_path = output_path / f"{task.metadata.name}{task.save_suffix}.json" - existing_results = None if save_path.exists(): existing_results = TaskResult.from_disk(save_path) @@ -475,38 +489,53 @@ def run( del self.tasks[0] # empty memory continue - task_eval_splits = ( - eval_splits if eval_splits is not None else task.eval_splits - ) - missing_splits = self._get_missing_splits( - existing_results, task_eval_splits - ) - - if not missing_splits and existing_results: - evaluation_results.append(existing_results) + # Unified call to get missing splits and subsets + missing_evaluations = self._get_missing_evaluations( + existing_results, + task_eval_splits, + task_subsets, + eval_subsets, + ) - # no splits are evaluated. - self.last_evaluated_splits[task.metadata.name] = [] - del self.tasks[0] - continue + # Determine final splits to run + final_splits_to_run = [] + # We need to run any split that is fully missing or has missing subsets + for sp, info in missing_evaluations.items(): + if info["whole_split_missing"] or info["missing_subsets"]: + final_splits_to_run.append(sp) - if missing_splits: + # If no splits need to be run and results exist, skip + if not final_splits_to_run: + if existing_results is not None: + evaluation_results.append(existing_results) + else: logger.info( - f"Running evaluation for missing splits: {missing_splits}" + f"No splits to evaluate for {task.metadata.name}. Skipping evaluation." ) + self.last_evaluated_splits[task.metadata.name] = [] + del self.tasks[0] + continue try: task.check_if_dataset_is_superseded() task.load_data(eval_splits=task_eval_splits, **kwargs) - # run evaluation task_results = {} evaluation_time = 0 kg_co2_emissions: int | None = 0 if co2_tracker else None self.last_evaluated_splits[task.metadata.name] = [] - for split in missing_splits: + for split in final_splits_to_run: + info = missing_evaluations[split] + + # Determine subsets to run for this split + # If the whole split is missing, run all required subsets + # If only some subsets are missing, run only those + subsets_to_run = info["missing_subsets"] + if info["whole_split_missing"] and task_subsets is None: + subsets_to_run = ["default"] + if co2_tracker: logger.warning( "Evaluating multiple MTEB runs simultaniously will produce incorrect CO₂ results" @@ -521,8 +550,8 @@ def run( task, model, split, - output_folder, encode_kwargs=encode_kwargs, + subsets_to_run=subsets_to_run, **kwargs, ) @@ -535,12 +564,11 @@ def run( model, split, output_folder, + subsets_to_run=subsets_to_run, encode_kwargs=encode_kwargs, **kwargs, ) - self.last_evaluated_splits[task.metadata.name].append(split) - logger.info( f"Evaluation for {task.metadata_dict['name']} on {split} took {tock - tick:.2f} seconds" ) @@ -548,8 +576,11 @@ def run( task_results[split] = results if verbosity >= 1: - logger.info(f"Scores: {results}") + logger.info(f"Scores: {task_results[split]}") + + self.last_evaluated_splits[task.metadata.name].append(split) + # Create new TaskResult new_results = TaskResult.from_task_results( task, task_results, @@ -557,6 +588,9 @@ def run( kg_co2_emissions=kg_co2_emissions, ) + # Merge with existing if needed + if output_path and save_path.exists(): + existing_results = TaskResult.from_disk(save_path) if existing_results: merged_results = self._merge_results(existing_results, new_results) else: @@ -642,3 +676,56 @@ def get_last_evaluated_splits(self): return deepcopy( {task: list(splits) for task, splits in self.last_evaluated_splits.items()} ) + + @staticmethod + def _get_missing_evaluations( + existing_results: TaskResult | None, + task_eval_splits: list[str], + task_eval_langs: list[str], + eval_subsets: list[str] | None, + ) -> dict[str, dict[str, Any]]: + """Return a dictionary for each split, indicating if the whole split is missing and which subsets are missing.""" + missing_evaluations = { + split: {"whole_split_missing": False, "missing_subsets": []} + for split in task_eval_splits + } + + # Determine subsets to consider if multilingual + if eval_subsets is None: + # If no eval_langs specified, consider all subsets + subsets_to_consider = task_eval_langs + else: + subsets_to_consider = [ + subset for subset in task_eval_langs if subset in eval_subsets + ] + + # If no existing results, all splits and subsets are missing + if existing_results is None: + for split in task_eval_splits: + missing_evaluations[split]["whole_split_missing"] = True + missing_evaluations[split]["missing_subsets"] = list( + subsets_to_consider + ) + return missing_evaluations + + # If we have existing results, check which splits and subsets are missing + for split in task_eval_splits: + if split not in existing_results.scores: + # Whole split missing + missing_evaluations[split]["whole_split_missing"] = True + missing_evaluations[split]["missing_subsets"] = list( + subsets_to_consider + ) + else: + # Some subsets may be missing + existing_subsets = { + score_dict["hf_subset"] + for score_dict in existing_results.scores[split] + } + missing_subsets = [ + s for s in subsets_to_consider if s not in existing_subsets + ] + if missing_subsets: + missing_evaluations[split]["missing_subsets"] = missing_subsets + + return missing_evaluations diff --git a/mteb/evaluation/evaluators/BitextMiningEvaluator.py b/mteb/evaluation/evaluators/BitextMiningEvaluator.py index 4fa7022ed6..4b068653da 100644 --- a/mteb/evaluation/evaluators/BitextMiningEvaluator.py +++ b/mteb/evaluation/evaluators/BitextMiningEvaluator.py @@ -62,7 +62,7 @@ def compute_metrics(self, model: Encoder, encode_kwargs: dict[str, Any] = {}): tqdm.tqdm(self.pairs, desc="Matching sentences") ): scores[f"{key1}-{key2}"] = self._compute_metrics( - embeddings[key1], embeddings[key2] + embeddings[key1], embeddings[key2], model ) # in case of default pair unnest the dict @@ -76,10 +76,13 @@ def _compute_metrics( self, embeddings1, embeddings2, + model: Encoder, ): # Find nearest neighbors logger.info("Finding nearest neighbors...") - nearest_neighbors = self._similarity_search(embeddings1, embeddings2, top_k=1) + nearest_neighbors = self._similarity_search( + embeddings1, embeddings2, model, top_k=1 + ) # Compute errors logger.info("Computing metrics...") @@ -106,10 +109,10 @@ def _similarity_search( self, query_embeddings, corpus_embeddings, + model: Encoder, query_chunk_size: int = 100, corpus_chunk_size: int = 500000, top_k: int = 10, - score_function=cos_sim, ): """This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings. It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries. @@ -117,10 +120,10 @@ def _similarity_search( Args: query_embeddings: A 2 dimensional tensor with the query embeddings. corpus_embeddings: A 2 dimensional tensor with the corpus embeddings. + model: The model used to encode the queries and corpus. This is used to check if the embeddings are on the same device and to encode the queries and corpus if they are not already tensors. query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory. corpus_chunk_size: Scans the corpus 100k entries at a time. Increasing that value increases the speed, but requires more memory. top_k: Retrieve top k matching entries. - score_function: Function for computing scores. By default, cosine similarity. Returns: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores. @@ -142,7 +145,7 @@ def _similarity_search( # Iterate over chunks of the corpus for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size): # Compute cosine similarities - cos_scores = score_function( + similarity_scores = cos_sim( query_embeddings[ query_start_idx : query_start_idx + query_chunk_size ], @@ -151,10 +154,20 @@ def _similarity_search( ], ) + if hasattr(model, "similarity"): + similarity_scores = model.similarity( + query_embeddings[ + query_start_idx : query_start_idx + query_chunk_size + ], + corpus_embeddings[ + corpus_start_idx : corpus_start_idx + corpus_chunk_size + ], + ) + # Get top-k scores cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( - cos_scores, - min(top_k, len(cos_scores[0])), + similarity_scores, + min(top_k, len(similarity_scores[0])), dim=1, largest=True, sorted=False, @@ -162,7 +175,7 @@ def _similarity_search( cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist() cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist() - for query_itr in range(len(cos_scores)): + for query_itr in range(len(similarity_scores)): for sub_corpus_id, score in zip( cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr], diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index 3dca66b0fa..be2f5af1f0 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -3,13 +3,12 @@ import logging from typing import Any -from mteb.evaluation.evaluators.model_classes import ( +from .Evaluator import Evaluator +from .model_classes import ( DenseRetrievalExactSearch, DRESModel, is_cross_encoder_compatible, ) - -from .Evaluator import Evaluator from .utils import ( add_task_specific_scores, calculate_retrieval_scores, @@ -30,7 +29,6 @@ def __init__( retriever, task_name: str | None = None, k_values: list[int] = [1, 3, 5, 10, 20, 100, 1000], - score_function: str = "cos_sim", encode_kwargs: dict[str, Any] = {}, **kwargs, ): @@ -52,14 +50,6 @@ def __init__( self.top_k = ( max(k_values) if "top_k" not in kwargs else kwargs["top_k"] ) # can lower it if reranking - self.score_function = ( - retriever.mteb_model_meta.similarity_fn_name - if ( - hasattr(retriever, "mteb_model_meta") - and retriever.mteb_model_meta.similarity_fn_name - ) - else score_function - ) self.task_name = task_name def __call__( @@ -90,7 +80,6 @@ def __call__( corpus, queries, self.top_k, - self.score_function, task_name=self.task_name, # type: ignore instructions=instructions, **kwargs, @@ -100,7 +89,6 @@ def __call__( corpus, queries, self.top_k, - self.score_function, instructions=instructions, request_qid=qid, task_name=self.task_name, @@ -120,6 +108,7 @@ def evaluate( dict[str, float], dict[str, float], dict[str, float], + dict[str, float], ]: if ignore_identical_ids: logger.debug( diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index 146d529dc9..7e66f22e65 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -7,14 +7,15 @@ from pathlib import Path from typing import Any +import numpy as np import torch import tqdm -from sentence_transformers import CrossEncoder, SentenceTransformer +from sentence_transformers import SentenceTransformer from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta -from .utils import convert_conv_history_to_query, cos_sim, dot_score, download +from .utils import convert_conv_history_to_query, cos_sim, download logger = logging.getLogger(__name__) @@ -53,32 +54,24 @@ def __init__( ): # Model is class that provides encode_corpus() and encode_queries() self.model = model - self.encode_kwargs = encode_kwargs + self.encode_kwargs = encode_kwargs.copy() - if "batch_size" not in encode_kwargs: - encode_kwargs["batch_size"] = 128 if "show_progress_bar" not in encode_kwargs: - encode_kwargs["show_progress_bar"] = True - - self.score_functions = {"cos_sim": cos_sim, "dot": dot_score, "cosine": cos_sim} - self.score_function_desc = { - "cos_sim": "Cosine Similarity", - "cosine": "Cosine Similarity", - "dot": "Dot Product", - } + self.encode_kwargs["show_progress_bar"] = True + self.corpus_chunk_size = corpus_chunk_size if isinstance(previous_results, Path): self.previous_results = str(previous_results) else: self.previous_results = previous_results - self.batch_size = encode_kwargs.get("batch_size") - self.show_progress_bar = encode_kwargs.get("show_progress_bar") + self.batch_size = self.encode_kwargs.get("batch_size", 32) + self.show_progress_bar = self.encode_kwargs.get("show_progress_bar") self.results = {} if self.previous_results is not None: self.previous_results = self.load_results_file() - if isinstance(self.model, CrossEncoder): + if hasattr(self.model, "predict"): # load the predict instance from the CrossEncoder # custom functions can be used by extending the DenseRetrievalExactSearch class self.predict = self.model.predict @@ -88,7 +81,6 @@ def search( corpus: dict[str, dict[str, str]], queries: dict[str, str], top_k: int, - score_function: str, task_name: str, instructions: dict[str, str] | None = None, request_qid: str | None = None, @@ -102,7 +94,6 @@ def search( corpus: Dictionary mapping corpus IDs to document dictionaries queries: Dictionary mapping query IDs to query strings top_k: Number of top results to return - score_function: Scoring function to use ('cos_sim' or 'dot') task_name: Name of the task instructions: Optional instructions to append to queries request_qid: Optional request query ID @@ -110,11 +101,6 @@ def search( top_ranked: Optional dict mapping query IDs to lists of pre-ranked corpus IDs **kwargs: Additional keyword arguments passed to the underlying model """ - if score_function not in self.score_functions: - raise ValueError( - f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product" - ) - logger.info("Encoding Queries.") query_ids = list(queries.keys()) self.results = {qid: {} for qid in query_ids} @@ -159,10 +145,6 @@ def search( # Map back to original order but reuse embeddings query_embeddings = unique_query_embeddings[query_idx_mapping] - logger.info( - f"Scoring Function: {self.score_function_desc[score_function]} ({score_function})" - ) - if top_ranked is not None: logger.info("Performing reranking on pre-ranked documents...") result_heaps = self._rerank_documents( @@ -171,7 +153,6 @@ def search( corpus=corpus, top_ranked=top_ranked, top_k=top_k, - score_function=score_function, task_name=task_name, request_qid=request_qid, return_sorted=return_sorted, @@ -183,7 +164,6 @@ def search( query_embeddings=query_embeddings, corpus=corpus, top_k=top_k, - score_function=score_function, task_name=task_name, request_qid=request_qid, return_sorted=return_sorted, @@ -198,11 +178,10 @@ def search( def _rerank_documents( self, query_ids: list[str], - query_embeddings: torch.Tensor, + query_embeddings: np.ndarray, corpus: dict[str, dict[str, str]], top_ranked: dict[str, list[str]], top_k: int, - score_function: str, task_name: str, request_qid: str | None = None, return_sorted: bool = False, @@ -259,8 +238,12 @@ def _rerank_documents( # Ensure query embedding is on the correct device and has correct shape query_embedding = query_embeddings[query_idx].unsqueeze(0) + score_function = ( + self.model.similarity if hasattr(self.model, "similarity") else cos_sim + ) + with torch.inference_mode(): - scores = self.score_functions[score_function]( + scores = score_function( query_embedding, query_doc_embeddings, ) @@ -305,7 +288,6 @@ def _full_corpus_search( query_embeddings: torch.Tensor, corpus: dict[str, dict[str, str]], top_k: int, - score_function: str, task_name: str, request_qid: str | None = None, return_sorted: bool = False, @@ -338,17 +320,20 @@ def _full_corpus_search( logging.info("Computing Similarities...") query_embeddings = torch.as_tensor(query_embeddings).to(device) sub_corpus_embeddings = torch.as_tensor(sub_corpus_embeddings).to(device) + + score_function = ( + self.model.similarity if hasattr(self.model, "similarity") else cos_sim + ) + with torch.inference_mode(): - cos_scores = self.score_functions[score_function]( - query_embeddings, sub_corpus_embeddings - ) + scores = score_function(query_embeddings, sub_corpus_embeddings) # get top-k values cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk( - cos_scores, + scores, min( top_k + 1, - len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]), + len(scores[1]) if len(scores) > 1 else len(scores[-1]), ), dim=1, largest=True, @@ -411,7 +396,7 @@ def search_cross_encoder( for qid in queries.keys(): if self.previous_results is None: # try to use all of them - logging.logging( + logging.info( f"previous_results is None. Using all the documents to rerank: {len(corpus)}" ) q_results = {doc_id: 0.0 for doc_id in corpus.keys()} @@ -461,7 +446,7 @@ def search_cross_encoder( len(queries_in_pair) == len(corpus_in_pair) == len(instructions_in_pair) ) - if isinstance(self.model.model, CrossEncoder): + if hasattr(self.model, "predict"): # can't take instructions, so add them here queries_in_pair = [ f"{q} {i}".strip() @@ -527,7 +512,6 @@ def encode_corpus( self, corpus: list[dict[str, str]], task_name: str, - batch_size: int, prompt_type: PromptType = PromptType.passage, **kwargs, ): @@ -536,7 +520,6 @@ def encode_corpus( sentences, task_name=task_name, prompt_type=prompt_type, - batch_size=batch_size, **kwargs, ) return corpus_embeddings diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index a6dd1c7325..7cc658d0cb 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -1,10 +1,13 @@ from __future__ import annotations import json +import tempfile from collections import defaultdict from pathlib import Path +from urllib.parse import urlencode import gradio as gr +import pandas as pd from gradio_rangeslider import RangeSlider import mteb @@ -24,6 +27,30 @@ def load_results(): return mteb.BenchmarkResults.from_validated(**json.load(cache_file)) +def produce_benchmark_link(benchmark_name: str, request: gr.Request) -> str: + """Produces a URL for the selected benchmark.""" + params = urlencode( + { + "benchmark_name": benchmark_name, + } + ) + base_url = request.request.base_url + url = f"{base_url}?{params}" + md = f"```\n{url}\n```" + return md + + +def set_benchmark_on_load(request: gr.Request): + query_params = request.query_params + return query_params.get("benchmark_name", "MTEB(Multilingual, beta)") + + +def download_table(table: pd.DataFrame) -> Path: + file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") + table.to_csv(file) + return file.name + + def update_citation(benchmark_name: str) -> str: benchmark = mteb.get_benchmark(benchmark_name) if str(benchmark.citation) != "None": @@ -66,6 +93,7 @@ def update_task_info(task_names: str) -> gr.DataFrame: properties=["name", "type", "languages", "domains", "reference", "main_score"] ) df["languages"] = df["languages"].map(format_list) + df = df.sort_values("name") df["domains"] = df["domains"].map(format_list) df["name"] = "[" + df["name"] + "](" + df["reference"] + ")" df = df.rename( @@ -217,24 +245,50 @@ def update_task_info(task_names: str) -> gr.DataFrame: inputs=[benchmark_select, lang_select, type_select, domain_select], ) citation = gr.Markdown(update_citation, inputs=[benchmark_select]) + with gr.Accordion("Share this benchmark:", open=False): + gr.Markdown(produce_benchmark_link, inputs=[benchmark_select]) with gr.Column(): - with gr.Tab("Performance-Size Plot"): + with gr.Tab("Performance per Model Size"): plot = gr.Plot(performance_size_plot, inputs=[summary_table]) gr.Markdown( "*We only display models that have been run on all tasks in the benchmark*" ) - with gr.Tab("Top 5 Radar Chart"): + with gr.Tab("Performance per Task Type (Radar Chart)"): radar_plot = gr.Plot(radar_chart, inputs=[summary_table]) gr.Markdown( "*We only display models that have been run on all task types in the benchmark*" ) with gr.Tab("Summary"): + with gr.Accordion( + "What do aggregate measures (Rank(Borda), Mean(Task), etc.) mean?", + open=False, + ): + gr.Markdown( + """ + **Rank(borda)** is computed based on the [borda count](https://en.wikipedia.org/wiki/Borda_count), where each task is treated as a preference voter, which gives votes on the models in accordance with their relative performance on the task. The best model obtains the highest number of votes. The model with the highest number of votes across tasks obtains the highest rank. The Borda rank tends to prefer models that perform well broadly across tasks. However, given that it is a rank it can be unclear if the two models perform similarly. + + **Mean(Task)**: This is a naïve average computed across all the tasks within the benchmark. This score is simple to understand and is continuous as opposed to the Borda rank. However, the mean can overvalue tasks with higher variance in its scores. + + **Mean(TaskType)**: This is a weighted average across different task categories, such as classification or retrieval. It is computed by first computing the average by task category and then computing the average on each category. Similar to the Mean(Task) this measure is continuous and tends to overvalue tasks with higher variance. This score also prefers models that perform well across all task categories. + """ + ) summary_table.render() + download_summary = gr.DownloadButton("Download Table") + download_summary.click( + download_table, inputs=[summary_table], outputs=[download_summary] + ) with gr.Tab("Performance per task"): per_task_table.render() + download_per_task = gr.DownloadButton("Download Table") + download_per_task.click( + download_table, inputs=[per_task_table], outputs=[download_per_task] + ) with gr.Tab("Task information"): task_info_table = gr.DataFrame(update_task_info, inputs=[task_select]) + # This sets the benchmark from the URL query parameters + demo.load(set_benchmark_on_load, inputs=[], outputs=[benchmark_select]) + @gr.on(inputs=[scores, searchbar], outputs=[summary_table, per_task_table]) def update_tables(scores, search_query: str): summary, per_task = scores_to_tables(scores, search_query) diff --git a/mteb/leaderboard/figures.py b/mteb/leaderboard/figures.py index 9f3e73f7a4..35f91dd363 100644 --- a/mteb/leaderboard/figures.py +++ b/mteb/leaderboard/figures.py @@ -6,6 +6,28 @@ import plotly.graph_objects as go +def text_plot(text: str): + """Returns empty scatter plot with text added, this can be great for error messages.""" + return px.scatter(template="plotly_white").add_annotation( + text=text, showarrow=False, font=dict(size=20) + ) + + +def failsafe_plot(fun): + """Decorator that turns the function producing a figure failsafe. + This is necessary, because once a Callback encounters an exception it + becomes useless in Gradio. + """ + + def wrapper(*args, **kwargs): + try: + return fun(*args, **kwargs) + except Exception: + return text_plot("Couldn't produce plot.") + + return wrapper + + def parse_n_params(text: str) -> int: if text.endswith("M"): return float(text[:-1]) * 1e6 @@ -37,6 +59,48 @@ def parse_float(value) -> float: ] +def add_size_guide(fig: go.Figure): + xpos = [5 * 1e9] * 4 + ypos = [7.8, 8.5, 9, 10] + sizes = [256, 1024, 2048, 4096] + fig.add_trace( + go.Scatter( + showlegend=False, + opacity=0.3, + mode="markers", + marker=dict( + size=np.sqrt(sizes), + color="rgba(0,0,0,0)", + line=dict(color="black", width=2), + ), + x=xpos, + y=ypos, + ) + ) + fig.add_annotation( + text="Embedding Size:", + font=dict(size=16), + x=np.log10(1.5e9), + y=10, + showarrow=False, + opacity=0.3, + ) + for x, y, size in zip(xpos, np.linspace(7.5, 14, 4), sizes): + fig.add_annotation( + text=f"{size}", + font=dict(size=12), + x=np.log10(x), + y=y, + showarrow=True, + ay=0, + ax=50, + opacity=0.3, + arrowwidth=2, + ) + return fig + + +@failsafe_plot def performance_size_plot(df: pd.DataFrame) -> go.Figure: df = df.copy() df["Number of Parameters"] = df["Number of Parameters"].map(parse_n_params) @@ -50,6 +114,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: if not len(df.index): return go.Figure() min_score, max_score = df["Mean (Task)"].min(), df["Mean (Task)"].max() + df["sqrt(dim)"] = np.sqrt(df["Embedding Dimensions"]) fig = px.scatter( df, x="Number of Parameters", @@ -57,7 +122,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: log_x=True, template="plotly_white", text="model_text", - size="Embedding Dimensions", + size="sqrt(dim)", color="Log(Tokens)", range_color=[2, 5], range_x=[8 * 1e6, 11 * 1e9], @@ -69,10 +134,21 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: "Mean (Task)": True, "Rank (Borda)": True, "Log(Tokens)": False, + "sqrt(dim)": False, "model_text": False, }, hover_name="Model", ) + # Note: it's important that this comes before setting the size mode + fig = add_size_guide(fig) + fig.update_traces( + marker=dict( + sizemode="diameter", + sizeref=1.5, + sizemin=0, + ) + ) + fig.add_annotation(x=1e9, y=10, text="Model size:") fig.update_layout( coloraxis_colorbar=dict( # noqa title="Max Tokens", @@ -124,14 +200,15 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: "#3CBBB1", ] fill_colors = [ - "rgba(238,66,102,0.2)", - "rgba(0,166,237,0.2)", - "rgba(236,167,44,0.2)", - "rgba(180,35,24,0.2)", - "rgba(60,187,177,0.2)", + "rgba(238,66,102,0.05)", + "rgba(0,166,237,0.05)", + "rgba(236,167,44,0.05)", + "rgba(180,35,24,0.05)", + "rgba(60,187,177,0.05)", ] +@failsafe_plot def radar_chart(df: pd.DataFrame) -> go.Figure: df = df.copy() df["Model"] = df["Model"].map(parse_model_name) @@ -139,6 +216,10 @@ def radar_chart(df: pd.DataFrame) -> go.Figure: task_type_columns = [ column for column in df.columns if "".join(column.split()) in task_types ] + if len(task_type_columns) <= 1: + raise ValueError( + "Couldn't produce radar chart, the benchmark only contains one task category." + ) df = df[["Model", *task_type_columns]].set_index("Model") df = df.replace("", np.nan) df = df.dropna() @@ -156,7 +237,7 @@ def radar_chart(df: pd.DataFrame) -> go.Figure: mode="lines", line=dict(width=2, color=line_colors[i]), fill="toself", - fillcolor=fill_colors[i], + fillcolor="rgba(0,0,0,0)", ) ) fig.update_layout( diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 52b2a8872d..27bc1ffd41 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -26,8 +26,10 @@ "API", "Tevatron", "NumPy", + "PyLate", + "ColBERT", ] -DISTANCE_METRICS = Literal["cosine", "dot"] +DISTANCE_METRICS = Literal["cosine", "max_sim", "dot"] def sentence_transformers_loader( diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index ce1db29bbd..6240962b27 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -4,6 +4,84 @@ from mteb.model_meta import ModelMeta, sentence_transformers_loader +LANGUAGES_V2_0 = [ + "afr_Latn", + "ara_Arab", + "aze_Latn", + "bel_Cyrl", + "bul_Cyrl", + "ben_Beng", + "cat_Latn", + "ceb_Latn", + "ces_Latn", + "cym_Latn", + "dan_Latn", + "deu_Latn", + "ell_Grek", + "eng_Latn", + "spa_Latn", + "est_Latn", + "eus_Latn", + "fas_Arab", + "fin_Latn", + "fra_Latn", + "glg_Latn", + "guj_Gujr", + "heb_Hebr", + "hin_Deva", + "hrv_Latn", + "hat_Latn", + "hun_Latn", + "hye_Armn", + "ind_Latn", + "isl_Latn", + "ita_Latn", + "jpn_Jpan", + "jav_Latn", + "kat_Geor", + "kaz_Cyrl", + "khm_Khmr", + "kan_Knda", + "kor_Hang", + "kir_Cyrl", + "lao_Laoo", + "lit_Latn", + "lav_Latn", + "mkd_Cyrl", + "mal_Mlym", + "mon_Cyrl", + "mar_Deva", + "msa_Latn", + "mya_Mymr", + "nep_Deva", + "nld_Latn", + "pan_Guru", + "pol_Latn", + "por_Latn", + "que_Latn", + "ron_Latn", + "rus_Cyrl", + "sin_Sinh", + "slk_Latn", + "slv_Latn", + "som_Latn", + "sqi_Latn", + "srp_Cyrl", + "swe_Latn", + "swa_Latn", + "tam_Taml", + "tel_Telu", + "tha_Thai", + "tgl_Latn", + "tur_Latn", + "ukr_Cyrl", + "urd_Arab", + "vie_Latn", + "yor_Latn", + "zho_Hans", +] + + arctic_m_v1_5 = ModelMeta( loader=partial( sentence_transformers_loader, @@ -60,7 +138,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="sentence-transformers/all-MiniLM-L6-v2", superseded_by=None, ) @@ -85,7 +163,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-s", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="intfloat/e5-small-unsupervised", superseded_by=None, ) @@ -110,7 +188,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="intfloat/e5-base-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-m-v1.5", ) @@ -127,19 +205,18 @@ languages=["eng_Latn"], open_weights=True, framework=["Sentence Transformers", "PyTorch"], - n_parameters=109_000_000, + n_parameters=137_000_000, memory_usage=None, max_tokens=2048, embed_dim=768, license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-long", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised", - superseded_by=None, + superseded_by="Snowflake/snowflake-arctic-embed-m-v2.0", ) - arctic_embed_l = ModelMeta( loader=partial( sentence_transformers_loader, @@ -152,14 +229,89 @@ languages=["eng_Latn"], open_weights=True, framework=["Sentence Transformers", "PyTorch"], - n_parameters=109_000_000, + n_parameters=335_000_000, memory_usage=None, max_tokens=512, - embed_dim=768, + embed_dim=1024, license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="intfloat/e5-base-unsupervised", + superseded_by="Snowflake/snowflake-arctic-embed-l-v2.0", +) + +arctic_embed_m_v1_5 = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-m-v1.5", + revision="97eab2e17fcb7ccb8bb94d6e547898fa1a6a0f47", + model_prompts={ + "query": "Represent this sentence for searching relevant passages: " + }, + ), + name="Snowflake/snowflake-arctic-embed-m-v1.5", + revision="97eab2e17fcb7ccb8bb94d6e547898fa1a6a0f47", + release_date="2024-07-08", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=512, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5", + similarity_fn_name="cosine", + use_instructions=True, + adapted_from=None, + superseded_by="Snowflake/snowflake-arctic-embed-m-v2.0", +) + +arctic_embed_m_v2_0 = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-m-v2.0", + revision="f2a7d59d80dfda5b1d14f096f3ce88bb6bf9ebdc", + ), + name="Snowflake/snowflake-arctic-embed-m-v2.0", + revision="f2a7d59d80dfda5b1d14f096f3ce88bb6bf9ebdc", + release_date="2024-12-04", # initial commit of hf model. + languages=LANGUAGES_V2_0, + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=305_000_000, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0", + similarity_fn_name="cosine", + use_instructions=True, + adapted_from="Alibaba-NLP/gte-multilingual-base", + superseded_by=None, +) + +arctic_embed_l_v2_0 = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-l-v2.0", + revision="edc2df7b6c25794b340229ca082e7c78782e6374", + ), + name="Snowflake/snowflake-arctic-embed-l-v2.0", + revision="edc2df7b6c25794b340229ca082e7c78782e6374", + release_date="2024-12-04", # initial commit of hf model. + languages=LANGUAGES_V2_0, + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=568_000_000, + memory_usage=None, + max_tokens=8192, + embed_dim=1024, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0", + similarity_fn_name="cosine", + use_instructions=True, + adapted_from="BAAI/bge-m3-retromae", superseded_by=None, ) diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index 0fb3c0242e..d136ccd834 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -15,7 +15,7 @@ }""" bge_small_en_v1_5 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="BAAI/bge-small-en-v1.5", revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a", @@ -34,12 +34,12 @@ reference="https://huggingface.co/BAAI/bge-small-en-v1.5", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, citation=BGE_15_CITATION, ) bge_base_en_v1_5 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="BAAI/bge-base-en-v1.5", revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a", @@ -58,12 +58,12 @@ reference="https://huggingface.co/BAAI/bge-base-en-v1.5", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, citation=BGE_15_CITATION, ) bge_large_en_v1_5 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="BAAI/bge-large-en-v1.5", revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09", @@ -82,6 +82,6 @@ reference="https://huggingface.co/BAAI/bge-large-en-v1.5", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, citation=BGE_15_CITATION, ) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 3f07a0d23b..43a797342d 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -216,7 +216,7 @@ def encode( } cohere_mult_3 = ModelMeta( - loader=partial( + loader=partial( # type: ignore CohereTextEmbeddingModel, model_name="embed-multilingual-v3.0", model_prompts=model_prompts, @@ -234,11 +234,11 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) cohere_eng_3 = ModelMeta( - loader=partial( + loader=partial( # type: ignore CohereTextEmbeddingModel, model_name="embed-english-v3.0", model_prompts=model_prompts, @@ -256,7 +256,7 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) cohere_mult_light_3 = ModelMeta( @@ -278,7 +278,7 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) cohere_eng_light_3 = ModelMeta( @@ -300,5 +300,5 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py new file mode 100644 index 0000000000..8753791bff --- /dev/null +++ b/mteb/models/colbert_models.py @@ -0,0 +1,218 @@ +from __future__ import annotations + +import logging +from collections.abc import Sequence +from functools import partial +from typing import Any + +import numpy as np +import torch + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta + +from .wrapper import Wrapper + +logger = logging.getLogger(__name__) + + +class ColBERTWrapper(Wrapper): + def __init__( + self, + model_name: str, + revision: str | None = None, + model_prompts: dict[str, str] | None = None, + **kwargs, + ) -> None: + """Wrapper for ColBERT models. + + Args: + model_name: The ColBERT model to load from HuggingFace Hub. + revision: The revision of the model to use. + model_prompts: A dictionary mapping task names to prompt names. + First priority is given to the composed prompt of task name + prompt type (query or passage), then to the specific task prompt, + then to the composed prompt of task type + prompt type, then to the specific task type prompt, + and finally to the specific prompt type. + **kwargs: Additional arguments to pass to the model. + """ + try: + from pylate import models as colbert_model + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`." + ) from e + + self.model_name = model_name + self.model = colbert_model.ColBERT(self.model_name, revision=revision, **kwargs) + if ( + model_prompts is None + and hasattr(self.model, "prompts") + and len(self.model.prompts) > 0 + ): + try: + model_prompts = self.validate_task_to_prompt_name(self.model.prompts) + except ValueError: + model_prompts = None + elif model_prompts is not None and hasattr(self.model, "prompts"): + logger.info(f"Model prompts will be overwritten with {model_prompts}") + self.model.prompts = model_prompts + self.model_prompts = self.validate_task_to_prompt_name(model_prompts) + + def encode( + self, + sentences: Sequence[str], + *, + task_name: str, + prompt_type: PromptType | None = None, + **kwargs: Any, + ) -> np.ndarray: + """Encodes the given sentences using the encoder. + + Args: + sentences: The sentences to encode. + task_name: The name of the task. Pylate uses this to + determine which prompt to use from a specified dictionary. + prompt_type: The name type of prompt. (query or passage) + **kwargs: Additional arguments to pass to the encoder. + + The order of priorities for prompt selection are: + 1. Composed prompt of task name + prompt type (query or passage) + 2. Specific task prompt + 3. Composed prompt of task type + prompt type (query or passage) + 4. Specific task type prompt + 5. Specific prompt type (query or passage) + + Returns: + The encoded sentences as a numpy array. + """ + prompt_name = None + if self.model_prompts is not None: + prompt_name = self.get_prompt_name( + self.model_prompts, task_name, prompt_type + ) + if prompt_name: + logger.info( + f"Using prompt_name={prompt_name} for task={task_name} prompt_type={prompt_type}" + ) + else: + logger.info( + f"No model prompts found for task={task_name} prompt_type={prompt_type}" + ) + logger.info(f"Encoding {len(sentences)} sentences.") + + pred = self.model.encode( + sentences, + prompt_name=prompt_name, + is_query=True if prompt_type == PromptType.query else False, + **kwargs, + ) + + # encode returns a list of tensors shaped (x, token_dim) where x is the number of tokens in the sentence + # we need to pad these tensors to the same length + # Tensors have varying lengths; therefore, they need to be padded with zeros to ensure uniformity before being combined + # output shape will be (batch_size, len(max(tokens)), embedding_token_dim) + pred = torch.nn.utils.rnn.pad_sequence(pred, batch_first=True, padding_value=0) + + return pred.cpu().numpy() + + def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: + """Computes the max-similarity max_sim(a[i], b[j]) for all i and j. + Works with a Tensor of the shape (batch_size, num_tokens, token_dim) + + Return: + Matrix with res[i][j] = max_sim(a[i], b[j]) + """ # noqa: D402 + if not isinstance(a, torch.Tensor): + a = torch.tensor(a, dtype=torch.float32) + + if not isinstance(b, torch.Tensor): + b = torch.tensor(b, dtype=torch.float32) + + if len(a.shape) == 2: + a = a.unsqueeze(0) + + if len(b.shape) == 2: + b = b.unsqueeze(0) + + scores = torch.einsum( + "ash,bth->abst", + a, + b, + ) + + return scores.max(axis=-1).values.sum(axis=-1) + + +colbert_v2 = ModelMeta( + loader=partial( + ColBERTWrapper, + model_name="colbert-ir/colbertv2.0", + ), + name="colbert-ir/colbertv2.0", + languages=["eng_Latn"], + open_weights=True, + revision="c1e84128e85ef755c096a95bdb06b47793b13acf", + public_training_code=True, + release_date="2024-09-21", + n_parameters=110 * 1e6, + max_tokens=180, # Reduced for Benchmarking - see ColBERT paper + embed_dim=None, # Bag of Embeddings (128) for each token + license="mit", + similarity_fn_name="max_sim", + framework=["PyLate", "ColBERT"], + reference="https://huggingface.co/colbert-ir/colbertv2.0", + use_instructions=False, + adapted_from=None, + superseded_by=None, +) + + +jina_colbert_v2 = ModelMeta( + loader=partial( + ColBERTWrapper, + model_name="jinaai/jina-colbert-v2", + query_prefix="[QueryMarker]", + document_prefix="[DocumentMarker]", + attend_to_expansion_tokens=True, + trust_remote_code=True, + ), + name="jinaai/jina-colbert-v2", + languages=[ # list of languages the model has been evaluated on + "ara-Arab", # Arabic + "ben-Beng", # Bengali + "deu-Latn", # German + "spa-Latn", # Spanish + "eng-Latn", # English + "fas-Arab", # Persian + "fin-Latn", # Finnish + "fra-Latn", # French + "hin-Deva", # Hindi + "ind-Latn", # Indonesian + "jpn-Jpan", # Japanese + "kor-Kore", # Korean + "rus-Cyrl", # Russian + "swa-Latn", # Swahili + "tel-Telu", # Telugu + "tha-Thai", # Thai + "yor-Latn", # Yoruba + "zho-Hans", # Chinese (Simplified) + "nld-Latn", # Dutch + "ita-Latn", # Italian + "por-Latn", # Portuguese + "vie-Latn", # Vietnamese + ], + open_weights=True, + revision="4cf816e5e2b03167b132a3c847a9ecd48ba708e1", + public_training_code=False, + release_date="2024-08-16", + n_parameters=559 * 1e6, + max_tokens=8192, + embed_dim=None, # Bag of Embeddings (128) for each token + license="cc-by-nc-4.0", + similarity_fn_name="max_sim", + framework=["PyLate", "ColBERT"], + reference="https://huggingface.co/jinaai/jina-colbert-v2", + use_instructions=False, + adapted_from=None, + superseded_by=None, +) diff --git a/mteb/models/e5_instruct.py b/mteb/models/e5_instruct.py index 8441ba5978..312b7c671a 100644 --- a/mteb/models/e5_instruct.py +++ b/mteb/models/e5_instruct.py @@ -12,15 +12,14 @@ MISTRAL_LANGUAGES = ["eng_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "spa_Latn"] -def e5_instruction(instruction: str) -> str: - return f"Instruct: {instruction}\nQuery: " +E5_INSTRUCTION = "Instruct: {instruction}\nQuery: " e5_instruct = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="intfloat/multilingual-e5-large-instruct", - instruction_template=e5_instruction, + instruction_template=E5_INSTRUCTION, attn="cccc", pooling_method="mean", mode="embedding", @@ -50,10 +49,10 @@ def e5_instruction(instruction: str) -> str: ) e5_mistral = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="intfloat/e5-mistral-7b-instruct", - instruction_template=e5_instruction, + instruction_template=E5_INSTRUCTION, attn="cccc", pooling_method="lasttoken", mode="embedding", diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index 3bce039f02..83f6dec08d 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -132,7 +132,7 @@ } e5_mult_small = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/multilingual-e5-small", revision="fd1525a9fd15316a2d503bf26ab031a61d056e98", @@ -156,7 +156,7 @@ ) e5_mult_base = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/multilingual-e5-base", model_prompts=model_prompts, @@ -179,7 +179,7 @@ ) e5_mult_large = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/multilingual-e5-large", revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb", @@ -203,7 +203,7 @@ ) e5_eng_small_v2 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/e5-small-v2", model_prompts=model_prompts, @@ -226,7 +226,7 @@ ) e5_eng_small = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/e5-small", revision="e272f3049e853b47cb5ca3952268c6662abda68f", @@ -250,7 +250,7 @@ ) e5_eng_base_v2 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/e5-base-v2", revision="1c644c92ad3ba1efdad3f1451a637716616a20e8", @@ -276,7 +276,7 @@ ) e5_eng_large_v2 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="intfloat/e5-large-v2", revision="b322e09026e4ea05f42beadf4d661fb4e101d311", diff --git a/mteb/models/gritlm_models.py b/mteb/models/gritlm_models.py index 596169b9b4..384f5c71cf 100644 --- a/mteb/models/gritlm_models.py +++ b/mteb/models/gritlm_models.py @@ -29,7 +29,7 @@ def gritlm_instruction(instruction: str = "") -> str: gritlm7b = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="GritLM/GritLM-7B", instruction_template=gritlm_instruction, @@ -53,7 +53,7 @@ def gritlm_instruction(instruction: str = "") -> str: citation=GRITLM_CITATION, ) gritlm8x7b = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="GritLM/GritLM-8x7B", instruction_template=gritlm_instruction, diff --git a/mteb/models/gte_models.py b/mteb/models/gte_models.py index 5c7043dd8d..7a6396ba9e 100644 --- a/mteb/models/gte_models.py +++ b/mteb/models/gte_models.py @@ -3,8 +3,12 @@ from functools import partial from mteb.model_meta import ModelMeta +from mteb.models.instruct_wrapper import instruct_wrapper + + +def instruction_template(instruction: str) -> str: + return f"Instruct: {instruction}\nQuery: " if instruction else "" -from .instruct_wrapper import instruct_wrapper GTE_CITATION = """ @article{li2023towards, @@ -16,10 +20,10 @@ """ gte_Qwen2_7B_instruct = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="Alibaba-NLP/gte-Qwen2-7B-instruct", - instruction_template="Instruct: {instruction}\nQuery: ", + instruction_template=instruction_template, attn="cccc", pooling_method="lasttoken", mode="embedding", @@ -43,3 +47,59 @@ use_instructions=True, citation=GTE_CITATION, ) + + +gte_Qwen1_5_7B_instruct = ModelMeta( + loader=partial( # type: ignore + instruct_wrapper, + model_name_or_path="Alibaba-NLP/gte-Qwen1.5-7B-instruct", + instruction_template=instruction_template, + attn="cccc", + pooling_method="lasttoken", + mode="embedding", + torch_dtype="auto", + normalized=True, + ), + name="Alibaba-NLP/gte-Qwen1.5-7B-instruct", + languages=["eng_Latn"], + open_weights=True, + revision="07d27e5226328010336563bc1b564a5e3436a298", + release_date="2024-04-20", # initial commit of hf model. + n_parameters=7_720_000_000, + memory_usage=None, + embed_dim=4096, + license="apache-2.0", + max_tokens=32768, + reference="https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, +) + + +gte_Qwen2_1_5B_instruct = ModelMeta( + loader=partial( # type: ignore + instruct_wrapper, + model_name_or_path="Alibaba-NLP/gte-Qwen2-1.5B-instruct", + instruction_template=instruction_template, + attn="cccc", + pooling_method="lasttoken", + mode="embedding", + torch_dtype="auto", + normalized=True, + ), + name="Alibaba-NLP/gte-Qwen2-1.5B-instruct", + languages=["eng_Latn"], + open_weights=True, + revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd", + release_date="2024-07-29", # initial commit of hf model. + n_parameters=1_780_000_000, + memory_usage=None, + embed_dim=8960, + license="apache-2.0", + max_tokens=131072, + reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, +) diff --git a/mteb/models/ibm_granite_models.py b/mteb/models/ibm_granite_models.py new file mode 100644 index 0000000000..c2443de233 --- /dev/null +++ b/mteb/models/ibm_granite_models.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +from functools import partial + +from mteb.model_meta import ModelMeta, sentence_transformers_loader + +GRANITE_LANGUAGES = [ + "ara_Latn", + "ces_Latn", + "deu_Latn", + "eng_Latn", + "spa_Latn", + "fra_Latn", + "ita_Latn", + "jpn_Latn", + "kor_Latn", + "nld_Latn", + "por_Latn", + "zho_Hant", + "zho_Hans", +] + + +granite_107m_multilingual = ModelMeta( + loader=partial( # type: ignore + sentence_transformers_loader, + model_name="ibm-granite/granite-embedding-107m-multilingual", + revision="47db56afe692f731540413c67dd818ff492277e7", + ), + name="ibm-granite/granite-embedding-107m-multilingual", + languages=GRANITE_LANGUAGES, + open_weights=True, + revision="47db56afe692f731540413c67dd818ff492277e7", + release_date="2024-12-18", + n_parameters=107_000_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/ibm-granite/granite-embedding-107m-multilingual", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + adapted_from=None, + superseded_by=None, +) + +granite_278m_multilingual = ModelMeta( + loader=partial( # type: ignore + sentence_transformers_loader, + model_name="ibm-granite/granite-embedding-278m-multilingual", + revision="84e3546b88b0cb69f8078608a1df558020bcbf1f", + ), + name="ibm-granite/granite-embedding-278m-multilingual", + languages=GRANITE_LANGUAGES, + open_weights=True, + revision="84e3546b88b0cb69f8078608a1df558020bcbf1f", + release_date="2024-12-18", + n_parameters=278_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/ibm-granite/granite-embedding-278m-multilingual", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + adapted_from=None, + superseded_by=None, +) + +granite_30m_english = ModelMeta( + loader=partial( # type: ignore + sentence_transformers_loader, + model_name="ibm-granite/granite-embedding-30m-english", + revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5", + ), + name="ibm-granite/granite-embedding-30m-english", + languages=["eng_Latn"], + open_weights=True, + revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5", + release_date="2024-12-18", + n_parameters=30_000_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/ibm-granite/granite-embedding-30m-english", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + adapted_from=None, + superseded_by=None, +) + +granite_125m_english = ModelMeta( + loader=partial( # type: ignore + sentence_transformers_loader, + model_name="ibm-granite/granite-embedding-125m-english", + revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730", + ), + name="ibm-granite/granite-embedding-125m-english", + languages=["eng_Latn"], + open_weights=True, + revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730", + release_date="2024-12-18", + n_parameters=125_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/ibm-granite/granite-embedding-125m-english", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + adapted_from=None, + superseded_by=None, +) diff --git a/mteb/models/instruct_wrapper.py b/mteb/models/instruct_wrapper.py index 30c173c779..d6b5cc0388 100644 --- a/mteb/models/instruct_wrapper.py +++ b/mteb/models/instruct_wrapper.py @@ -47,6 +47,11 @@ def __init__( "No instruction template provided. Instructions will be used as-is." ) + if "gte-Qwen" in model_name_or_path: + logger.warning( + "Instructions are used in both query and docs, which may cause performance discrepancies from the original implementation." + ) + self.instruction_template = instruction_template super().__init__(model_name_or_path=model_name_or_path, mode=mode, **kwargs) diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index a9bc680585..192ad4cc5c 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -191,7 +191,7 @@ def encode( jina_embeddings_v3 = ModelMeta( - loader=partial( + loader=partial( # type: ignore JinaWrapper, model="jinaai/jina-embeddings-v3", revision="215a6e121fa0183376388ac6b1ae230326bfeaed", @@ -220,7 +220,7 @@ def encode( license="cc-by-nc-4.0", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, reference="https://huggingface.co/jinaai/jina-embeddings-v3", citation=""" @misc{sturua2024jinaembeddingsv3multilingualembeddingstask, diff --git a/mteb/models/linq_models.py b/mteb/models/linq_models.py new file mode 100644 index 0000000000..48e86ac8d5 --- /dev/null +++ b/mteb/models/linq_models.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from functools import partial + +import torch + +from mteb.model_meta import ModelMeta +from mteb.models.instruct_wrapper import instruct_wrapper + + +def instruction_template(instruction: str) -> str: + return f"Instruct: {instruction}\nQuery: " if instruction else "" + + +Linq_Embed_Mistral = ModelMeta( + loader=partial( # type: ignore + instruct_wrapper, + model_name_or_path="Linq-AI-Research/Linq-Embed-Mistral", + instruction_template=instruction_template, + attn="cccc", + pooling_method="lasttoken", + mode="embedding", + torch_dtype=torch.bfloat16, + normalized=True, + ), + name="Linq-AI-Research/Linq-Embed-Mistral", + languages=["eng_Latn"], + open_weights=True, + revision="0c1a0b0589177079acc552433cad51d7c9132379", + release_date="2024-05-29", # initial commit of hf model. + n_parameters=7_110_000_000, + memory_usage=None, + embed_dim=4096, + license="cc-by-nc-4.0", + max_tokens=32768, + reference="https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, +) diff --git a/mteb/models/mxbai_models.py b/mteb/models/mxbai_models.py index 7df0247e7f..5507bab8c0 100644 --- a/mteb/models/mxbai_models.py +++ b/mteb/models/mxbai_models.py @@ -5,7 +5,7 @@ from mteb.model_meta import ModelMeta, sentence_transformers_loader mxbai_embed_large_v1 = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="mixedbread-ai/mxbai-embed-large-v1", revision="990580e27d329c7408b3741ecff85876e128e203", diff --git a/mteb/models/nvidia_models.py b/mteb/models/nvidia_models.py new file mode 100644 index 0000000000..0c0170de6e --- /dev/null +++ b/mteb/models/nvidia_models.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +import logging +from collections.abc import Sequence +from functools import partial +from typing import Any + +import numpy as np +import torch +from sentence_transformers import CrossEncoder, SentenceTransformer + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta +from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper + +logger = logging.getLogger(__name__) + + +def instruction_template(instruction: str) -> str: + return f"Instruct: {instruction}\nQuery: " if instruction else "" + + +class NvEmbedWrapper(SentenceTransformerWrapper): + def __init__( + self, + model: str | SentenceTransformer | CrossEncoder, + revision: str | None = None, + model_prompts: dict[str, str] | None = None, + **kwargs, + ) -> None: + super().__init__(model, revision, model_prompts, **kwargs) + self.model.max_seq_length = 32768 + self.model.tokenizer.padding_side = "right" + logger.warning( + "Instructions are used in both query and docs, which may cause performance discrepancies from the original implementation." + ) + + def encode( + self, + sentences: Sequence[str], + *, + task_name: str, + prompt_type: PromptType | None = None, + **kwargs: Any, + ) -> np.ndarray: + # Add eos token to each input example + sentences = [example + self.model.tokenizer.eos_token for example in sentences] + + instruction = "" + if prompt_type == PromptType.query: + instruction = self.get_instruction(task_name, prompt_type) + + prompt = instruction_template(instruction) + + if prompt: + logger.info(f"Using {prompt=} for task={task_name} {prompt_type=}") + else: + logger.info(f"No model prompts found for task={task_name} {prompt_type=}") + + logger.info(f"Encoding {len(sentences)} sentences.") + + embeddings = self.model.encode( + sentences, + prompt=prompt, + normalize_embeddings=True, + **kwargs, + ) + if isinstance(embeddings, torch.Tensor): + embeddings = embeddings.cpu().detach().float().numpy() + return embeddings + + +NV_embed_v2 = ModelMeta( + loader=partial( # type: ignore + NvEmbedWrapper, + model="nvidia/NV-Embed-v2", + trust_remote_code=True, + ), + name="nvidia/NV-Embed-v2", + languages=["eng_Latn"], + open_weights=True, + revision="7604d305b621f14095a1aa23d351674c2859553a", + release_date="2024-09-09", # initial commit of hf model. + n_parameters=7_850_000_000, + memory_usage=None, + embed_dim=4096, + license="cc-by-nc-4.0", + max_tokens=32768, + reference="https://huggingface.co/nvidia/NV-Embed-v2", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, +) + +NV_embed_v1 = ModelMeta( + loader=partial( # type: ignore + NvEmbedWrapper, + model="nvidia/NV-Embed-v1", + trust_remote_code=True, + ), + name="nvidia/NV-Embed-v1", + languages=["eng_Latn"], + open_weights=True, + revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c", + release_date="2024-09-13", # initial commit of hf model. + n_parameters=7_850_000_000, + memory_usage=None, + embed_dim=4096, + license="cc-by-nc-4.0", + max_tokens=32768, + reference="https://huggingface.co/nvidia/NV-Embed-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, +) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index f1b90f6c05..5e6cd0184c 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -15,17 +15,21 @@ bge_models, bm25, cohere_models, + colbert_models, e5_instruct, e5_models, google_models, gritlm_models, gte_models, + ibm_granite_models, jina_models, + linq_models, llm2vec_models, misc_models, model2vec_models, mxbai_models, nomic_models, + nvidia_models, openai_models, promptriever_models, repllama_models, @@ -46,29 +50,33 @@ bge_models, bm25, cohere_models, + colbert_models, e5_instruct, e5_models, google_models, + google_models, gritlm_models, gte_models, + ibm_granite_models, + jina_models, + linq_models, llm2vec_models, mxbai_models, model2vec_models, misc_models, nomic_models, + nvidia_models, openai_models, + promptriever_models, + repllama_models, + rerankers_custom, + rerankers_monot5_based, ru_sentence_models, salesforce_models, sentence_transformers_models, - voyage_models, - google_models, - repllama_models, - promptriever_models, - jina_models, - uae_models, stella_models, - rerankers_monot5_based, - rerankers_custom, + uae_models, + voyage_models, ] MODEL_REGISTRY = {} diff --git a/mteb/models/rerankers_custom.py b/mteb/models/rerankers_custom.py index 4555888be0..d9ab7a061e 100644 --- a/mteb/models/rerankers_custom.py +++ b/mteb/models/rerankers_custom.py @@ -175,7 +175,7 @@ def loader_inner(**kwargs: Any) -> Encoder: monobert_large = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoBERTReranker, model_name_or_path="castorini/monobert-large-msmarco", @@ -190,7 +190,7 @@ def loader_inner(**kwargs: Any) -> Encoder: # languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28 jina_reranker_multilingual = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=JinaReranker, model_name_or_path="jinaai/jina-reranker-v2-base-multilingual", @@ -204,7 +204,7 @@ def loader_inner(**kwargs: Any) -> Encoder: ) bge_reranker_v2_m3 = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=BGEReranker, model_name_or_path="BAAI/bge-reranker-v2-m3", diff --git a/mteb/models/rerankers_monot5_based.py b/mteb/models/rerankers_monot5_based.py index d40c3409ed..d95639e041 100644 --- a/mteb/models/rerankers_monot5_based.py +++ b/mteb/models/rerankers_monot5_based.py @@ -94,8 +94,10 @@ def get_prediction_tokens( token_true_id = tokenizer.get_vocab()[token_true] return token_false_id, token_true_id else: - raise Exception(f"We don't know the indexes for the non-relevant/relevant tokens for\ - the checkpoint {model_name_or_path} and you did not provide any.") + raise Exception( + f"We don't know the indexes for the non-relevant/relevant tokens for\ + the checkpoint {model_name_or_path} and you did not provide any." + ) else: token_false_id = tokenizer.get_vocab()[token_false] token_true_id = tokenizer.get_vocab()[token_true] @@ -276,7 +278,7 @@ def get_prediction_tokens(self, *args, **kwargs): monot5_small = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoT5Reranker, model_name_or_path="castorini/monot5-small-msmarco-10k", @@ -299,7 +301,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) monot5_base = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoT5Reranker, model_name_or_path="castorini/monot5-base-msmarco-10k", @@ -322,7 +324,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) monot5_large = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoT5Reranker, model_name_or_path="castorini/monot5-large-msmarco-10k", @@ -345,7 +347,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) monot5_3b = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoT5Reranker, model_name_or_path="castorini/monot5-3b-msmarco-10k", @@ -368,7 +370,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) flant5_base = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=FLANT5Reranker, model_name_or_path="google/flan-t5-base", @@ -393,7 +395,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) flant5_large = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=FLANT5Reranker, model_name_or_path="google/flan-t5-large", @@ -418,7 +420,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) flant5_xl = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=FLANT5Reranker, model_name_or_path="google/flan-t5-xl", @@ -443,7 +445,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) flant5_xxl = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=FLANT5Reranker, model_name_or_path="google/flan-t5-xxl", @@ -469,7 +471,7 @@ def get_prediction_tokens(self, *args, **kwargs): llama2_7b = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=LlamaReranker, model_name_or_path="meta-llama/Llama-2-7b-hf", @@ -492,7 +494,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) llama2_7b_chat = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=LlamaReranker, model_name_or_path="meta-llama/Llama-2-7b-chat-hf", @@ -515,7 +517,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) mistral_7b = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MistralReranker, model_name_or_path="mistralai/Mistral-7B-Instruct-v0.2", @@ -538,7 +540,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) followir_7b = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=FollowIRReranker, model_name_or_path="jhu-clsp/FollowIR-7B", @@ -667,7 +669,7 @@ def get_prediction_tokens(self, *args, **kwargs): ] mt5_base_mmarco_v2 = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoT5Reranker, model_name_or_path="unicamp-dl/mt5-base-mmarco-v2", @@ -690,7 +692,7 @@ def get_prediction_tokens(self, *args, **kwargs): ) mt5_13b_mmarco_100k = ModelMeta( - loader=partial( + loader=partial( # type: ignore _loader, wrapper=MonoT5Reranker, model_name_or_path="unicamp-dl/mt5-13b-mmarco-100k", diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index 027b7c4840..a25353faf5 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -75,11 +75,11 @@ ) user_base_ru = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="deepvk/USER-base", revision="436a489a2087d61aa670b3496a9915f84e46c861", - prompts={"query": "query: ", "passage": "passage: "}, + model_prompts={"query": "query: ", "passage": "passage: "}, ), name="deepvk/USER-base", languages=["rus_Cyrl"], @@ -94,7 +94,7 @@ reference="https://huggingface.co/ai-forever/sbert_large_mt_nlu_ru", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, citation="""@misc{deepvk2024user, title={USER: Universal Sentence Encoder for Russian}, author={Malashenko, Boris and Zemerov, Anton and Spirin, Egor}, @@ -245,7 +245,7 @@ rosberta_ru_en = ModelMeta( - loader=partial( + loader=partial( # type: ignore sentence_transformers_loader, model_name="ai-forever/ru-en-RoSBERTa", revision="89fb1651989adbb1cfcfdedafd7d102951ad0555", @@ -261,6 +261,7 @@ open_weights=True, revision="89fb1651989adbb1cfcfdedafd7d102951ad0555", release_date="2024-07-29", + use_instructions=True, citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb, title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design}, author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov}, diff --git a/mteb/models/salesforce_models.py b/mteb/models/salesforce_models.py index 3a2ab12670..ab63b30eca 100644 --- a/mteb/models/salesforce_models.py +++ b/mteb/models/salesforce_models.py @@ -2,26 +2,23 @@ from functools import partial -import torch - from mteb.model_meta import ModelMeta - -from .instruct_wrapper import instruct_wrapper +from mteb.models.instruct_wrapper import instruct_wrapper -def sfr_instruction(instruction: str) -> str: - return f"Instruct: {instruction}\nQuery: " +def instruction_template(instruction: str) -> str: + return f"Instruct: {instruction}\nQuery: " if instruction else "" SFR_Embedding_2_R = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="Salesforce/SFR-Embedding-2_R", - instruction_template=sfr_instruction, + instruction_template=instruction_template, attn="cccc", pooling_method="lasttoken", mode="embedding", - torch_dtype=torch.bfloat16, + torch_dtype="auto", # The ST script does not normalize while the HF one does so unclear what to do # https://huggingface.co/Salesforce/SFR-Embedding-2_R normalized=True, @@ -48,3 +45,31 @@ def sfr_instruction(instruction: str) -> str: } """, ) + + +SFR_Embedding_Mistral = ModelMeta( + loader=partial( # type: ignore + instruct_wrapper, + model_name_or_path="Salesforce/SFR-Embedding-Mistral", + instruction_template=instruction_template, + attn="cccc", + pooling_method="lasttoken", + mode="embedding", + torch_dtype="auto", + normalized=True, + ), + name="Salesforce/SFR-Embedding-Mistral", + languages=["eng_Latn"], + open_weights=True, + revision="938c560d1c236aa563b2dbdf084f28ab28bccb11", + release_date="2024-01-24", # initial commit of hf model. + n_parameters=7_110_000_000, + memory_usage=None, + embed_dim=4096, + license="cc-by-nc-4.0", + max_tokens=32768, + reference="https://huggingface.co/Salesforce/SFR-Embedding-Mistral", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, +) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 13d39e4031..763fa7e154 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -56,6 +56,9 @@ def __init__( if isinstance(self.model, CrossEncoder): self.predict = self._predict + if hasattr(self.model, "similarity"): + self.similarity = self.model.similarity + def encode( self, sentences: Sequence[str], diff --git a/mteb/models/stella_models.py b/mteb/models/stella_models.py index 8fc19fd06d..153ee6aa99 100644 --- a/mteb/models/stella_models.py +++ b/mteb/models/stella_models.py @@ -7,7 +7,7 @@ stella_en_400M = ModelMeta( # https://huggingface.co/dunzhang/stella_en_400M_v5/discussions/21#671a6205ac1e2416090f2bf4 - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="dunzhang/stella_en_400M_v5", attn="cccc", @@ -31,7 +31,7 @@ ) stella_en_1_5b = ModelMeta( - loader=partial( + loader=partial( # type: ignore instruct_wrapper, model_name_or_path="dunzhang/stella_en_1.5B_v5", attn="cccc", diff --git a/mteb/models/uae_models.py b/mteb/models/uae_models.py index 33f2cb03ac..cb83d57c77 100644 --- a/mteb/models/uae_models.py +++ b/mteb/models/uae_models.py @@ -52,7 +52,7 @@ def encode( uae_large_v1 = ModelMeta( - loader=partial( + loader=partial( # type: ignore UAEWrapper, model="WhereIsAI/UAE-Large-V1", revision="369c368f70f16a613f19f5598d4f12d9f44235d4", @@ -74,7 +74,7 @@ def encode( similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], reference="https://huggingface.co/WhereIsAI/UAE-Large-V1", - use_instructions=False, + use_instructions=True, citation=""" @article{li2023angle, title={AnglE-optimized Text Embeddings}, diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 9f42808b37..0e6ef71d94 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -145,7 +145,7 @@ def _batched_encode( revision="1", release_date="2024-05-05", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-large-2-instruct", model_prompts=model_prompts, @@ -167,7 +167,7 @@ def _batched_encode( revision="1", release_date="2024-05-30", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-finance-2", model_prompts=model_prompts, @@ -181,7 +181,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/06/03/domain-specific-embeddings-finance-edition-voyage-finance-2/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_law_2 = ModelMeta( @@ -189,7 +189,7 @@ def _batched_encode( revision="1", release_date="2024-04-15", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-law-2", model_prompts=model_prompts, @@ -203,7 +203,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/04/15/domain-specific-embeddings-and-retrieval-legal-edition-voyage-law-2/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_code_2 = ModelMeta( @@ -211,7 +211,7 @@ def _batched_encode( revision="1", release_date="2024-01-23", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-code-2", model_prompts=model_prompts, @@ -225,7 +225,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/01/23/voyage-code-2-elevate-your-code-retrieval/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_large_2 = ModelMeta( @@ -233,7 +233,7 @@ def _batched_encode( revision="1", release_date="2023-10-29", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-large-2", model_prompts=model_prompts, @@ -247,7 +247,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_2 = ModelMeta( @@ -255,7 +255,7 @@ def _batched_encode( revision="1", release_date="2023-10-29", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-2", model_prompts=model_prompts, @@ -269,14 +269,14 @@ def _batched_encode( reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_multilingual_2 = ModelMeta( name="voyageai/voyage-multilingual-2", revision="1", release_date="2024-06-10", languages=None, # supported languages not specified - loader=partial( + loader=partial( # type: ignore VoyageWrapper, model_name="voyage-multilingual-2", model_prompts=model_prompts, @@ -290,7 +290,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/06/10/voyage-multilingual-2-multilingual-embedding-model/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_3 = ModelMeta( @@ -312,7 +312,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_3_lite = ModelMeta( @@ -334,5 +334,5 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) diff --git a/mteb/tasks/Classification/multilingual/HinDialectClassification.py b/mteb/tasks/Classification/multilingual/HinDialectClassification.py index 6565d4b71a..c9d6b36669 100644 --- a/mteb/tasks/Classification/multilingual/HinDialectClassification.py +++ b/mteb/tasks/Classification/multilingual/HinDialectClassification.py @@ -3,29 +3,29 @@ from mteb.abstasks.AbsTaskClassification import AbsTaskClassification from mteb.abstasks.TaskMetadata import TaskMetadata -_LANGUAGES = { - "pan": ["pan-Guru"], - "bgc": ["bgc-Deva"], - "mag": ["mag-Deva"], - "bns": ["bns-Deva"], - "kfq": ["kfg-Deva"], - "noe": ["noe-Deva"], - "bhb": ["bhb-Deva"], - "bho": ["bho-Deva"], - "gbm": ["gbm-Deva"], - "mup": ["mup-Deva"], - "anp": ["anp-Deva"], - "hne": ["hne-Deva"], - "bra": ["bra-Deva"], - "raj": ["raj-Deva"], - "awa": ["awa-Deva"], - "guj": ["guj-Gujr"], - "ben": ["ben-Beng"], - "bhd": ["bhd-Deva"], - "kfy": ["kfy-Deva"], - "mar": ["mar-Deva"], - "bjj": ["bjj-Deva"], -} +_LANGUAGES = [ + "pan-Guru", + "bgc-Deva", + "mag-Deva", + "bns-Deva", + "kfg-Deva", + "noe-Deva", + "bhb-Deva", + "bho-Deva", + "gbm-Deva", + "mup-Deva", + "anp-Deva", + "hne-Deva", + "bra-Deva", + "raj-Deva", + "awa-Deva", + "guj-Gujr", + "ben-Beng", + "bhd-Deva", + "kfy-Deva", + "mar-Deva", + "bjj-Deva", +] class HinDialectClassification(AbsTaskClassification): diff --git a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py index 4cef2c0604..217d300ec0 100644 --- a/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py +++ b/mteb/tasks/Classification/multilingual/SouthAfricanLangClassification.py @@ -3,19 +3,19 @@ from mteb.abstasks.AbsTaskClassification import AbsTaskClassification from mteb.abstasks.TaskMetadata import TaskMetadata -_LANGUAGES = { - "afr": ["afr-Latn"], - "eng": ["eng-Latn"], - "nbl": ["nbl-Latn"], - "nso": ["nso-Latn"], - "sot": ["sot-Latn"], - "ssw": ["ssw-Latn"], - "tsn": ["tsn-Latn"], - "tso": ["tso-Latn"], - "ven": ["ven-Latn"], - "xho": ["xho-Latn"], - "zul": ["zul-Latn"], -} +_LANGUAGES = [ + "afr-Latn", + "eng-Latn", + "nbl-Latn", + "nso-Latn", + "sot-Latn", + "ssw-Latn", + "tsn-Latn", + "tso-Latn", + "ven-Latn", + "xho-Latn", + "zul-Latn", +] class SouthAfricanLangClassification(AbsTaskClassification): diff --git a/mteb/tasks/Reranking/__init__.py b/mteb/tasks/Reranking/__init__.py index 497e2751b8..1e34adfc44 100644 --- a/mteb/tasks/Reranking/__init__.py +++ b/mteb/tasks/Reranking/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +from .ara import NamaaMrTydiReranking from .eng import ( AskUbuntuDupQuestions, MindSmallReranking, @@ -19,6 +20,7 @@ "CMedQAv2", "MMarcoReranking", "T2Reranking", + "NamaaMrTydiReranking", "AskUbuntuDupQuestions", "WebLINXCandidatesReranking", "StackOverflowDupQuestions", diff --git a/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py b/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py new file mode 100644 index 0000000000..4a9d755747 --- /dev/null +++ b/mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskReranking import AbsTaskReranking + + +class NamaaMrTydiReranking(AbsTaskReranking): + metadata = TaskMetadata( + name="NamaaMrTydiReranking", + description="Mr. TyDi is a multi-lingual benchmark dataset built on TyDi, covering eleven typologically diverse languages. It is designed for monolingual retrieval, specifically to evaluate ranking with learned dense representations. This dataset adapts the arabic test split for Reranking evaluation purposes by the addition of multiple (Hard) Negatives to each query and positive", + reference="https://huggingface.co/NAMAA-Space", + dataset={ + "path": "NAMAA-Space/mteb-eval-mrtydi", + "revision": "502637220a7ad0ecc5c39ff5518d7508d2624af8", + }, + type="Reranking", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["ara-Arab"], + main_score="map", + date=("2023-11-01", "2024-05-15"), + domains=["Encyclopaedic", "Written"], + task_subtypes=[], + license="cc-by-sa-3.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@article{muennighoff2022mteb, + doi = {10.48550/ARXIV.2210.07316}, + url = {https://arxiv.org/abs/2210.07316}, + author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\i}c and Reimers, Nils}, + title = {MTEB: Massive Text Embedding Benchmark}, + publisher = {arXiv}, + journal={arXiv preprint arXiv:2210.07316}, + year = {2022} +}""", + ) diff --git a/mteb/tasks/Reranking/ara/__init__.py b/mteb/tasks/Reranking/ara/__init__.py new file mode 100644 index 0000000000..8f56ceada7 --- /dev/null +++ b/mteb/tasks/Reranking/ara/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .NamaaMrTydiReranking import NamaaMrTydiReranking + +__all__ = ["NamaaMrTydiReranking"] diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index 859e8d3a49..7118699ac8 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -77,6 +77,19 @@ MLQuestionsRetrieval, MSMARCOHardNegatives, MSMARCOv2, + NanoArguAnaRetrieval, + NanoClimateFeverRetrieval, + NanoDBPediaRetrieval, + NanoFEVERRetrieval, + NanoFiQA2018Retrieval, + NanoHotpotQARetrieval, + NanoMSMARCORetrieval, + NanoNFCorpusRetrieval, + NanoNQRetrieval, + NanoQuoraRetrieval, + NanoSCIDOCSRetrieval, + NanoSciFactRetrieval, + NanoTouche2020Retrieval, NarrativeQARetrieval, NFCorpus, NQHardNegatives, @@ -246,6 +259,7 @@ "LEMBPasskeyRetrieval", "CQADupstackAndroidRetrieval", "TempReasonL2Context", + "NanoDBPediaRetrieval", "ARCChallenge", "LegalBenchCorporateLobbying", "SCIDOCS", @@ -260,22 +274,29 @@ "ClimateFEVERHardNegatives", "CQADupstackWordpressRetrieval", "CQADupstackEnglishRetrieval", + "NanoTouche2020Retrieval", "CQADupstackStatsRetrieval", "MLQuestionsRetrieval", "TempReasonL2Fact", + "NanoSciFactRetrieval", "CQADupstackGamingRetrieval", "CQADupstackWebmastersRetrieval", + "NanoFiQA2018Retrieval", "CQADupstackUnixRetrieval", "TempReasonL3Pure", "CQADupstackPhysicsRetrieval", "FiQA2018", "LitSearchRetrieval", + "NanoFEVERRetrieval", + "NanoMSMARCORetrieval", "FeedbackQARetrieval", "HagridRetrieval", + "NanoNFCorpusRetrieval", "FaithDialRetrieval", "SciFact", "CQADupstackMathematicaRetrieval", "RARbMath", + "NanoNQRetrieval", "HellaSwag", "PIQA", "SpartQA", @@ -283,13 +304,18 @@ "TempReasonL1", "HotpotQA", "HotpotQAHardNegatives", + "NanoClimateFeverRetrieval", + "NanoQuoraRetrieval", + "NanoArguAnaRetrieval", "LegalBenchConsumerContractsQA", + "NanoHotpotQARetrieval", "ArguAna", "LEMBWikimQARetrieval", "TempReasonL3Fact", "FEVER", "FEVERHardNegatives", "CQADupstackGisRetrieval", + "NanoSCIDOCSRetrieval", "AILACasedocs", "NFCorpus", "LEMBSummScreenFDRetrieval", diff --git a/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py new file mode 100644 index 0000000000..2230368b94 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoArguAnaRetrieval.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoArguAnaRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoArguAnaRetrieval", + description="NanoArguAna is a smaller subset of ArguAna, a dataset for argument retrieval in debate contexts.", + reference="http://argumentation.bplaced.net/arguana/data", + dataset={ + "path": "zeta-alpha-ai/NanoArguAna", + "revision": "8f4a982d470a32c45817738b9d29042ca55d75ad", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2020-01-01", "2020-12-31"], + domains=["Medical", "Written"], + task_subtypes=["Discourse coherence"], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{boteva2016, + author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan}, + title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + journal = {Proceedings of the 38th European Conference on Information Retrieval}, + journal-abbrev = {ECIR}, + year = {2016}, + city = {Padova}, + country = {Italy}, + url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf} +}""", + prompt={"query": "Given a claim, find documents that refute the claim"}, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoArguAna", + "corpus", + revision="8f4a982d470a32c45817738b9d29042ca55d75ad", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoArguAna", + "queries", + revision="8f4a982d470a32c45817738b9d29042ca55d75ad", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoArguAna", + "qrels", + revision="8f4a982d470a32c45817738b9d29042ca55d75ad", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py new file mode 100644 index 0000000000..0185a454d3 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoClimateFeverRetrieval.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoClimateFeverRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoClimateFeverRetrieval", + description="NanoClimateFever is a small version of the BEIR dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change.", + reference="https://arxiv.org/abs/2012.00614", + dataset={ + "path": "zeta-alpha-ai/NanoClimateFEVER", + "revision": "96741bfa30b9f56db8c9eb7d08e775ed6474f206", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2020-01-01", "2020-12-31"], + domains=["Non-fiction", "Academic", "News"], + task_subtypes=["Claim verification"], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@misc{diggelmann2021climatefever, + title={CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims}, + author={Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold}, + year={2021}, + eprint={2012.00614}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +}""", + prompt={ + "query": "Given a claim about climate change, retrieve documents that support or refute the claim" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoClimateFEVER", + "corpus", + revision="96741bfa30b9f56db8c9eb7d08e775ed6474f206", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoClimateFEVER", + "queries", + revision="96741bfa30b9f56db8c9eb7d08e775ed6474f206", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoClimateFEVER", + "qrels", + revision="96741bfa30b9f56db8c9eb7d08e775ed6474f206", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py new file mode 100644 index 0000000000..caa638743c --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoDBPediaRetrieval.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoDBPediaRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoDBPediaRetrieval", + description="NanoDBPediaRetrieval is a small version of the standard test collection for entity search over the DBpedia knowledge base.", + reference="https://huggingface.co/datasets/zeta-alpha-ai/NanoDBPedia", + dataset={ + "path": "zeta-alpha-ai/NanoDBPedia", + "revision": "438f1c25129f05db6238699b5afdc9c6b58d2096", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2015-01-01", "2015-12-31"], + domains=["Encyclopaedic"], + task_subtypes=["Topic classification"], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@article{lehmann2015dbpedia, title={DBpedia: A large-scale, multilingual knowledge base extracted from Wikipedia}, author={Lehmann, Jens and et al.}, journal={Semantic Web}, year={2015}}""", + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoDBPedia", + "corpus", + revision="438f1c25129f05db6238699b5afdc9c6b58d2096", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoDBPedia", + "queries", + revision="438f1c25129f05db6238699b5afdc9c6b58d2096", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoDBPedia", + "qrels", + revision="438f1c25129f05db6238699b5afdc9c6b58d2096", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py new file mode 100644 index 0000000000..6bdd0ab4cf --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoFEVERRetrieval.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoFEVERRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoFEVERRetrieval", + description="NanoFEVER is a smaller version of " + + "FEVER (Fact Extraction and VERification), which consists of 185,445 claims generated by altering sentences" + + " extracted from Wikipedia and subsequently verified without knowledge of the sentence they were" + + " derived from.", + reference="https://fever.ai/", + dataset={ + "path": "zeta-alpha-ai/NanoFEVER", + "revision": "a8bfdf1bf15181167a7e22e69cf8754bdea9b4c8", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2018-01-01", "2018-12-31"], + domains=["Academic", "Encyclopaedic"], + task_subtypes=["Claim verification"], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{thorne-etal-2018-fever, + title = "{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification", + author = "Thorne, James and + Vlachos, Andreas and + Christodoulopoulos, Christos and + Mittal, Arpit", + editor = "Walker, Marilyn and + Ji, Heng and + Stent, Amanda", + booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", + month = jun, + year = "2018", + address = "New Orleans, Louisiana", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/N18-1074", + doi = "10.18653/v1/N18-1074", + pages = "809--819", + abstract = "In this paper we introduce a new publicly available dataset for verification against textual sources, FEVER: Fact Extraction and VERification. It consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The claims are classified as Supported, Refuted or NotEnoughInfo by annotators achieving 0.6841 in Fleiss kappa. For the first two classes, the annotators also recorded the sentence(s) forming the necessary evidence for their judgment. To characterize the challenge of the dataset presented, we develop a pipeline approach and compare it to suitably designed oracles. The best accuracy we achieve on labeling a claim accompanied by the correct evidence is 31.87{\%}, while if we ignore the evidence we achieve 50.91{\%}. Thus we believe that FEVER is a challenging testbed that will help stimulate progress on claim verification against textual sources.", +}""", + prompt={ + "query": "Given a claim, retrieve documents that support or refute the claim" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoFEVER", + "corpus", + revision="a8bfdf1bf15181167a7e22e69cf8754bdea9b4c8", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoFEVER", + "queries", + revision="a8bfdf1bf15181167a7e22e69cf8754bdea9b4c8", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoFEVER", + "qrels", + revision="a8bfdf1bf15181167a7e22e69cf8754bdea9b4c8", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py new file mode 100644 index 0000000000..1a3467c1d7 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoFiQA2018Retrieval.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoFiQA2018Retrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoFiQA2018Retrieval", + description="NanoFiQA2018 is a smaller subset of the Financial Opinion Mining and Question Answering dataset.", + reference="https://sites.google.com/view/fiqa/", + dataset={ + "path": "zeta-alpha-ai/NanoFiQA2018", + "revision": "4163ba032953d5044a7a6244261413f609c14342", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2018-01-01", "2018-12-31"], + domains=["Academic", "Social"], + task_subtypes=["Sentiment/Hate speech"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{ +thakur2021beir, +title={{BEIR}: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models}, +author={Nandan Thakur and Nils Reimers and Andreas R{\"u}ckl{\'e} and Abhishek Srivastava and Iryna Gurevych}, +booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, +year={2021}, +url={https://openreview.net/forum?id=wCu6T5xFjeJ} +}""", + prompt={ + "query": "Given a financial question, retrieve user replies that best answer the question" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoFiQA2018", + "corpus", + revision="4163ba032953d5044a7a6244261413f609c14342", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoFiQA2018", + "queries", + revision="4163ba032953d5044a7a6244261413f609c14342", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoFiQA2018", + "qrels", + revision="4163ba032953d5044a7a6244261413f609c14342", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py new file mode 100644 index 0000000000..4389aeafa8 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoHotpotQARetrieval.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoHotpotQARetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoHotpotQARetrieval", + description="NanoHotpotQARetrieval is a smaller subset of the " + + "HotpotQA dataset, which is a question answering dataset featuring natural, multi-hop questions, with strong" + + " supervision for supporting facts to enable more explainable question answering systems.", + reference="https://hotpotqa.github.io/", + dataset={ + "path": "zeta-alpha-ai/NanoHotpotQA", + "revision": "d79c0cdda980aba54842756770928035e1b61a51", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2018-01-01", "2018-12-31"], + domains=["Web", "Written"], + task_subtypes=["Question answering"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{yang-etal-2018-hotpotqa, + title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", + author = "Yang, Zhilin and + Qi, Peng and + Zhang, Saizheng and + Bengio, Yoshua and + Cohen, William and + Salakhutdinov, Ruslan and + Manning, Christopher D.", + editor = "Riloff, Ellen and + Chiang, David and + Hockenmaier, Julia and + Tsujii, Jun{'}ichi", + booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", + month = oct # "-" # nov, + year = "2018", + address = "Brussels, Belgium", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/D18-1259", + doi = "10.18653/v1/D18-1259", + pages = "2369--2380", + abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", +}""", + prompt={ + "query": "Given a multi-hop question, retrieve documents that can help answer the question" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoHotpotQA", + "corpus", + revision="d79c0cdda980aba54842756770928035e1b61a51", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoHotpotQA", + "queries", + revision="d79c0cdda980aba54842756770928035e1b61a51", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoHotpotQA", + "qrels", + revision="d79c0cdda980aba54842756770928035e1b61a51", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py new file mode 100644 index 0000000000..8a2f51e7fd --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoMSMARCORetrieval.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoMSMARCORetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoMSMARCORetrieval", + description="NanoMSMARCORetrieval is a smaller subset of MS MARCO, a collection of datasets focused on deep learning in search.", + reference="https://microsoft.github.io/msmarco/", + dataset={ + "path": "zeta-alpha-ai/NanoMSMARCO", + "revision": "7b8ff22f2771dc65ac5b439f222eb19a1f56abda", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2016-01-01", "2016-12-31"], + domains=["Web"], + task_subtypes=["Question answering"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@article{DBLP:journals/corr/NguyenRSGTMD16, + author = {Tri Nguyen and + Mir Rosenberg and + Xia Song and + Jianfeng Gao and + Saurabh Tiwary and + Rangan Majumder and + Li Deng}, + title = {{MS} {MARCO:} {A} Human Generated MAchine Reading COmprehension Dataset}, + journal = {CoRR}, + volume = {abs/1611.09268}, + year = {2016}, + url = {http://arxiv.org/abs/1611.09268}, + archivePrefix = {arXiv}, + eprint = {1611.09268}, + timestamp = {Mon, 13 Aug 2018 16:49:03 +0200}, + biburl = {https://dblp.org/rec/journals/corr/NguyenRSGTMD16.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +}""", + prompt={ + "query": "Given a web search query, retrieve relevant passages that answer the query" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoMSMARCO", + "corpus", + revision="7b8ff22f2771dc65ac5b439f222eb19a1f56abda", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoMSMARCO", + "queries", + revision="7b8ff22f2771dc65ac5b439f222eb19a1f56abda", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoMSMARCO", + "qrels", + revision="7b8ff22f2771dc65ac5b439f222eb19a1f56abda", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py new file mode 100644 index 0000000000..0f6ac8533a --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoNFCorpusRetrieval.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoNFCorpusRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoNFCorpusRetrieval", + description="NanoNFCorpus is a smaller subset of NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval.", + reference="https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/", + dataset={ + "path": "zeta-alpha-ai/NanoNFCorpus", + "revision": "dd542a7efb9ad2136b9e00768b60fca9038f8156", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2016-01-01", "2016-12-31"], + domains=["Medical", "Academic", "Written"], + task_subtypes=["Question answering"], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{boteva2016, + author = {Boteva, Vera and Gholipour, Demian and Sokolov, Artem and Riezler, Stefan}, + title = {A Full-Text Learning to Rank Dataset for Medical Information Retrieval}, + journal = {Proceedings of the 38th European Conference on Information Retrieval}, + journal-abbrev = {ECIR}, + year = {2016}, + city = {Padova}, + country = {Italy}, + url = {http://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf} +}""", + prompt={ + "query": "Given a question, retrieve relevant documents that best answer the question" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoNFCorpus", + "corpus", + revision="dd542a7efb9ad2136b9e00768b60fca9038f8156", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoNFCorpus", + "queries", + revision="dd542a7efb9ad2136b9e00768b60fca9038f8156", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoNFCorpus", + "qrels", + revision="dd542a7efb9ad2136b9e00768b60fca9038f8156", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py new file mode 100644 index 0000000000..5aa831f799 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoNQRetrieval.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoNQRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoNQRetrieval", + description="NanoNQ is a smaller subset of a dataset which contains questions from real users, and it requires QA systems to read and comprehend an entire Wikipedia article that may or may not contain the answer to the question.", + reference="https://ai.google.com/research/NaturalQuestions", + dataset={ + "path": "zeta-alpha-ai/NanoNQ", + "revision": "77540146379abf95df8326a3c5bb9eb21c7146c3", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2019-01-01", "2019-12-31"], + domains=["Academic", "Web"], + task_subtypes=["Question answering"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@article{47761,title = {Natural Questions: a Benchmark for Question Answering Research}, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh + and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee + and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le + and Slav Petrov},year = {2019},journal = {Transactions of the Association of Computational + Linguistics}}""", + prompt={ + "query": "Given a question, retrieve Wikipedia passages that answer the question" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoNQ", + "corpus", + revision="77540146379abf95df8326a3c5bb9eb21c7146c3", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoNQ", + "queries", + revision="77540146379abf95df8326a3c5bb9eb21c7146c3", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoNQ", + "qrels", + revision="77540146379abf95df8326a3c5bb9eb21c7146c3", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py new file mode 100644 index 0000000000..1391d12b93 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoQuoraRetrieval.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoQuoraRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoQuoraRetrieval", + description="NanoQuoraRetrieval is a smaller subset of the " + + "QuoraRetrieval dataset, which is based on questions that are marked as duplicates on the Quora platform. Given a" + + " question, find other (duplicate) questions.", + reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs", + dataset={ + "path": "zeta-alpha-ai/NanoQuoraRetrieval", + "revision": "2ab2d73e6c862026282808b913a34f4136928545", + }, + type="Retrieval", + category="s2s", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2017-01-01", "2017-12-31"], + domains=["Social"], + task_subtypes=["Duplicate Detection"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@misc{quora-question-pairs, + author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung}, + title = {Quora Question Pairs}, + publisher = {Kaggle}, + year = {2017}, + url = {https://kaggle.com/competitions/quora-question-pairs} +}""", + prompt={ + "query": "Given a question, retrieve questions that are semantically equivalent to the given question" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoQuoraRetrieval", + "corpus", + revision="2ab2d73e6c862026282808b913a34f4136928545", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoQuoraRetrieval", + "queries", + revision="2ab2d73e6c862026282808b913a34f4136928545", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoQuoraRetrieval", + "qrels", + revision="2ab2d73e6c862026282808b913a34f4136928545", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py new file mode 100644 index 0000000000..2d27e1a2dc --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoSCIDOCSRetrieval.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoSCIDOCSRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoSCIDOCSRetrieval", + description="NanoFiQA2018 is a smaller subset of " + + "SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation" + + " prediction, to document classification and recommendation.", + reference="https://allenai.org/data/scidocs", + dataset={ + "path": "zeta-alpha-ai/NanoSCIDOCS", + "revision": "484eb90549fc3f0b9c42b3551e80ceb999515537", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2020-01-01", "2020-12-31"], + domains=["Academic", "Written", "Non-fiction"], + task_subtypes=[], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{specter2020cohan, + title={SPECTER: Document-level Representation Learning using Citation-informed Transformers}, + author={Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, + booktitle={ACL}, + year={2020} +}""", + prompt={ + "query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoSCIDOCS", + "corpus", + revision="484eb90549fc3f0b9c42b3551e80ceb999515537", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoSCIDOCS", + "queries", + revision="484eb90549fc3f0b9c42b3551e80ceb999515537", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoSCIDOCS", + "qrels", + revision="484eb90549fc3f0b9c42b3551e80ceb999515537", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py new file mode 100644 index 0000000000..aff949d319 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoSciFactRetrieval.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoSciFactRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoSciFactRetrieval", + description="NanoSciFact is a smaller subset of SciFact, which verifies scientific claims using evidence from the research literature containing scientific paper abstracts.", + reference="https://github.com/allenai/scifact", + dataset={ + "path": "zeta-alpha-ai/NanoSciFact", + "revision": "309f1d1ae3ae2e092444a8a0c25bed59b82318bc", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=["2018-01-01", "2018-12-31"], + domains=["Academic", "Medical", "Written"], + task_subtypes=["Claim verification"], + license="cc-by-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@inproceedings{specter2020cohan, + title={SPECTER: Document-level Representation Learning using Citation-informed Transformers}, + author={Arman Cohan and Sergey Feldman and Iz Beltagy and Doug Downey and Daniel S. Weld}, + booktitle={ACL}, + year={2020} +}""", + prompt={ + "query": "Given a scientific claim, retrieve documents that support or refute the claim" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoSciFact", + "corpus", + revision="309f1d1ae3ae2e092444a8a0c25bed59b82318bc", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoSciFact", + "queries", + revision="309f1d1ae3ae2e092444a8a0c25bed59b82318bc", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoSciFact", + "qrels", + revision="309f1d1ae3ae2e092444a8a0c25bed59b82318bc", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py new file mode 100644 index 0000000000..656b5494a0 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/NanoTouche2020Retrieval.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from datasets import load_dataset + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class NanoTouche2020Retrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NanoTouche2020Retrieval", + description="NanoTouche2020 is a smaller subset of Touché Task 1: Argument Retrieval for Controversial Questions.", + reference="https://webis.de/events/touche-20/shared-task-1.html", + dataset={ + "path": "zeta-alpha-ai/NanoTouche2020", + "revision": "0d2f26ed8c5ad309f95c7f9499c70a40e140fccd", + }, + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2020-09-23", "2020-09-23"), + domains=["Academic"], + task_subtypes=["Question answering"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="found", + bibtex_citation="""@dataset{potthast_2022_6862281, + author = {Potthast, Martin and + Gienapp, Lukas and + Wachsmuth, Henning and + Hagen, Matthias and + Fröbe, Maik and + Bondarenko, Alexander and + Ajjour, Yamen and + Stein, Benno}, + title = {{Touché20-Argument-Retrieval-for-Controversial- + Questions}}, + month = jul, + year = 2022, + publisher = {Zenodo}, + doi = {10.5281/zenodo.6862281}, + url = {https://doi.org/10.5281/zenodo.6862281} +}""", + prompt={ + "query": "Given a question, retrieve detailed and persuasive arguments that answer the question" + }, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + self.corpus = load_dataset( + "zeta-alpha-ai/NanoTouche2020", + "corpus", + revision="0d2f26ed8c5ad309f95c7f9499c70a40e140fccd", + ) + self.queries = load_dataset( + "zeta-alpha-ai/NanoTouche2020", + "queries", + revision="0d2f26ed8c5ad309f95c7f9499c70a40e140fccd", + ) + self.relevant_docs = load_dataset( + "zeta-alpha-ai/NanoTouche2020", + "qrels", + revision="0d2f26ed8c5ad309f95c7f9499c70a40e140fccd", + ) + + self.corpus = { + split: { + sample["_id"]: {"_id": sample["_id"], "text": sample["text"]} + for sample in self.corpus[split] + } + for split in self.corpus + } + + self.queries = { + split: {sample["_id"]: sample["text"] for sample in self.queries[split]} + for split in self.queries + } + + self.relevant_docs = { + split: { + sample["query-id"]: {sample["corpus-id"]: 1} + for sample in self.relevant_docs[split] + } + for split in self.relevant_docs + } + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/__init__.py b/mteb/tasks/Retrieval/eng/__init__.py index 47e2498709..9f1717a89f 100644 --- a/mteb/tasks/Retrieval/eng/__init__.py +++ b/mteb/tasks/Retrieval/eng/__init__.py @@ -41,6 +41,19 @@ from .MLQuestions import MLQuestionsRetrieval from .MSMARCORetrieval import MSMARCO, MSMARCOHardNegatives from .MSMARCOv2Retrieval import MSMARCOv2 +from .NanoArguAnaRetrieval import NanoArguAnaRetrieval +from .NanoClimateFeverRetrieval import NanoClimateFeverRetrieval +from .NanoDBPediaRetrieval import NanoDBPediaRetrieval +from .NanoFEVERRetrieval import NanoFEVERRetrieval +from .NanoFiQA2018Retrieval import NanoFiQA2018Retrieval +from .NanoHotpotQARetrieval import NanoHotpotQARetrieval +from .NanoMSMARCORetrieval import NanoMSMARCORetrieval +from .NanoNFCorpusRetrieval import NanoNFCorpusRetrieval +from .NanoNQRetrieval import NanoNQRetrieval +from .NanoQuoraRetrieval import NanoQuoraRetrieval +from .NanoSCIDOCSRetrieval import NanoSCIDOCSRetrieval +from .NanoSciFactRetrieval import NanoSciFactRetrieval +from .NanoTouche2020Retrieval import NanoTouche2020Retrieval from .NarrativeQARetrieval import NarrativeQARetrieval from .NFCorpusRetrieval import NFCorpus from .NQRetrieval import NQ, NQHardNegatives @@ -90,6 +103,7 @@ "LEMBPasskeyRetrieval", "CQADupstackAndroidRetrieval", "TempReasonL2Context", + "NanoDBPediaRetrieval", "ARCChallenge", "LegalBenchCorporateLobbying", "SCIDOCS", @@ -104,22 +118,29 @@ "ClimateFEVERHardNegatives", "CQADupstackWordpressRetrieval", "CQADupstackEnglishRetrieval", + "NanoTouche2020Retrieval", "CQADupstackStatsRetrieval", "MLQuestionsRetrieval", "TempReasonL2Fact", + "NanoSciFactRetrieval", "CQADupstackGamingRetrieval", "CQADupstackWebmastersRetrieval", + "NanoFiQA2018Retrieval", "CQADupstackUnixRetrieval", "TempReasonL3Pure", "CQADupstackPhysicsRetrieval", "FiQA2018", "LitSearchRetrieval", + "NanoFEVERRetrieval", + "NanoMSMARCORetrieval", "FeedbackQARetrieval", "HagridRetrieval", + "NanoNFCorpusRetrieval", "FaithDialRetrieval", "SciFact", "CQADupstackMathematicaRetrieval", "RARbMath", + "NanoNQRetrieval", "HellaSwag", "PIQA", "SpartQA", @@ -127,13 +148,18 @@ "TempReasonL1", "HotpotQA", "HotpotQAHardNegatives", + "NanoClimateFeverRetrieval", + "NanoQuoraRetrieval", + "NanoArguAnaRetrieval", "LegalBenchConsumerContractsQA", + "NanoHotpotQARetrieval", "ArguAna", "LEMBWikimQARetrieval", "TempReasonL3Fact", "FEVER", "FEVERHardNegatives", "CQADupstackGisRetrieval", + "NanoSCIDOCSRetrieval", "AILACasedocs", "NFCorpus", "LEMBSummScreenFDRetrieval", diff --git a/mteb/tasks/__init__.py b/mteb/tasks/__init__.py index 6e09541e10..745d4066ae 100644 --- a/mteb/tasks/__init__.py +++ b/mteb/tasks/__init__.py @@ -426,6 +426,7 @@ MindSmallReranking, MIRACLReranking, MMarcoReranking, + NamaaMrTydiReranking, NevIR, RuBQReranking, SciDocsReranking, @@ -547,6 +548,19 @@ MSMARCOPLHardNegatives, MSMARCOv2, MultiLongDocRetrieval, + NanoArguAnaRetrieval, + NanoClimateFeverRetrieval, + NanoDBPediaRetrieval, + NanoFEVERRetrieval, + NanoFiQA2018Retrieval, + NanoHotpotQARetrieval, + NanoMSMARCORetrieval, + NanoNFCorpusRetrieval, + NanoNQRetrieval, + NanoQuoraRetrieval, + NanoSCIDOCSRetrieval, + NanoSciFactRetrieval, + NanoTouche2020Retrieval, NarrativeQARetrieval, NeuCLIR2022Retrieval, NeuCLIR2022RetrievalHardNegatives, @@ -1140,6 +1154,7 @@ "LEMBPasskeyRetrieval", "CQADupstackAndroidRetrieval", "TempReasonL2Context", + "NanoDBPediaRetrieval", "ARCChallenge", "LegalBenchCorporateLobbying", "SCIDOCS", @@ -1154,22 +1169,29 @@ "ClimateFEVERHardNegatives", "CQADupstackWordpressRetrieval", "CQADupstackEnglishRetrieval", + "NanoTouche2020Retrieval", "CQADupstackStatsRetrieval", "MLQuestionsRetrieval", "TempReasonL2Fact", + "NanoSciFactRetrieval", "CQADupstackGamingRetrieval", "CQADupstackWebmastersRetrieval", + "NanoFiQA2018Retrieval", "CQADupstackUnixRetrieval", "TempReasonL3Pure", "CQADupstackPhysicsRetrieval", "FiQA2018", "LitSearchRetrieval", + "NanoFEVERRetrieval", + "NanoMSMARCORetrieval", "FeedbackQARetrieval", "HagridRetrieval", + "NanoNFCorpusRetrieval", "FaithDialRetrieval", "SciFact", "CQADupstackMathematicaRetrieval", "RARbMath", + "NanoNQRetrieval", "HellaSwag", "PIQA", "SpartQA", @@ -1177,13 +1199,18 @@ "TempReasonL1", "HotpotQA", "HotpotQAHardNegatives", + "NanoClimateFeverRetrieval", + "NanoQuoraRetrieval", + "NanoArguAnaRetrieval", "LegalBenchConsumerContractsQA", + "NanoHotpotQARetrieval", "ArguAna", "LEMBWikimQARetrieval", "TempReasonL3Fact", "FEVER", "FEVERHardNegatives", "CQADupstackGisRetrieval", + "NanoSCIDOCSRetrieval", "AILACasedocs", "NFCorpus", "LEMBSummScreenFDRetrieval", @@ -1280,6 +1307,7 @@ "CMedQAv2", "MMarcoReranking", "T2Reranking", + "NamaaMrTydiReranking", "AskUbuntuDupQuestions", "WebLINXCandidatesReranking", "StackOverflowDupQuestions", diff --git a/pyproject.toml b/pyproject.toml index 4ab7f1f88e..58d1b4d3e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.23.2" +version = "1.25.3" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ @@ -70,6 +70,7 @@ jina = ["einops>=0.8.0"] flash_attention = ["flash-attn>=2.6.3"] openai = ["openai>=1.41.0", "tiktoken>=0.8.0"] model2vec = ["model2vec>=0.3.0"] +pylate = ["pylate>=1.1.4"] [tool.coverage.report] diff --git a/scripts/generate_imports.py b/scripts/generate_imports.py index 469d894441..ae331de9df 100644 --- a/scripts/generate_imports.py +++ b/scripts/generate_imports.py @@ -7,8 +7,7 @@ import types from pathlib import Path -# Adjust this import to the correct location of AbsTask. -from mteb.tasks import AbsTask +from mteb.abstasks import AbsTask BASE_DIR = Path("../mteb/tasks") diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index f3e6b48260..701abab729 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -523,3 +523,15 @@ def test_empty_descriptive_stat_in_new_datasets(task: AbsTask): task.metadata.descriptive_stats is not None ), f"Dataset {task.metadata.name} should have descriptive stats. You can add metadata to your task by running `YorTask().calculate_metadata_metrics()`" assert task.metadata.n_samples is not None + + +@pytest.mark.parametrize("task", get_tasks()) +def test_eval_langs_correctly_specified(task: AbsTask): + if task.is_multilingual: + assert isinstance( + task.metadata.eval_langs, dict + ), f"{task.metadata.name} should have eval_langs as a dict" + else: + assert isinstance( + task.metadata.eval_langs, list + ), f"{task.metadata.name} should have eval_langs as a list" diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 7a096828e5..142b4b42ad 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -1447,6 +1447,95 @@ def load_data(self, **kwargs): class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): expected_stats = { + "val": { + "num_samples": 8, + "number_of_characters": 224, + "num_documents": 4, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 4, + "num_queries": 4, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 4, + "none_queries": 0, + "num_relevant_docs": 8, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "num_top_ranked": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 4, + "number_of_characters": 112, + "num_documents": 2, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 2, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "none_queries": 0, + "num_relevant_docs": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "num_top_ranked": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, + }, + "fra": { + "num_samples": 4, + "number_of_characters": 112, + "num_documents": 2, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 2, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "none_queries": 0, + "num_relevant_docs": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "num_top_ranked": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, + }, + }, + }, "test": { "num_samples": 8, "number_of_characters": 224, @@ -1535,14 +1624,14 @@ class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): "max_top_ranked_per_query": None, }, }, - } + }, } metadata = TaskMetadata( type="Retrieval", name="MockMultilingualRetrievalTask", main_score="ndcg_at_10", - **general_args, # type: ignore + **dict(general_args | {"eval_splits": ["val", "test"]}), # type: ignore ) metadata.eval_langs = multilingual_eval_langs @@ -1551,14 +1640,22 @@ def load_data(self, **kwargs): "test": { "q1": "This is a test sentence", "q2": "This is another test sentence", - } + }, + "val": { + "q1": "This is a test sentence", + "q2": "This is another test sentence", + }, } self.queries = {"eng": queries, "fra": queries} corpus = { "test": { "d1": "This is a positive sentence", "d2": "This is another positive sentence", - } + }, + "val": { + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", + }, } self.corpus = {"eng": corpus, "fra": corpus} @@ -1567,6 +1664,10 @@ def load_data(self, **kwargs): "q1": {"d1": 1, "d2": 0}, "q2": {"d1": 0, "d2": 1}, }, + "val": { + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, } self.relevant_docs = { "eng": relevant_docs, diff --git a/tests/test_evaluation/test_split_evaluation.py b/tests/test_evaluation/test_split_evaluation.py index a2ca249747..c79f25ac42 100644 --- a/tests/test_evaluation/test_split_evaluation.py +++ b/tests/test_evaluation/test_split_evaluation.py @@ -7,6 +7,7 @@ MockSentenceTransformer, ) from tests.test_benchmark.mock_tasks import ( + MockMultilingualRetrievalTask, MockRetrievalTask, ) @@ -21,6 +22,11 @@ def tasks(): return [MockRetrievalTask()] +@pytest.fixture +def multilingual_tasks(): + return [MockMultilingualRetrievalTask()] + + def test_all_splits_evaluated(model, tasks, tmp_path): evaluation = MTEB(tasks=tasks) results = evaluation.run( @@ -34,6 +40,7 @@ def test_all_splits_evaluated(model, tasks, tmp_path): last_evaluated_splits = evaluation.get_last_evaluated_splits() assert set(last_evaluated_splits["MockRetrievalTask"]) == {"val", "test"} assert len(last_evaluated_splits["MockRetrievalTask"]) == 2 + assert results[0].scores.keys() == {"val", "test"} def test_one_missing_split(model, tasks, tmp_path): @@ -49,6 +56,7 @@ def test_one_missing_split(model, tasks, tmp_path): last_evaluated_splits = evaluation.get_last_evaluated_splits() assert set(last_evaluated_splits["MockRetrievalTask"]) == {"val"} assert len(last_evaluated_splits["MockRetrievalTask"]) == 1 + assert results[0].scores.keys() == {"val"} results2 = evaluation.run( model, @@ -62,11 +70,12 @@ def test_one_missing_split(model, tasks, tmp_path): last_evaluated_splits = evaluation.get_last_evaluated_splits() assert set(last_evaluated_splits["MockRetrievalTask"]) == {"test"} assert len(last_evaluated_splits["MockRetrievalTask"]) == 1 + assert results2[0].scores.keys() == {"test", "val"} def test_no_missing_splits(model, tasks, tmp_path): evaluation = MTEB(tasks=tasks) - _ = evaluation.run( + results = evaluation.run( model, eval_splits=["val", "test"], output_folder=str(tmp_path / "testcase3"), @@ -76,9 +85,10 @@ def test_no_missing_splits(model, tasks, tmp_path): last_evaluated_splits = evaluation.get_last_evaluated_splits() assert "MockRetrievalTask" in last_evaluated_splits assert len(last_evaluated_splits["MockRetrievalTask"]) == 2 + assert results[0].scores.keys() == {"test", "val"} evaluation = MTEB(tasks=tasks) - _ = evaluation.run( + results = evaluation.run( model, eval_splits=["val", "test"], output_folder=str(tmp_path / "testcase3"), @@ -89,3 +99,209 @@ def test_no_missing_splits(model, tasks, tmp_path): last_evaluated_splits = evaluation.get_last_evaluated_splits() assert "MockRetrievalTask" in last_evaluated_splits assert len(last_evaluated_splits["MockRetrievalTask"]) == 0 + assert results[0].scores.keys() == {"test", "val"} + + +def test_all_languages_evaluated(model, multilingual_tasks, tmp_path): + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "all_lang_evaluated"), + verbosity=2, + eval_subsets=None, + ) + assert "MockMultilingualRetrievalTask" == results[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockMultilingualRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert last_evaluated_splits["MockMultilingualRetrievalTask"] == ["test"] + assert results[0].scores.keys() == {"test"} + assert len(results[0].scores["test"]) == 2 + + +def test_missing_language(model, multilingual_tasks, tmp_path): + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "missing_lang_test"), + verbosity=2, + eval_subsets=["eng"], + ) + + assert "MockMultilingualRetrievalTask" == results[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockMultilingualRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert last_evaluated_splits["MockMultilingualRetrievalTask"] == ["test"] + assert results[0].scores.keys() == {"test"} + assert results[0].languages == ["eng"] + + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "missing_lang_test"), + verbosity=2, + eval_subsets=["eng", "fra"], + overwrite_results=True, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert last_evaluated_splits["MockMultilingualRetrievalTask"] == ["test"] + assert sorted(results[0].languages) == ["eng", "fra"] + assert results[0].scores.keys() == {"test"} + assert len(results[0].scores["test"]) == 2 + + +def test_no_missing_languages(model, multilingual_tasks, tmp_path): + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "no_missing_lang_test"), + verbosity=2, + eval_subsets=["eng", "fra"], + ) + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockMultilingualRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert results[0].scores.keys() == {"test"} + assert len(results[0].scores["test"]) == 2 + assert sorted(results[0].languages) == ["eng", "fra"] + + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "no_missing_lang_test"), + verbosity=2, + eval_subsets=["eng", "fra"], + overwrite_results=True, + ) + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockMultilingualRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 0 + assert results[0].scores.keys() == {"test"} + assert len(results[0].scores["test"]) == 2 + assert sorted(results[0].languages) == ["eng", "fra"] + + +def test_partial_languages(model, multilingual_tasks, tmp_path): + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "partial_lang_test"), + verbosity=2, + eval_subsets=["fra"], + ) + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert last_evaluated_splits["MockMultilingualRetrievalTask"] == ["test"] + assert results[0].scores.keys() == {"test"} + assert len(results[0].scores["test"]) == 1 + assert results[0].languages == ["fra"] + + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "partial_lang_test"), + verbosity=2, + eval_subsets=["fra", "eng"], + overwrite_results=True, + ) + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert last_evaluated_splits["MockMultilingualRetrievalTask"] == ["test"] + assert results[0].scores.keys() == {"test"} + assert len(results[0].scores["test"]) == 2 + assert sorted(results[0].languages) == ["eng", "fra"] + + +def test_multilingual_one_missing_split_no_missing_lang( + model, multilingual_tasks, tmp_path +): + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["val"], + output_folder=str(tmp_path / "partial_langs_partial_splits"), + verbosity=2, + eval_subsets=["eng", "fra"], + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert len(last_evaluated_splits["MockMultilingualRetrievalTask"]) == 1 + assert set(last_evaluated_splits["MockMultilingualRetrievalTask"]) == {"val"} + assert sorted(results[0].languages) == ["eng", "fra"] + assert results[0].scores.keys() == {"val"} + assert len(results[0].scores["val"]) == 2 + + results = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "partial_langs_partial_splits"), + verbosity=2, + eval_subsets=["eng", "fra"], + overwrite_results=True, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockMultilingualRetrievalTask"]) == {"test"} + assert sorted(results[0].languages) == ["eng", "fra"] + assert results[0].scores.keys() == {"test", "val"} + assert len(results[0].scores["test"]) == 2 + assert len(results[0].scores["val"]) == 2 + + +def test_multilingual_one_missing_lang_in_one_split( + model, multilingual_tasks, tmp_path +): + evaluation = MTEB(tasks=multilingual_tasks) + results = evaluation.run( + model, + eval_splits=["val"], + output_folder=str(tmp_path / "one_lang_one_split"), + verbosity=2, + eval_subsets=["eng", "fra"], + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockMultilingualRetrievalTask"]) == {"val"} + assert sorted(results[0].languages) == ["eng", "fra"] + assert results[0].scores.keys() == {"val"} + assert len(results[0].scores["val"]) == 2 + + results = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "one_lang_one_split"), + verbosity=2, + eval_subsets=["eng"], + overwrite_results=True, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockMultilingualRetrievalTask"]) == {"test"} + assert sorted(results[0].languages) == ["eng", "fra"] + assert results[0].scores.keys() == {"test", "val"} + assert len(results[0].scores["test"]) == 1 + assert len(results[0].scores["val"]) == 2 + + results = evaluation.run( + model, + eval_splits=["test"], + output_folder=str(tmp_path / "one_lang_one_split"), + verbosity=2, + eval_subsets=["eng", "fra"], + overwrite_results=True, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockMultilingualRetrievalTask"]) == {"test"} + assert sorted(results[0].languages) == ["eng", "fra"] + # output merged result with previous results + assert results[0].scores.keys() == {"test", "val"} + assert len(results[0].scores["test"]) == 2 From 71c46ea2a9a5b9753d5ba822f1b660301ae26d00 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Mon, 23 Dec 2024 03:17:13 +0500 Subject: [PATCH 24/40] fix: [V2] Update datasets wich can't be loaded with `datasets>=3.0` (#1619) * reupload datasets * fix loader * remove commented code * lint * update pyproject dependencies --- .../ara/HotelReviewSentimentClassification.py | 10 +--- .../ara/TweetEmotionClassification.py | 11 +--- .../deu/TenKGnadClassification.py | 5 +- .../Classification/eng/ArxivClassification.py | 5 +- .../eng/PatentClassification.py | 10 +--- .../fil/FilipinoHateSpeechClassification.py | 10 +--- mteb/tasks/Classification/mya/MyanmarNews.py | 11 +--- .../DutchBookReviewSentimentClassification.py | 5 +- .../swe/SwedishSentimentClassification.py | 10 +--- .../tha/WisesightSentimentClassification.py | 16 +----- .../urd/UrduRomanSentimentClassification.py | 13 +---- .../multilingual/IndicQARetrieval.py | 53 ++----------------- mteb/tasks/STS/jpn/JSTS.py | 9 +--- pyproject.toml | 2 +- 14 files changed, 28 insertions(+), 142 deletions(-) diff --git a/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py b/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py index 24b7bc33fc..bb6ad6aa18 100644 --- a/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py +++ b/mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py @@ -8,9 +8,8 @@ class HotelReviewSentimentClassification(AbsTaskClassification): metadata = TaskMetadata( name="HotelReviewSentimentClassification", dataset={ - "path": "Elnagara/hard", - "revision": "b108d2c32ee4e1f4176ea233e1a5ac17bceb9ef9", - "trust_remote_code": True, + "path": "mteb/HotelReviewSentimentClassification", + "revision": "273d5105974460d3979149e29e88c06a8214c541", }, description="HARD is a dataset of Arabic hotel reviews collected from the Booking.com website.", reference="https://link.springer.com/chapter/10.1007/978-3-319-67056-0_3", @@ -38,8 +37,3 @@ class HotelReviewSentimentClassification(AbsTaskClassification): } """, ) - - def dataset_transform(self): - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["train"] - ) diff --git a/mteb/tasks/Classification/ara/TweetEmotionClassification.py b/mteb/tasks/Classification/ara/TweetEmotionClassification.py index e7fb8687ac..d79956731c 100644 --- a/mteb/tasks/Classification/ara/TweetEmotionClassification.py +++ b/mteb/tasks/Classification/ara/TweetEmotionClassification.py @@ -8,9 +8,8 @@ class TweetEmotionClassification(AbsTaskClassification): metadata = TaskMetadata( name="TweetEmotionClassification", dataset={ - "path": "emotone-ar-cicling2017/emotone_ar", - "revision": "0ded8ff72cc68cbb7bb5c01b0a9157982b73ddaf", - "trust_remote_code": True, + "path": "mteb/TweetEmotionClassification", + "revision": "0d803980e91953cc67c21429f74b301b7b1b3f08", }, description="A dataset of 10,000 tweets that was created with the aim of covering the most frequently used emotion categories in Arabic tweets.", reference="https://link.springer.com/chapter/10.1007/978-3-319-77116-8_8", @@ -38,9 +37,3 @@ class TweetEmotionClassification(AbsTaskClassification): } """, ) - - def dataset_transform(self): - self.dataset = self.dataset.rename_column("tweet", "text") - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["train"] - ) diff --git a/mteb/tasks/Classification/deu/TenKGnadClassification.py b/mteb/tasks/Classification/deu/TenKGnadClassification.py index 592d66c983..f9bde63b6e 100644 --- a/mteb/tasks/Classification/deu/TenKGnadClassification.py +++ b/mteb/tasks/Classification/deu/TenKGnadClassification.py @@ -10,9 +10,8 @@ class TenKGnadClassification(AbsTaskClassification): description="10k German News Articles Dataset (10kGNAD) contains news articles from the online Austrian newspaper website DER Standard with their topic classification (9 classes).", reference="https://tblock.github.io/10kGNAD/", dataset={ - "path": "community-datasets/gnad10", - "revision": "0798affe9b3f88cfda4267b6fbc50fac67046ee5", - "trust_remote_code": True, + "path": "mteb/TenKGnadClassification", + "revision": "ae9862bbcddc27b4bd93e2a7b463b7b5d05c6c55", }, type="Classification", category="p2p", diff --git a/mteb/tasks/Classification/eng/ArxivClassification.py b/mteb/tasks/Classification/eng/ArxivClassification.py index 92bd473a74..d24b0b2a0c 100644 --- a/mteb/tasks/Classification/eng/ArxivClassification.py +++ b/mteb/tasks/Classification/eng/ArxivClassification.py @@ -9,9 +9,8 @@ class ArxivClassification(AbsTaskClassification): name="ArxivClassification", description="Classification Dataset of Arxiv Papers", dataset={ - "path": "ccdv/arxiv-classification", - "revision": "f9bd92144ed76200d6eb3ce73a8bd4eba9ffdc85", - "trust_remote_code": True, + "path": "mteb/ArxivClassification", + "revision": "5e80893bf045abefbf8cbe5d713bddc91ae158d5", }, reference="https://ieeexplore.ieee.org/document/8675939", type="Classification", diff --git a/mteb/tasks/Classification/eng/PatentClassification.py b/mteb/tasks/Classification/eng/PatentClassification.py index 9f10a8a794..f8cd3b49af 100644 --- a/mteb/tasks/Classification/eng/PatentClassification.py +++ b/mteb/tasks/Classification/eng/PatentClassification.py @@ -9,9 +9,8 @@ class PatentClassification(AbsTaskClassification): name="PatentClassification", description="Classification Dataset of Patents and Abstract", dataset={ - "path": "ccdv/patent-classification", - "revision": "2f38a1dfdecfacee0184d74eaeafd3c0fb49d2a6", - "trust_remote_code": True, + "path": "mteb/PatentClassification", + "revision": "6bd77eb030ab3bfbf1e6f7a2b069979daf167311", }, reference="https://aclanthology.org/P19-1212.pdf", type="Classification", @@ -46,8 +45,3 @@ class PatentClassification(AbsTaskClassification): abstract = "Most existing text summarization datasets are compiled from the news domain, where summaries have a flattened discourse structure. In such datasets, summary-worthy content often appears in the beginning of input articles. Moreover, large segments from input articles are present verbatim in their respective summaries. These issues impede the learning and evaluation of systems that can understand an article{'}s global content structure as well as produce abstractive summaries with high compression ratio. In this work, we present a novel dataset, BIGPATENT, consisting of 1.3 million records of U.S. patent documents along with human written abstractive summaries. Compared to existing summarization datasets, BIGPATENT has the following properties: i) summaries contain a richer discourse structure with more recurring entities, ii) salient content is evenly distributed in the input, and iii) lesser and shorter extractive fragments are present in the summaries. Finally, we train and evaluate baselines and popular learning models on BIGPATENT to shed light on new challenges and motivate future directions for summarization research.", }""", ) - - def dataset_transform(self): - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["test"] - ) diff --git a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py index df6205d427..f5e8c1d66f 100644 --- a/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py +++ b/mteb/tasks/Classification/fil/FilipinoHateSpeechClassification.py @@ -12,9 +12,8 @@ class FilipinoHateSpeechClassification(AbsTaskClassification): description="Filipino Twitter dataset for sentiment classification.", reference="https://pcj.csp.org.ph/index.php/pcj/issue/download/29/PCJ%20V14%20N1%20pp1-14%202019", dataset={ - "path": "jcblaise/hatespeech_filipino", - "revision": "b01711587b073e55569de75ef04d7da4592a3618", - "trust_remote_code": True, + "path": "mteb/FilipinoHateSpeechClassification", + "revision": "087a17c0b7f9a78901c88aea00ad2892a319fdac", }, type="Classification", category="s2s", @@ -41,8 +40,3 @@ class FilipinoHateSpeechClassification(AbsTaskClassification): } """, ) - - def dataset_transform(self): - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["validation", "test"] - ) diff --git a/mteb/tasks/Classification/mya/MyanmarNews.py b/mteb/tasks/Classification/mya/MyanmarNews.py index 8418e20533..2248dd0032 100644 --- a/mteb/tasks/Classification/mya/MyanmarNews.py +++ b/mteb/tasks/Classification/mya/MyanmarNews.py @@ -8,9 +8,8 @@ class MyanmarNews(AbsTaskClassification): metadata = TaskMetadata( name="MyanmarNews", dataset={ - "path": "ayehninnkhine/myanmar_news", - "revision": "b899ec06227db3679b0fe3c4188a6b48cc0b65eb", - "trust_remote_code": True, + "path": "mteb/MyanmarNews", + "revision": "644419f24bc820bbf8af24e0b4714a069812e0a3", }, description="The Myanmar News dataset on Hugging Face contains news articles in Burmese. It is designed for tasks such as text classification, sentiment analysis, and language modeling. The dataset includes a variety of news topics in 4 categorie, providing a rich resource for natural language processing applications involving Burmese which is a low resource language.", reference="https://huggingface.co/datasets/myanmar_news", @@ -37,9 +36,3 @@ class MyanmarNews(AbsTaskClassification): pages = {401--408} }""", ) - - def dataset_transform(self): - self.dataset = self.dataset.rename_columns({"category": "label"}) - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["train"] - ) diff --git a/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py b/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py index f0ee1b07dc..9c85aca4df 100644 --- a/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py +++ b/mteb/tasks/Classification/nld/DutchBookReviewSentimentClassification.py @@ -10,9 +10,8 @@ class DutchBookReviewSentimentClassification(AbsTaskClassification): description="A Dutch book review for sentiment classification.", reference="https://github.com/benjaminvdb/DBRD", dataset={ - "path": "benjaminvdb/dbrd", - "revision": "3f756ab4572e071eb53e887ab629f19fa747d39e", - "trust_remote_code": True, + "path": "mteb/DutchBookReviewSentimentClassification", + "revision": "1c2815ad38cf4794eb8d678fb08f569ea79392f6", }, type="Classification", category="s2s", diff --git a/mteb/tasks/Classification/swe/SwedishSentimentClassification.py b/mteb/tasks/Classification/swe/SwedishSentimentClassification.py index 4c0fdc16cb..149be829fc 100644 --- a/mteb/tasks/Classification/swe/SwedishSentimentClassification.py +++ b/mteb/tasks/Classification/swe/SwedishSentimentClassification.py @@ -10,9 +10,8 @@ class SwedishSentimentClassification(AbsTaskClassification): description="Dataset of Swedish reviews scarped from various public available websites", reference="https://huggingface.co/datasets/swedish_reviews", dataset={ - "path": "timpal0l/swedish_reviews", - "revision": "105ba6b3cb99b9fd64880215be469d60ebf44a1b", - "trust_remote_code": True, + "path": "mteb/SwedishSentimentClassification", + "revision": "39e35f55d58338ebd602f8d83b52cfe027f5146a", }, type="Classification", category="s2s", @@ -29,8 +28,3 @@ class SwedishSentimentClassification(AbsTaskClassification): sample_creation="found", bibtex_citation="", ) - - def dataset_transform(self): - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["validation", "test"] - ) diff --git a/mteb/tasks/Classification/tha/WisesightSentimentClassification.py b/mteb/tasks/Classification/tha/WisesightSentimentClassification.py index 3a76003d5b..1a142529c7 100644 --- a/mteb/tasks/Classification/tha/WisesightSentimentClassification.py +++ b/mteb/tasks/Classification/tha/WisesightSentimentClassification.py @@ -10,9 +10,8 @@ class WisesightSentimentClassification(AbsTaskClassification): description="Wisesight Sentiment Corpus: Social media messages in Thai language with sentiment label (positive, neutral, negative, question)", reference="https://github.com/PyThaiNLP/wisesight-sentiment", dataset={ - "path": "pythainlp/wisesight_sentiment", - "revision": "14aa5773afa135ba835cc5179bbc4a63657a42ae", - "trust_remote_code": True, + "path": "mteb/WisesightSentimentClassification", + "revision": "727ea9bd253f9eedf16aebec6ac3f07791fb3db2", }, type="Classification", category="s2s", @@ -43,14 +42,3 @@ class WisesightSentimentClassification(AbsTaskClassification): """, ) - - def dataset_transform(self): - for split in self.dataset.keys(): - self.dataset[split] = self.dataset[split].rename_column("texts", "text") - self.dataset[split] = self.dataset[split].rename_column("category", "label") - - self.dataset = self.stratified_subsampling( - self.dataset, - seed=self.seed, - splits=["test"], - ) diff --git a/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py b/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py index 62440ef9c2..eb66927269 100644 --- a/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py +++ b/mteb/tasks/Classification/urd/UrduRomanSentimentClassification.py @@ -10,9 +10,8 @@ class UrduRomanSentimentClassification(AbsTaskClassification): description="The Roman Urdu dataset is a data corpus comprising of more than 20000 records tagged for sentiment (Positive, Negative, Neutral)", reference="https://archive.ics.uci.edu/dataset/458/roman+urdu+data+set", dataset={ - "path": "community-datasets/roman_urdu", - "revision": "566be6449bb30b9b9f2b59173391647fe0ca3224", - "trust_remote_code": True, + "path": "mteb/UrduRomanSentimentClassification", + "revision": "905c1121c002c4b9adc4ebc5faaf4d6f50d1b1ee", }, type="Classification", category="s2s", @@ -37,11 +36,3 @@ class UrduRomanSentimentClassification(AbsTaskClassification): } """, ) - - def dataset_transform(self): - self.dataset = self.dataset.rename_columns( - {"sentence": "text", "sentiment": "label"} - ) - self.dataset = self.stratified_subsampling( - self.dataset, seed=self.seed, splits=["train"] - ) diff --git a/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py b/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py index 62a166f89c..c0e2ef4cc3 100644 --- a/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py @@ -1,9 +1,5 @@ from __future__ import annotations -from hashlib import sha256 - -import datasets - from mteb.abstasks.MultilingualTask import MultilingualTask from mteb.abstasks.TaskMetadata import TaskMetadata @@ -24,13 +20,12 @@ } -class IndicQARetrieval(MultilingualTask, AbsTaskRetrieval): +class IndicQARetrieval(AbsTaskRetrieval, MultilingualTask): metadata = TaskMetadata( name="IndicQARetrieval", dataset={ - "path": "ai4bharat/IndicQA", - "revision": "570d90ae4f7b64fe4fdd5f42fc9f9279b8c9fd9d", - "trust_remote_code": True, + "path": "mteb/IndicQARetrieval", + "revision": "51e8b328988795d658f6f34acd34044e9346e2ee", }, description="IndicQA is a manually curated cloze-style reading comprehension dataset that can be used for evaluating question-answering models in 11 Indic languages. It is repurposed retrieving relevant context for each question.", reference="https://arxiv.org/abs/2212.05409", @@ -55,45 +50,3 @@ class IndicQARetrieval(MultilingualTask, AbsTaskRetrieval): doi = {10.18653/v1/2023.acl-long.693} }""", ) - - def load_data(self, **kwargs): - if self.data_loaded: - return - - split = "test" - queries = {lang: {split: {}} for lang in self.hf_subsets} - corpus = {lang: {split: {}} for lang in self.hf_subsets} - relevant_docs = {lang: {split: {}} for lang in self.hf_subsets} - - for lang in self.hf_subsets: - data = datasets.load_dataset( - name=f"indicqa.{lang}", **self.metadata_dict["dataset"] - )[split] - data = data.filter(lambda x: x["answers"]["text"] != "") - - question_ids = { - question: sha256(question.encode("utf-8")).hexdigest() - for question in set(data["question"]) - } - context_ids = { - context: sha256(context.encode("utf-8")).hexdigest() - for context in set(data["context"]) - } - - for row in data: - question = row["question"] - context = row["context"] - query_id = question_ids[question] - queries[lang][split][query_id] = question - - doc_id = context_ids[context] - corpus[lang][split][doc_id] = {"text": context} - if query_id not in relevant_docs[lang][split]: - relevant_docs[lang][split][query_id] = {} - relevant_docs[lang][split][query_id][doc_id] = 1 - - self.corpus = datasets.DatasetDict(corpus) - self.queries = datasets.DatasetDict(queries) - self.relevant_docs = datasets.DatasetDict(relevant_docs) - - self.data_loaded = True diff --git a/mteb/tasks/STS/jpn/JSTS.py b/mteb/tasks/STS/jpn/JSTS.py index 4993359190..bdd031c865 100644 --- a/mteb/tasks/STS/jpn/JSTS.py +++ b/mteb/tasks/STS/jpn/JSTS.py @@ -9,10 +9,8 @@ class JSTS(AbsTaskSTS): metadata = TaskMetadata( name="JSTS", dataset={ - "path": "shunk031/JGLUE", - "revision": "50e79c314a7603ebc92236b66a0973d51a00ed8c", - "name": "JSTS", - "trust_remote_code": True, + "path": "mteb/JSTS", + "revision": "5bac629e25799df4c9c80a6a5db983d6cba9e77d", }, description="Japanese Semantic Textual Similarity Benchmark dataset construct from YJ Image Captions Dataset " + "(Miyazaki and Shimizu, 2016) and annotated by crowdsource annotators.", @@ -65,6 +63,3 @@ def metadata_dict(self) -> dict[str, str]: metadata_dict["min_score"] = 0 metadata_dict["max_score"] = 5 return metadata_dict - - def dataset_transform(self) -> None: - self.dataset = self.dataset.rename_column("label", "score") diff --git a/pyproject.toml b/pyproject.toml index 58d1b4d3e4..f67d296ae6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ ] requires-python = ">=3.9" dependencies = [ - "datasets>=2.19.0,<3.0.0", + "datasets>=2.19.0", "numpy>=1.0.0,<3.0.0", "requests>=2.26.0", "scikit_learn>=1.0.2", From 2519c7a89babcc6a13b147dfb46426184e9d09e1 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sat, 4 Jan 2025 19:35:45 +0300 Subject: [PATCH 25/40] update nanobenchmark stat --- .../Retrieval/NanoClimateFeverRetrieval.json | 8 ++++---- .../Retrieval/NanoDBPediaRetrieval.json | 8 ++++---- .../Retrieval/NanoFEVERRetrieval.json | 8 ++++---- .../Retrieval/NanoFiQA2018Retrieval.json | 8 ++++---- .../Retrieval/NanoHotpotQARetrieval.json | 10 +++++----- .../Retrieval/NanoNFCorpusRetrieval.json | 8 ++++---- mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json | 8 ++++---- .../Retrieval/NanoQuoraRetrieval.json | 8 ++++---- .../Retrieval/NanoSCIDOCSRetrieval.json | 10 +++++----- .../Retrieval/NanoSciFactRetrieval.json | 8 ++++---- .../Retrieval/NanoTouche2020Retrieval.json | 10 +++++----- 11 files changed, 47 insertions(+), 47 deletions(-) diff --git a/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json index 5a408ec517..2e104199cd 100644 --- a/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 265, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 148, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 38, + "average_relevant_docs_per_query": 2.96, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 115, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json index cd6f035639..7f2ad604e2 100644 --- a/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 63, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 1158, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "average_relevant_docs_per_query": 23.16, + "max_relevant_docs_per_query": 81, + "unique_relevant_docs": 1146, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json index eb7f3d6e95..c05328b903 100644 --- a/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 83, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 57, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "average_relevant_docs_per_query": 1.14, + "max_relevant_docs_per_query": 3, + "unique_relevant_docs": 57, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json b/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json index 92c11900a9..26c8ca1d25 100644 --- a/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json @@ -13,11 +13,11 @@ "max_query_length": 97, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 123, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "average_relevant_docs_per_query": 2.46, + "max_relevant_docs_per_query": 15, + "unique_relevant_docs": 123, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json b/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json index ec35252f78..8eefbe3e1b 100644 --- a/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 184, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "num_relevant_docs": 100, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 100, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json index b6e2e3fb1a..78cf87b0ea 100644 --- a/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 53, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 2518, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 42, + "average_relevant_docs_per_query": 50.36, + "max_relevant_docs_per_query": 463, + "unique_relevant_docs": 1627, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json index 254a0ac40f..2e8f469cc8 100644 --- a/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 83, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 57, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "average_relevant_docs_per_query": 1.14, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 57, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json index 540b0fd3aa..2940b34287 100644 --- a/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 139, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 70, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "average_relevant_docs_per_query": 1.4, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 70, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json index 78e927e208..813e6f6d90 100644 --- a/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 143, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "num_relevant_docs": 244, + "min_relevant_docs_per_query": 3, + "average_relevant_docs_per_query": 4.88, + "max_relevant_docs_per_query": 5, + "unique_relevant_docs": 236, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json b/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json index 00e8cb4be1..5e0355f4ec 100644 --- a/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoSciFactRetrieval.json @@ -13,11 +13,11 @@ "max_query_length": 200, "unique_queries": 50, "none_queries": 0, - "num_relevant_docs": 50, + "num_relevant_docs": 56, "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 50, + "average_relevant_docs_per_query": 1.12, + "max_relevant_docs_per_query": 4, + "unique_relevant_docs": 55, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, diff --git a/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json b/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json index de076dae57..ae280045e5 100644 --- a/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json +++ b/mteb/descriptive_stats/Retrieval/NanoTouche2020Retrieval.json @@ -13,11 +13,11 @@ "max_query_length": 83, "unique_queries": 49, "none_queries": 0, - "num_relevant_docs": 49, - "min_relevant_docs_per_query": 1, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 1, - "unique_relevant_docs": 49, + "num_relevant_docs": 932, + "min_relevant_docs_per_query": 6, + "average_relevant_docs_per_query": 19.020408163265305, + "max_relevant_docs_per_query": 32, + "unique_relevant_docs": 920, "num_instructions": null, "min_instruction_length": null, "average_instruction_length": null, From 9bc4a1a3629ac76c59427db72c5582fc5b6ea260 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Wed, 8 Jan 2025 11:13:08 +0300 Subject: [PATCH 26/40] [v2] Remove metadata dict (#1719) * remove metadata dict * lint * fix n_experiments --- mteb/abstasks/AbsTask.py | 4 +- mteb/abstasks/AbsTaskClassification.py | 7 +-- mteb/abstasks/AbsTaskClustering.py | 2 +- mteb/abstasks/AbsTaskClusteringFast.py | 4 +- .../AbsTaskMultilabelClassification.py | 2 +- mteb/abstasks/AbsTaskPairClassification.py | 2 +- mteb/abstasks/AbsTaskReranking.py | 6 +- mteb/abstasks/AbsTaskSTS.py | 12 +--- mteb/abstasks/AbsTaskSummarization.py | 13 ++--- mteb/evaluation/MTEB.py | 33 +++++------ .../kat/TbilisiCityHallBitextMining.py | 4 +- .../multilingual/BibleNLPBitextMining.py | 2 +- .../multilingual/DiaBLaBitextMining.py | 2 +- .../multilingual/FloresBitextMining.py | 2 +- .../multilingual/IN22ConvBitextMining.py | 2 +- .../multilingual/IN22GenBitextMining.py | 2 +- .../IndicGenBenchFloresBitextMining.py | 2 +- .../multilingual/RomaTalesBitextMining.py | 2 +- .../BitextMining/srn/SRNCorpusBitextMining.py | 2 +- .../eng/LegalBenchClassification.py | 4 +- ...esianMongabayConservationClassification.py | 2 +- .../multilingual/AfriSentiClassification.py | 2 +- .../multilingual/IndicLangClassification.py | 4 +- .../Classification/multilingual/NaijaSenti.py | 2 +- .../multilingual/TurkicClassification.py | 3 +- .../Classification/zho/CMTEBClassification.py | 7 +-- .../multilingual/MLSUMClusteringP2P.py | 4 +- .../multilingual/MLSUMClusteringS2S.py | 4 +- .../multilingual/MasakhaNEWSClusteringP2P.py | 2 +- .../multilingual/MasakhaNEWSClusteringS2S.py | 2 +- mteb/tasks/Clustering/nob/snl_clustering.py | 2 +- mteb/tasks/Clustering/nob/vg_clustering.py | 2 +- .../multilingual/mFollowIR.py | 12 ++-- .../PairClassification/eng/LegalBenchPC.py | 4 +- mteb/tasks/PairClassification/fas/FarsTail.py | 4 +- .../multilingual/OpusparcusPC.py | 2 +- .../multilingual/XStance.py | 6 +- mteb/tasks/Reranking/fra/AlloprofReranking.py | 4 +- mteb/tasks/Reranking/fra/SyntecReranking.py | 4 +- .../Reranking/multilingual/MIRACLReranking.py | 6 +- .../Retrieval/ara/SadeemQuestionRetrieval.py | 6 +- .../code/COIRCodeSearchNetRetrieval.py | 6 +- .../Retrieval/code/CodeEditSearchRetrieval.py | 2 +- .../code/CodeSearchNetCCRetrieval.py | 6 +- .../Retrieval/code/CodeSearchNetRetrieval.py | 2 +- mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py | 6 +- .../tasks/Retrieval/deu/GermanDPRRetrieval.py | 2 +- .../deu/GermanGovServiceRetrieval.py | 4 +- .../Retrieval/deu/GermanQuADRetrieval.py | 2 +- mteb/tasks/Retrieval/ell/GreekCivicsQA.py | 4 +- mteb/tasks/Retrieval/eng/BrightRetrieval.py | 6 +- mteb/tasks/Retrieval/eng/HagridRetrieval.py | 2 +- .../Retrieval/eng/LEMBNarrativeQARetrieval.py | 6 +- .../Retrieval/eng/LEMBNeedleRetrieval.py | 6 +- .../Retrieval/eng/LEMBPasskeyRetrieval.py | 6 +- .../tasks/Retrieval/eng/LEMBQMSumRetrieval.py | 6 +- .../eng/LEMBSummScreenFDRetrieval.py | 6 +- .../Retrieval/eng/LEMBWikimQARetrieval.py | 6 +- .../tasks/Retrieval/eng/LitSearchRetrieval.py | 2 +- mteb/tasks/Retrieval/eng/MLQuestions.py | 6 +- .../Retrieval/eng/NarrativeQARetrieval.py | 2 +- mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py | 6 +- mteb/tasks/Retrieval/fra/AlloprofRetrieval.py | 6 +- mteb/tasks/Retrieval/fra/BSARDRetrieval.py | 4 +- mteb/tasks/Retrieval/fra/FQuADRetrieval.py | 8 +-- mteb/tasks/Retrieval/fra/SyntecRetrieval.py | 6 +- .../tasks/Retrieval/jpn/JaGovFaqsRetrieval.py | 4 +- mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py | 4 +- .../jpn/NLPJournalAbsIntroRetrieval.py | 4 +- .../jpn/NLPJournalTitleAbsRetrieval.py | 4 +- .../jpn/NLPJournalTitleIntroRetrieval.py | 4 +- .../Retrieval/kat/GeorgianFAQRetrieval.py | 4 +- .../Retrieval/multilingual/CUREv1Retrieval.py | 12 ++-- ...CrossLingualSemanticDiscriminationWMT19.py | 2 +- ...CrossLingualSemanticDiscriminationWMT21.py | 2 +- .../Retrieval/multilingual/MIRACLRetrieval.py | 12 ++-- .../Retrieval/multilingual/MLQARetrieval.py | 2 +- .../multilingual/MintakaRetrieval.py | 8 +-- .../Retrieval/multilingual/MrTidyRetrieval.py | 6 +- .../multilingual/MultiLongDocRetrieval.py | 6 +- .../multilingual/NeuCLIR2022Retrieval.py | 12 ++-- .../multilingual/NeuCLIR2023Retrieval.py | 12 ++-- .../multilingual/PublicHealthQARetrieval.py | 6 +- .../StatcanDialogueDatasetRetrieval.py | 6 +- .../multilingual/XMarketRetrieval.py | 6 +- .../Retrieval/multilingual/XPQARetrieval.py | 6 +- .../Retrieval/multilingual/XQuADRetrieval.py | 6 +- .../tasks/Retrieval/slk/SlovakSumRetrieval.py | 4 +- .../spa/SpanishPassageRetrievalS2P.py | 6 +- .../spa/SpanishPassageRetrievalS2S.py | 6 +- mteb/tasks/Retrieval/tur/TurHistQuad.py | 4 +- mteb/tasks/Retrieval/vie/VieQuADRetrieval.py | 4 +- mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py | 10 +--- mteb/tasks/STS/eng/BiossesSTS.py | 8 +-- mteb/tasks/STS/eng/STS12STS.py | 10 +--- mteb/tasks/STS/eng/STS13STS.py | 10 +--- mteb/tasks/STS/eng/STS14STS.py | 8 +-- mteb/tasks/STS/eng/STS15STS.py | 8 +-- mteb/tasks/STS/eng/STS16STS.py | 8 +-- mteb/tasks/STS/eng/STSBenchmarkSTS.py | 11 ++-- mteb/tasks/STS/eng/SickrSTS.py | 8 +-- mteb/tasks/STS/fao/FaroeseSTS.py | 8 +-- mteb/tasks/STS/fin/FinParaSTS.py | 8 +-- mteb/tasks/STS/fra/SickFrSTS.py | 8 +-- mteb/tasks/STS/jpn/JSICK.py | 8 +-- mteb/tasks/STS/jpn/JSTS.py | 8 +-- mteb/tasks/STS/kor/KlueSTS.py | 8 +-- mteb/tasks/STS/kor/KorSTS.py | 8 +-- .../STS/multilingual/IndicCrosslingualSTS.py | 8 +-- .../STS/multilingual/STS17CrosslingualSTS.py | 8 +-- .../STS/multilingual/STS22CrosslingualSTS.py | 16 ++---- .../STSBenchmarkMultilingualSTS.py | 8 +-- mteb/tasks/STS/multilingual/SemRel24STS.py | 8 +-- mteb/tasks/STS/pol/PolishSTS.py | 16 ++---- mteb/tasks/STS/por/Assin2STS.py | 8 +-- mteb/tasks/STS/por/SickBrSTS.py | 8 +-- mteb/tasks/STS/ron/RonSTS.py | 8 +-- mteb/tasks/STS/rus/RUParaPhraserSTS.py | 8 +-- mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py | 8 +-- mteb/tasks/STS/spa/STSES.py | 8 +-- mteb/tasks/STS/zho/CMTEBSTS.py | 56 +++++-------------- .../eng/SummEvalSummarization.py | 16 ++---- .../fra/SummEvalFrSummarization.py | 18 ++---- scripts/data/create_task_table.py | 12 ++-- tests/test_benchmark/mock_tasks.py | 32 +++-------- 125 files changed, 321 insertions(+), 525 deletions(-) diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index cbdcac6372..4774be995c 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -137,7 +137,7 @@ def evaluate( for hf_subset in hf_subsets: logger.info( - f"\nTask: {self.metadata_dict['name']}, split: {split}, subset: {hf_subset}. Running..." + f"\nTask: {self.metadata.name}, split: {split}, subset: {hf_subset}. Running..." ) if hf_subset not in self.dataset and hf_subset == "default": data_split = self.dataset[split] @@ -213,7 +213,7 @@ def load_data(self, **kwargs): """ if self.data_loaded: return - self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore + self.dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore self.dataset_transform() self.data_loaded = True diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 42de8dd273..bb5f6dc27e 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -54,7 +54,7 @@ class AbsTaskClassification(AbsTask): """Abstract class for kNN classification tasks The similarity is computed between pairs and the results are ranked. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns: text: str label: int @@ -66,6 +66,7 @@ class AbsTaskClassification(AbsTask): abstask_prompt = "Classify user passages." samples_per_label: int = 8 + n_experiments: int = 10 def __init__( self, @@ -79,9 +80,7 @@ def __init__( # Bootstrap parameters self.n_experiments: int = ( # type: ignore - n_experiments - if n_experiments is not None - else self.metadata_dict.get("n_experiments", 10) + n_experiments if n_experiments is not None else self.n_experiments ) # kNN parameters diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index 095c44435c..e4ea7b0a02 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -57,7 +57,7 @@ class AbsTaskClustering(AbsTask): """Abstract class for Clustering tasks The similarity is computed between pairs and the results are ranked. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns: + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns: sentences: list of str labels: list of str """ diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index af600eb7e0..ca4ef58457 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -129,7 +129,7 @@ class AbsTaskClusteringFast(AbsTask): If the clustering is hierarchical, and more than one label is specified in order for each observation, V-measures are calculated in the outlined way on each of the levels separately. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns: sentences: list[str] labels: list[str] | list[list[str]] @@ -147,7 +147,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) def _add_main_score(self, scores): - if self.metadata_dict["main_score"] in scores: + if self.metadata.main_score in scores: scores["main_score"] = scores[self.metadata.main_score] else: logger.warning( diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index bcd21a387d..156fdd5e09 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -81,7 +81,7 @@ class AbsTaskMultilabelClassification(AbsTask): """Abstract class for multioutput classification tasks The similarity is computed between pairs and the results are ranked. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns: + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns: text: str label: list[list[int]] diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 4d39fd2c3f..0ac388c4e5 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -59,7 +59,7 @@ class AbsTaskPairClassification(AbsTask): The similarity is computed between pairs and the results are ranked. Average precision is computed to measure how well the methods can be used for pairwise pair classification. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns: + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns: sentence1: list[str] sentence2: list[str] labels: list[int] diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index e8ea495366..76ee7df74a 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -107,14 +107,14 @@ def transform_old_dataset_format(self, given_dataset=None): for hf_subset in hf_subsets: if given_dataset: cur_dataset = given_dataset - elif "name" in self.metadata_dict["dataset"]: - cur_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore + elif "name" in self.metadata.dataset: + cur_dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore assert ( hf_subset == "default" ), f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata." else: cur_dataset = datasets.load_dataset( - **self.metadata_dict["dataset"], name=hf_subset + **self.metadata.dataset, name=hf_subset ) # type: ignore for split in cur_dataset: diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index 2183cc08af..2f29f01cd4 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -54,25 +54,19 @@ class STSDescriptiveStatistics(DescriptiveStatistics): class AbsTaskSTS(AbsTask): """Abstract class for STS experiments. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns:: + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns:: sentence1: str sentence2: str score: float """ abstask_prompt = "Retrieve semantically similar text." + min_score: int + max_score: int def __init__(self, **kwargs): super().__init__(**kwargs) - @property - def min_score(self) -> int: - return self.metadata_dict["min_score"] - - @property - def max_score(self) -> int: - return self.metadata_dict["max_score"] - def _evaluate_subset( self, model, data_split, *, encode_kwargs: dict[str, Any] = {}, **kwargs ) -> ScoresDict: diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 07fd420571..0ed06ba068 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -68,13 +68,16 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics): class AbsTaskSummarization(AbsTask): """Abstract class for summarization experiments. - self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns: + self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns: text: str human_summaries: list[str] machine_summaries: list[str] relevance: list[float] (the score of the machine generated summaries) """ + min_score: int + max_score: int + evalutor = SummarizationEvaluator abstask_prompt = ( "Given a news summary, retrieve other semantically similar summaries." @@ -83,14 +86,6 @@ class AbsTaskSummarization(AbsTask): def __init__(self, **kwargs): super().__init__(**kwargs) - @property - def min_score(self): - return self.metadata_dict["min_score"] - - @property - def max_score(self): - return self.metadata_dict["max_score"] - def _evaluate_subset( self, model: Encoder, data_split, *, encode_kwargs: dict[str, Any], **kwargs ) -> ScoresDict: diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 6b25a0eb1e..ab317cadbd 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -121,16 +121,16 @@ def deprecation_warning( @property def available_tasks(self): - return [x.metadata_dict["name"] for x in self.tasks_cls] + return [x.metadata.name for x in self.tasks_cls] @property def available_task_types(self): # sort the task types - return sorted({x.metadata_dict["type"] for x in self.tasks_cls}) + return sorted({x.metadata.type for x in self.tasks_cls}) @property def available_task_categories(self): - return {x.metadata_dict["category"] for x in self.tasks_cls} + return {x.metadata.category for x in self.tasks_cls} def _extend_lang_code(self): # add all possible language codes @@ -241,12 +241,10 @@ def select_tasks(self, **kwargs): # If `task_list` is specified, select list of tasks if self._tasks is not None: self.tasks = list( - filter( - lambda x: (x.metadata_dict["name"] in self._tasks), self.tasks_cls - ) + filter(lambda x: (x.metadata.name in self._tasks), self.tasks_cls) ) if len(self.tasks) != len(self._tasks): - tasks_known = {x.metadata_dict["name"] for x in self.tasks_cls} + tasks_known = {x.metadata.name for x in self.tasks_cls} tasks_unknown = { x for x in self._tasks if isinstance(x, str) } - tasks_known @@ -265,23 +263,22 @@ def select_tasks(self, **kwargs): # Otherwise use filters to select tasks filtered_tasks = filter( lambda x: (self._task_types is None) - or (x.metadata_dict["type"] in self._task_types), + or (x.metadata.type in self._task_types), self.tasks_cls, ) filtered_tasks = filter( lambda x: (self._task_categories is None) - or (x.metadata_dict["category"] in self._task_categories), + or (x.metadata.category in self._task_categories), filtered_tasks, ) filtered_tasks = filter( - lambda x: (self._version is None) - or (x.metadata_dict["version"] >= self._version), + lambda x: (self._version is None) or (x.metadata.version >= self._version), filtered_tasks, ) # keep only tasks with at least one language in the filter filtered_tasks = filter( - lambda x: (not (self._task_langs)) - or (len(set(x.metadata_dict["eval_langs"]) & set(self._task_langs)) > 0), + lambda x: (not self._task_langs) + or (len(set(x.metadata.eval_langs) & set(self._task_langs)) > 0), filtered_tasks, ) @@ -292,7 +289,7 @@ def load_tasks_data(self): """Load datasets for the selected tasks.""" logger.info(f"\n\n## Loading datasets for {len(self.tasks)} tasks") for task in self.tasks: - logger.info(f"\n# Loading dataset for {task.metadata_dict['name']}") + logger.info(f"\n# Loading dataset for {task.metadata.name}") task.load_data() @staticmethod @@ -595,7 +592,7 @@ def run( ) logger.info( - f"Evaluation for {task.metadata_dict['name']} on {split} took {tock - tick:.2f} seconds" + f"Evaluation for {task.metadata.name} on {split} took {tock - tick:.2f} seconds" ) evaluation_time += tock - tick @@ -627,16 +624,14 @@ def run( evaluation_results.append(merged_results) except Exception as e: - logger.error( - f"Error while evaluating {task.metadata_dict['name']}: {e}" - ) + logger.error(f"Error while evaluating {task.metadata.name}: {e}") if raise_error: raise e logger.error( f"Please check all the error logs at: {self.err_logs_path}" ) with open(self.err_logs_path, "a") as f_out: - f_out.write(f"{datetime.now()} >>> {task.metadata_dict['name']}\n") + f_out.write(f"{datetime.now()} >>> {task.metadata.name}\n") f_out.write(traceback.format_exc()) f_out.write("\n\n") diff --git a/mteb/tasks/BitextMining/kat/TbilisiCityHallBitextMining.py b/mteb/tasks/BitextMining/kat/TbilisiCityHallBitextMining.py index 6c6816fb5d..c5bf62f5cf 100644 --- a/mteb/tasks/BitextMining/kat/TbilisiCityHallBitextMining.py +++ b/mteb/tasks/BitextMining/kat/TbilisiCityHallBitextMining.py @@ -51,10 +51,10 @@ def load_data(self, **kwargs) -> None: for lang in self.hf_subsets: l1, l2 = lang.split("-") dataset = load_dataset( - self.metadata_dict["dataset"]["path"], + self.metadata.dataset["path"], split=_EVAL_SPLIT, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) dataset = dataset.rename_columns( {_LANGUAGES[l1]: "sentence1", _LANGUAGES[l2]: "sentence2"} diff --git a/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py b/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py index 07724153c9..670723a38e 100644 --- a/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/BibleNLPBitextMining.py @@ -908,7 +908,7 @@ def load_data(self, **kwargs: Any) -> None: else: dataset = datasets.load_dataset( name=self._transform_lang_name_hf(lang), - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset[lang] = datasets.DatasetDict({"train": dataset}) seen_pairs.append(hf_lang_name) diff --git a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py index b7806d60ac..67a561d206 100644 --- a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py @@ -52,7 +52,7 @@ def load_data(self, **kwargs): self.dataset = {} for lang in self.hf_subsets: - self.dataset[lang] = datasets.load_dataset(**self.metadata_dict["dataset"]) + self.dataset[lang] = datasets.load_dataset(**self.metadata.dataset) self.dataset_transform() self.data_loaded = True diff --git a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py index 786b5f0fd9..25d489f5d6 100644 --- a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py @@ -274,5 +274,5 @@ def load_data(self, **kwargs: Any) -> None: """Load dataset from HuggingFace hub""" if self.data_loaded: return - self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) + self.dataset = datasets.load_dataset(**self.metadata.dataset) self.data_loaded = True diff --git a/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py b/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py index 61a8717507..94b7f7cc02 100644 --- a/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IN22ConvBitextMining.py @@ -106,5 +106,5 @@ def load_data(self, **kwargs: Any) -> None: """Load dataset from HuggingFace hub""" if self.data_loaded: return - self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) + self.dataset = datasets.load_dataset(**self.metadata.dataset) self.data_loaded = True diff --git a/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py b/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py index 503c64e5f0..3a196780a3 100644 --- a/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IN22GenBitextMining.py @@ -100,5 +100,5 @@ def load_data(self, **kwargs: Any) -> None: """Load dataset from HuggingFace hub""" if self.data_loaded: return - self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) + self.dataset = datasets.load_dataset(**self.metadata.dataset) self.data_loaded = True diff --git a/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py index 38efd482a0..843ed35f40 100644 --- a/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/IndicGenBenchFloresBitextMining.py @@ -149,7 +149,7 @@ def load_data(self, **kwargs: Any) -> None: language = f"{coded_source_language}_en" self.dataset[lang] = datasets.load_dataset( - **self.metadata_dict["dataset"], + **self.metadata.dataset, field="examples", data_files={ "validation": f"flores_{language}_dev.json", diff --git a/mteb/tasks/BitextMining/multilingual/RomaTalesBitextMining.py b/mteb/tasks/BitextMining/multilingual/RomaTalesBitextMining.py index 28f11bfcbf..87c2f59ef8 100644 --- a/mteb/tasks/BitextMining/multilingual/RomaTalesBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/RomaTalesBitextMining.py @@ -42,7 +42,7 @@ def load_data(self, **kwargs): self.dataset = {} for lang in self.hf_subsets: - self.dataset[lang] = datasets.load_dataset(**self.metadata_dict["dataset"]) + self.dataset[lang] = datasets.load_dataset(**self.metadata.dataset) self.dataset_transform() self.data_loaded = True diff --git a/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py b/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py index b4072553b6..5adb51888b 100644 --- a/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py +++ b/mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py @@ -71,7 +71,7 @@ def _clean_columns(batch, keys): dataset = datasets.load_dataset( name="srn-nl_other", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ).map(lambda batch: _clean_columns(batch, ["nl", "srn"]), batched=True) dataset = dataset.rename_columns( {_LANGUAGES[l1]: "sentence1", _LANGUAGES[l2]: "sentence2"} diff --git a/mteb/tasks/Classification/eng/LegalBenchClassification.py b/mteb/tasks/Classification/eng/LegalBenchClassification.py index 4e3f25554f..ac4172fd8b 100644 --- a/mteb/tasks/Classification/eng/LegalBenchClassification.py +++ b/mteb/tasks/Classification/eng/LegalBenchClassification.py @@ -4550,9 +4550,9 @@ def load_data(self, **kwargs: Any) -> None: class_count = 0 for dataset_col_map in _MAUD_DATASET_MAP: _dataset = datasets.load_dataset( - self.metadata_dict["dataset"]["path"], + self.metadata.dataset["path"], dataset_col_map["name"], - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], trust_remote_code=True, ) diff --git a/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py b/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py index 91e54bc137..fc986e8f91 100644 --- a/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py +++ b/mteb/tasks/Classification/ind/IndonesianMongabayConservationClassification.py @@ -57,7 +57,7 @@ class IndonesianMongabayConservationClassification(AbsTaskClassification): ) def dataset_transform(self): - splits = self.metadata_dict["eval_splits"] + splits = self.metadata.eval_splits class_labels = ["positif", "netral", "negatif"] ds = {} diff --git a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py index 8a4a79d68b..9c1bd5daf7 100644 --- a/mteb/tasks/Classification/multilingual/AfriSentiClassification.py +++ b/mteb/tasks/Classification/multilingual/AfriSentiClassification.py @@ -65,7 +65,7 @@ def load_data(self, **kwargs): return self.dataset = {} for lang in self.hf_subsets: - metadata = self.metadata_dict.get("dataset", None) + metadata = self.metadata.dataset dataset = datasets.load_dataset(name=lang, **metadata) self.dataset[lang] = _transform(dataset, lang) self.dataset_transform() diff --git a/mteb/tasks/Classification/multilingual/IndicLangClassification.py b/mteb/tasks/Classification/multilingual/IndicLangClassification.py index 47564cf501..dcb5ca1004 100644 --- a/mteb/tasks/Classification/multilingual/IndicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/IndicLangClassification.py @@ -110,9 +110,7 @@ def load_data(self, **kwargs: Any) -> None: labels = sorted(_LANGUAGES.keys()) - data = datasets.load_dataset(**self.metadata_dict["dataset"])["train"]["data"][ - 0 - ] + data = datasets.load_dataset(**self.metadata.dataset)["train"]["data"][0] dataset = {"train": [], "test": []} for lang, lang_code in LANG_MAP.items(): diff --git a/mteb/tasks/Classification/multilingual/NaijaSenti.py b/mteb/tasks/Classification/multilingual/NaijaSenti.py index b31333236e..347f84771d 100644 --- a/mteb/tasks/Classification/multilingual/NaijaSenti.py +++ b/mteb/tasks/Classification/multilingual/NaijaSenti.py @@ -70,7 +70,7 @@ def load_data(self, **kwargs: Any) -> None: for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=f"{lang}", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset[lang] = datasets.DatasetDict( { diff --git a/mteb/tasks/Classification/multilingual/TurkicClassification.py b/mteb/tasks/Classification/multilingual/TurkicClassification.py index ec947fce4d..3ab0684df1 100644 --- a/mteb/tasks/Classification/multilingual/TurkicClassification.py +++ b/mteb/tasks/Classification/multilingual/TurkicClassification.py @@ -57,8 +57,7 @@ def load_data(self, **kwargs): if self.data_loaded: return dataset = {} - metadata = self.metadata_dict.get("dataset", None) - full_dataset = datasets.load_dataset(**metadata) + full_dataset = datasets.load_dataset(**self.metadata.dataset) full_dataset = full_dataset.rename_columns( {"processed_text": "text", "category": "label"} ) diff --git a/mteb/tasks/Classification/zho/CMTEBClassification.py b/mteb/tasks/Classification/zho/CMTEBClassification.py index 7e790ecf9a..8f7fc24694 100644 --- a/mteb/tasks/Classification/zho/CMTEBClassification.py +++ b/mteb/tasks/Classification/zho/CMTEBClassification.py @@ -145,12 +145,7 @@ class IFlyTek(AbsTaskClassification): ) samples_per_label = 32 - - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["n_experiments"] = 5 - return metadata_dict + n_experiments = 5 class MultilingualSentiment(AbsTaskClassification): diff --git a/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py index 0a832bb228..4699fdae85 100644 --- a/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MLSUMClusteringP2P.py @@ -60,7 +60,7 @@ def load_data(self, **kwargs): for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True @@ -130,7 +130,7 @@ def load_data(self, **kwargs): for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True diff --git a/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py index f5e19874a4..787391a7f4 100644 --- a/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MLSUMClusteringS2S.py @@ -60,7 +60,7 @@ def load_data(self, **kwargs): for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True @@ -125,7 +125,7 @@ def load_data(self, **kwargs): for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py index 480cceff8f..788c334bba 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py @@ -70,7 +70,7 @@ def load_data(self, **kwargs): for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py index 7e8b22b9af..56bd8f3890 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py @@ -69,7 +69,7 @@ def load_data(self, **kwargs): for lang in self.hf_subsets: self.dataset[lang] = datasets.load_dataset( name=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True diff --git a/mteb/tasks/Clustering/nob/snl_clustering.py b/mteb/tasks/Clustering/nob/snl_clustering.py index 9256fc66c0..f368a2efac 100644 --- a/mteb/tasks/Clustering/nob/snl_clustering.py +++ b/mteb/tasks/Clustering/nob/snl_clustering.py @@ -54,7 +54,7 @@ class SNLClustering(AbsTaskClustering): ) def dataset_transform(self): - splits = self.metadata_dict["eval_splits"] + splits = self.metadata.eval_splits documents: list = [] labels: list = [] diff --git a/mteb/tasks/Clustering/nob/vg_clustering.py b/mteb/tasks/Clustering/nob/vg_clustering.py index f1050e796b..812e9c41d2 100644 --- a/mteb/tasks/Clustering/nob/vg_clustering.py +++ b/mteb/tasks/Clustering/nob/vg_clustering.py @@ -54,7 +54,7 @@ class VGClustering(AbsTaskClustering): ) def dataset_transform(self): - splits = self.metadata_dict["eval_splits"] + splits = self.metadata.eval_splits documents: list = [] labels: list = [] diff --git a/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py b/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py index b42f86b7c5..9df4ba4aae 100644 --- a/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py +++ b/mteb/tasks/InstructionReranking/multilingual/mFollowIR.py @@ -170,11 +170,11 @@ def load_data(self, **kwargs): self.relevant_docs, self.top_ranked, ) = load_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True @@ -221,11 +221,11 @@ def load_data(self, **kwargs): self.relevant_docs, self.top_ranked, ) = load_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/PairClassification/eng/LegalBenchPC.py b/mteb/tasks/PairClassification/eng/LegalBenchPC.py index 534d086264..8244906328 100644 --- a/mteb/tasks/PairClassification/eng/LegalBenchPC.py +++ b/mteb/tasks/PairClassification/eng/LegalBenchPC.py @@ -127,9 +127,9 @@ def load_data(self, **kwargs: Any) -> None: _hf_dataset = None for dataset_col_map in _DATASET_COLUMN_MAP: _dataset = datasets.load_dataset( - self.metadata_dict["dataset"]["path"], + self.metadata.dataset["path"], dataset_col_map["name"], - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], trust_remote_code=True, ) diff --git a/mteb/tasks/PairClassification/fas/FarsTail.py b/mteb/tasks/PairClassification/fas/FarsTail.py index 552e953f77..3bed9f5b74 100644 --- a/mteb/tasks/PairClassification/fas/FarsTail.py +++ b/mteb/tasks/PairClassification/fas/FarsTail.py @@ -41,8 +41,8 @@ class FarsTail(AbsTaskPairClassification): def load_data(self, **kwargs): if self.data_loaded: return - path = self.metadata_dict["dataset"]["path"] - revision = self.metadata_dict["dataset"]["revision"] + path = self.metadata.dataset["path"] + revision = self.metadata.dataset["revision"] data_files = { "test": f"https://huggingface.co/datasets/{path}/resolve/{revision}/data/Test-word.csv" } diff --git a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py index e6c9cc4ee0..8fb328aaf4 100644 --- a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py +++ b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py @@ -58,7 +58,7 @@ def load_data(self, **kwargs): self.dataset[lang] = datasets.load_dataset( lang=lang, quality=100, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.dataset_transform(lang) self.data_loaded = True diff --git a/mteb/tasks/PairClassification/multilingual/XStance.py b/mteb/tasks/PairClassification/multilingual/XStance.py index ca41208845..3c94ec4b60 100644 --- a/mteb/tasks/PairClassification/multilingual/XStance.py +++ b/mteb/tasks/PairClassification/multilingual/XStance.py @@ -56,8 +56,8 @@ def load_data(self, **kwargs): max_n_samples = 2048 self.dataset = {} - path = self.metadata_dict["dataset"]["path"] - revision = self.metadata_dict["dataset"]["revision"] + path = self.metadata.dataset["path"] + revision = self.metadata.dataset["revision"] raw_dataset = load_dataset( path, revision=revision, @@ -73,7 +73,7 @@ def convert_example(example): for lang in self.metadata.eval_langs: self.dataset[lang] = {} - for split in self.metadata_dict["eval_splits"]: + for split in self.metadata.eval_splits: # filter by language self.dataset[lang][split] = raw_dataset[split].filter( lambda row: row["language"] == lang diff --git a/mteb/tasks/Reranking/fra/AlloprofReranking.py b/mteb/tasks/Reranking/fra/AlloprofReranking.py index 150d2b314b..203db1c02e 100644 --- a/mteb/tasks/Reranking/fra/AlloprofReranking.py +++ b/mteb/tasks/Reranking/fra/AlloprofReranking.py @@ -47,11 +47,11 @@ def load_data(self, **kwargs): self.dataset = datasets.load_dataset( name="queries", - **self.metadata_dict["dataset"], + **self.metadata.dataset, split=self.metadata.eval_splits[0], ) documents = datasets.load_dataset( - name="documents", **self.metadata_dict["dataset"], split="test" + name="documents", **self.metadata.dataset, split="test" ) # replace documents ids in positive and negative column by their respective texts doc_id2txt = dict(list(zip(documents["doc_id"], documents["text"]))) diff --git a/mteb/tasks/Reranking/fra/SyntecReranking.py b/mteb/tasks/Reranking/fra/SyntecReranking.py index 8b74f2115a..8d21860dd2 100644 --- a/mteb/tasks/Reranking/fra/SyntecReranking.py +++ b/mteb/tasks/Reranking/fra/SyntecReranking.py @@ -45,11 +45,11 @@ def load_data(self, **kwargs): self.dataset = datasets.load_dataset( name="queries", - **self.metadata_dict["dataset"], + **self.metadata.dataset, split=self.metadata.eval_splits[0], ) documents = datasets.load_dataset( - name="documents", **self.metadata_dict["dataset"], split="test" + name="documents", **self.metadata.dataset, split="test" ) # replace documents ids in positive and negative column by their respective texts doc_id2txt = dict(list(zip(documents["doc_id"], documents["text"]))) diff --git a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py index e9e2677b3b..cd87ec3200 100644 --- a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py +++ b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py @@ -123,14 +123,14 @@ def load_data(self, **kwargs): hf_subsets = list(self.hf_subsets) if self.is_multilingual else ["default"] for hf_subset in hf_subsets: - if "name" in self.metadata_dict["dataset"]: - cur_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore + if "name" in self.metadata.dataset: + cur_dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore assert ( hf_subset == "default" ), f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata." else: cur_dataset = datasets.load_dataset( - **self.metadata_dict["dataset"], name=hf_subset + **self.metadata.dataset, name=hf_subset ) # type: ignore for split in cur_dataset: diff --git a/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py b/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py index 2009a91c79..57df5e4e7f 100644 --- a/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py +++ b/mteb/tasks/Retrieval/ara/SadeemQuestionRetrieval.py @@ -44,13 +44,13 @@ def load_data(self, **kwargs): if self.data_loaded: return - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])["queries"] + query_list = datasets.load_dataset(**self.metadata.dataset)["queries"] queries = {row["query-id"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])["corpus"] + corpus_list = datasets.load_dataset(**self.metadata.dataset)["corpus"] corpus = {row["corpus-id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])["qrels"] + qrels_list = datasets.load_dataset(**self.metadata.dataset)["qrels"] qrels = {row["query-id"]: {row["corpus-id"]: 1} for row in qrels_list} self.corpus = {self._EVAL_SPLIT: corpus} diff --git a/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py b/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py index 29858026a6..315b749aac 100644 --- a/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py +++ b/mteb/tasks/Retrieval/code/COIRCodeSearchNetRetrieval.py @@ -105,11 +105,11 @@ def load_data(self, **kwargs): self.corpus, self.queries, self.relevant_docs = ( _load_code_search_code_retrieval( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.hf_subsets, - splits=self.metadata_dict["eval_splits"], + splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) ) diff --git a/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py b/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py index e3175fa324..2e6100c9a6 100644 --- a/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeEditSearchRetrieval.py @@ -58,7 +58,7 @@ def load_data(self, **kwargs): data = datasets.load_dataset( split=self._EVAL_SPLIT, data_dir=lang, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) for row in data: lang_subs[lang].append(row) diff --git a/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py b/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py index 3f5ca2e028..974c34a16a 100644 --- a/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeSearchNetCCRetrieval.py @@ -112,11 +112,11 @@ def load_data(self, **kwargs): self.corpus, self.queries, self.relevant_docs = ( _load_code_search_code_retrieval( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.hf_subsets, - splits=self.metadata_dict["eval_splits"], + splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) ) diff --git a/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py b/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py index ddcef675f5..60c6e5b4d9 100644 --- a/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py +++ b/mteb/tasks/Retrieval/code/CodeSearchNetRetrieval.py @@ -43,7 +43,7 @@ def load_data(self, **kwargs): split=self._EVAL_SPLIT, trust_remote_code=True, streaming=True, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) data = data.shuffle(seed=42) diff --git a/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py b/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py index 111eb986ed..eab0dd6835 100644 --- a/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GerDaLIRRetrieval.py @@ -53,17 +53,17 @@ def load_data(self, **kwargs): query_rows = datasets.load_dataset( name="queries", split=self._EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus_rows = datasets.load_dataset( name="corpus", split=self._EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) qrels_rows = datasets.load_dataset( name="qrels", split=self._EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.queries = { diff --git a/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py b/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py index 19c90dc52e..f1e82d738a 100644 --- a/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py @@ -63,7 +63,7 @@ def load_data(self, **kwargs): data = datasets.load_dataset( split=self._EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus = {} queries = {} diff --git a/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py b/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py index 10604f42a8..a91639de24 100644 --- a/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanGovServiceRetrieval.py @@ -56,10 +56,10 @@ def load_data(self, **kwargs): return dataset = datasets.load_dataset( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], split=_EVAL_SPLIT, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) corpus = {} queries = {} diff --git a/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py b/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py index dec5b4e033..6946513dd6 100644 --- a/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanQuADRetrieval.py @@ -64,6 +64,6 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = load_retrieval_data( - self.metadata_dict["dataset"]["path"], self.metadata_dict["eval_splits"] + self.metadata.dataset["path"], self.metadata.eval_splits ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/ell/GreekCivicsQA.py b/mteb/tasks/Retrieval/ell/GreekCivicsQA.py index 2f9860052b..d34afe125a 100644 --- a/mteb/tasks/Retrieval/ell/GreekCivicsQA.py +++ b/mteb/tasks/Retrieval/ell/GreekCivicsQA.py @@ -38,8 +38,8 @@ def load_data(self, **kwargs): if self.data_loaded: return # fetch both subsets of the dataset - eval_split = self.metadata_dict["eval_splits"][0] - data_raw = datasets.load_dataset(**self.metadata_dict["dataset"])[eval_split] + eval_split = self.metadata.eval_splits[0] + data_raw = datasets.load_dataset(**self.metadata.dataset)[eval_split] queries = {eval_split: {}} corpus = {eval_split: {}} diff --git a/mteb/tasks/Retrieval/eng/BrightRetrieval.py b/mteb/tasks/Retrieval/eng/BrightRetrieval.py index 37256918d2..a5c03cdd6b 100644 --- a/mteb/tasks/Retrieval/eng/BrightRetrieval.py +++ b/mteb/tasks/Retrieval/eng/BrightRetrieval.py @@ -130,10 +130,10 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = self.load_bright_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], domains=DOMAINS, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/eng/HagridRetrieval.py b/mteb/tasks/Retrieval/eng/HagridRetrieval.py index 546bf99126..83aa83a262 100644 --- a/mteb/tasks/Retrieval/eng/HagridRetrieval.py +++ b/mteb/tasks/Retrieval/eng/HagridRetrieval.py @@ -52,7 +52,7 @@ def load_data(self, **kwargs): data = datasets.load_dataset( "miracl/hagrid", split=self.metadata.eval_splits[0], - revision=self.metadata_dict["dataset"].get("revision", None), + revision=self.metadata.dataset.get("revision", None), trust_remote_code=self.metadata.dataset["trust_remote_code"], ) proc_data = self.preprocess_data(data) diff --git a/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py b/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py index 3d45290d71..54e90ea8b6 100644 --- a/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBNarrativeQARetrieval.py @@ -63,17 +63,17 @@ def load_data(self, **kwargs): if self.data_loaded: return - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + query_list = datasets.load_dataset(**self.metadata.dataset)[ "queries" ] # dict_keys(['qid', 'text']) queries = {row["qid"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + corpus_list = datasets.load_dataset(**self.metadata.dataset)[ "corpus" ] # dict_keys(['doc_id', 'text']) corpus = {row["doc_id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + qrels_list = datasets.load_dataset(**self.metadata.dataset)[ "qrels" ] # dict_keys(['qid', 'doc_id']) qrels = {row["qid"]: {row["doc_id"]: 1} for row in qrels_list} diff --git a/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py index c467843d01..a590ca79b6 100644 --- a/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBNeedleRetrieval.py @@ -61,7 +61,7 @@ def load_data(self, **kwargs): for split in self._EVAL_SPLIT: context_length = int(split.split("_")[1]) - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + query_list = datasets.load_dataset(**self.metadata.dataset)[ "queries" ] # dict_keys(['qid', 'text']) query_list = query_list.filter( @@ -69,7 +69,7 @@ def load_data(self, **kwargs): ) queries = {row["qid"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + corpus_list = datasets.load_dataset(**self.metadata.dataset)[ "corpus" ] # dict_keys(['doc_id', 'text']) corpus_list = corpus_list.filter( @@ -77,7 +77,7 @@ def load_data(self, **kwargs): ) corpus = {row["doc_id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + qrels_list = datasets.load_dataset(**self.metadata.dataset)[ "qrels" ] # dict_keys(['qid', 'doc_id']) qrels_list = qrels_list.filter( diff --git a/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py index f3c9b96485..0408738bed 100644 --- a/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBPasskeyRetrieval.py @@ -61,7 +61,7 @@ def load_data(self, **kwargs): for split in self._EVAL_SPLIT: context_length = int(split.split("_")[1]) - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + query_list = datasets.load_dataset(**self.metadata.dataset)[ "queries" ] # dict_keys(['qid', 'text']) query_list = query_list.filter( @@ -69,7 +69,7 @@ def load_data(self, **kwargs): ) queries = {row["qid"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + corpus_list = datasets.load_dataset(**self.metadata.dataset)[ "corpus" ] # dict_keys(['doc_id', 'text']) corpus_list = corpus_list.filter( @@ -77,7 +77,7 @@ def load_data(self, **kwargs): ) corpus = {row["doc_id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + qrels_list = datasets.load_dataset(**self.metadata.dataset)[ "qrels" ] # dict_keys(['qid', 'doc_id']) qrels_list = qrels_list.filter( diff --git a/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py index c302e4758a..d569d278bb 100644 --- a/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBQMSumRetrieval.py @@ -72,17 +72,17 @@ def load_data(self, **kwargs): if self.data_loaded: return - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + query_list = datasets.load_dataset(**self.metadata.dataset)[ "queries" ] # dict_keys(['qid', 'text']) queries = {row["qid"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + corpus_list = datasets.load_dataset(**self.metadata.dataset)[ "corpus" ] # dict_keys(['doc_id', 'text']) corpus = {row["doc_id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + qrels_list = datasets.load_dataset(**self.metadata.dataset)[ "qrels" ] # dict_keys(['qid', 'doc_id']) qrels = {row["qid"]: {row["doc_id"]: 1} for row in qrels_list} diff --git a/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py b/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py index c2c6b6db03..bd5dff199d 100644 --- a/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBSummScreenFDRetrieval.py @@ -59,17 +59,17 @@ def load_data(self, **kwargs): if self.data_loaded: return - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + query_list = datasets.load_dataset(**self.metadata.dataset)[ "queries" ] # dict_keys(['qid', 'text']) queries = {row["qid"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + corpus_list = datasets.load_dataset(**self.metadata.dataset)[ "corpus" ] # dict_keys(['doc_id', 'text']) corpus = {row["doc_id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + qrels_list = datasets.load_dataset(**self.metadata.dataset)[ "qrels" ] # dict_keys(['qid', 'doc_id']) qrels = {row["qid"]: {row["doc_id"]: 1} for row in qrels_list} diff --git a/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py b/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py index 04e8b3bb86..aa931d99d3 100644 --- a/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/LEMBWikimQARetrieval.py @@ -47,17 +47,17 @@ def load_data(self, **kwargs): if self.data_loaded: return - query_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + query_list = datasets.load_dataset(**self.metadata.dataset)[ "queries" ] # dict_keys(['qid', 'text']) queries = {row["qid"]: row["text"] for row in query_list} - corpus_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + corpus_list = datasets.load_dataset(**self.metadata.dataset)[ "corpus" ] # dict_keys(['doc_id', 'text']) corpus = {row["doc_id"]: {"text": row["text"]} for row in corpus_list} - qrels_list = datasets.load_dataset(**self.metadata_dict["dataset"])[ + qrels_list = datasets.load_dataset(**self.metadata.dataset)[ "qrels" ] # dict_keys(['qid', 'doc_id']) qrels = {row["qid"]: {row["doc_id"]: 1} for row in qrels_list} diff --git a/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py b/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py index 2c823e85dd..af986761d4 100644 --- a/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py +++ b/mteb/tasks/Retrieval/eng/LitSearchRetrieval.py @@ -46,7 +46,7 @@ def load_data(self, **kwargs): if self.data_loaded: return self.corpus, self.queries, self.relevant_docs = {}, {}, {} - dataset_path = self.metadata_dict["dataset"]["path"] + dataset_path = self.metadata.dataset["path"] query_ds = datasets.load_dataset(dataset_path, "query") diff --git a/mteb/tasks/Retrieval/eng/MLQuestions.py b/mteb/tasks/Retrieval/eng/MLQuestions.py index 6b594be445..e5eae5de58 100644 --- a/mteb/tasks/Retrieval/eng/MLQuestions.py +++ b/mteb/tasks/Retrieval/eng/MLQuestions.py @@ -62,12 +62,12 @@ def load_data(self, **kwargs): if self.data_loaded: return self.corpus, self.queries, self.relevant_docs = {}, {}, {} - dataset_path = self.metadata_dict["dataset"]["path"] - revision = self.metadata_dict["dataset"].get("revision", None) + dataset_path = self.metadata.dataset["path"] + revision = self.metadata.dataset.get("revision", None) download_dir = snapshot_download( repo_id=dataset_path, repo_type="dataset", revision=revision ) - for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): + for split in kwargs.get("eval_splits", self.metadata.eval_splits): corpus, queries, qrels = self._load_data_for_split(download_dir, split) self.corpus[split], self.queries[split], self.relevant_docs[split] = ( corpus, diff --git a/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py b/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py index d973ec45ae..335e832de9 100644 --- a/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/NarrativeQARetrieval.py @@ -50,7 +50,7 @@ def load_data(self, **kwargs): data = datasets.load_dataset( split=self._EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.queries = { self._EVAL_SPLIT: { diff --git a/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py b/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py index 3640881df0..6578eb9e77 100644 --- a/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/TopiOCQARetrieval.py @@ -57,8 +57,8 @@ def load_data(self, **kwargs): if self.data_loaded: return self.corpus, self.queries, self.relevant_docs = {}, {}, {} - dataset_path = self.metadata_dict["dataset"]["path"] - for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): + dataset_path = self.metadata.dataset["path"] + for split in kwargs.get("eval_splits", self.metadata.eval_splits): corpus, queries, qrels = self._load_data_for_split(dataset_path, split) self.corpus[split], self.queries[split], self.relevant_docs[split] = ( corpus, @@ -69,7 +69,7 @@ def load_data(self, **kwargs): self.data_loaded = True def _load_data_for_split(self, dataset_path, split): - revision = self.metadata_dict["dataset"].get("revision", None) + revision = self.metadata.dataset.get("revision", None) ds = load_dataset( dataset_path, split=split, diff --git a/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py b/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py index ada02b511b..fcb37c031d 100644 --- a/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py +++ b/mteb/tasks/Retrieval/fra/AlloprofRetrieval.py @@ -48,13 +48,13 @@ def load_data(self, **kwargs): # fetch both subsets of the dataset corpus_raw = datasets.load_dataset( name="documents", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) queries_raw = datasets.load_dataset( name="queries", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) - eval_split = self.metadata_dict["eval_splits"][0] + eval_split = self.metadata.eval_splits[0] self.queries = { eval_split: {str(q["id"]): q["text"] for q in queries_raw[eval_split]} } diff --git a/mteb/tasks/Retrieval/fra/BSARDRetrieval.py b/mteb/tasks/Retrieval/fra/BSARDRetrieval.py index 93509c51fc..438c86d628 100644 --- a/mteb/tasks/Retrieval/fra/BSARDRetrieval.py +++ b/mteb/tasks/Retrieval/fra/BSARDRetrieval.py @@ -53,12 +53,12 @@ def load_data(self, **kwargs): corpus_raw = datasets.load_dataset( name="corpus", split="corpus", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) queries_raw = datasets.load_dataset( name="questions", split=self.metadata.eval_splits[0], - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.queries = { diff --git a/mteb/tasks/Retrieval/fra/FQuADRetrieval.py b/mteb/tasks/Retrieval/fra/FQuADRetrieval.py index 20a54b8232..7edb1a2d3e 100644 --- a/mteb/tasks/Retrieval/fra/FQuADRetrieval.py +++ b/mteb/tasks/Retrieval/fra/FQuADRetrieval.py @@ -56,7 +56,7 @@ def load_data(self, **kwargs): if self.data_loaded: return dataset_raw = datasets.load_dataset( - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) # set valid_hasAns and test_hasAns as the validation and test splits (only queries with answers) @@ -73,12 +73,12 @@ def load_data(self, **kwargs): eval_split: { str(i): q["question"] for i, q in enumerate(dataset_raw[eval_split]) } - for eval_split in self.metadata_dict["eval_splits"] + for eval_split in self.metadata.eval_splits } self.corpus = { eval_split: {str(row["title"]): row for row in dataset_raw[eval_split]} - for eval_split in self.metadata_dict["eval_splits"] + for eval_split in self.metadata.eval_splits } self.relevant_docs = { @@ -86,7 +86,7 @@ def load_data(self, **kwargs): str(i): {str(q["title"]): 1} for i, q in enumerate(dataset_raw[eval_split]) } - for eval_split in self.metadata_dict["eval_splits"] + for eval_split in self.metadata.eval_splits } self.data_loaded = True diff --git a/mteb/tasks/Retrieval/fra/SyntecRetrieval.py b/mteb/tasks/Retrieval/fra/SyntecRetrieval.py index 4e5dd52c51..9f3f0f0e04 100644 --- a/mteb/tasks/Retrieval/fra/SyntecRetrieval.py +++ b/mteb/tasks/Retrieval/fra/SyntecRetrieval.py @@ -47,14 +47,14 @@ def load_data(self, **kwargs): # fetch both subsets of the dataset corpus_raw = datasets.load_dataset( name="documents", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) queries_raw = datasets.load_dataset( name="queries", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) - eval_split = self.metadata_dict["eval_splits"][0] + eval_split = self.metadata.eval_splits[0] self.queries = { eval_split: { str(i): q["Question"] for i, q in enumerate(queries_raw[eval_split]) diff --git a/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py b/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py index 3960ab6f19..4a7a877c1a 100644 --- a/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py @@ -44,7 +44,7 @@ def load_data(self, **kwargs): query_list = datasets.load_dataset( name="jagovfaqs_22k-query", split=_EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) # Limit the dataset size to make sure the task does not take too long to run, sample the dataset to 2048 queries @@ -59,7 +59,7 @@ def load_data(self, **kwargs): qrels[str(row_id)] = {str(row["relevant_docs"][0]): 1} corpus_list = datasets.load_dataset( - name="jagovfaqs_22k-corpus", split="corpus", **self.metadata_dict["dataset"] + name="jagovfaqs_22k-corpus", split="corpus", **self.metadata.dataset ) corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} diff --git a/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py b/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py index 07fb165632..0ad54e5653 100644 --- a/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/JaQuADRetrieval.py @@ -43,8 +43,8 @@ def load_data(self, **kwargs): if self.data_loaded: return - split = self.metadata_dict["eval_splits"][0] - ds = load_dataset(**self.metadata_dict["dataset"], split=split) + split = self.metadata.eval_splits[0] + ds = load_dataset(**self.metadata.dataset, split=split) ds = ds.shuffle(seed=42) max_samples = min(2048, len(ds)) ds = ds.select( diff --git a/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py b/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py index d7b0a60adf..9553cdb84b 100644 --- a/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py @@ -41,7 +41,7 @@ def load_data(self, **kwargs): query_list = datasets.load_dataset( name="nlp_journal_abs_intro-query", split=_EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) queries = {} @@ -53,7 +53,7 @@ def load_data(self, **kwargs): corpus_list = datasets.load_dataset( name="nlp_journal_abs_intro-corpus", split="corpus", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} diff --git a/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py b/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py index 0a7be8965b..9926062e59 100644 --- a/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py @@ -41,7 +41,7 @@ def load_data(self, **kwargs): query_list = datasets.load_dataset( name="nlp_journal_title_abs-query", split=_EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) queries = {} @@ -53,7 +53,7 @@ def load_data(self, **kwargs): corpus_list = datasets.load_dataset( name="nlp_journal_title_abs-corpus", split="corpus", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} diff --git a/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py b/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py index dc4507adca..0ee9e607a9 100644 --- a/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py +++ b/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py @@ -41,7 +41,7 @@ def load_data(self, **kwargs): query_list = datasets.load_dataset( name="nlp_journal_title_intro-query", split=_EVAL_SPLIT, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) queries = {} @@ -53,7 +53,7 @@ def load_data(self, **kwargs): corpus_list = datasets.load_dataset( name="nlp_journal_title_intro-corpus", split="corpus", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} diff --git a/mteb/tasks/Retrieval/kat/GeorgianFAQRetrieval.py b/mteb/tasks/Retrieval/kat/GeorgianFAQRetrieval.py index f870e999c9..c34b52b9ac 100644 --- a/mteb/tasks/Retrieval/kat/GeorgianFAQRetrieval.py +++ b/mteb/tasks/Retrieval/kat/GeorgianFAQRetrieval.py @@ -45,10 +45,10 @@ def load_data(self, **kwargs): relevant_docs = {_EVAL_SPLIT: {}} data = load_dataset( - self.metadata_dict["dataset"]["path"], + self.metadata.dataset["path"], split=_EVAL_SPLIT, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) question_ids = { question: _id for _id, question in enumerate(set(data["question"])) diff --git a/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py b/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py index 6e97786a77..9f935d1a9a 100644 --- a/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py +++ b/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py @@ -64,8 +64,8 @@ class CUREv1Retrieval(MultilingualTask, AbsTaskRetrieval): def _load_corpus(self, split: str, cache_dir: str | None = None): ds = load_dataset( - path=self.metadata_dict["dataset"]["path"], - revision=self.metadata_dict["dataset"]["revision"], + path=self.metadata.dataset["path"], + revision=self.metadata.dataset["revision"], name="corpus", split=split, cache_dir=cache_dir, @@ -79,8 +79,8 @@ def _load_corpus(self, split: str, cache_dir: str | None = None): def _load_qrels(self, split: str, cache_dir: str | None = None): ds = load_dataset( - path=self.metadata_dict["dataset"]["path"], - revision=self.metadata_dict["dataset"]["revision"], + path=self.metadata.dataset["path"], + revision=self.metadata.dataset["revision"], name="qrels", split=split, cache_dir=cache_dir, @@ -100,8 +100,8 @@ def _load_qrels(self, split: str, cache_dir: str | None = None): def _load_queries(self, split: str, language: str, cache_dir: str | None = None): ds = load_dataset( - path=self.metadata_dict["dataset"]["path"], - revision=self.metadata_dict["dataset"]["revision"], + path=self.metadata.dataset["path"], + revision=self.metadata.dataset["revision"], name=f"queries-{language}", split=split, cache_dir=cache_dir, diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py index 4ca7c5e495..3c7551e951 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py @@ -80,7 +80,7 @@ def load_data(self, **kwargs): lang_pair = _build_lang_pair(langs) dataset_raw[lang_pair] = datasets.load_dataset( name=hf_subset, - **self.metadata_dict["dataset"], + **self.metadata.dataset, )[split] queries[lang_pair] = {} diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py index f5c0262308..e846d5c83b 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py @@ -80,7 +80,7 @@ def load_data(self, **kwargs): lang_pair = _build_lang_pair(langs) dataset_raw[lang_pair] = datasets.load_dataset( name=hf_subset, - **self.metadata_dict["dataset"], + **self.metadata.dataset, )[split] queries[lang_pair] = {} diff --git a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py index 2b21177297..beec172071 100644 --- a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py @@ -143,11 +143,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = _load_miracl_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.hf_subsets, - splits=self.metadata_dict["eval_splits"], + splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True @@ -329,11 +329,11 @@ def load_data(self, **kwargs): self.corpus, self.queries, self.relevant_docs = ( _load_miracl_data_hard_negatives( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.hf_subsets, - splits=self.metadata_dict["eval_splits"], + splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) ) diff --git a/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py b/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py index c03f280b22..b8fd1a6ff1 100644 --- a/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MLQARetrieval.py @@ -127,7 +127,7 @@ def load_data(self, **kwargs): _dataset_raw[lang_pair] = datasets.load_dataset( name=hf_subset, - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.queries[lang_pair] = {} diff --git a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py index 3a44ba4e09..b8a2eec9af 100644 --- a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py @@ -107,12 +107,12 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = _load_mintaka_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - split=self.metadata_dict["eval_splits"][0], + split=self.metadata.eval_splits[0], cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], - trust_remote_code=self.metadata_dict["dataset"]["trust_remote_code"], + revision=self.metadata.dataset["revision"], + trust_remote_code=self.metadata.dataset["trust_remote_code"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py index 0b65d3b8f8..7fe6c764d0 100644 --- a/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MrTidyRetrieval.py @@ -120,11 +120,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = _load_data_retrieval( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.hf_subsets, - splits=self.metadata_dict["eval_splits"], + splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py index 025a34ef6a..998895e894 100644 --- a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py @@ -108,10 +108,10 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = load_mldr_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py b/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py index 6c48a6731d..94c7bf39fb 100644 --- a/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py +++ b/mteb/tasks/Retrieval/multilingual/NeuCLIR2022Retrieval.py @@ -93,11 +93,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = load_neuclir_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True @@ -207,11 +207,11 @@ def load_data(self, **kwargs): self.corpus, self.queries, self.relevant_docs = ( load_neuclir_data_hard_negatives( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py b/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py index 88432333cc..5f0626146e 100644 --- a/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py +++ b/mteb/tasks/Retrieval/multilingual/NeuCLIR2023Retrieval.py @@ -94,11 +94,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = load_neuclir_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True @@ -210,11 +210,11 @@ def load_data(self, **kwargs): self.corpus, self.queries, self.relevant_docs = ( load_neuclir_data_hard_negatives( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - eval_splits=self.metadata_dict["eval_splits"], + eval_splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py index 6f7d188b7b..bf5e786a74 100644 --- a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py @@ -102,11 +102,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = _load_publichealthqa_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.hf_subsets, - split=self.metadata_dict["eval_splits"][0], + split=self.metadata.eval_splits[0], cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py b/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py index ab7e178c82..98e8a3aad7 100644 --- a/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/StatcanDialogueDatasetRetrieval.py @@ -107,11 +107,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = _load_statcan_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=list(_LANGS.keys()), - splits=self.metadata_dict["eval_splits"], + splits=self.metadata.eval_splits, cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py index 01d240eb9d..bb0a5b63b1 100644 --- a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py @@ -102,11 +102,11 @@ def load_data(self, **kwargs): return self.corpus, self.queries, self.relevant_docs = _load_xmarket_data( - path=self.metadata_dict["dataset"]["path"], + path=self.metadata.dataset["path"], langs=self.metadata.eval_langs, - split=self.metadata_dict["eval_splits"][0], + split=self.metadata.eval_splits[0], cache_dir=kwargs.get("cache_dir", None), - revision=self.metadata_dict["dataset"]["revision"], + revision=self.metadata.dataset["revision"], ) self.data_loaded = True diff --git a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py index 72cbbd6dab..b53049ac90 100644 --- a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py @@ -98,9 +98,9 @@ def load_data(self, **kwargs): if self.data_loaded: return - path = self.metadata_dict["dataset"]["path"] - revision = self.metadata_dict["dataset"]["revision"] - eval_splits = self.metadata_dict["eval_splits"] + path = self.metadata.dataset["path"] + revision = self.metadata.dataset["revision"] + eval_splits = self.metadata.eval_splits dataset = _load_dataset_csv(path, revision, eval_splits) self.queries, self.corpus, self.relevant_docs = {}, {}, {} diff --git a/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py b/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py index 4d952896e3..ff2591ae7e 100644 --- a/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py @@ -76,9 +76,9 @@ def load_data(self, **kwargs): relevant_docs = {lang: {split: {}} for lang in self.hf_subsets} for lang in self.hf_subsets: - data = datasets.load_dataset( - name=f"xquad.{lang}", **self.metadata_dict["dataset"] - )[split] + data = datasets.load_dataset(name=f"xquad.{lang}", **self.metadata.dataset)[ + split + ] data = data.filter(lambda x: x["answers"]["text"] != "") question_ids = { diff --git a/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py b/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py index 0b26fd1079..e10c96b5a9 100644 --- a/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py +++ b/mteb/tasks/Retrieval/slk/SlovakSumRetrieval.py @@ -48,10 +48,10 @@ def load_data(self, **kwargs): if self.data_loaded: return self.corpus, self.queries, self.relevant_docs = {}, {}, {} - dataset_path = self.metadata_dict["dataset"]["path"] + dataset_path = self.metadata.dataset["path"] n_sample = 600 - for split in kwargs.get("eval_splits", self.metadata_dict["eval_splits"]): + for split in kwargs.get("eval_splits", self.metadata.eval_splits): split_ds = datasets.load_dataset( dataset_path, split=f"{split}[:{n_sample}]" ) diff --git a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py index e136bd82f2..4cc37f6892 100644 --- a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py +++ b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2P.py @@ -62,17 +62,17 @@ def load_data(self, **kwargs): query_rows = datasets.load_dataset( name="queries", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus_rows = datasets.load_dataset( name="corpus.documents", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) qrels_rows = datasets.load_dataset( name="qrels.s2p", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.queries = {"test": {row["_id"]: row["text"] for row in query_rows}} diff --git a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py index 86b45f1f4c..3bedff74be 100644 --- a/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py +++ b/mteb/tasks/Retrieval/spa/SpanishPassageRetrievalS2S.py @@ -62,17 +62,17 @@ def load_data(self, **kwargs): query_rows = datasets.load_dataset( name="queries", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) corpus_rows = datasets.load_dataset( name="corpus.sentences", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) qrels_rows = datasets.load_dataset( name="qrels.s2s", split="test", - **self.metadata_dict["dataset"], + **self.metadata.dataset, ) self.queries = {"test": {row["_id"]: row["text"] for row in query_rows}} diff --git a/mteb/tasks/Retrieval/tur/TurHistQuad.py b/mteb/tasks/Retrieval/tur/TurHistQuad.py index e7aa10ac96..02d61335a6 100644 --- a/mteb/tasks/Retrieval/tur/TurHistQuad.py +++ b/mteb/tasks/Retrieval/tur/TurHistQuad.py @@ -53,14 +53,14 @@ def load_data(self, **kwargs) -> None: if self.data_loaded: return - self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) + self.dataset = datasets.load_dataset(**self.metadata.dataset) self.corpus = {} self.relevant_docs = {} self.queries = {} text2id = {} - for split in self.metadata_dict["eval_splits"]: + for split in self.metadata.eval_splits: ds: datasets.Dataset = self.dataset[split] # type: ignore ds = ds.shuffle(seed=42) max_samples = min(1024, len(ds)) diff --git a/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py b/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py index 07ec5aba8b..6560d2ec92 100644 --- a/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py +++ b/mteb/tasks/Retrieval/vie/VieQuADRetrieval.py @@ -58,8 +58,8 @@ def load_data(self, **kwargs): seed = 42 random.seed(seed) - split = self.metadata_dict["eval_splits"][0] - ds = load_dataset(**self.metadata_dict["dataset"], split=split) + split = self.metadata.eval_splits[0] + ds = load_dataset(**self.metadata.dataset, split=split) ds = ds.shuffle(seed=seed) titles, questions, contexts, answers = [], [], [], [] diff --git a/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py b/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py index 34add4378e..19a798b7c5 100644 --- a/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/deu/GermanSTSBenchmarkSTS.py @@ -6,6 +6,9 @@ class GermanSTSBenchmarkSTS(AbsTaskSTS): + min_score = 0 + max_score = 5 + metadata = TaskMetadata( name="GermanSTSBenchmark", dataset={ @@ -35,10 +38,3 @@ class GermanSTSBenchmarkSTS(AbsTaskSTS): url={https://github.com/PhilipMay/stsb-multi-mt} }""", ) - - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict diff --git a/mteb/tasks/STS/eng/BiossesSTS.py b/mteb/tasks/STS/eng/BiossesSTS.py index ce54e37789..0198e0b4da 100644 --- a/mteb/tasks/STS/eng/BiossesSTS.py +++ b/mteb/tasks/STS/eng/BiossesSTS.py @@ -44,9 +44,5 @@ class BiossesSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/eng/STS12STS.py b/mteb/tasks/STS/eng/STS12STS.py index b222b42c66..d7d64fa916 100644 --- a/mteb/tasks/STS/eng/STS12STS.py +++ b/mteb/tasks/STS/eng/STS12STS.py @@ -6,6 +6,9 @@ class STS12STS(AbsTaskSTS): + min_score = 0 + max_score = 5 + metadata = TaskMetadata( name="STS12", dataset={ @@ -41,10 +44,3 @@ class STS12STS(AbsTaskSTS): series = {SemEval '12} }""", ) - - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict diff --git a/mteb/tasks/STS/eng/STS13STS.py b/mteb/tasks/STS/eng/STS13STS.py index 415eafbc23..0e70023c84 100644 --- a/mteb/tasks/STS/eng/STS13STS.py +++ b/mteb/tasks/STS/eng/STS13STS.py @@ -6,6 +6,9 @@ class STS13STS(AbsTaskSTS): + min_score = 0 + max_score = 5 + metadata = TaskMetadata( name="STS13", dataset={ @@ -35,10 +38,3 @@ class STS13STS(AbsTaskSTS): url={https://api.semanticscholar.org/CorpusID:10241043} }""", ) - - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict diff --git a/mteb/tasks/STS/eng/STS14STS.py b/mteb/tasks/STS/eng/STS14STS.py index 933cc124da..d686c4b50e 100644 --- a/mteb/tasks/STS/eng/STS14STS.py +++ b/mteb/tasks/STS/eng/STS14STS.py @@ -47,9 +47,5 @@ class STS14STS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/eng/STS15STS.py b/mteb/tasks/STS/eng/STS15STS.py index 99e81aa90f..dd1c49f1f0 100644 --- a/mteb/tasks/STS/eng/STS15STS.py +++ b/mteb/tasks/STS/eng/STS15STS.py @@ -45,9 +45,5 @@ class STS15STS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/eng/STS16STS.py b/mteb/tasks/STS/eng/STS16STS.py index 94c978d4fc..0077aca0dd 100644 --- a/mteb/tasks/STS/eng/STS16STS.py +++ b/mteb/tasks/STS/eng/STS16STS.py @@ -51,9 +51,5 @@ class STS16STS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/eng/STSBenchmarkSTS.py b/mteb/tasks/STS/eng/STSBenchmarkSTS.py index 099fba6773..1771e41725 100644 --- a/mteb/tasks/STS/eng/STSBenchmarkSTS.py +++ b/mteb/tasks/STS/eng/STSBenchmarkSTS.py @@ -6,6 +6,9 @@ class STSBenchmarkSTS(AbsTaskSTS): + min_score = 0 + max_score = 5 + metadata = TaskMetadata( name="STSBenchmark", dataset={ @@ -35,9 +38,5 @@ class STSBenchmarkSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/eng/SickrSTS.py b/mteb/tasks/STS/eng/SickrSTS.py index 1c93fff578..cde715c8b1 100644 --- a/mteb/tasks/STS/eng/SickrSTS.py +++ b/mteb/tasks/STS/eng/SickrSTS.py @@ -55,9 +55,5 @@ class SickrSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/fao/FaroeseSTS.py b/mteb/tasks/STS/fao/FaroeseSTS.py index 156485321a..dce9f0ce27 100644 --- a/mteb/tasks/STS/fao/FaroeseSTS.py +++ b/mteb/tasks/STS/fao/FaroeseSTS.py @@ -43,12 +43,8 @@ class FaroeseSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 def dataset_transform(self): self.dataset = self.dataset.rename_column("label", "score") diff --git a/mteb/tasks/STS/fin/FinParaSTS.py b/mteb/tasks/STS/fin/FinParaSTS.py index 6ed513ade8..4697aa84e5 100644 --- a/mteb/tasks/STS/fin/FinParaSTS.py +++ b/mteb/tasks/STS/fin/FinParaSTS.py @@ -58,12 +58,8 @@ class FinParaSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 2 - metadata_dict["max_score"] = 4 - return metadata_dict + min_score = 2 + max_score = 4 def dataset_transform(self): self.dataset = self.dataset.shuffle(seed=self.seed) diff --git a/mteb/tasks/STS/fra/SickFrSTS.py b/mteb/tasks/STS/fra/SickFrSTS.py index 241aa60163..a627d6243a 100644 --- a/mteb/tasks/STS/fra/SickFrSTS.py +++ b/mteb/tasks/STS/fra/SickFrSTS.py @@ -30,12 +30,8 @@ class SickFrSTS(AbsTaskSTS): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 def dataset_transform(self): self.dataset = self.dataset.rename_columns( diff --git a/mteb/tasks/STS/jpn/JSICK.py b/mteb/tasks/STS/jpn/JSICK.py index 554a3abf1d..2666071a7b 100644 --- a/mteb/tasks/STS/jpn/JSICK.py +++ b/mteb/tasks/STS/jpn/JSICK.py @@ -41,12 +41,8 @@ class JSICK(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 1 + max_score = 5 def dataset_transform(self) -> None: self.dataset = self.dataset.rename_column("label", "score") diff --git a/mteb/tasks/STS/jpn/JSTS.py b/mteb/tasks/STS/jpn/JSTS.py index bdd031c865..7bb2906e11 100644 --- a/mteb/tasks/STS/jpn/JSTS.py +++ b/mteb/tasks/STS/jpn/JSTS.py @@ -57,9 +57,5 @@ class JSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/kor/KlueSTS.py b/mteb/tasks/STS/kor/KlueSTS.py index af55fb5bc0..4b8b0e35cc 100644 --- a/mteb/tasks/STS/kor/KlueSTS.py +++ b/mteb/tasks/STS/kor/KlueSTS.py @@ -38,12 +38,8 @@ class KlueSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 def dataset_transform(self): # In the case of KLUE STS, score value is nested within the `labels` field. diff --git a/mteb/tasks/STS/kor/KorSTS.py b/mteb/tasks/STS/kor/KorSTS.py index 6ab1437bb1..b4cfc1b707 100644 --- a/mteb/tasks/STS/kor/KorSTS.py +++ b/mteb/tasks/STS/kor/KorSTS.py @@ -35,9 +35,5 @@ class KorSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py b/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py index 0f37f78a80..2c2f12f294 100644 --- a/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/IndicCrosslingualSTS.py @@ -65,9 +65,5 @@ class IndicCrosslingualSTS(AbsTaskSTS, MultilingualTask): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py index 0e7928fe8b..793357e2eb 100644 --- a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py @@ -67,9 +67,5 @@ class STS17Crosslingual(AbsTaskSTS, MultilingualTask): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py index 0e294aeb5a..05bd05dd39 100644 --- a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py @@ -79,12 +79,8 @@ class STS22CrosslingualSTSv2(AbsTaskSTS, MultilingualTask): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 4 - return metadata_dict + min_score = 1 + max_score = 4 class STS22CrosslingualSTS(AbsTaskSTS, MultilingualTask): @@ -141,9 +137,5 @@ class STS22CrosslingualSTS(AbsTaskSTS, MultilingualTask): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 4 - return metadata_dict + min_score = 1 + max_score = 4 diff --git a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py index eaf5ff1afb..ad15498247 100644 --- a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py +++ b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py @@ -54,12 +54,8 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS, MultilingualTask): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 def dataset_transform(self) -> None: for lang, subset in self.dataset.items(): diff --git a/mteb/tasks/STS/multilingual/SemRel24STS.py b/mteb/tasks/STS/multilingual/SemRel24STS.py index ea503eb1b6..905f5cb332 100644 --- a/mteb/tasks/STS/multilingual/SemRel24STS.py +++ b/mteb/tasks/STS/multilingual/SemRel24STS.py @@ -65,12 +65,8 @@ class SemRel24STS(AbsTaskSTS, MultilingualTask): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 def dataset_transform(self) -> None: for lang, subset in self.dataset.items(): diff --git a/mteb/tasks/STS/pol/PolishSTS.py b/mteb/tasks/STS/pol/PolishSTS.py index 9115f37996..64dcad0b60 100644 --- a/mteb/tasks/STS/pol/PolishSTS.py +++ b/mteb/tasks/STS/pol/PolishSTS.py @@ -59,12 +59,8 @@ class SickrPLSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 1 + max_score = 5 class CdscrSTS(AbsTaskSTS): @@ -109,9 +105,5 @@ class CdscrSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 1 + max_score = 5 diff --git a/mteb/tasks/STS/por/Assin2STS.py b/mteb/tasks/STS/por/Assin2STS.py index e96ae97c34..c79675e5fc 100644 --- a/mteb/tasks/STS/por/Assin2STS.py +++ b/mteb/tasks/STS/por/Assin2STS.py @@ -36,12 +36,8 @@ class Assin2STS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 1 + max_score = 5 def dataset_transform(self) -> None: self.dataset = self.dataset.rename_columns( diff --git a/mteb/tasks/STS/por/SickBrSTS.py b/mteb/tasks/STS/por/SickBrSTS.py index 5298ab5437..c3149d02cc 100644 --- a/mteb/tasks/STS/por/SickBrSTS.py +++ b/mteb/tasks/STS/por/SickBrSTS.py @@ -52,12 +52,8 @@ class SickBrSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 1 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 1 + max_score = 5 def dataset_transform(self): self.dataset = self.stratified_subsampling( diff --git a/mteb/tasks/STS/ron/RonSTS.py b/mteb/tasks/STS/ron/RonSTS.py index 4941cba3e6..9c6331ce4c 100644 --- a/mteb/tasks/STS/ron/RonSTS.py +++ b/mteb/tasks/STS/ron/RonSTS.py @@ -38,9 +38,5 @@ class RonSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/rus/RUParaPhraserSTS.py b/mteb/tasks/STS/rus/RUParaPhraserSTS.py index 9174f2f661..961de39a1c 100644 --- a/mteb/tasks/STS/rus/RUParaPhraserSTS.py +++ b/mteb/tasks/STS/rus/RUParaPhraserSTS.py @@ -53,12 +53,8 @@ class RUParaPhraserSTS(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = -1 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = -1 + max_score = 1 def dataset_transform(self): self.dataset = self.dataset.rename_columns( diff --git a/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py b/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py index eca26691fa..06a50bc939 100644 --- a/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/rus/RuSTSBenchmarkSTS.py @@ -36,9 +36,5 @@ class RuSTSBenchmarkSTS(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/STS/spa/STSES.py b/mteb/tasks/STS/spa/STSES.py index 8bdbf227a2..40c2dcf7f1 100644 --- a/mteb/tasks/STS/spa/STSES.py +++ b/mteb/tasks/STS/spa/STSES.py @@ -49,12 +49,8 @@ class STSES(AbsTaskSTS): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 def dataset_transform(self): data = self.dataset[_EVAL_SPLIT] diff --git a/mteb/tasks/STS/zho/CMTEBSTS.py b/mteb/tasks/STS/zho/CMTEBSTS.py index e428e24156..dbb1193e3a 100644 --- a/mteb/tasks/STS/zho/CMTEBSTS.py +++ b/mteb/tasks/STS/zho/CMTEBSTS.py @@ -49,12 +49,8 @@ class ATEC(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class BQ(AbsTaskSTS): @@ -90,12 +86,8 @@ class BQ(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class LCQMC(AbsTaskSTS): @@ -131,12 +123,8 @@ class LCQMC(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class PAWSX(AbsTaskSTS): @@ -172,12 +160,8 @@ class PAWSX(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class STSB(AbsTaskSTS): @@ -213,12 +197,8 @@ class STSB(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 class AFQMC(AbsTaskSTS): @@ -265,12 +245,8 @@ class AFQMC(AbsTaskSTS): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class QBQTC(AbsTaskSTS): @@ -298,9 +274,5 @@ class QBQTC(AbsTaskSTS): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 2 - return metadata_dict + min_score = 0 + max_score = 2 diff --git a/mteb/tasks/Summarization/eng/SummEvalSummarization.py b/mteb/tasks/Summarization/eng/SummEvalSummarization.py index 8f64d1bbf5..d217b78af6 100644 --- a/mteb/tasks/Summarization/eng/SummEvalSummarization.py +++ b/mteb/tasks/Summarization/eng/SummEvalSummarization.py @@ -40,12 +40,8 @@ class SummEvalSummarization(AbsTaskSummarization): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 class SummEvalSummarizationv2(AbsTaskSummarization): @@ -78,9 +74,5 @@ class SummEvalSummarizationv2(AbsTaskSummarization): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py b/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py index 660f03502e..fb1efe01a6 100644 --- a/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py +++ b/mteb/tasks/Summarization/fra/SummEvalFrSummarization.py @@ -39,13 +39,8 @@ class SummEvalFrSummarization(AbsTaskSummarization): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - - return metadata_dict + min_score = 0 + max_score = 5 class SummEvalFrSummarizationv2(AbsTaskSummarization): @@ -78,10 +73,5 @@ class SummEvalFrSummarizationv2(AbsTaskSummarization): }""", ) - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 5 - - return metadata_dict + min_score = 0 + max_score = 5 diff --git a/scripts/data/create_task_table.py b/scripts/data/create_task_table.py index aa9b5f6ccc..e5b292a08a 100644 --- a/scripts/data/create_task_table.py +++ b/scripts/data/create_task_table.py @@ -138,7 +138,7 @@ def get_ds_stats(hf_hub_name): for task in MTEB().tasks: print("Task: ", task) if "dataset" in task.metadata_dict: - hub_name = hub_url = task.metadata_dict["dataset"]["path"] + hub_name = hub_url = task.metadata.dataset["path"] ds_stats = get_ds_stats(hub_name.split("/")[-1]) elif "beir_name" in task.metadata_dict: hub_name = hub_url = "BeIR/" + task.metadata_dict.get("beir_name") @@ -146,12 +146,12 @@ def get_ds_stats(hf_hub_name): if "cqadupstack" in hub_name: hub_url = "BeIR/cqadupstack-qrels" TABLE_STRING += "\n" + ONE_LINE.format( - f"[{task.metadata_dict['name']}]({task.metadata_dict['reference']})", + f"[{task.metadata.name}]({task.metadata.reference})", f"[{hub_name}](https://huggingface.co/datasets/{hub_url})", - task.metadata_dict["description"], - task.metadata_dict["type"], - task.metadata_dict["category"], - len(task.metadata_dict["eval_langs"]), + task.metadata.description, + task.metadata.type, + task.metadata.category, + len(task.metadata.eval_langs), *ds_stats, ) diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 142b4b42ad..ab193aab29 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -851,12 +851,8 @@ def load_data(self, **kwargs): ) self.data_loaded = True - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class MockMultilingualSTSTask(AbsTaskSTS, MultilingualTask): @@ -946,12 +942,8 @@ def load_data(self, **kwargs): ) self.data_loaded = True - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class MockSummarizationTask(AbsTaskSummarization): @@ -1010,12 +1002,8 @@ def load_data(self, **kwargs): ) self.data_loaded = True - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class MockMultilingualSummarizationTask(AbsTaskSummarization, MultilingualTask): @@ -1118,12 +1106,8 @@ def load_data(self, **kwargs): ) self.data_loaded = True - @property - def metadata_dict(self) -> dict[str, str]: - metadata_dict = super().metadata_dict - metadata_dict["min_score"] = 0 - metadata_dict["max_score"] = 1 - return metadata_dict + min_score = 0 + max_score = 1 class MockRerankingTask(AbsTaskReranking): From cc829e5bacfe514da4a950ed4849cba6a7687160 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 10 Jan 2025 17:13:28 +0300 Subject: [PATCH 27/40] lint --- mteb/evaluation/evaluators/model_classes.py | 9 +++++---- mteb/leaderboard/app.py | 18 ++++++++---------- mteb/models/sentence_transformers_models.py | 2 -- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index 7e66f22e65..ba5618bb05 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -448,10 +448,11 @@ def search_cross_encoder( if hasattr(self.model, "predict"): # can't take instructions, so add them here - queries_in_pair = [ - f"{q} {i}".strip() - for i, q in zip(instructions_in_pair, queries_in_pair) - ] + if instructions_in_pair[0] is not None: + queries_in_pair = [ + f"{q} {i}".strip() + for i, q in zip(instructions_in_pair, queries_in_pair) + ] scores = self.model.predict(list(zip(queries_in_pair, corpus_in_pair))) # type: ignore else: # may use the instructions in a unique way, so give them also diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index ba336f8ea5..d1383cf1a7 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -5,7 +5,6 @@ import logging import tempfile import time -from collections import defaultdict from pathlib import Path from urllib.parse import urlencode @@ -17,7 +16,6 @@ from mteb.caching import json_cache from mteb.leaderboard.figures import performance_size_plot, radar_chart from mteb.leaderboard.table import scores_to_tables -from mteb.models.overview import get_model_meta logger = logging.getLogger(__name__) @@ -143,28 +141,28 @@ def update_task_info(task_names: str) -> gr.DataFrame: ) lang_select = gr.Dropdown( all_results.languages, - value=list(sorted(default_results.languages)), + value=sorted(default_results.languages), multiselect=True, label="Language", info="Select languages to include.", ) type_select = gr.Dropdown( all_results.task_types, - value=list(sorted(default_results.task_types)), + value=sorted(default_results.task_types), multiselect=True, label="Task Type", info="Select task types to include.", ) domain_select = gr.Dropdown( all_results.domains, - value=list(sorted(default_results.domains)), + value=sorted(default_results.domains), multiselect=True, label="Domain", info="Select domains to include.", ) task_select = gr.Dropdown( all_results.task_names, - value=list(sorted(default_results.task_names)), + value=sorted(default_results.task_names), allow_custom_value=True, multiselect=True, label="Task", @@ -330,16 +328,16 @@ def on_benchmark_select(benchmark_name): benchmark = mteb.get_benchmark(benchmark_name) languages = [task.languages for task in benchmark.tasks if task.languages] languages = set(itertools.chain.from_iterable(languages)) - languages = list(sorted(languages)) + languages = sorted(languages) domains = [ task.metadata.domains for task in benchmark.tasks if task.metadata.domains ] domains = set(itertools.chain.from_iterable(domains)) types = {task.metadata.type for task in benchmark.tasks if task.metadata.type} languages, domains, types = ( - list(sorted(languages)), - list(sorted(domains)), - list(sorted(types)), + sorted(languages), + sorted(domains), + sorted(types), ) elapsed = time.time() - start_time benchmark_results = all_benchmark_results[benchmark_name] diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index fda4c96681..4e0ad6420f 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -323,7 +323,6 @@ training_datasets={"jinaai/negation-dataset": ["train"]}, ) - all_MiniLM_L12_v2 = ModelMeta( name="sentence-transformers/all-MiniLM-L12-v2", languages=["eng-Latn"], @@ -373,7 +372,6 @@ "embedding-data/PAQ_pairs": ["train"], "embedding-data/WikiAnswers": ["train"], }, - ) contriever = ModelMeta( From 4247e22b9eaf7a5af0419d116e00ee66ced786ee Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sat, 11 Jan 2025 21:03:56 +0500 Subject: [PATCH 28/40] [v2] Remove memory usage (#1751) remove memory usage --- mteb/model_meta.py | 2 - mteb/models/arctic_models.py | 9 --- mteb/models/bge_models.py | 3 - mteb/models/bm25.py | 1 - mteb/models/cohere_models.py | 4 -- mteb/models/e5_instruct.py | 2 - mteb/models/e5_models.py | 9 --- mteb/models/google_models.py | 3 - mteb/models/gritlm_models.py | 2 - mteb/models/gte_models.py | 3 - mteb/models/ibm_granite_models.py | 4 -- mteb/models/jasper_models.py | 1 - mteb/models/linq_models.py | 1 - mteb/models/llm2vec_models.py | 8 --- mteb/models/misc_models.py | 63 --------------------- mteb/models/mxbai_models.py | 1 - mteb/models/no_instruct_sentence_models.py | 1 - mteb/models/nomic_models.py | 5 -- mteb/models/nvidia_models.py | 2 - mteb/models/openai_models.py | 3 - mteb/models/promptriever_models.py | 4 -- mteb/models/repllama_models.py | 2 - mteb/models/ru_sentence_models.py | 12 ---- mteb/models/salesforce_models.py | 3 - mteb/models/sentence_transformers_models.py | 13 ----- mteb/models/voyage_models.py | 9 --- 26 files changed, 170 deletions(-) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index f9ffee4427..982c92cbea 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -59,7 +59,6 @@ class ModelMeta(BaseModel): name: The name of the model, ideally the name on huggingface. n_parameters: The number of parameters in the model, e.g. 7_000_000 for a 7M parameter model. Can be None if the the number of parameters is not known (e.g. for proprietary models) or if the loader returns a SentenceTransformer model from which it can be derived. - memory_usage: The amount of memory the model uses in GB. Can be None if the memory usage is not known (e.g. for proprietary models). max_tokens: The maximum number of tokens the model can handle. Can be None if the maximum number of tokens is not known (e.g. for proprietary models). embed_dim: The dimension of the embeddings produced by the model. Currently all models are assumed to produce fixed-size embeddings. @@ -92,7 +91,6 @@ class ModelMeta(BaseModel): languages: list[ISO_LANGUAGE_SCRIPT] | None loader: Callable[..., Encoder] | None = None n_parameters: int | None = None - memory_usage: float | None = None max_tokens: float | None = None embed_dim: int | None = None license: str | None = None diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index e6ad5b1614..502b2c2576 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -98,7 +98,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=109_000_000, - memory_usage=None, max_tokens=512, embed_dim=768, license="apache-2.0", @@ -132,7 +131,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=22_600_000, - memory_usage=None, max_tokens=512, embed_dim=384, license="apache-2.0", @@ -157,7 +155,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=32_200_000, - memory_usage=None, max_tokens=512, embed_dim=384, license="apache-2.0", @@ -182,7 +179,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=109_000_000, - memory_usage=None, max_tokens=512, embed_dim=768, license="apache-2.0", @@ -207,7 +203,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=137_000_000, - memory_usage=None, max_tokens=2048, embed_dim=768, license="apache-2.0", @@ -231,7 +226,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=335_000_000, - memory_usage=None, max_tokens=512, embed_dim=1024, license="apache-2.0", @@ -258,7 +252,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=109_000_000, - memory_usage=None, max_tokens=512, embed_dim=768, license="apache-2.0", @@ -283,7 +276,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=305_000_000, - memory_usage=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -307,7 +299,6 @@ open_weights=True, framework=["Sentence Transformers", "PyTorch"], n_parameters=568_000_000, - memory_usage=None, max_tokens=8192, embed_dim=1024, license="apache-2.0", diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index d136ccd834..a84659769d 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -27,7 +27,6 @@ revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a", release_date="2023-09-12", # initial commit of hf model. n_parameters=24_000_000, - memory_usage=None, embed_dim=512, license="mit", max_tokens=512, @@ -51,7 +50,6 @@ revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a", release_date="2023-09-11", # initial commit of hf model. n_parameters=438_000_000, - memory_usage=None, embed_dim=768, license="mit", max_tokens=512, @@ -75,7 +73,6 @@ revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09", release_date="2023-09-12", # initial commit of hf model. n_parameters=1_340_000_000, - memory_usage=None, embed_dim=1024, license="mit", max_tokens=512, diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index 4231752702..117e0d145e 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -131,7 +131,6 @@ def encode(self, texts: list[str], **kwargs): revision="0_1_10", release_date="2024-07-10", ## release of version 0.1.10 n_parameters=None, - memory_usage=None, embed_dim=None, license=None, max_tokens=None, diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 43a797342d..b5b887f8bb 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -227,7 +227,6 @@ def encode( revision="1", release_date="2023-11-02", n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=512, reference="https://cohere.com/blog/introducing-embed-v3", @@ -250,7 +249,6 @@ def encode( revision="1", release_date="2023-11-02", n_parameters=None, - memory_usage=None, max_tokens=512, embed_dim=1024, license=None, @@ -272,7 +270,6 @@ def encode( reference="https://cohere.com/blog/introducing-embed-v3", release_date="2023-11-02", n_parameters=None, - memory_usage=None, max_tokens=512, embed_dim=384, license=None, @@ -294,7 +291,6 @@ def encode( revision="1", release_date="2023-11-02", n_parameters=None, - memory_usage=None, max_tokens=512, embed_dim=384, license=None, diff --git a/mteb/models/e5_instruct.py b/mteb/models/e5_instruct.py index 312b7c671a..cbdc7c7e9d 100644 --- a/mteb/models/e5_instruct.py +++ b/mteb/models/e5_instruct.py @@ -36,7 +36,6 @@ use_instructions=True, reference="https://huggingface.co/intfloat/multilingual-e5-large-instruct", n_parameters=560_000_000, - memory_usage=None, embed_dim=1024, license="mit", max_tokens=514, @@ -71,7 +70,6 @@ use_instructions=True, reference="https://huggingface.co/intfloat/e5-mistral-7b-instruct", n_parameters=7_111_000_000, - memory_usage=None, embed_dim=4096, license="mit", max_tokens=32768, diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index 83f6dec08d..99d83f7f48 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -144,7 +144,6 @@ revision="fd1525a9fd15316a2d503bf26ab031a61d056e98", release_date=E5_PAPER_RELEASE_DATE, n_parameters=118_000_000, - memory_usage=None, embed_dim=384, license="mit", max_tokens=512, @@ -167,7 +166,6 @@ revision="d13f1b27baf31030b7fd040960d60d909913633f", release_date=E5_PAPER_RELEASE_DATE, n_parameters=278_000_000, - memory_usage=None, embed_dim=768, license="mit", max_tokens=514, @@ -191,7 +189,6 @@ revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb", release_date=E5_PAPER_RELEASE_DATE, n_parameters=560_000_000, - memory_usage=None, embed_dim=1024, license="mit", max_tokens=514, @@ -214,7 +211,6 @@ revision="dca8b1a9dae0d4575df2bf423a5edb485a431236", release_date=E5_PAPER_RELEASE_DATE, n_parameters=33_000_000, - memory_usage=None, embed_dim=384, license="mit", max_tokens=512, @@ -238,7 +234,6 @@ revision="e272f3049e853b47cb5ca3952268c6662abda68f", release_date=E5_PAPER_RELEASE_DATE, n_parameters=33_000_000, - memory_usage=None, embed_dim=384, license="mit", max_tokens=512, @@ -262,7 +257,6 @@ revision="1c644c92ad3ba1efdad3f1451a637716616a20e8", release_date=E5_PAPER_RELEASE_DATE, n_parameters=109_000_000, - memory_usage=None, embed_dim=768, license="mit", max_tokens=512, @@ -288,7 +282,6 @@ revision="b322e09026e4ea05f42beadf4d661fb4e101d311", release_date=E5_PAPER_RELEASE_DATE, n_parameters=335_000_000, - memory_usage=None, embed_dim=1024, license="mit", max_tokens=514, @@ -314,7 +307,6 @@ revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81", release_date="2022-12-26", n_parameters=335_000_000, - memory_usage=None, embed_dim=1024, license="apache-2.0", max_tokens=512, @@ -340,7 +332,6 @@ revision="b533fe4636f4a2507c08ddab40644d20b0006d6a", release_date="2022-12-26", n_parameters=109_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, diff --git a/mteb/models/google_models.py b/mteb/models/google_models.py index 4fcd21ae6e..d70a5b210f 100644 --- a/mteb/models/google_models.py +++ b/mteb/models/google_models.py @@ -145,7 +145,6 @@ def encode( revision="1", # revision is intended for implementation release_date="2024-05-14", n_parameters=None, - memory_usage=None, max_tokens=2048, embed_dim=768, license=None, @@ -166,7 +165,6 @@ def encode( revision="1", # revision is intended for implementation release_date="2024-11-18", n_parameters=None, - memory_usage=None, max_tokens=2048, embed_dim=768, license=None, @@ -187,7 +185,6 @@ def encode( revision="1", # revision is intended for implementation release_date="2024-05-14", n_parameters=None, - memory_usage=None, max_tokens=2048, embed_dim=768, license=None, diff --git a/mteb/models/gritlm_models.py b/mteb/models/gritlm_models.py index ccdcda01d5..a7be0889d0 100644 --- a/mteb/models/gritlm_models.py +++ b/mteb/models/gritlm_models.py @@ -43,7 +43,6 @@ def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: release_date="2024-02-15", training_datasets={"GritLM/tulu2": ["train"]}, n_parameters=7_240_000_000, - memory_usage=None, embed_dim=4096, license="apache-2.0", max_tokens=4096, @@ -68,7 +67,6 @@ def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: revision="7f089b13e3345510281733ca1e6ff871b5b4bc76", release_date="2024-02-15", n_parameters=57_920_000_000, - memory_usage=None, embed_dim=4096, license="apache-2.0", max_tokens=4096, diff --git a/mteb/models/gte_models.py b/mteb/models/gte_models.py index 80f00618c9..456774a018 100644 --- a/mteb/models/gte_models.py +++ b/mteb/models/gte_models.py @@ -48,7 +48,6 @@ def instruction_template( revision="e26182b2122f4435e8b3ebecbf363990f409b45b", release_date="2024-06-15", # initial commit of hf model. n_parameters=7_613_000_000, - memory_usage=None, embed_dim=3584, license="apache-2.0", reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct", @@ -77,7 +76,6 @@ def instruction_template( revision="07d27e5226328010336563bc1b564a5e3436a298", release_date="2024-04-20", # initial commit of hf model. n_parameters=7_720_000_000, - memory_usage=None, embed_dim=4096, license="apache-2.0", max_tokens=32768, @@ -106,7 +104,6 @@ def instruction_template( revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd", release_date="2024-07-29", # initial commit of hf model. n_parameters=1_780_000_000, - memory_usage=None, embed_dim=8960, license="apache-2.0", max_tokens=131072, diff --git a/mteb/models/ibm_granite_models.py b/mteb/models/ibm_granite_models.py index c2443de233..394ed2d5f5 100644 --- a/mteb/models/ibm_granite_models.py +++ b/mteb/models/ibm_granite_models.py @@ -33,7 +33,6 @@ revision="47db56afe692f731540413c67dd818ff492277e7", release_date="2024-12-18", n_parameters=107_000_000, - memory_usage=None, embed_dim=384, license="apache-2.0", max_tokens=512, @@ -56,7 +55,6 @@ revision="84e3546b88b0cb69f8078608a1df558020bcbf1f", release_date="2024-12-18", n_parameters=278_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -79,7 +77,6 @@ revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5", release_date="2024-12-18", n_parameters=30_000_000, - memory_usage=None, embed_dim=384, license="apache-2.0", max_tokens=512, @@ -102,7 +99,6 @@ revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730", release_date="2024-12-18", n_parameters=125_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, diff --git a/mteb/models/jasper_models.py b/mteb/models/jasper_models.py index 60fa4f6975..970b487ea9 100644 --- a/mteb/models/jasper_models.py +++ b/mteb/models/jasper_models.py @@ -80,7 +80,6 @@ def encode( revision="d6330ce98f8a0d741e781df845904c9484f00efa", release_date="2024-12-11", # first commit n_parameters=1_999_000_000, - memory_usage=None, max_tokens=131072, embed_dim=8960, license="apache-2.0", diff --git a/mteb/models/linq_models.py b/mteb/models/linq_models.py index 4babbf75cf..e67ec7dec5 100644 --- a/mteb/models/linq_models.py +++ b/mteb/models/linq_models.py @@ -32,7 +32,6 @@ def instruction_template( revision="0c1a0b0589177079acc552433cad51d7c9132379", release_date="2024-05-29", # initial commit of hf model. n_parameters=7_110_000_000, - memory_usage=None, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, diff --git a/mteb/models/llm2vec_models.py b/mteb/models/llm2vec_models.py index ecee6795bc..cf85c591c8 100644 --- a/mteb/models/llm2vec_models.py +++ b/mteb/models/llm2vec_models.py @@ -104,7 +104,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="baa8ebf04a1c2500e61288e7dad65e8ae42601a7", # TODO: Not sure what to put here as a model is made of two peft repos, each with a different revision release_date="2024-04-09", n_parameters=7_505_000_000, - memory_usage=None, max_tokens=8192, embed_dim=4096, license="mit", @@ -129,7 +128,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="1cb7b735326d13a8541db8f57f35da5373f5e9c6", release_date="2024-04-09", n_parameters=7_505_000_000, - memory_usage=None, max_tokens=8192, embed_dim=4096, license="mit", @@ -155,7 +153,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="0ae69bdd5816105778b971c3138e8f8a18eaa3ae", release_date="2024-04-09", n_parameters=7_111_000_000, - memory_usage=None, max_tokens=32768, embed_dim=4096, license="mit", @@ -180,7 +177,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8", release_date="2024-04-09", n_parameters=7_111_000_000, - memory_usage=None, max_tokens=32768, embed_dim=4096, license="mit", @@ -205,7 +201,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="2c055a5d77126c0d3dc6cd8ffa30e2908f4f45f8", release_date="2024-04-09", n_parameters=7_111_000_000, - memory_usage=None, max_tokens=32768, embed_dim=4096, license="mit", @@ -230,7 +225,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="a76944871d169ebe7c97eb921764cd063afed785", release_date="2024-04-09", n_parameters=7_111_000_000, - memory_usage=None, max_tokens=32768, embed_dim=4096, license="mit", @@ -255,7 +249,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="a5943d406c6b016fef3f07906aac183cf1a0b47d", release_date="2024-04-09", n_parameters=7_111_000_000, - memory_usage=None, max_tokens=32768, embed_dim=4096, license="mit", @@ -280,7 +273,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="a5943d406c6b016fef3f07906aac183cf1a0b47d", release_date="2024-04-09", n_parameters=7_111_000_000, - memory_usage=None, max_tokens=32768, embed_dim=4096, license="mit", diff --git a/mteb/models/misc_models.py b/mteb/models/misc_models.py index d05461af17..488a5c8f06 100644 --- a/mteb/models/misc_models.py +++ b/mteb/models/misc_models.py @@ -13,7 +13,6 @@ languages=["eng_Latn"], loader=None, n_parameters=7110660096, - memory_usage=None, max_tokens=32768.0, embed_dim=None, license="mit", @@ -35,7 +34,6 @@ languages=[], loader=None, n_parameters=278043648, - memory_usage=None, max_tokens=514.0, embed_dim=768, license=None, @@ -57,7 +55,6 @@ languages=None, loader=None, n_parameters=494032768, - memory_usage=None, max_tokens=131072.0, embed_dim=896, license="mit", @@ -79,7 +76,6 @@ languages=None, loader=None, n_parameters=494032768, - memory_usage=None, max_tokens=131072.0, embed_dim=896, license="mit", @@ -101,7 +97,6 @@ languages=["eng_Latn"], loader=None, n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=768, license="apache-2.0", @@ -156,7 +151,6 @@ languages=[], loader=None, n_parameters=2506172416, - memory_usage=None, max_tokens=8192.0, embed_dim=2048, license=None, @@ -178,7 +172,6 @@ languages=["eng_Latn"], loader=None, n_parameters=7241732096, - memory_usage=None, max_tokens=32768.0, embed_dim=None, license="apache-2.0", @@ -205,7 +198,6 @@ trust_remote_code=True, ), n_parameters=278043648, - memory_usage=None, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -232,7 +224,6 @@ trust_remote_code=True, ), n_parameters=559890432, - memory_usage=None, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -259,7 +250,6 @@ trust_remote_code=True, ), n_parameters=117653760, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -281,7 +271,6 @@ languages=None, loader=None, n_parameters=17389824, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -303,7 +292,6 @@ languages=None, loader=None, n_parameters=22713216, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -325,7 +313,6 @@ languages=None, loader=None, n_parameters=15615360, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -347,7 +334,6 @@ languages=None, loader=None, n_parameters=15615360, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -369,7 +355,6 @@ languages=None, loader=None, n_parameters=17389824, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -391,7 +376,6 @@ languages=None, loader=None, n_parameters=17389824, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -413,7 +397,6 @@ languages=None, loader=None, n_parameters=19164288, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -435,7 +418,6 @@ languages=["fra_Latn"], loader=None, n_parameters=559890432, - memory_usage=None, max_tokens=514.0, embed_dim=1024, license="mit", @@ -457,7 +439,6 @@ languages=["ara_Arab"], loader=None, n_parameters=135193344, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -479,7 +460,6 @@ languages=["ara_Arab"], loader=None, n_parameters=117653760, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -501,7 +481,6 @@ languages=["ara_Arab"], loader=None, n_parameters=278043648, - memory_usage=None, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -523,7 +502,6 @@ languages=["ara_Arab"], loader=None, n_parameters=470926848, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -545,7 +523,6 @@ languages=["ara_Arab"], loader=None, n_parameters=109486464, - memory_usage=None, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -567,7 +544,6 @@ languages=["ara_Arab"], loader=None, n_parameters=162841344, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -589,7 +565,6 @@ languages=None, loader=None, n_parameters=None, - memory_usage=None, max_tokens=512.0, embed_dim=1024, license="apache-2.0", @@ -611,7 +586,6 @@ languages=None, loader=None, n_parameters=None, - memory_usage=None, max_tokens=514.0, embed_dim=768, license=None, @@ -633,7 +607,6 @@ languages=None, loader=None, n_parameters=567754752, - memory_usage=None, max_tokens=8194.0, embed_dim=1024, license=None, @@ -655,7 +628,6 @@ languages=None, loader=None, n_parameters=1279887360, - memory_usage=None, max_tokens=2048.0, embed_dim=2048, license=None, @@ -677,7 +649,6 @@ languages=None, loader=None, n_parameters=1279887360, - memory_usage=None, max_tokens=2048.0, embed_dim=2048, license=None, @@ -699,7 +670,6 @@ languages=["fra_Latn", "eng_Latn"], loader=None, n_parameters=1279887360, - memory_usage=None, max_tokens=2048.0, embed_dim=2048, license="mit", @@ -721,7 +691,6 @@ languages=["eng_Latn"], loader=None, n_parameters=109482752, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="mit", @@ -743,7 +712,6 @@ languages=["eng_Latn"], loader=None, n_parameters=335142400, - memory_usage=None, max_tokens=512.0, embed_dim=1024, license="mit", @@ -765,7 +733,6 @@ languages=["eng_Latn"], loader=None, n_parameters=33360512, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -787,7 +754,6 @@ languages=["pol_Latn"], loader=None, n_parameters=103705344, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="gpl-3.0", @@ -809,7 +775,6 @@ languages=["pol_Latn"], loader=None, n_parameters=None, - memory_usage=None, max_tokens=514.0, embed_dim=768, license="lgpl", @@ -831,7 +796,6 @@ languages=["pol_Latn"], loader=None, n_parameters=278043648, - memory_usage=None, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -853,7 +817,6 @@ languages=["eng_Latn"], loader=None, n_parameters=None, - memory_usage=None, max_tokens=4096.0, embed_dim=None, license="mit", @@ -875,7 +838,6 @@ languages=["pol_Latn"], loader=None, n_parameters=559890432, - memory_usage=None, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -897,7 +859,6 @@ languages=["pol_Latn"], loader=None, n_parameters=117653760, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -919,7 +880,6 @@ languages=["pol_Latn"], loader=None, n_parameters=124442880, - memory_usage=None, max_tokens=514.0, embed_dim=768, license="apache-2.0", @@ -941,7 +901,6 @@ languages=["pol_Latn"], loader=None, n_parameters=434961408, - memory_usage=None, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -1009,7 +968,6 @@ ], loader=None, n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1077,7 +1035,6 @@ ], loader=None, n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1145,7 +1102,6 @@ ], loader=None, n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1213,7 +1169,6 @@ ], loader=None, n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=None, license="bigscience-bloom-rail-1.0", @@ -1235,7 +1190,6 @@ languages=["eng_Latn"], loader=None, n_parameters=109482240, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="mit", @@ -1257,7 +1211,6 @@ languages=["eng_Latn"], loader=None, n_parameters=22713216, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -1279,7 +1232,6 @@ languages=["eng_Latn"], loader=None, n_parameters=335141888, - memory_usage=None, max_tokens=512.0, embed_dim=1024, license="mit", @@ -1301,7 +1253,6 @@ languages=["eng_Latn"], loader=None, n_parameters=33360000, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -1323,7 +1274,6 @@ languages=None, loader=None, n_parameters=None, - memory_usage=None, max_tokens=None, embed_dim=4096, license=None, @@ -1345,7 +1295,6 @@ languages=["deu_Latn"], loader=None, n_parameters=335736320, - memory_usage=None, max_tokens=512.0, embed_dim=1024, license=None, @@ -1367,7 +1316,6 @@ languages=["eng_Latn"], loader=None, n_parameters=33360000, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -1395,7 +1343,6 @@ languages=["eng_Latn"], loader=None, n_parameters=33360000, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="mit", @@ -1417,7 +1364,6 @@ languages=["eng_Latn"], loader=None, n_parameters=22713216, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -1439,7 +1385,6 @@ languages=None, loader=None, n_parameters=None, - memory_usage=None, max_tokens=514.0, embed_dim=768, license=None, @@ -1461,7 +1406,6 @@ languages=["rus_Cyrl"], loader=None, n_parameters=359026688, - memory_usage=None, max_tokens=8194.0, embed_dim=1024, license="apache-2.0", @@ -1494,7 +1438,6 @@ languages=["eng_Latn"], loader=None, n_parameters=None, - memory_usage=None, max_tokens=512.0, embed_dim=None, license="mit", @@ -1516,7 +1459,6 @@ languages=None, loader=None, n_parameters=98688000, - memory_usage=None, max_tokens=512.0, embed_dim=1024, license=None, @@ -1538,7 +1480,6 @@ languages=["ara_Arab", "eng_Latn"], loader=None, n_parameters=559890432, - memory_usage=None, max_tokens=514.0, embed_dim=1024, license="apache-2.0", @@ -1570,7 +1511,6 @@ release_date="2024-09-04", languages=["zho_Hans", "eng_Latn"], n_parameters=2724880896, - memory_usage=None, max_tokens=512.0, embed_dim=2304, license=None, @@ -1602,7 +1542,6 @@ ], loader=None, n_parameters=117654272, - memory_usage=None, max_tokens=512.0, embed_dim=384, license="apache-2.0", @@ -1624,7 +1563,6 @@ languages=["ara_Arab", "eng_Latn"], loader=None, n_parameters=135193344, - memory_usage=None, max_tokens=512.0, embed_dim=768, license="apache-2.0", @@ -1646,7 +1584,6 @@ languages=["eng_Latn"], loader=None, n_parameters=7110660096, - memory_usage=None, max_tokens=32768.0, embed_dim=4096, license="mit", diff --git a/mteb/models/mxbai_models.py b/mteb/models/mxbai_models.py index 5507bab8c0..f9c8a013f5 100644 --- a/mteb/models/mxbai_models.py +++ b/mteb/models/mxbai_models.py @@ -19,7 +19,6 @@ revision="990580e27d329c7408b3741ecff85876e128e203", release_date="2024-03-07", # initial commit of hf model. n_parameters=335_000_000, - memory_usage=None, max_tokens=512, embed_dim=1024, license="apache-2.0", diff --git a/mteb/models/no_instruct_sentence_models.py b/mteb/models/no_instruct_sentence_models.py index 019cfe7e04..4924e316f9 100644 --- a/mteb/models/no_instruct_sentence_models.py +++ b/mteb/models/no_instruct_sentence_models.py @@ -90,7 +90,6 @@ def encode( # type: ignore revision="b38747000553d8268915c95a55fc87e707c9aadd", release_date="2024-05-01", # first commit n_parameters=33_400_000, - memory_usage=None, max_tokens=512, embed_dim=384, license="mit", diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 3e743500dd..f8c9cf0c7e 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -129,7 +129,6 @@ def encode( # type: ignore release_date="2024-02-10", # first commit citation=NOMIC_CITATION, n_parameters=137_000_000, - memory_usage=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -155,7 +154,6 @@ def encode( # type: ignore revision="0759316f275aa0cb93a5b830973843ca66babcf5", release_date="2024-01-31", # first commit n_parameters=None, - memory_usage=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -182,7 +180,6 @@ def encode( # type: ignore revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f", release_date="2024-01-15", # first commit n_parameters=None, - memory_usage=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -209,7 +206,6 @@ def encode( # type: ignore revision="b53d557b15ae63852847c222d336c1609eced93c", release_date="2024-01-15", # first commit n_parameters=None, - memory_usage=None, max_tokens=8192, embed_dim=768, license="apache-2.0", @@ -237,7 +233,6 @@ def encode( # type: ignore revision="5960f1566fb7cb1adf1eb6e816639cf4646d9b12", release_date="2024-12-29", n_parameters=149_000_000, - memory_usage=None, max_tokens=8192, embed_dim=768, license="apache-2.0", diff --git a/mteb/models/nvidia_models.py b/mteb/models/nvidia_models.py index 72274b41de..08b1072cc2 100644 --- a/mteb/models/nvidia_models.py +++ b/mteb/models/nvidia_models.py @@ -84,7 +84,6 @@ def encode( revision="7604d305b621f14095a1aa23d351674c2859553a", release_date="2024-09-09", # initial commit of hf model. n_parameters=7_850_000_000, - memory_usage=None, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, @@ -106,7 +105,6 @@ def encode( revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c", release_date="2024-09-13", # initial commit of hf model. n_parameters=7_850_000_000, - memory_usage=None, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index aecacf549a..463fdb81b7 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -130,7 +130,6 @@ def _to_numpy(self, embedding_response) -> np.ndarray: embed_dim=1536, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://openai.com/index/new-embedding-models-and-api-updates/", similarity_fn_name="cosine", @@ -155,7 +154,6 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, - memory_usage=None, ) text_embedding_ada_002 = ModelMeta( name="openai/text-embedding-ada-002", @@ -175,5 +173,4 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, - memory_usage=None, ) diff --git a/mteb/models/promptriever_models.py b/mteb/models/promptriever_models.py index 1a58397d74..4bced3a59d 100644 --- a/mteb/models/promptriever_models.py +++ b/mteb/models/promptriever_models.py @@ -69,7 +69,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision release_date="2024-09-15", n_parameters=7_000_000, - memory_usage=None, max_tokens=4096, embed_dim=4096, license="apache-2.0", @@ -96,7 +95,6 @@ def loader_inner(**kwargs: Any) -> Encoder: training_datasets={"samaya-ai/msmarco-w-instructions": ["train"]}, release_date="2024-09-15", n_parameters=8_000_000, - memory_usage=None, max_tokens=8192, embed_dim=4096, license="apache-2.0", @@ -122,7 +120,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision release_date="2024-09-15", n_parameters=8_000_000, - memory_usage=None, max_tokens=8192, embed_dim=4096, training_datasets={"samaya-ai/msmarco-w-instructions": ["train"]}, @@ -149,7 +146,6 @@ def loader_inner(**kwargs: Any) -> Encoder: release_date="2024-09-15", n_parameters=7_000_000, training_datasets={"samaya-ai/msmarco-w-instructions": ["train"]}, - memory_usage=None, max_tokens=4096, embed_dim=4096, license="apache-2.0", diff --git a/mteb/models/repllama_models.py b/mteb/models/repllama_models.py index 49855865cf..3df7199084 100644 --- a/mteb/models/repllama_models.py +++ b/mteb/models/repllama_models.py @@ -151,7 +151,6 @@ def loader_inner(**kwargs: Any) -> Encoder: release_date="2023-10-11", training_datasets={"Tevatron/msmarco-passage-aug": ["train"]}, n_parameters=7_000_000, - memory_usage=None, max_tokens=4096, embed_dim=4096, license="apache-2.0", @@ -178,7 +177,6 @@ def loader_inner(**kwargs: Any) -> Encoder: revision="01c7f73d771dfac7d292323805ebc428287df4f9-ad5c1d0938a1e02954bcafb4d811ba2f34052e71", # base-peft revision release_date="2024-09-15", n_parameters=7_000_000, - memory_usage=None, max_tokens=4096, embed_dim=4096, license="apache-2.0", diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index faa36fa160..16146b212a 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -13,7 +13,6 @@ revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3", release_date="2021-10-28", n_parameters=29_400_000, - memory_usage=None, embed_dim=312, license="mit", max_tokens=2048, @@ -30,7 +29,6 @@ revision="5441c5ea8026d4f6d7505ec004845409f1259fb1", release_date="2021-05-24", n_parameters=29_400_000, - memory_usage=None, embed_dim=312, license="mit", max_tokens=2048, @@ -47,7 +45,6 @@ revision="af977d5dfa46a3635e29bf0ef383f2df2a08d47a", release_date="2020-11-20", n_parameters=427_000_000, - memory_usage=None, embed_dim=1024, license="mit", max_tokens=512, # best guess @@ -64,7 +61,6 @@ revision="05300876c2b83f46d3ddd422a7f17e45cf633bb0", release_date="2021-05-18", n_parameters=427_000_000, - memory_usage=None, embed_dim=1024, license="Not specified", max_tokens=512, # best guess @@ -87,7 +83,6 @@ revision="436a489a2087d61aa670b3496a9915f84e46c861", release_date="2024-06-10", n_parameters=427_000_000, - memory_usage=None, embed_dim=1024, license="Not specified", max_tokens=512, # best guess @@ -128,7 +123,6 @@ revision="bdd30b0e19757e6940c92c7aff19e8fc0a60dff4", release_date="2023-02-07", n_parameters=124_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -145,7 +139,6 @@ revision="4036cab694767a299f2b9e6492909664d9414229", release_date="2020-03-04", n_parameters=1280_000_000, - memory_usage=None, embed_dim=768, license="Not specified", max_tokens=512, # best guess @@ -171,7 +164,6 @@ revision="e348066b4a7279b97138038299bddc6580a9169a", release_date="2022-06-28", n_parameters=107_000_000, - memory_usage=None, embed_dim=768, license="Not specified", max_tokens=512, @@ -198,7 +190,6 @@ revision="78b5122d6365337dd4114281b0d08cd1edbb3bc8", release_date="2020-03-04", n_parameters=107_000_000, - memory_usage=None, embed_dim=768, license="Not specified", max_tokens=512, @@ -215,7 +206,6 @@ revision="cf0714e606d4af551e14ad69a7929cd6b0da7f7e", release_date="2021-06-10", n_parameters=129_000_000, - memory_usage=None, embed_dim=768, license="Not specified", max_tokens=512, @@ -232,7 +222,6 @@ revision="8ce0cf757446ce9bb2d5f5a4ac8103c7a1049054", release_date="2024-06-21", n_parameters=129_000_000, - memory_usage=None, embed_dim=312, license="mit", max_tokens=512, @@ -250,7 +239,6 @@ revision="1940b046c6b5e125df11722b899130329d0a46da", release_date="2024-06-27", n_parameters=129_000_000, - memory_usage=None, embed_dim=312, license="mit", max_tokens=512, diff --git a/mteb/models/salesforce_models.py b/mteb/models/salesforce_models.py index d9810c7281..4dec2fa286 100644 --- a/mteb/models/salesforce_models.py +++ b/mteb/models/salesforce_models.py @@ -32,7 +32,6 @@ def instruction_template( revision="91762139d94ed4371a9fa31db5551272e0b83818", release_date="2024-06-14", # initial commit of hf model. n_parameters=7_110_000_000, - memory_usage=None, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, @@ -67,7 +66,6 @@ def instruction_template( revision="938c560d1c236aa563b2dbdf084f28ab28bccb11", release_date="2024-01-24", # initial commit of hf model. n_parameters=7_110_000_000, - memory_usage=None, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, @@ -95,7 +93,6 @@ def instruction_template( revision="938c560d1c236aa563b2dbdf084f28ab28bccb11", release_date="2024-01-24", # initial commit of hf model. n_parameters=7_110_000_000, - memory_usage=None, embed_dim=4096, license="cc-by-nc-4.0", max_tokens=32768, diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 4e0ad6420f..64fb4f5605 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -78,7 +78,6 @@ revision="8b3219a92973c328a8e22fadcfa821b5dc75636a", release_date="2021-08-30", n_parameters=22_700_000, - memory_usage=None, embed_dim=384, license="apache-2.0", max_tokens=256, @@ -121,7 +120,6 @@ revision="bf3bf13ab40c3157080a7ab344c831b9ad18b5eb", release_date="2019-11-01", # release date of paper n_parameters=118_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -141,7 +139,6 @@ revision="79f2382ceacceacdf38563d7c5d16b9ff8d725d6", release_date="2019-11-01", # release date of paper n_parameters=278_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -161,7 +158,6 @@ revision="e34fab64a3011d2176c99545a93d5cbddc9a91b7", release_date="2019-11-01", # release date of paper n_parameters=471_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -189,7 +185,6 @@ revision="b207367332321f8e44f96e224ef15bc607f4dbf0", release_date="2021-08-30", n_parameters=22_700_000, - memory_usage=None, embed_dim=384, license="apache-2.0", max_tokens=512, @@ -208,7 +203,6 @@ revision="9a3225965996d404b775526de6dbfe85d3368642", release_date="2021-08-30", n_parameters=109_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=384, @@ -250,7 +244,6 @@ revision="6e85f575bc273f1fd840a658067d0157933c83f0", release_date="2023-09-27", n_parameters=137_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=8192, @@ -270,7 +263,6 @@ revision="796cff318cdd4e5fbe8b7303a1ef8cbec36996ef", release_date="2023-09-27", n_parameters=32_700_000, - memory_usage=None, embed_dim=512, license="apache-2.0", max_tokens=8192, @@ -290,7 +282,6 @@ revision="aa0645035294a8c0607ce5bb700aba982cdff32c", release_date="2023-07-07", n_parameters=110_000_000, - memory_usage=None, embed_dim=768, license="apache-2.0", max_tokens=512, @@ -310,7 +301,6 @@ revision="c1fed70aa4823a640f1a7150a276e4d3b08dce08", release_date="2023-07-07", n_parameters=35_000_000, - memory_usage=None, embed_dim=512, license="apache-2.0", max_tokens=512, @@ -330,7 +320,6 @@ revision="364dd28d28dcd3359b537f3cf1f5348ba679da62", release_date="2021-08-30", n_parameters=33_400_000, - memory_usage=None, embed_dim=384, license="apache-2.0", max_tokens=256, @@ -381,7 +370,6 @@ revision="abe8c1493371369031bcb1e02acb754cf4e162fa", release_date="2022-06-25", # release date of model on HF n_parameters=150_000_000, - memory_usage=None, embed_dim=768, license=None, max_tokens=512, @@ -406,7 +394,6 @@ revision="98f70f14cdf12d7ea217ed2fd4e808b0195f1e7e", release_date="2024-11-10", n_parameters=272_000_000, - memory_usage=None, embed_dim=1024, license="apache-2.0", max_tokens=2048, diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 70f61e2c52..02078e4cde 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -151,7 +151,6 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/05/05/voyage-large-2-instruct-instruction-tuned-and-rank-1-on-mteb/", similarity_fn_name="cosine", @@ -173,7 +172,6 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/06/03/domain-specific-embeddings-finance-edition-voyage-finance-2/", similarity_fn_name="cosine", @@ -195,7 +193,6 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/04/15/domain-specific-embeddings-and-retrieval-legal-edition-voyage-law-2/", similarity_fn_name="cosine", @@ -217,7 +214,6 @@ def _batched_encode( embed_dim=1536, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/01/23/voyage-code-2-elevate-your-code-retrieval/", similarity_fn_name="cosine", @@ -239,7 +235,6 @@ def _batched_encode( embed_dim=1536, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", @@ -261,7 +256,6 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", @@ -282,7 +276,6 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/06/10/voyage-multilingual-2-multilingual-embedding-model/", similarity_fn_name="cosine", @@ -304,7 +297,6 @@ def _batched_encode( embed_dim=1024, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", @@ -326,7 +318,6 @@ def _batched_encode( embed_dim=512, open_weights=False, n_parameters=None, - memory_usage=None, license=None, reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", From 2b41cb4123a8b75eb43d00881d042050ada10aa8 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sun, 12 Jan 2025 03:36:37 +0500 Subject: [PATCH 29/40] [v2] fix contriever (add similarity_fn_name to ST wrapper) (#1749) * add dotwrapper * lint * make cleaner * add similarity_fn * update to similarity_fn_name * lint * fix name parameter --- mteb/models/sentence_transformer_wrapper.py | 6 +++++- mteb/models/sentence_transformers_models.py | 11 +++++++++++ mteb/models/wrapper.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index e580ef8959..8c133e125c 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -21,6 +21,7 @@ def __init__( model: str | SentenceTransformer | CrossEncoder, revision: str | None = None, model_prompts: dict[str, str] | None = None, + similarity_fn_name: str | None = None, **kwargs, ) -> None: """Wrapper for SentenceTransformer models. @@ -32,6 +33,7 @@ def __init__( First priority is given to the composed prompt of task name + prompt type (query or passage), then to the specific task prompt, then to the composed prompt of task type + prompt type, then to the specific task type prompt, and finally to the specific prompt type. + similarity_fn_name: A similarity function to use. **kwargs: Additional arguments to pass to the SentenceTransformer model. """ if isinstance(model, str): @@ -59,7 +61,9 @@ def __init__( if isinstance(self.model, CrossEncoder): self.predict = self._predict - if hasattr(self.model, "similarity"): + if similarity_fn_name: + self.similarity = self.get_similarity_function(similarity_fn_name) + elif hasattr(self.model, "similarity"): self.similarity = self.model.similarity def encode( diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 64fb4f5605..d5d487078d 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -2,7 +2,12 @@ from __future__ import annotations +from functools import partial + from mteb.model_meta import ModelMeta +from mteb.models.sentence_transformer_wrapper import ( + SentenceTransformerWrapper, +) paraphrase_langs = [ "ara_Arab", @@ -364,6 +369,12 @@ ) contriever = ModelMeta( + loader=partial( + SentenceTransformerWrapper, + model="facebook/contriever-msmarco", + revision="abe8c1493371369031bcb1e02acb754cf4e162fa", + similarity_fn_name="dot", + ), name="facebook/contriever-msmarco", languages=["eng-Latn"], open_weights=True, diff --git a/mteb/models/wrapper.py b/mteb/models/wrapper.py index 956071d3dc..76b31ba529 100644 --- a/mteb/models/wrapper.py +++ b/mteb/models/wrapper.py @@ -3,9 +3,12 @@ import logging from typing import Callable, get_args +import numpy as np + import mteb from mteb.abstasks.TaskMetadata import TASK_TYPE from mteb.encoder_interface import PromptType +from mteb.evaluation.evaluators.utils import cos_sim, dot_score logger = logging.getLogger(__name__) @@ -64,6 +67,18 @@ def get_prompt_name( ) return None + @staticmethod + def get_similarity_function( + similarity_fn_name: str, + ) -> Callable[[np.ndarray, np.ndarray], np.ndarray]: + if similarity_fn_name == "cosine": + return cos_sim + if similarity_fn_name == "dot": + return dot_score + raise ValueError( + "Invalid similarity function. Should be one of ['cosine', 'dot']" + ) + @staticmethod def validate_task_to_prompt_name( task_to_prompt_name: dict[str, str] | None, From 91871fec8a6f46ddad9e8c01a3e4fdc09fb61408 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sun, 12 Jan 2025 12:38:46 +0500 Subject: [PATCH 30/40] [v2] Refactor evaluators and Abstasks (#1707) * refactor evaluators and tasks * remove slow/fast loading * fix imports * fix summ evaluator * fix evaluator * make classification parent class for AbsTaskMultilabelClassification * fix descriptive stat * fix import * add typehint * remove clustering_downsample * fix tests * remove prints * remove all inits * fix all abstasks * fix tests * remove changes in DeprecatedSummarizationEvaluator * lint * update typehint --- mteb/abstasks/AbsTask.py | 21 +- mteb/abstasks/AbsTaskBitextMining.py | 11 +- mteb/abstasks/AbsTaskClassification.py | 148 ++++++------- mteb/abstasks/AbsTaskClustering.py | 6 - mteb/abstasks/AbsTaskClusteringFast.py | 50 +---- .../AbsTaskMultilabelClassification.py | 198 ++---------------- mteb/abstasks/AbsTaskPairClassification.py | 6 - mteb/abstasks/AbsTaskReranking.py | 3 - mteb/abstasks/AbsTaskRetrieval.py | 10 +- mteb/abstasks/AbsTaskSTS.py | 6 - mteb/abstasks/AbsTaskSpeedTask.py | 6 - mteb/abstasks/AbsTaskSummarization.py | 19 +- mteb/abstasks/MultiSubsetLoader.py | 47 ----- mteb/abstasks/MultilingualTask.py | 47 ++++- .../AmazonCounterfactualClassification.json | 75 +++++-- .../CEDRClassification.json | 4 +- mteb/evaluation/MTEB.py | 14 +- .../evaluators/BitextMiningEvaluator.py | 14 +- .../evaluators/ClassificationEvaluator.py | 97 ++++----- .../evaluators/ClusteringEvaluator.py | 4 - mteb/evaluation/evaluators/Evaluator.py | 10 +- mteb/evaluation/evaluators/STSEvaluator.py | 5 - .../evaluators/SummarizationEvaluator.py | 30 +-- mteb/overview.py | 13 +- .../Clustering/eng/ArxivClusteringP2P.py | 29 ++- ...CrossLingualSemanticDiscriminationWMT19.py | 5 +- ...CrossLingualSemanticDiscriminationWMT21.py | 4 +- tests/test_benchmark/mock_tasks.py | 40 +++- tests/test_benchmark/test_benchmark.py | 2 +- tests/test_tasks/test_all_abstasks.py | 5 +- 30 files changed, 363 insertions(+), 566 deletions(-) delete mode 100644 mteb/abstasks/MultiSubsetLoader.py diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 4774be995c..da599c1efc 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -11,6 +11,7 @@ import numpy as np import torch import tqdm +import transformers from datasets import Dataset, DatasetDict from sklearn.preprocessing import MultiLabelBinarizer @@ -25,6 +26,13 @@ # ^ e.g {'main_score': 0.5, 'hf_subset': 'en-de', 'languages': ['eng-Latn', 'deu-Latn']} +def set_seed(seed: int) -> tuple[random.Random, np.random.Generator]: + torch.manual_seed(seed) + np.random.seed(seed) + transformers.set_seed(seed) + return random.Random(seed), np.random.default_rng(seed) + + def _multilabel_subsampling( dataset_dict: DatasetDict, seed: int, @@ -63,14 +71,14 @@ class AbsTask(ABC): and Dataset is a datasets.Dataset objedct. "hf subset" is the data subset on Huggingface typically used to denote the language e.g. datasets.load_dataset("data", "en"). If the dataset does not have a subset this is simply "default". abstask_prompt: The potential prompt of the abstask - superseeded_by: Denotes the task that this task is superseeded by. Used to issue warning to users of outdated datasets, while maintaining + superseded_by: Denotes the task that this task is superseeded by. Used to issue warning to users of outdated datasets, while maintaining reproducibility of existing benchmarks. """ metadata: TaskMetadata abstask_prompt: str | None = None _eval_splits: list[str] | None = None - superseded_by: None | str = None + superseded_by: str | None = None dataset: dict[HFSubset, DatasetDict] | None = None # type: ignore data_loaded: bool = False is_multilingual: bool = False @@ -85,10 +93,7 @@ def __init__(self, seed: int = 42, **kwargs: Any): self.save_suffix = kwargs.get("save_suffix", "") self.seed = seed - random.seed(self.seed) - np.random.seed(self.seed) - torch.manual_seed(self.seed) - torch.cuda.manual_seed_all(self.seed) + self.rng_state, self.np_rng = set_seed(seed) def check_if_dataset_is_superseded(self): """Check if the dataset is superseded by a newer version""" @@ -146,6 +151,7 @@ def evaluate( scores[hf_subset] = self._evaluate_subset( model, data_split, encode_kwargs=encode_kwargs, **kwargs ) + self._add_main_score(scores[hf_subset]) return scores @abstractmethod @@ -329,6 +335,9 @@ def filter_languages( self.hf_subsets = subsets_to_keep return self + def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None: + scores["main_score"] = scores[self.metadata.main_score] + def _upload_dataset_to_hub(self, repo_name: str, fields: list[str]) -> None: if self.is_multilingual: for config in self.metadata.eval_langs: diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 635f0a67b1..19d380f3fb 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -61,9 +61,6 @@ class AbsTaskBitextMining(AbsTask): parallel_subsets = False abstask_prompt = "Retrieve parallel sentences." - def __init__(self, **kwargs): - super().__init__(**kwargs) - def evaluate( self, model: Encoder, @@ -94,7 +91,7 @@ def evaluate( else: for hf_subet in hf_subsets: logger.info( - f"\nTask: {self.metadata.name}, split: {split}, subset: {hf_subet}. Running..." + f"Task: {self.metadata.name}, split: {split}, subset: {hf_subet}. Running..." ) if hf_subet not in self.dataset and hf_subet == "default": @@ -103,8 +100,7 @@ def evaluate( data_split = self.dataset[hf_subet][split] scores[hf_subet] = self._evaluate_subset( model, - data_split, # type: ignore - subsets=["sentence1", "sentence2"], + data_split, encode_kwargs=encode_kwargs, **kwargs, ) @@ -142,9 +138,6 @@ def _evaluate_subset( self._add_main_score(metrics) return metrics - def _add_main_score(self, scores) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> BitextDescriptiveStatistics: diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index bb5f6dc27e..afa9ad6a99 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -5,12 +5,11 @@ from typing import Any import numpy as np +from datasets import Dataset, DatasetDict from mteb.encoder_interface import Encoder from ..evaluation.evaluators import ( - kNNClassificationEvaluator, - kNNClassificationEvaluatorPytorch, logRegClassificationEvaluator, ) from ..load_results.task_results import HFSubset, ScoresDict @@ -24,17 +23,20 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): """Descriptive statistics for Classification Attributes: - num_samples: number of samples in the dataset. - number_of_characters: Total number of symbols in the dataset. - number_texts_intersect_with_train: Number of texts in the train split - - min_text_length: Minimum length of text - average_text_length: Average length of text - max_text_length: Maximum length of text - unique_text: Number of unique texts - - unique_labels: Number of unique labels - labels: dict of label frequencies + num_samples: number of samples in the dataset. + number_of_characters: Total number of symbols in the dataset. + number_texts_intersect_with_train: Number of texts in the train split + + min_text_length: Minimum length of text + average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text + average_label_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text + unique_labels: Number of unique labels + labels: dict of label frequencies """ num_samples: int @@ -44,14 +46,17 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): min_text_length: int average_text_length: float max_text_length: int - unique_text: int + unique_texts: int + min_labels_per_text: int + average_label_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] class AbsTaskClassification(AbsTask): - """Abstract class for kNN classification tasks + """Abstract class for classification tasks The similarity is computed between pairs and the results are ranked. self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It @@ -64,40 +69,21 @@ class AbsTaskClassification(AbsTask): """ + evaluator = logRegClassificationEvaluator abstask_prompt = "Classify user passages." samples_per_label: int = 8 n_experiments: int = 10 - - def __init__( - self, - method: str = "logReg", - n_experiments: int | None = None, - k: int = 3, - **kwargs, - ): - super().__init__(**kwargs) - self.method = method - - # Bootstrap parameters - self.n_experiments: int = ( # type: ignore - n_experiments if n_experiments is not None else self.n_experiments - ) - - # kNN parameters - self.k = k - - def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None: - scores["main_score"] = scores[self.metadata.main_score] + k: int = 3 + train_split = "train" def evaluate( self, - model, - eval_split: str = "test", - train_split: str = "train", + model: Encoder, + split: str = "test", subsets_to_run: list[HFSubset] | None = None, *, encode_kwargs: dict[str, Any] = {}, - **kwargs, + **kwargs: Any, ) -> dict[HFSubset, ScoresDict]: if not self.data_loaded: self.load_data() @@ -109,7 +95,7 @@ def evaluate( for hf_subset in hf_subsets: logger.info( - f"\nTask: {self.metadata.name}, split: {eval_split}, subset: {hf_subset}. Running..." + f"Task: {self.metadata.name}, split: {split}, subset: {hf_subset}. Running..." ) if hf_subset not in self.dataset and hf_subset == "default": @@ -119,8 +105,7 @@ def evaluate( scores[hf_subset] = self._evaluate_subset( model, ds, - eval_split, - train_split, + eval_split_name=split, encode_kwargs=encode_kwargs, **kwargs, ) @@ -131,14 +116,13 @@ def evaluate( def _evaluate_subset( self, model: Encoder, - dataset, - eval_split: str = "test", - train_split: str = "train", + dataset: DatasetDict | Dataset, + eval_split_name: str, encode_kwargs: dict[str, Any] = {}, **kwargs, ) -> ScoresDict: - train_split = dataset[train_split] - eval_split = dataset[eval_split] + train_split = dataset[self.train_split] + eval_split = dataset[eval_split_name] params = {"k": self.k} params.update(kwargs) @@ -159,40 +143,17 @@ def _evaluate_subset( idxs, ) - if self.method == "kNN": - evaluator = kNNClassificationEvaluator( - X_sampled, - y_sampled, - eval_split["text"], # type: ignore - eval_split["label"], # type: ignore - task_name=self.metadata.name, - encode_kwargs=encode_kwargs, - **params, - ) - elif self.method == "kNN-pytorch": - evaluator = kNNClassificationEvaluatorPytorch( - X_sampled, - y_sampled, - eval_split["text"], # type: ignore - eval_split["label"], # type: ignore - task_name=self.metadata.name, - encode_kwargs=encode_kwargs, - **params, - ) - elif self.method == "logReg": - evaluator = logRegClassificationEvaluator( - X_sampled, - y_sampled, - eval_split["text"], # type: ignore - eval_split["label"], # type: ignore - task_name=self.metadata.name, - encode_kwargs=encode_kwargs, - **params, - ) - else: - raise ValueError(f"Method {self.method} not supported") - - scores_exp, test_cache = evaluator(model, test_cache=test_cache) + evaluator = self.evaluator( + X_sampled, + y_sampled, + eval_split["text"], # type: ignore + eval_split["label"], # type: ignore + task_name=self.metadata.name, + **params, + ) + scores_exp, test_cache = evaluator( + model, encode_kwargs=encode_kwargs, test_cache=test_cache + ) scores.append(scores_exp) avg_scores: dict[str, Any] = { @@ -242,7 +203,18 @@ def _calculate_metrics_from_split( text_len = [len(t) for t in text] total_text_len = sum(text_len) - label_count = Counter(label) + if isinstance(label[0], int): + label_len = [1] * len(label) + total_label_len = len(label) + total_labels = label + else: + # multilabel classification + label_len = [len(l) for l in label] + total_label_len = sum(label_len) + total_labels = [] + for l in label: + total_labels.extend(l if len(l) > 0 else [None]) + label_count = Counter(total_labels) num_texts_in_train = ( len(set(text) & set(train_text)) if split != "train" else None ) @@ -253,10 +225,16 @@ def _calculate_metrics_from_split( min_text_length=min(text_len), average_text_length=total_text_len / len(text), max_text_length=max(text_len), - unique_text=len(set(text)), + unique_texts=len(set(text)), + min_labels_per_text=min(label_len), + average_label_per_text=total_label_len / len(label), + max_labels_per_text=max(label_len), unique_labels=len(label_count), labels={ - str(label): {"count": count} for label, count in label_count.items() + str(label): { + "count": value, + } + for label, value in label_count.items() }, ) diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index e4ea7b0a02..d68cc74712 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -64,12 +64,6 @@ class AbsTaskClustering(AbsTask): abstask_prompt = "Identify categories in user passages." - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _add_main_score(self, scores) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _evaluate_subset( self, model: Encoder, diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index ca4ef58457..5682270637 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -38,8 +38,6 @@ def evaluate_clustering_bootstrapped( The bootstrapping is done by sampling N samples from the corpus and clustering them. It is done without replacement to get a diverse set of samples. """ - n_embeddings = embeddings.shape[0] - v_measures = defaultdict(list) if max_depth is not None: max_depth = min(max_depth, max(map(len, labels))) @@ -143,17 +141,6 @@ class AbsTaskClusteringFast(AbsTask): max_depth = None abstask_prompt = "Identify categories in user passages." - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _add_main_score(self, scores): - if self.metadata.main_score in scores: - scores["main_score"] = scores[self.metadata.main_score] - else: - logger.warning( - f"main score {self.metadata.main_score} not found in scores {scores.keys()}" - ) - def _evaluate_subset( self, model: Encoder, @@ -162,8 +149,6 @@ def _evaluate_subset( encode_kwargs: dict[str, Any] = {}, **kwargs: Any, ) -> dict[str, float | dict[str, list[float]]]: - rng_state = random.Random(self.seed) - if ( self.max_document_to_embed is not None and self.max_fraction_of_documents_to_embed is not None @@ -186,7 +171,7 @@ def _evaluate_subset( max_documents_to_embed = self.max_document_to_embed max_documents_to_embed = min(len(dataset), max_documents_to_embed) # type: ignore - example_indices = rng_state.sample( + example_indices = self.rng_state.sample( range(len(dataset)), k=max_documents_to_embed ) downsampled_dataset = dataset.select(example_indices) # type: ignore @@ -210,7 +195,7 @@ def _evaluate_subset( cluster_size=self.max_documents_per_cluster, kmean_batch_size=self.k_mean_batch_size, max_depth=self.max_depth, - rng_state=rng_state, + rng_state=self.rng_state, ) v_measures = list(itertools.chain.from_iterable(all_v_scores.values())) @@ -272,37 +257,6 @@ def _push_dataset_to_hub(self, repo_name: str) -> None: self._upload_dataset_to_hub(repo_name, ["sentences", "labels"]) -def clustering_downsample( - dataset: DatasetDict, seed: int, max_samples_in_cluster: int = 2048 -) -> DatasetDict: - """In cases where it is not possible to convert the dataset to a fast version, we can downsample the dataset to speed up the evaluation. - - This might be necessary when the clusters in the dataset is not sampled from the same distribution. - """ - rng_state = random.Random(seed) - - ds = {} - for split in dataset: - _docs = [] - _labels = [] - - n_clusters = len(dataset[split]) - - for i in range(n_clusters): - labels = dataset[split]["labels"][i] - sentences = dataset[split]["sentences"][i] - - n_sample = min(max_samples_in_cluster, len(sentences)) - - # sample n_sample from each cluster - idxs = rng_state.sample(range(len(sentences)), n_sample) - _docs.append([sentences[idx] for idx in idxs]) - _labels.append([labels[idx] for idx in idxs]) - - ds[split] = Dataset.from_dict({"sentences": _docs, "labels": _labels}) - return DatasetDict(ds) - - def convert_to_fast( dataset: DatasetDict, seed: int, max_size: int = 100_000 ) -> DatasetDict: diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index 156fdd5e09..d6b53d9f93 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -2,10 +2,11 @@ import itertools import logging -from collections import Counter, defaultdict +from collections import defaultdict from typing import Any import numpy as np +from datasets import Dataset, DatasetDict from sklearn.base import ClassifierMixin, clone from sklearn.metrics import f1_score, label_ranking_average_precision_score from sklearn.model_selection import train_test_split @@ -14,9 +15,8 @@ from mteb.encoder_interface import Encoder -from ..load_results.task_results import HFSubset, ScoresDict -from .AbsTask import AbsTask -from .TaskMetadata import DescriptiveStatistics +from ..load_results.task_results import ScoresDict +from .AbsTaskClassification import AbsTaskClassification logger = logging.getLogger(__name__) @@ -27,57 +27,21 @@ def evaluate_classifier( embeddings_test: np.ndarray, y_test: np.ndarray, classifier: ClassifierMixin, -): - scores = {} +) -> dict[str, float]: classifier = clone(classifier) classifier.fit(embeddings_train, y_train) y_pred = classifier.predict(embeddings_test) accuracy = classifier.score(embeddings_test, y_test) f1 = f1_score(y_test, y_pred, average="macro") - scores["accuracy"] = accuracy - scores["f1"] = f1 lrap = label_ranking_average_precision_score(y_test, y_pred) - scores["lrap"] = lrap - return scores + return { + "accuracy": accuracy, + "f1": f1, + "lrap": lrap, + } -class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): - """Descriptive statistics for MultilabelClassification - - Attributes: - num_samples: number of samples in the dataset. - number_of_characters: Total number of symbols in the dataset. - number_texts_intersect_with_train: Number of texts in the train split - - min_text_length: Minimum length of text - average_text_length: Average length of text - max_text_length: Maximum length of text - unique_texts: Number of unique texts - - min_labels_per_text: Minimum number of labels per text - average_label_per_text: Average number of labels per text - max_labels_per_text: Maximum number of labels per text - unique_labels: Number of unique labels - labels: dict of label frequencies - """ - - num_samples: int - number_of_characters: int - number_texts_intersect_with_train: int | None - - min_text_length: int - average_text_length: float - max_text_length: int - unique_texts: int - - min_labels_per_text: int - average_label_per_text: float - max_labels_per_text: int - unique_labels: int - labels: dict[str, dict[str, int]] - - -class AbsTaskMultilabelClassification(AbsTask): +class AbsTaskMultilabelClassification(AbsTaskClassification): """Abstract class for multioutput classification tasks The similarity is computed between pairs and the results are ranked. @@ -90,90 +54,19 @@ class AbsTaskMultilabelClassification(AbsTask): """ - classifier = KNeighborsClassifier(n_neighbors=5) - abstask_prompt = "Classify user passages." - samples_per_label: int = 8 - - def __init__( - self, - n_experiments=None, - batch_size=32, - **kwargs, - ): - super().__init__(**kwargs) - self.batch_size = batch_size - - # Bootstrap parameters - self.n_experiments = n_experiments or getattr(self, "n_experiments", 10) - - # Run metadata validation by instantiating addressing the attribute - # This is quite hacky. Ideally, this would be done in the constructor of - # each concrete task, but then we have to duplicate the __init__ method's - # interface. - if hasattr(self, "metadata"): - self.metadata - - def _add_main_score(self, scores): - scores["main_score"] = scores[self.metadata.main_score] - - def evaluate( - self, - model: Encoder, - eval_split: str = "test", - train_split: str = "train", - subsets_to_run: list[HFSubset] | None = None, - *, - encode_kwargs: dict[str, Any] = {}, - **kwargs: Any, - ) -> dict[HFSubset, ScoresDict]: - if not self.data_loaded: - self.load_data() - - scores = {} - hf_subsets = list(self.dataset) if self.is_multilingual else ["default"] - # If subsets_to_run is specified, filter the hf_subsets accordingly - if subsets_to_run is not None: - hf_subsets = [s for s in hf_subsets if s in subsets_to_run] - - for hf_subset in hf_subsets: - logger.info( - f"\nTask: {self.metadata.name}, split: {eval_split}, subset: {hf_subset}. Running..." - ) - - if hf_subset not in self.dataset and hf_subset == "default": - ds = self.dataset - else: - ds = self.dataset[hf_subset] - scores[hf_subset] = self._evaluate_subset( - model, - ds, - eval_split, - train_split, - encode_kwargs=encode_kwargs, - **kwargs, - ) - self._add_main_score(scores[hf_subset]) - - return scores + evaluator = KNeighborsClassifier(n_neighbors=5) def _evaluate_subset( self, model: Encoder, - dataset, - eval_split: str = "test", - train_split: str = "train", + dataset: DatasetDict | Dataset, + eval_split_name: str, *, encode_kwargs: dict[str, Any] = {}, **kwargs: Any, ) -> ScoresDict: - train_split = dataset[train_split] - eval_split = dataset[eval_split] - params = { - "classifier_type": type(self.classifier).__name__, - "classifier_params": self.classifier.get_params(), - "batch_size": self.batch_size, - } - params.update(kwargs) + train_split = dataset[self.train_split] + eval_split = dataset[eval_split_name] scores = [] # Bootstrap sample indices from training set for each experiment @@ -222,7 +115,7 @@ def _evaluate_subset( y_train = train_split.select(sample_indices)["label"] y_train = binarizer.transform(y_train) scores_exp = evaluate_classifier( - X_train, y_train, X_test, y_test, self.classifier + X_train, y_train, X_test, y_test, self.evaluator ) scores.append(scores_exp) @@ -246,60 +139,3 @@ def _undersample_data_indices(self, y, samples_per_label, idxs=None): for label in y[i]: label_counter[label] += 1 return sample_indices, idxs - - def _calculate_metrics_from_split( - self, split: str, hf_subset: str | None = None, compute_overall: bool = False - ) -> MultilabelClassificationDescriptiveStatistics: - train_text = [] - if hf_subset: - text = self.dataset[hf_subset][split]["text"] - label = self.dataset[hf_subset][split]["label"] - if split != "train": - train_text = self.dataset[hf_subset]["train"]["text"] - elif compute_overall: - text = [] - label = [] - for hf_subset in self.metadata.eval_langs: - text.extend(self.dataset[hf_subset][split]["text"]) - label.extend(self.dataset[hf_subset][split]["label"]) - if split != "train": - train_text.extend(self.dataset[hf_subset]["train"]["text"]) - else: - text = self.dataset[split]["text"] - label = self.dataset[split]["label"] - if split != "train": - train_text = self.dataset["train"]["text"] - - text_len = [len(t) for t in text] - total_text_len = sum(text_len) - label_len = [len(l) for l in label] - total_label_len = sum(label_len) - total_labels = [] - for l in label: - total_labels.extend(l if len(l) > 0 else [None]) - label_count = Counter(total_labels) - num_texts_in_train = ( - len(set(text) & set(train_text)) if split != "train" else None - ) - return MultilabelClassificationDescriptiveStatistics( - num_samples=len(text), - number_of_characters=total_text_len, - number_texts_intersect_with_train=num_texts_in_train, - min_text_length=min(text_len), - average_text_length=total_text_len / len(text), - max_text_length=max(text_len), - unique_texts=len(set(text)), - min_labels_per_text=min(label_len), - average_label_per_text=total_label_len / len(label), - max_labels_per_text=max(label_len), - unique_labels=len(label_count), - labels={ - str(label): { - "count": value, - } - for label, value in label_count.items() - }, - ) - - def _push_dataset_to_hub(self, repo_name: str) -> None: - self._upload_dataset_to_hub(repo_name, ["text", "label"]) diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 0ac388c4e5..4c3be485ea 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -67,12 +67,6 @@ class AbsTaskPairClassification(AbsTask): abstask_prompt = "Retrieve text that are semantically similar to the given text." - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _add_main_score(self, scores: ScoresDict) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _evaluate_subset( self, model: Encoder, diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index 76ee7df74a..5467d3b326 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -33,9 +33,6 @@ class AbsTaskReranking(AbsTaskRetrieval): """Abstract class for re-ranking experiments. This is mostly the same as the RetrievalEvaluator, but here to adapt the old format to the new format. TODO: update these tasks to the new format and delete this class.""" - def __init__(self, **kwargs): - super(AbsTaskRetrieval, self).__init__(**kwargs) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index b709c046f2..50e446cf28 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -127,11 +127,8 @@ class AbsTaskRetrieval(AbsTask): ignore_identical_ids: bool = False abstask_prompt = "Retrieve text based on user query." - def __init__(self, **kwargs): - self.top_ranked = None - self.instructions = None - # there could be multiple options, so do this even if multilingual - super(AbsTaskRetrieval, self).__init__(**kwargs) # noqa + instructions = None + top_ranked = None def load_data(self, **kwargs): if self.data_loaded: @@ -378,9 +375,6 @@ def _evaluate_subset( return scores - def _add_main_score(self, scores: ScoresDict) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> RetrievalDescriptiveStatistics: diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index 2f29f01cd4..428cc57dca 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -64,9 +64,6 @@ class AbsTaskSTS(AbsTask): min_score: int max_score: int - def __init__(self, **kwargs): - super().__init__(**kwargs) - def _evaluate_subset( self, model, data_split, *, encode_kwargs: dict[str, Any] = {}, **kwargs ) -> ScoresDict: @@ -86,9 +83,6 @@ def normalize(x): self._add_main_score(scores) return scores - def _add_main_score(self, scores: ScoresDict) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> STSDescriptiveStatistics: diff --git a/mteb/abstasks/AbsTaskSpeedTask.py b/mteb/abstasks/AbsTaskSpeedTask.py index 31f6bdb943..a726d40dbf 100644 --- a/mteb/abstasks/AbsTaskSpeedTask.py +++ b/mteb/abstasks/AbsTaskSpeedTask.py @@ -24,9 +24,6 @@ class AbsTaskSpeedTask(AbsTask): num_loops = 7 device = "cpu" - def __init__(self, **kwargs): - super().__init__(**kwargs) - def load_data(self, **kwargs): """Reads the text 'The Ugly Duckling' as the `test` split with a `text` column.""" if self.data_loaded: @@ -106,9 +103,6 @@ def _evaluate_subset(self, model: Encoder, data_split, **kwargs) -> ScoresDict: self._add_main_score(scores) return scores - def _add_main_score(self, scores) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> dict[str, float]: diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 0ed06ba068..a8ff3cdc90 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -4,6 +4,7 @@ from typing import Any import numpy as np +from datasets import Dataset from mteb.encoder_interface import Encoder from mteb.load_results.task_results import ScoresDict @@ -78,22 +79,25 @@ class AbsTaskSummarization(AbsTask): min_score: int max_score: int - evalutor = SummarizationEvaluator abstask_prompt = ( "Given a news summary, retrieve other semantically similar summaries." ) - - def __init__(self, **kwargs): - super().__init__(**kwargs) + # SummEval has DeprecatedSummarizationEvaluator + evaluator = SummarizationEvaluator def _evaluate_subset( - self, model: Encoder, data_split, *, encode_kwargs: dict[str, Any], **kwargs + self, + model: Encoder, + data_split: Dataset, + *, + encode_kwargs: dict[str, Any], + **kwargs, ) -> ScoresDict: normalized_scores = [ (np.array(x) - self.min_score) / (self.max_score - self.min_score) for x in data_split["relevance"] ] - evaluator = self.evalutor( + evaluator = self.evaluator( machine_summaries=data_split["machine_summaries"], human_summaries=data_split["human_summaries"], texts=data_split["text"], @@ -105,9 +109,6 @@ def _evaluate_subset( self._add_main_score(scores) return scores - def _add_main_score(self, scores: ScoresDict) -> None: - scores["main_score"] = scores[self.metadata.main_score] - def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> SummarizationDescriptiveStatistics: diff --git a/mteb/abstasks/MultiSubsetLoader.py b/mteb/abstasks/MultiSubsetLoader.py deleted file mode 100644 index 0b0ab98084..0000000000 --- a/mteb/abstasks/MultiSubsetLoader.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -import datasets - - -class MultiSubsetLoader: - def load_data(self, **kwargs): - """Load dataset containing multiple subsets from HuggingFace hub""" - if self.data_loaded: - return - - if hasattr(self, "fast_loading") and self.fast_loading: - self.fast_load() - else: - self.slow_load() - - self.dataset_transform() - self.data_loaded = True - - def fast_load(self, **kwargs): - """Load all subsets at once, then group by language with Polars. Using fast loading has two requirements: - - Each row in the dataset should have a 'lang' feature giving the corresponding language/language pair - - The datasets must have a 'default' config that loads all the subsets of the dataset (see https://huggingface.co/docs/datasets/en/repository_structure#configurations) - """ - self.dataset = {} - merged_dataset = datasets.load_dataset( - **self.metadata.dataset - ) # load "default" subset - for split in merged_dataset.keys(): - df_split = merged_dataset[split].to_polars() - df_grouped = dict(df_split.group_by(["lang"])) - for lang in set(df_split["lang"].unique()) & set(self.hf_subsets): - self.dataset.setdefault(lang, {}) - self.dataset[lang][split] = datasets.Dataset.from_polars( - df_grouped[(lang,)].drop("lang") - ) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility - for lang, subset in self.dataset.items(): - self.dataset[lang] = datasets.DatasetDict(subset) - - def slow_load(self, **kwargs): - """Load each subsets iteratively""" - self.dataset = {} - for lang in self.hf_subsets: - self.dataset[lang] = datasets.load_dataset( - name=lang, - **self.metadata.dataset, - ) diff --git a/mteb/abstasks/MultilingualTask.py b/mteb/abstasks/MultilingualTask.py index 3fd007df6d..42280f370f 100644 --- a/mteb/abstasks/MultilingualTask.py +++ b/mteb/abstasks/MultilingualTask.py @@ -1,10 +1,11 @@ from __future__ import annotations +import datasets + from .AbsTask import AbsTask -from .MultiSubsetLoader import MultiSubsetLoader -class MultilingualTask(MultiSubsetLoader, AbsTask): +class MultilingualTask(AbsTask): def __init__(self, hf_subsets: list[str] | None = None, **kwargs): super().__init__(**kwargs) if isinstance(hf_subsets, list): @@ -18,3 +19,45 @@ def __init__(self, hf_subsets: list[str] | None = None, **kwargs): else: self.hf_subsets = self.metadata.eval_langs self.is_multilingual = True + + def load_data(self, **kwargs): + """Load dataset containing multiple subsets from HuggingFace hub""" + if self.data_loaded: + return + + if hasattr(self, "fast_loading") and self.fast_loading: + self.fast_load() + else: + self.slow_load() + + self.dataset_transform() + self.data_loaded = True + + def fast_load(self, **kwargs): + """Load all subsets at once, then group by language with Polars. Using fast loading has two requirements: + - Each row in the dataset should have a 'lang' feature giving the corresponding language/language pair + - The datasets must have a 'default' config that loads all the subsets of the dataset (see https://huggingface.co/docs/datasets/en/repository_structure#configurations) + """ + self.dataset = {} + merged_dataset = datasets.load_dataset( + **self.metadata.dataset + ) # load "default" subset + for split in merged_dataset.keys(): + df_split = merged_dataset[split].to_polars() + df_grouped = dict(df_split.group_by(["lang"])) + for lang in set(df_split["lang"].unique()) & set(self.hf_subsets): + self.dataset.setdefault(lang, {}) + self.dataset[lang][split] = datasets.Dataset.from_polars( + df_grouped[(lang,)].drop("lang") + ) # Remove lang column and convert back to HF datasets, not strictly necessary but better for compatibility + for lang, subset in self.dataset.items(): + self.dataset[lang] = datasets.DatasetDict(subset) + + def slow_load(self, **kwargs): + """Load each subsets iteratively""" + self.dataset = {} + for lang in self.hf_subsets: + self.dataset[lang] = datasets.load_dataset( + name=lang, + **self.metadata.dataset, + ) diff --git a/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json b/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json index 176d0d00cf..e572fdde2c 100644 --- a/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json +++ b/mteb/descriptive_stats/Classification/AmazonCounterfactualClassification.json @@ -6,7 +6,10 @@ "min_text_length": 9, "average_text_length": 94.74495602690119, "max_text_length": 525, - "unique_text": 1903, + "unique_texts": 1903, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -24,7 +27,10 @@ "min_text_length": 31, "average_text_length": 102.14414414414415, "max_text_length": 370, - "unique_text": 666, + "unique_texts": 666, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -42,7 +48,10 @@ "min_text_length": 36, "average_text_length": 109.20298507462687, "max_text_length": 470, - "unique_text": 335, + "unique_texts": 335, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -60,7 +69,10 @@ "min_text_length": 22, "average_text_length": 125.00214592274678, "max_text_length": 525, - "unique_text": 466, + "unique_texts": 466, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -78,7 +90,10 @@ "min_text_length": 9, "average_text_length": 43.51931330472103, "max_text_length": 191, - "unique_text": 464, + "unique_texts": 464, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -98,7 +113,10 @@ "min_text_length": 6, "average_text_length": 93.37706611570248, "max_text_length": 568, - "unique_text": 3779, + "unique_texts": 3779, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "1": { @@ -116,7 +134,10 @@ "min_text_length": 6, "average_text_length": 101.47226386806597, "max_text_length": 420, - "unique_text": 1333, + "unique_texts": 1333, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "1": { @@ -134,7 +155,10 @@ "min_text_length": 32, "average_text_length": 106.14626865671642, "max_text_length": 541, - "unique_text": 670, + "unique_texts": 670, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -152,7 +176,10 @@ "min_text_length": 23, "average_text_length": 123.58886509635974, "max_text_length": 568, - "unique_text": 933, + "unique_texts": 933, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -170,7 +197,10 @@ "min_text_length": 6, "average_text_length": 42.44325481798715, "max_text_length": 165, - "unique_text": 934, + "unique_texts": 934, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -190,7 +220,10 @@ "min_text_length": 6, "average_text_length": 93.08924110603841, "max_text_length": 572, - "unique_text": 19945, + "unique_texts": 19945, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -208,7 +241,10 @@ "min_text_length": 6, "average_text_length": 102.10175, "max_text_length": 541, - "unique_text": 7998, + "unique_texts": 7998, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { @@ -226,7 +262,10 @@ "min_text_length": 33, "average_text_length": 107.30039820806371, "max_text_length": 514, - "unique_text": 4018, + "unique_texts": 4018, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "1": { @@ -244,7 +283,10 @@ "min_text_length": 19, "average_text_length": 120.44482142857143, "max_text_length": 572, - "unique_text": 5587, + "unique_texts": 5587, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "1": { @@ -262,7 +304,10 @@ "min_text_length": 8, "average_text_length": 42.662142857142854, "max_text_length": 190, - "unique_text": 5530, + "unique_texts": 5530, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json index d5d91adf50..605c3b717b 100644 --- a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json +++ b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json @@ -2,7 +2,7 @@ "test": { "num_samples": 1882, "number_of_characters": 171649, - "number_texts_in_train": 7, + "number_texts_intersect_with_train": 7, "min_text_length": 6, "average_text_length": 91.20563230605738, "max_text_length": 220, @@ -35,7 +35,7 @@ "train": { "num_samples": 7528, "number_of_characters": 697322, - "number_texts_in_train": null, + "number_texts_intersect_with_train": null, "min_text_length": 5, "average_text_length": 92.63044633368757, "max_text_length": 280, diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index ab317cadbd..bb920dbb45 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -22,6 +22,7 @@ from mteb.models import model_meta_from_sentence_transformers from ..abstasks.AbsTask import AbsTask +from ..abstasks.AbsTaskMultilabelClassification import AbsTaskMultilabelClassification from ..abstasks.AbsTaskReranking import AbsTaskReranking from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper @@ -224,14 +225,17 @@ def print_selected_tasks(self): def select_tasks(self, **kwargs): """Select the tasks to be evaluated.""" # Get all existing tasks - # reranking subclasses retrieval to share methods, but is an abstract task - tasks_categories_cls = list(AbsTask.__subclasses__()) + [AbsTaskReranking] + # reranking and multiclassClassification subclasses retrieval to share methods, but is an abstract task + tasks_categories_cls = list(AbsTask.__subclasses__()) + [ + AbsTaskReranking, + AbsTaskMultilabelClassification, + ] all_task_classes = [] for cat_cls in tasks_categories_cls: for cls in cat_cls.__subclasses__(): - if ( - cat_cls.__name__.startswith("AbsTask") - and cls.__name__ != "AbsTaskReranking" + if cat_cls.__name__.startswith("AbsTask") and cls.__name__ not in ( + "AbsTaskReranking", + "AbsTaskMultilabelClassification", ): task = cls(hf_subsets=self._task_langs, **kwargs) all_task_classes.append(task) diff --git a/mteb/evaluation/evaluators/BitextMiningEvaluator.py b/mteb/evaluation/evaluators/BitextMiningEvaluator.py index 1c03d3bb57..10f75f55dc 100644 --- a/mteb/evaluation/evaluators/BitextMiningEvaluator.py +++ b/mteb/evaluation/evaluators/BitextMiningEvaluator.py @@ -3,6 +3,7 @@ import logging from typing import Any +import numpy as np import torch import tqdm from datasets import Dataset @@ -31,6 +32,7 @@ def __init__( self.pairs = pair_columns self.n = len(sentences) self.sentences = sentences + # TODO used only by BUCC self.gold = ( list(zip(range(self.n), range(self.n))) if "gold" not in sentences @@ -78,10 +80,10 @@ def compute_metrics(self, model: Encoder, encode_kwargs: dict[str, Any] = {}): def _compute_metrics( self, - embeddings1, - embeddings2, + embeddings1: np.ndarray, + embeddings2: np.ndarray, model: Encoder, - ): + ) -> dict[str, float]: # Find nearest neighbors logger.info("Finding nearest neighbors...") nearest_neighbors = self._similarity_search( @@ -111,13 +113,13 @@ def _compute_metrics( def _similarity_search( self, - query_embeddings, - corpus_embeddings, + query_embeddings: np.ndarray, + corpus_embeddings: np.ndarray, model: Encoder, query_chunk_size: int = 100, corpus_chunk_size: int = 500000, top_k: int = 10, - ): + ) -> list[list[dict[str, float]]]: """This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings. It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries. diff --git a/mteb/evaluation/evaluators/ClassificationEvaluator.py b/mteb/evaluation/evaluators/ClassificationEvaluator.py index 955f269de7..2a67e1a202 100644 --- a/mteb/evaluation/evaluators/ClassificationEvaluator.py +++ b/mteb/evaluation/evaluators/ClassificationEvaluator.py @@ -28,36 +28,31 @@ def dot_distance(a: np.ndarray, b: np.ndarray) -> float: class kNNClassificationEvaluator(Evaluator): def __init__( self, - sentences_train, - y_train, - sentences_test, - y_test, - task_name: str | None = None, + sentences_train: list[str], + y_train: list[int], + sentences_test: list[str], + y_test: list[int], + task_name: str, k: int = 1, - encode_kwargs: dict[str, Any] = {}, - limit: int | None = None, **kwargs, ): super().__init__(**kwargs) - if limit is not None: - sentences_train = sentences_train[:limit] - y_train = y_train[:limit] - sentences_test = sentences_test[:limit] - y_test = y_test[:limit] self.sentences_train = sentences_train self.y_train = y_train self.sentences_test = sentences_test self.y_test = y_test self.task_name = task_name - self.encode_kwargs = encode_kwargs - - if "batch_size" not in self.encode_kwargs: - self.encode_kwargs["batch_size"] = 32 self.k = k - def __call__(self, model, test_cache=None): + def __call__( + self, + model: Encoder, + *, + encode_kwargs: dict[str, Any] = {}, + test_cache: np.ndarray | None = None, + ) -> tuple[dict[str, float], Any]: scores = {} max_accuracy = 0 max_f1 = 0 @@ -65,13 +60,13 @@ def __call__(self, model, test_cache=None): X_train = model.encode( self.sentences_train, task_name=self.task_name, - **self.encode_kwargs, + **encode_kwargs, ) if test_cache is None: X_test = model.encode( self.sentences_test, task_name=self.task_name, - **self.encode_kwargs, + **encode_kwargs, ) test_cache = X_test else: @@ -101,22 +96,15 @@ def __call__(self, model, test_cache=None): class kNNClassificationEvaluatorPytorch(Evaluator): def __init__( self, - sentences_train, - y_train, - sentences_test, - y_test, + sentences_train: list[str], + y_train: list[int], + sentences_test: list[str], + y_test: list[int], task_name: str, k: int = 1, - encode_kwargs: dict[str, Any] = {}, - limit: int | None = None, **kwargs: Any, ): super().__init__(**kwargs) - if limit is not None: - sentences_train = sentences_train[:limit] - y_train = y_train[:limit] - sentences_test = sentences_test[:limit] - y_test = y_test[:limit] self.sentences_train = sentences_train self.y_train = y_train @@ -124,14 +112,15 @@ def __init__( self.y_test = y_test self.task_name = task_name - self.encode_kwargs = encode_kwargs - - if "batch_size" not in self.encode_kwargs: - self.encode_kwargs["batch_size"] = 32 - self.k = k - def __call__(self, model: Encoder, test_cache=None): + def __call__( + self, + model: Encoder, + *, + encode_kwargs: dict[str, Any] = {}, + test_cache: np.ndarray | None = None, + ) -> tuple[dict[str, float], Any]: scores = {} max_accuracy = 0 max_f1 = 0 @@ -139,14 +128,14 @@ def __call__(self, model: Encoder, test_cache=None): X_train = model.encode( self.sentences_train, task_name=self.task_name, - **self.encode_kwargs, + **encode_kwargs, ) if test_cache is None: X_test = model.encode( self.sentences_test, task_name=self.task_name, - **self.encode_kwargs, + **encode_kwargs, ) test_cache = X_test else: @@ -251,27 +240,15 @@ def _dot_score(a: Tensor, b: Tensor): class logRegClassificationEvaluator(Evaluator): def __init__( self, - sentences_train, - y_train, - sentences_test, - y_test, + sentences_train: list[str], + y_train: list[int], + sentences_test: list[str], + y_test: list[int], task_name: str, max_iter: int = 100, - encode_kwargs: dict[str, Any] = {}, - limit: int | None = None, **kwargs, ): super().__init__(**kwargs) - self.encode_kwargs = encode_kwargs - - if "batch_size" not in self.encode_kwargs: - self.encode_kwargs["batch_size"] = 32 - - if limit is not None: - sentences_train = sentences_train[:limit] - y_train = y_train[:limit] - sentences_test = sentences_test[:limit] - y_test = y_test[:limit] self.sentences_train = sentences_train self.y_train = y_train self.sentences_test = sentences_test @@ -280,7 +257,13 @@ def __init__( self.max_iter = max_iter self.task_name = task_name - def __call__(self, model, test_cache=None): + def __call__( + self, + model: Encoder, + *, + encode_kwargs: dict[str, Any] = {}, + test_cache: np.ndarray | None = None, + ) -> tuple[dict[str, float], Any]: scores = {} clf = LogisticRegression( random_state=self.seed, @@ -291,13 +274,13 @@ def __call__(self, model, test_cache=None): X_train = model.encode( self.sentences_train, task_name=self.task_name, - **self.encode_kwargs, + **encode_kwargs, ) if test_cache is None: X_test = model.encode( self.sentences_test, task_name=self.task_name, - **self.encode_kwargs, + **encode_kwargs, ) test_cache = X_test else: diff --git a/mteb/evaluation/evaluators/ClusteringEvaluator.py b/mteb/evaluation/evaluators/ClusteringEvaluator.py index b0a21e4469..6035970f53 100644 --- a/mteb/evaluation/evaluators/ClusteringEvaluator.py +++ b/mteb/evaluation/evaluators/ClusteringEvaluator.py @@ -21,13 +21,9 @@ def __init__( labels, task_name: str | None = None, clustering_batch_size: int = 500, - limit: int | None = None, **kwargs, ): super().__init__(**kwargs) - if limit is not None: - sentences = sentences[:limit] - labels = labels[:limit] self.sentences = sentences self.labels = labels self.clustering_batch_size = clustering_batch_size diff --git a/mteb/evaluation/evaluators/Evaluator.py b/mteb/evaluation/evaluators/Evaluator.py index d53e6a57b2..d3f0cb2509 100644 --- a/mteb/evaluation/evaluators/Evaluator.py +++ b/mteb/evaluation/evaluators/Evaluator.py @@ -1,12 +1,9 @@ from __future__ import annotations -import random from abc import ABC, abstractmethod from typing import Any -import numpy as np -import torch - +from mteb.abstasks.AbsTask import set_seed from mteb.encoder_interface import Encoder @@ -17,10 +14,7 @@ class Evaluator(ABC): def __init__(self, seed: int = 42, **kwargs: Any): self.seed = seed - random.seed(self.seed) - np.random.seed(self.seed) - torch.manual_seed(self.seed) - torch.cuda.manual_seed_all(self.seed) + self.rng_state, self.np_rng = set_seed(seed) @abstractmethod def __call__(self, model: Encoder, *, encode_kwargs: dict[str, Any] = {}): diff --git a/mteb/evaluation/evaluators/STSEvaluator.py b/mteb/evaluation/evaluators/STSEvaluator.py index 2f6e8e4a46..75df316155 100644 --- a/mteb/evaluation/evaluators/STSEvaluator.py +++ b/mteb/evaluation/evaluators/STSEvaluator.py @@ -25,14 +25,9 @@ def __init__( sentences2, gold_scores, task_name: str | None = None, - limit: int | None = None, **kwargs, ): super().__init__(**kwargs) - if limit is not None: - sentences1 = sentences1[:limit] - sentences2 = sentences2[:limit] - gold_scores = gold_scores[:limit] self.sentences1 = sentences1 self.sentences2 = sentences2 self.gold_scores = gold_scores diff --git a/mteb/evaluation/evaluators/SummarizationEvaluator.py b/mteb/evaluation/evaluators/SummarizationEvaluator.py index df077fd44a..fe8af37b7c 100644 --- a/mteb/evaluation/evaluators/SummarizationEvaluator.py +++ b/mteb/evaluation/evaluators/SummarizationEvaluator.py @@ -26,24 +26,24 @@ class SummarizationEvaluator(Evaluator): def __init__( self, - task_name: str | None = None, - human_summaries=None, - machine_summaries=None, - texts=None, - gold_scores=None, - limit: int | None = None, + human_summaries: list[list[str]], + machine_summaries: list[list[str]], + texts: list[str], + gold_scores: list[list[float]], + task_name: str, **kwargs, ): - # human_summaries shape: (None, num_human_summaries) - # machine_summaries shape: (None, num_machine_summaries) - # gold scores shape: (None, num_machine_summaries) - # texts: (None,) + """Summarization Evaluator + + Args: + human_summaries: shape: (-1, num_human_summaries) + machine_summaries: shape: (-1, num_machine_summaries) + texts: shape: (-1,) + gold_scores: shape: (-1, num_machine_summaries) + task_name: Name of the task + **kwargs: Additional arguments to pass to the Evaluator + """ super().__init__(**kwargs) - if limit is not None: - human_summaries = human_summaries[:limit] - machine_summaries = machine_summaries[:limit] - gold_scores = gold_scores[:limit] - texts = texts[:limit] self.human_summaries = human_summaries self.machine_summaries = machine_summaries self.texts = texts diff --git a/mteb/overview.py b/mteb/overview.py index 77bc06b3fa..64e8802563 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -8,7 +8,7 @@ import pandas as pd -from mteb.abstasks import AbsTask, AbsTaskReranking +from mteb.abstasks import AbsTask, AbsTaskMultilabelClassification, AbsTaskReranking from mteb.abstasks.TaskMetadata import TASK_CATEGORY, TASK_DOMAIN, TASK_TYPE from mteb.languages import ( ISO_TO_LANGUAGE, @@ -26,13 +26,16 @@ def create_task_list() -> list[type[AbsTask]]: # reranking subclasses retrieval to share methods, but is an abstract task - tasks_categories_cls = list(AbsTask.__subclasses__()) + [AbsTaskReranking] + tasks_categories_cls = list(AbsTask.__subclasses__()) + [ + AbsTaskReranking, + AbsTaskMultilabelClassification, + ] tasks = [] for cat_cls in tasks_categories_cls: for cls in cat_cls.__subclasses__(): - if ( - cat_cls.__name__.startswith("AbsTask") - and cls.__name__ != "AbsTaskReranking" + if cat_cls.__name__.startswith("AbsTask") and cls.__name__ not in ( + "AbsTaskReranking", + "AbsTaskMultilabelClassification", ): tasks.append(cls) return tasks diff --git a/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py b/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py index 72f831599c..91af6090d2 100644 --- a/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py +++ b/mteb/tasks/Clustering/eng/ArxivClusteringP2P.py @@ -1,7 +1,10 @@ from __future__ import annotations +import random + +from datasets import Dataset, DatasetDict + from mteb.abstasks.AbsTaskClustering import AbsTaskClustering -from mteb.abstasks.AbsTaskClusteringFast import clustering_downsample from mteb.abstasks.TaskMetadata import TaskMetadata @@ -79,5 +82,25 @@ class ArxivClusteringP2PFast(AbsTaskClustering): ) def dataset_transform(self): - ds = clustering_downsample(self.dataset, self.seed) - self.dataset = ds + rng_state = random.Random(self.seed) + + ds = {} + for split in self.dataset: + _docs = [] + _labels = [] + + n_clusters = len(self.dataset[split]) + + for i in range(n_clusters): + labels = self.dataset[split]["labels"][i] + sentences = self.dataset[split]["sentences"][i] + + n_sample = min(2048, len(sentences)) + + # sample n_sample from each cluster + idxs = rng_state.sample(range(len(sentences)), n_sample) + _docs.append([sentences[idx] for idx in idxs]) + _labels.append([labels[idx] for idx in idxs]) + + ds[split] = Dataset.from_dict({"sentences": _docs, "labels": _labels}) + self.dataset = DatasetDict(ds) diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py index 3c7551e951..07097468e8 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py @@ -54,10 +54,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval, MultilingualTask sample_creation="LM-generated and verified", bibtex_citation="preprint_coming", ) - - def __init__(self, **kwargs): - self.num_of_distractors = 4 - super().__init__(**kwargs) + num_of_distractors = 4 def load_data(self, **kwargs): """Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link". diff --git a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py index e846d5c83b..682bdbe7c1 100644 --- a/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py +++ b/mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py @@ -55,9 +55,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval, MultilingualTask bibtex_citation="preprint_coming", ) - def __init__(self, **kwargs): - self.num_of_distractors = 4 - super().__init__(**kwargs) + num_of_distractors = 4 def load_data(self, **kwargs): """Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link". diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index ab193aab29..cedf393210 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -55,7 +55,10 @@ class MockClassificationTask(AbsTaskClassification): "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, @@ -66,7 +69,10 @@ class MockClassificationTask(AbsTaskClassification): "min_text_length": 23, "average_text_length": 26.5, "max_text_length": 30, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, @@ -113,7 +119,10 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 2}, "1": {"count": 2}}, "hf_subset_descriptive_stats": { @@ -124,7 +133,10 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, @@ -135,7 +147,10 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "min_text_length": 23, "average_text_length": 26.0, "max_text_length": 29, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, @@ -148,7 +163,10 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "min_text_length": 23, "average_text_length": 26.5, "max_text_length": 30, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 2}, "1": {"count": 2}}, "hf_subset_descriptive_stats": { @@ -159,7 +177,10 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "min_text_length": 23, "average_text_length": 26.5, "max_text_length": 30, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, @@ -170,7 +191,10 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "min_text_length": 23, "average_text_length": 26.5, "max_text_length": 30, - "unique_text": 2, + "unique_texts": 2, + "min_labels_per_text": 1, + "average_label_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index b654bd62ea..e84f0e63b6 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -138,7 +138,7 @@ def test_encode_kwargs_passed_to_all_encodes(task_name: str | AbsTask): my_encode_kwargs = {"no_one_uses_this_args": "but_its_here"} class MockEncoderWithKwargs(mteb.Encoder): - def encode(self, sentences, prompt_name: str | None = None, **kwargs): + def encode(self, sentences, task_name: str | None = None, **kwargs): assert "no_one_uses_this_args" in kwargs assert ( my_encode_kwargs["no_one_uses_this_args"] diff --git a/tests/test_tasks/test_all_abstasks.py b/tests/test_tasks/test_all_abstasks.py index ddece7dbc4..84d5a521ca 100644 --- a/tests/test_tasks/test_all_abstasks.py +++ b/tests/test_tasks/test_all_abstasks.py @@ -9,11 +9,10 @@ import mteb from mteb import MTEB -from mteb.abstasks import AbsTask +from mteb.abstasks import AbsTask, MultilingualTask from mteb.abstasks.AbsTaskReranking import AbsTaskReranking from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval from mteb.abstasks.AbsTaskSpeedTask import AbsTaskSpeedTask -from mteb.abstasks.MultiSubsetLoader import MultiSubsetLoader from mteb.overview import TASKS_REGISTRY from ..test_benchmark.task_grid import MOCK_TASK_TEST_GRID_AS_STRING @@ -35,8 +34,8 @@ def test_load_data( if ( isinstance(task, AbsTaskRetrieval) or isinstance(task, AbsTaskReranking) - or isinstance(task, MultiSubsetLoader) or isinstance(task, AbsTaskSpeedTask) + or isinstance(task, MultilingualTask) ): pytest.skip() with patch.object(task, "dataset_transform") as mock_dataset_transform: From f73e7acc37d0873f8d689b2cbc191ca21735bd4a Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 12 Jan 2025 10:45:58 +0300 Subject: [PATCH 31/40] openai remove memory usage --- mteb/models/openai_models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 75eade15d6..00f53c80e8 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -157,7 +157,6 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, - memory_usage=None, public_training_data=False, # assumed public_training_code=False, # assumed training_datasets=None, @@ -180,7 +179,6 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, - memory_usage=None, public_training_data=False, # assumed public_training_code=False, # assumed training_datasets=None, From d946ad4d279ebd39dd32620a66bef287e006b405 Mon Sep 17 00:00:00 2001 From: Sam <40773225+sam-hey@users.noreply.github.com> Date: Sun, 12 Jan 2025 16:05:04 +0100 Subject: [PATCH 32/40] fix: [v2] _run_eval() for case: co2_tracker False & add test (#1774) fix: _run_eval no co tracking Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> --- mteb/evaluation/MTEB.py | 1 - tests/test_benchmark/test_benchmark.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index bb920dbb45..0c07ff34db 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -589,7 +589,6 @@ def run( task, model, split, - output_folder, subsets_to_run=subsets_to_run, encode_kwargs=encode_kwargs, **kwargs, diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index e84f0e63b6..0c8521578d 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -67,6 +67,24 @@ def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): eval.run(model, output_folder="tests/results", overwrite_results=True) +@pytest.mark.parametrize("task", [MockMultilingualRetrievalTask]) +@pytest.mark.parametrize( + "model", + [MockSentenceTransformer()], +) +def test_run_eval_without_co2_tracking(task: str | AbsTask, model: mteb.Encoder): + """Test that a task can be fetched and run without CO2 tracking""" + if isinstance(task, str): + tasks = mteb.get_tasks(tasks=[task]) + else: + tasks = [task] + + eval = mteb.MTEB(tasks=tasks) + eval.run( + model, output_folder="tests/results", overwrite_results=True, co2_tracker=False + ) + + @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID[:1]) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) def test_reload_results(task: str | AbsTask, model: mteb.Encoder, tmp_path: Path): From 81a272e1a7c760dbfc40a599aa0833e653c793d7 Mon Sep 17 00:00:00 2001 From: Orion Weller <31665361+orionw@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:11:40 -0500 Subject: [PATCH 33/40] Fix RepLLaMA-based models and Instructions for Cross-Encoders (#1733) * fix promptriever; repllama; cross_encoders. Need to fix instruction tasks * update info * interface and fixes * update interface * minor --------- Co-authored-by: oweller2 Co-authored-by: oweller2 --- mteb/cross_encoder_interface.py | 48 +++++++++++++++++++ mteb/encoder_interface.py | 20 ++++++++ mteb/evaluation/evaluators/model_classes.py | 31 ++++++------ mteb/models/promptriever_models.py | 40 +++++++++------- mteb/models/repllama_models.py | 18 +++++-- mteb/models/sentence_transformer_wrapper.py | 13 ++++- .../InstructionRetrieval/eng/InstructIR.py | 2 +- 7 files changed, 135 insertions(+), 37 deletions(-) create mode 100644 mteb/cross_encoder_interface.py diff --git a/mteb/cross_encoder_interface.py b/mteb/cross_encoder_interface.py new file mode 100644 index 0000000000..072754f0b8 --- /dev/null +++ b/mteb/cross_encoder_interface.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any, Protocol, Union, runtime_checkable + +import numpy as np +import torch + +Corpus = Union[list[dict[str, str]], dict[str, list[str]]] + + +@runtime_checkable +class CrossEncoder(Protocol): + """The interface for a cross-encoder in MTEB. + + In general the interface is kept aligned with sentence-transformers interface. In cases where exceptions occurs these are handled within MTEB. + """ + + def __init__(self, device: str | None = None, **kwargs: Any) -> None: + """The initialization function for the cross-encoder. Used when calling it from the mteb run CLI. + + Args: + device: The device to use for prediction. Can be ignored if the encoder is not using a device (e.g. for API) + **kwargs: Additional arguments to pass to the cross-encoder. + """ + + def predict( + self, + queries: Sequence[str], + passages: Sequence[str], + *, + task_name: str | None = None, + instruction: str | None = None, + **kwargs: Any, + ) -> np.ndarray | torch.Tensor: + """Predicts relevance scores for query-passage pairs. Note that, unlike the encoder, the cross-encoder can compare across queries and passages. + + Args: + queries: The queries to score. + passages: The passages to score. + task_name: The name of the task to score. + instruction: Optional instruction text to combine with the query. + **kwargs: Additional arguments to pass to the cross-encoder. + + Returns: + The predicted relevance scores for each query-passage pair. + """ + ... diff --git a/mteb/encoder_interface.py b/mteb/encoder_interface.py index fb4b71ddf7..94a93cf342 100644 --- a/mteb/encoder_interface.py +++ b/mteb/encoder_interface.py @@ -61,6 +61,26 @@ def encode( ... +class EncoderWithQueryInstructionFormatting(Protocol): + """Optional protocol for encoders that support combining queries with instructions in a model-specific way. If not implemented, MTEB will use the default query instruction formatting ({query} {instruction}).""" + + def combine_query_and_instruction( + self, + query: str, + instruction: str, + ) -> str: + """Combines a query with an instruction. + + Args: + query: The query text to combine. + instruction: The instruction text to combine with the query. + + Returns: + The combined query and instruction text. + """ + ... + + class EncoderWithSimilarity(Encoder, Protocol): """Besides the required functions in the Encoder interface, the encoder can additionally specify its own similiarity functions. diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index ba5618bb05..b05de30d7f 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -76,6 +76,15 @@ def __init__( # custom functions can be used by extending the DenseRetrievalExactSearch class self.predict = self.model.predict + if hasattr(self.model, "combine_query_and_instruction"): + self.combine_query_and_instruction = ( + self.model.combine_query_and_instruction + ) + else: + self.combine_query_and_instruction = ( + lambda query, instruction: f"{query.strip()} {instruction}".strip() + ) + def search( self, corpus: dict[str, dict[str, str]], @@ -110,7 +119,9 @@ def search( new_queries = [] for q_idx, qid in enumerate(query_ids): new_queries.append( - f"{queries[q_idx].strip()} {instructions[qid]}".strip() + self.combine_query_and_instruction( + queries[q_idx], instructions[qid] + ) ) queries = new_queries @@ -446,19 +457,11 @@ def search_cross_encoder( len(queries_in_pair) == len(corpus_in_pair) == len(instructions_in_pair) ) - if hasattr(self.model, "predict"): - # can't take instructions, so add them here - if instructions_in_pair[0] is not None: - queries_in_pair = [ - f"{q} {i}".strip() - for i, q in zip(instructions_in_pair, queries_in_pair) - ] - scores = self.model.predict(list(zip(queries_in_pair, corpus_in_pair))) # type: ignore - else: - # may use the instructions in a unique way, so give them also - scores = self.model.predict( # type: ignore - list(zip(queries_in_pair, corpus_in_pair, instructions_in_pair)) - ) + # cross-encoders may use the instructions in a unique way + # due to the many ways of combining query+instruct+doc, so let them decide + scores = self.model.predict( # type: ignore + list(zip(queries_in_pair, corpus_in_pair, instructions_in_pair)) + ) for i, score in enumerate(scores): results[query_ids[i]][corpus_ids[i]] = float(score) diff --git a/mteb/models/promptriever_models.py b/mteb/models/promptriever_models.py index 4bced3a59d..803a5ab89f 100644 --- a/mteb/models/promptriever_models.py +++ b/mteb/models/promptriever_models.py @@ -6,10 +6,10 @@ import numpy as np import torch -from mteb.encoder_interface import Encoder +from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta -from .repllama_models import RepLLaMAWrapper +from .repllama_models import RepLLaMAWrapper, model_prompts from .wrapper import Wrapper logger = logging.getLogger(__name__) @@ -19,18 +19,18 @@ class PromptrieverWrapper(RepLLaMAWrapper, Wrapper): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - def encode_queries(self, queries: list[str], **kwargs: Any) -> np.ndarray: - queries = [f"query: {query}" for query in queries] - if "instruction" in kwargs: - end_punct_list = [ - "?" if query.strip()[-1] not in ["?", ".", "!"] else "" - for query in queries - ] - queries = [ - f"{query}{end_punct_list[i]} {kwargs['instruction']}" - for i, query in enumerate(queries) - ] - return self.encode(queries, **kwargs) + def encode( + self, + sentences: list[str], + *, + task_name: str, + prompt_type: PromptType | None = None, + **kwargs: Any, + ) -> np.ndarray: + kwargs["is_promptriever"] = True + return super().encode( + sentences, task_name=task_name, prompt_type=prompt_type, **kwargs + ) def _loader(wrapper: type[PromptrieverWrapper], **kwargs) -> Callable[..., Encoder]: @@ -57,11 +57,12 @@ def loader_inner(**kwargs: Any) -> Encoder: promptriever_llama2 = ModelMeta( loader=_loader( - RepLLaMAWrapper, + PromptrieverWrapper, base_model_name_or_path="meta-llama/Llama-2-7b-hf", peft_model_name_or_path="samaya-ai/promptriever-llama2-7b-v1", device_map="auto", torch_dtype=torch.bfloat16, + model_prompts=model_prompts, ), name="samaya-ai/promptriever-llama2-7b-v1", languages=["eng_Latn"], @@ -82,11 +83,12 @@ def loader_inner(**kwargs: Any) -> Encoder: promptriever_llama3 = ModelMeta( loader=_loader( - RepLLaMAWrapper, + PromptrieverWrapper, base_model_name_or_path="meta-llama/Meta-Llama-3.1-8B", peft_model_name_or_path="samaya-ai/promptriever-llama3.1-8b-v1", device_map="auto", torch_dtype=torch.bfloat16, + model_prompts=model_prompts, ), name="samaya-ai/promptriever-llama3.1-8b-v1", languages=["eng_Latn"], @@ -108,11 +110,12 @@ def loader_inner(**kwargs: Any) -> Encoder: promptriever_llama3_instruct = ModelMeta( loader=_loader( - RepLLaMAWrapper, + PromptrieverWrapper, base_model_name_or_path="meta-llama/Meta-Llama-3.1-8B-Instruct", peft_model_name_or_path="samaya-ai/promptriever-llama3.1-8b-instruct-v1", device_map="auto", torch_dtype=torch.bfloat16, + model_prompts=model_prompts, ), name="samaya-ai/promptriever-llama3.1-8b-instruct-v1", languages=["eng_Latn"], @@ -133,11 +136,12 @@ def loader_inner(**kwargs: Any) -> Encoder: promptriever_mistral_v1 = ModelMeta( loader=_loader( - RepLLaMAWrapper, + PromptrieverWrapper, base_model_name_or_path="mistralai/Mistral-7B-v0.1", peft_model_name_or_path="samaya-ai/promptriever-mistral-v0.1-7b-v1", device_map="auto", torch_dtype=torch.bfloat16, + model_prompts=model_prompts, ), name="samaya-ai/promptriever-mistral-v0.1-7b-v1", languages=["eng_Latn"], diff --git a/mteb/models/repllama_models.py b/mteb/models/repllama_models.py index 3df7199084..e132115d86 100644 --- a/mteb/models/repllama_models.py +++ b/mteb/models/repllama_models.py @@ -75,19 +75,31 @@ def create_batch_dict(self, tokenizer, input_texts): return_tensors="pt", ) + def combine_query_and_instruction(self, query, instruction): + end_punct = "?" if query.strip()[-1] not in ["?", ".", "!"] else "" + return f"{query}{end_punct} {instruction}".strip() + def encode( self, sentences: list[str], *, task_name: str, prompt_type: PromptType | None = None, - **kwargs: Any, # noqa + **kwargs, ) -> np.ndarray: batch_size = 16 if "batch_size" not in kwargs else kwargs.pop("batch_size") all_embeddings = [] - prompt = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + prompt = self.model_prompts.get(prompt_name) + if prompt: - sentences = [f"{prompt}{sentence}".strip() for sentence in sentences] + if prompt_type == "queries": + sentences = [ + f"{prompt}{sentence.strip()}".strip() for sentence in sentences + ] + else: + sentences = [f"{prompt}{sentence}".strip() for sentence in sentences] + for i in tqdm.tqdm(range(0, len(sentences), batch_size)): batch_texts = sentences[i : i + batch_size] diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 1eb19eb46b..9ec25a9896 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -59,7 +59,7 @@ def __init__( self.model_prompts = self.validate_task_to_prompt_name(model_prompts) if isinstance(self.model, CrossEncoder): - self.predict = self._predict + self.predict = self.handle_instructions_predict if similarity_fn_name: self.similarity = self.get_similarity_function(similarity_fn_name) @@ -129,3 +129,14 @@ def _predict( convert_to_numpy=True, **kwargs, ) + + def handle_instructions_predict(self, sentences, **kwargs): + # unzip the queries, corpus, and instruction so we can add the instructions to the queries + # as ST models can't take an arg for instructions + queries, corpus, instructions = list(zip(*sentences)) + # combine the queries and instructions + queries_with_instructions = [ + f"{query.strip()} {instruction}".strip() if instruction else query + for query, instruction in zip(queries, instructions) + ] + return self._predict(list(zip(queries_with_instructions, corpus)), **kwargs) diff --git a/mteb/tasks/InstructionRetrieval/eng/InstructIR.py b/mteb/tasks/InstructionRetrieval/eng/InstructIR.py index 1e2f40cd25..bb1cbcd748 100644 --- a/mteb/tasks/InstructionRetrieval/eng/InstructIR.py +++ b/mteb/tasks/InstructionRetrieval/eng/InstructIR.py @@ -8,7 +8,7 @@ class InstructIR(AbsTaskRetrieval): metadata = TaskMetadata( name="InstructIR", - description='A benchmark specifically designed to evaluate the instruction following ability in information retrieval models. Our approach focuses on user-aligned instructions tailored to each query instance, reflecting the diverse characteristics inherent in real-world search scenarios. NOTE: scores on this may differ unless you include instruction first, then "[SEP]" and then the query.', + description='A benchmark specifically designed to evaluate the instruction following ability in information retrieval models. Our approach focuses on user-aligned instructions tailored to each query instance, reflecting the diverse characteristics inherent in real-world search scenarios. **NOTE**: scores on this may differ unless you include instruction first, then "[SEP]" and then the query via redefining `combine_query_and_instruction` in your model.', reference="https://github.com/kaistAI/InstructIR/tree/main", dataset={ "path": "mteb/InstructIR-mteb", From 296b9eaa56f9843b63e0056db3bae1da3e895603 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 13 Jan 2025 22:17:46 +0300 Subject: [PATCH 34/40] lint --- mteb/benchmarks/benchmarks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 417bd91d25..268fc748cf 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -6,7 +6,6 @@ from pydantic import AnyUrl, BeforeValidator, TypeAdapter -from mteb import get_tasks from mteb.abstasks.AbsTask import AbsTask from mteb.load_results.benchmark_results import BenchmarkResults from mteb.load_results.load_results import load_results From 54018c799b2fa9ea30fbbd06557318a6ffedc85b Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Wed, 15 Jan 2025 21:29:05 +0500 Subject: [PATCH 35/40] [v2] Remove deprecated parameters from `MTEB` and cli (#1773) * remove deprecated parameters * remove _task_langs * lint * fixes * fixes * fixes * fix all abs tasks * change to get tasks * try to fix * final fixes * back to tuple * update args description --- mteb/cli.py | 21 +-- mteb/evaluation/MTEB.py | 203 +++---------------------- mteb/overview.py | 2 +- tests/test_benchmark/task_grid.py | 52 +++---- tests/test_benchmark/test_benchmark.py | 4 +- tests/test_overview.py | 5 - tests/test_reproducible_workflow.py | 4 +- tests/test_tasks/test_all_abstasks.py | 9 +- tests/test_tasks/test_mteb_rerank.py | 9 +- 9 files changed, 56 insertions(+), 253 deletions(-) diff --git a/mteb/cli.py b/mteb/cli.py index 3c6c821f52..c552394e49 100644 --- a/mteb/cli.py +++ b/mteb/cli.py @@ -374,26 +374,7 @@ def main(): add_create_meta_parser(subparsers) args = parser.parse_args() - - # If no subcommand is provided, default to run with a deprecation warning - if not hasattr(args, "func"): - logger.warning( - "Using `mteb` without a subcommand is deprecated. Use `mteb run` instead.", - DeprecationWarning, - ) - # Set default arguments for 'run' if no subcommand is provided - default_args = parser.parse_args( - ["run"] - + list(map(str, args._get_args())) - + [ - f"--{k}" if v is None else f"--{k}={v}" - for k, v in vars(args).items() - if k != "func" - ] - ) - default_args.func(default_args) - else: - args.func(args) + args.func(args) if __name__ == "__main__": diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 0c07ff34db..3c94f24785 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -5,28 +5,29 @@ import os import traceback from collections.abc import Iterable -from copy import copy, deepcopy +from copy import deepcopy from datetime import datetime from itertools import chain from pathlib import Path from time import time -from typing import Any +from typing import TYPE_CHECKING, Any import datasets from codecarbon import EmissionsTracker from sentence_transformers import CrossEncoder, SentenceTransformer +import mteb from mteb.abstasks.AbsTask import ScoresDict from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models import model_meta_from_sentence_transformers from ..abstasks.AbsTask import AbsTask -from ..abstasks.AbsTaskMultilabelClassification import AbsTaskMultilabelClassification -from ..abstasks.AbsTaskReranking import AbsTaskReranking from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper -from . import LangMapping + +if TYPE_CHECKING: + from mteb.benchmarks import Benchmark logger = logging.getLogger(__name__) @@ -34,124 +35,41 @@ class MTEB: def __init__( self, - tasks: Iterable[str | AbsTask] | None = None, + tasks: Iterable[AbsTask | Benchmark], *, - task_types: list[str] | None = None, - task_categories: list[str] | None = None, - task_langs: list[str] | None = None, - version=None, err_logs_path: str = "error_logs.txt", - **kwargs, ): """Create an Evaluation pipeline, based on the provided tasks. Args: - tasks: List of tasks to be evaluated. - task_types: Will be deprecated we recommend that you use `mteb.get_tasks()` to filter tasks. List of task types (Clustering, Retrieval..) to be - evaluated. If None, all tasks will be evaluated - task_categories: Will be deprecated we recommend that you use `mteb.get_tasks()` to filter tasks. List of task categories (s2s, p2p..) to be - evaluated. If None, all tasks will be evaluated - task_langs: Will be deprecated we recommend that you use `mteb.get_tasks()` to filter tasks. List of languages to be evaluated. if None, all - languages will be evaluated. ["eng-Latn", "deu_Latn"] will evaluate on all tasks with these languages. - version: Will be deprecated. Version of the benchmark to use. If None, latest is used + tasks: List of tasks or benchmarks to be evaluated, e.g. tasks returned by + `mteb.get_tasks(["task1","task2"]) or `mteb.get_benchmark("MTEB(eng, classic)"). err_logs_path: Path to save error logs. - kwargs: Additional arguments to be passed to the tasks """ from mteb.benchmarks import Benchmark - self.deprecation_warning( - task_types, task_categories, task_langs, tasks, version - ) - - if tasks is not None: - self._tasks = tasks - if isinstance(tasks[0], Benchmark): - self.benchmarks = tasks - self._tasks = list(chain.from_iterable(tasks)) - assert ( - task_types is None and task_categories is None - ), "Cannot specify both `tasks` and `task_types`/`task_categories`" - else: - self._task_types = task_types - self._task_categories = task_categories - self._tasks = None - - self._task_langs = task_langs if task_langs is not None else [] - if isinstance(self._task_langs, str): - self._task_langs = [self._task_langs] + self.tasks = list(tasks) + if len(self.tasks) > 0 and isinstance(self.tasks[0], Benchmark): + self.benchmarks = tasks + self.tasks = list(chain.from_iterable(self.tasks)) - self._extend_lang_code() - self._extend_lang_pairs() # add all possible pairs - - self._version = version self.err_logs_path = err_logs_path - self.last_evaluated_splits = {} - self.select_tasks(**kwargs) - - def deprecation_warning( - self, task_types, task_categories, task_langs, tasks, version - ): - if task_types is not None: - logger.warning( - "The `task_types` argument is deprecated and will be removed in the next release. " - + "Please use `tasks = mteb.get_tasks(... task_types = [...])` to filter tasks instead." - ) - if task_categories is not None: - logger.warning( - "The `task_categories` argument is deprecated and will be removed in the next release. " - + "Please use `tasks = mteb.get_tasks(... categories = [...])` to filter tasks instead." - ) - if task_langs is not None: - logger.warning( - "The `task_langs` argument is deprecated and will be removed in the next release. " - + "Please use `tasks = mteb.get_tasks(... languages = [...])` to filter tasks instead. " - + "Note that this uses 3 letter language codes (ISO 639-3)." - ) - if version is not None: - logger.warning( - "The `version` argument is deprecated and will be removed in the next release." - ) - task_contains_strings = any(isinstance(x, str) for x in tasks or []) - if task_contains_strings: - logger.warning( - "Passing task names as strings is deprecated and will be removed in the next release. " - + "Please use `tasks = mteb.get_tasks(tasks=[...])` method to get tasks instead." - ) - @property def available_tasks(self): - return [x.metadata.name for x in self.tasks_cls] + return [x.metadata.name for x in self.tasks] @property def available_task_types(self): # sort the task types - return sorted({x.metadata.type for x in self.tasks_cls}) + return sorted({x.metadata.type for x in self.tasks}) @property def available_task_categories(self): - return {x.metadata.category for x in self.tasks_cls} - - def _extend_lang_code(self): - # add all possible language codes - for lang in set(self._task_langs): - if lang in LangMapping.LANG_MAPPING: - self._task_langs += LangMapping.LANG_MAPPING[lang] - - def _extend_lang_pairs(self): - # add all possible language pairs - langs = set(self._task_langs) - for x in langs: - if "-" not in x: - for y in langs: - if "-" not in y: - pair = f"{x}-{y}" - if pair not in langs: - self._task_langs.append(pair) - return - - def _display_tasks(self, task_list, name=None): + return {x.metadata.category for x in self.tasks} + + def _display_tasks(self, task_list: Iterable[AbsTask], name: str | None = None): from rich.console import Console # disable logging for other ranks @@ -215,80 +133,14 @@ def mteb_benchmarks(self): @classmethod def mteb_tasks(cls): """Get all tasks available in the MTEB.""" - instance = cls() - instance._display_tasks(instance.tasks_cls, name="MTEB tasks") + tasks = mteb.get_tasks() + instance = cls(tasks) + instance._display_tasks(tasks, name="MTEB tasks") def print_selected_tasks(self): """Print the selected tasks.""" self._display_tasks(self.tasks, name="Selected tasks") - def select_tasks(self, **kwargs): - """Select the tasks to be evaluated.""" - # Get all existing tasks - # reranking and multiclassClassification subclasses retrieval to share methods, but is an abstract task - tasks_categories_cls = list(AbsTask.__subclasses__()) + [ - AbsTaskReranking, - AbsTaskMultilabelClassification, - ] - all_task_classes = [] - for cat_cls in tasks_categories_cls: - for cls in cat_cls.__subclasses__(): - if cat_cls.__name__.startswith("AbsTask") and cls.__name__ not in ( - "AbsTaskReranking", - "AbsTaskMultilabelClassification", - ): - task = cls(hf_subsets=self._task_langs, **kwargs) - all_task_classes.append(task) - - self.tasks_cls = all_task_classes - - # If `task_list` is specified, select list of tasks - if self._tasks is not None: - self.tasks = list( - filter(lambda x: (x.metadata.name in self._tasks), self.tasks_cls) - ) - if len(self.tasks) != len(self._tasks): - tasks_known = {x.metadata.name for x in self.tasks_cls} - tasks_unknown = { - x for x in self._tasks if isinstance(x, str) - } - tasks_known - if tasks_unknown: - unknown_str, known_str = ( - ",".join(sorted(tasks_unknown)), - ",".join(sorted(tasks_known)), - ) - logger.warning( - f"WARNING: Unknown tasks: {unknown_str}. Known tasks: {known_str}." - ) - # add task if subclass of mteb.tasks - self.tasks.extend([x for x in self._tasks if isinstance(x, AbsTask)]) - return - - # Otherwise use filters to select tasks - filtered_tasks = filter( - lambda x: (self._task_types is None) - or (x.metadata.type in self._task_types), - self.tasks_cls, - ) - filtered_tasks = filter( - lambda x: (self._task_categories is None) - or (x.metadata.category in self._task_categories), - filtered_tasks, - ) - filtered_tasks = filter( - lambda x: (self._version is None) or (x.metadata.version >= self._version), - filtered_tasks, - ) - # keep only tasks with at least one language in the filter - filtered_tasks = filter( - lambda x: (not self._task_langs) - or (len(set(x.metadata.eval_langs) & set(self._task_langs)) > 0), - filtered_tasks, - ) - - # Get final list of tasks - self.tasks = list(filtered_tasks) - def load_tasks_data(self): """Load datasets for the selected tasks.""" logger.info(f"\n\n## Loading datasets for {len(self.tasks)} tasks") @@ -416,13 +268,6 @@ def run( Returns: A list of TaskResult objects, one for each task evaluated. """ - if "batch_size" in kwargs: - logger.warning( - "The `batch_size` argument is deprecated and will be removed in the next release. " - + "Please use `encode_kwargs = {'batch_size': ...}` to set the batch size instead." - ) - encode_kwargs["batch_size"] = kwargs["batch_size"] - # update logging to account for different levels of Verbosity (similar to the command line) if verbosity == 0: @@ -455,8 +300,8 @@ def run( self.print_selected_tasks() evaluation_results = [] - original_tasks = ( - self.tasks.copy() + original_tasks = deepcopy( + self.tasks ) # save them in case we re-use the object (e.g. for reranking) # To evaluate missing splits, we keep track of the task name and the corresponding splits. @@ -665,7 +510,7 @@ def create_model_meta(model: Encoder) -> ModelMeta: ) # create a copy of the meta to avoid modifying the original object - meta = copy(meta) + meta = deepcopy(meta) meta.revision = meta.revision or "no_revision_available" meta.name = meta.name or "no_model_name_available" diff --git a/mteb/overview.py b/mteb/overview.py index 5846993b02..39d96041bd 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -124,7 +124,7 @@ def __repr__(self) -> str: return "MTEBTasks" + super().__repr__() @staticmethod - def _extract_property_from_task(task, property): + def _extract_property_from_task(task, property: str): if hasattr(task.metadata, property): return getattr(task.metadata, property) elif hasattr(task, property): diff --git a/tests/test_benchmark/task_grid.py b/tests/test_benchmark/task_grid.py index 8ae310555f..3ad484b6ff 100644 --- a/tests/test_benchmark/task_grid.py +++ b/tests/test_benchmark/task_grid.py @@ -2,14 +2,8 @@ from __future__ import annotations +import mteb from mteb.abstasks import AbsTask -from mteb.tasks.BitextMining.dan.BornholmskBitextMining import BornholmBitextMining -from mteb.tasks.Classification.multilingual.IndicSentimentClassification import ( - IndicSentimentClassification, -) -from mteb.tasks.Clustering.eng.TwentyNewsgroupsClustering import ( - TwentyNewsgroupsClusteringFast, -) from .mock_tasks import ( MockBitextMiningTask, @@ -39,31 +33,25 @@ MockSummarizationTask, ) -twenty_news = TwentyNewsgroupsClusteringFast() - -# downsample to speed up tests -twenty_news.max_document_to_embed = 1000 -twenty_news.n_clusters = 2 -twenty_news.max_fraction_of_documents_to_embed = None - -TASK_TEST_GRID = [ - BornholmBitextMining(), # bitext mining + just supplying a task class instead of a string - IndicSentimentClassification( # multi subset loader - hf_subsets=["as"], # we only load one subset here to speed up tests - n_experiments=2, # to speed up the test - ), - "TwentyNewsgroupsClustering", # clustering and string instead of class - twenty_news, # fast clustering - "Banking77Classification", # classification - "SciDocsRR", # reranking - "FarsTail", # pair classification - "TwitterHjerneRetrieval", # retrieval - "BrazilianToxicTweetsClassification", # multilabel classification - "FaroeseSTS", # STS - "SummEval", # summarization - "Core17InstructionRetrieval", # instruction reranking - "InstructIR", # instruction retrieval -] +TASK_TEST_GRID = ( + mteb.get_tasks( + tasks=[ + "BornholmBitextMining", # bitext mining + just supplying a task class instead of a string + "TwentyNewsgroupsClustering", # clustering and string instead of class + "TwentyNewsgroupsClustering.v2", # fast clustering + "Banking77Classification", # classification + "SciDocsRR", # reranking + "FarsTail", # pair classification + "TwitterHjerneRetrieval", # retrieval + "BrazilianToxicTweetsClassification", # multilabel classification + "FaroeseSTS", # STS + "SummEval", # summarization + "Core17InstructionRetrieval", # instruction reranking + "InstructIR", # instruction retrieval + ] + ) + + mteb.get_tasks(tasks=["IndicSentimentClassification"], languages=["asm-Beng"]) +) TASK_TEST_GRID_AS_STRING = [ t.metadata.name if isinstance(t, AbsTask) else t for t in TASK_TEST_GRID diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 0c8521578d..1393d46f12 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -67,7 +67,7 @@ def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): eval.run(model, output_folder="tests/results", overwrite_results=True) -@pytest.mark.parametrize("task", [MockMultilingualRetrievalTask]) +@pytest.mark.parametrize("task", [MockMultilingualRetrievalTask()]) @pytest.mark.parametrize( "model", [MockSentenceTransformer()], @@ -188,7 +188,7 @@ def test_run_using_benchmark(model: mteb.Encoder): name="test_bench", tasks=mteb.get_tasks(tasks=["STS12", "SummEval"]) ) - eval = mteb.MTEB(tasks=bench) + eval = mteb.MTEB(tasks=[bench]) eval.run( model, output_folder="tests/results", overwrite_results=True ) # we just want to test that it runs diff --git a/tests/test_overview.py b/tests/test_overview.py index 127e54f279..6136af1ea5 100644 --- a/tests/test_overview.py +++ b/tests/test_overview.py @@ -98,8 +98,3 @@ def test_MTEBTasks( # check for header of a table n_langs = len(tasks) assert len(tasks.to_markdown().split("\n")) - 3 == n_langs - - -def test_all_tasks_fetch(): - """Test that all tasks can be fetched""" - mteb.MTEB.mteb_tasks() diff --git a/tests/test_reproducible_workflow.py b/tests/test_reproducible_workflow.py index 566864a112..1c7536076e 100644 --- a/tests/test_reproducible_workflow.py +++ b/tests/test_reproducible_workflow.py @@ -36,7 +36,7 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio @pytest.mark.parametrize( "task_name", TASK_TEST_GRID - + [ + + ( "BitextMining", "Classification", "MultilabelClassification", @@ -49,7 +49,7 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio "InstructionRetrieval", "InstructionReranking", "Speed", - ], + ), ) def test_validate_task_to_prompt_name(task_name: str | AbsTask): if isinstance(task_name, AbsTask): diff --git a/tests/test_tasks/test_all_abstasks.py b/tests/test_tasks/test_all_abstasks.py index af66133273..91a7b95070 100644 --- a/tests/test_tasks/test_all_abstasks.py +++ b/tests/test_tasks/test_all_abstasks.py @@ -8,20 +8,17 @@ import pytest import mteb -from mteb import MTEB from mteb.abstasks import AbsTask, MultilingualTask from mteb.abstasks.AbsTaskReranking import AbsTaskReranking from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval from mteb.abstasks.AbsTaskSpeedTask import AbsTaskSpeedTask -from mteb.overview import TASKS_REGISTRY +from mteb.overview import TASKS_REGISTRY, get_tasks from ..test_benchmark.task_grid import MOCK_TASK_TEST_GRID_AS_STRING logging.basicConfig(level=logging.INFO) -tasks = [ - t for t in MTEB().tasks_cls if t.metadata.name not in MOCK_TASK_TEST_GRID_AS_STRING -] +tasks = [t for t in get_tasks() if t.metadata.name not in MOCK_TASK_TEST_GRID_AS_STRING] @pytest.mark.parametrize("task", tasks) @@ -84,7 +81,7 @@ async def check_datasets_are_available_on_hf(tasks): def test_dataset_availability(): """Checks if the datasets are available on Hugging Face using both their name and revision.""" - tasks = MTEB().tasks_cls + tasks = get_tasks() tasks = [ t for t in tasks diff --git a/tests/test_tasks/test_mteb_rerank.py b/tests/test_tasks/test_mteb_rerank.py index c540bb41ee..565b00e22f 100644 --- a/tests/test_tasks/test_mteb_rerank.py +++ b/tests/test_tasks/test_mteb_rerank.py @@ -6,6 +6,7 @@ from sentence_transformers import CrossEncoder, SentenceTransformer +import mteb from mteb import MTEB from mteb.model_meta import ModelMeta @@ -318,11 +319,7 @@ def test_mteb_rerank(tmp_path: Path): "1395", ] model = CrossEncoder("cross-encoder/ms-marco-TinyBERT-L-2-v2") - eval = MTEB( - tasks=[ - "SciFact", - ] - ) + eval = MTEB(tasks=mteb.get_tasks(["SciFact"])) # create fake first stage results tmp_file = tmp_path / "tmp.json" with open(tmp_file, "w") as f: @@ -374,7 +371,7 @@ def test_reranker_same_ndcg1(): revision=ce_revision, release_date="2021-04-15", ) - eval = MTEB(tasks=["SciFact"]) + eval = MTEB(tasks=mteb.get_tasks(["SciFact"])) eval.run( de, output_folder="tests/results/stage1", From 3a5aa0c1e5d57507841205a8708c9dbc21557991 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 16 Jan 2025 01:39:12 +0500 Subject: [PATCH 36/40] [v2] remove metadata_dict (#1820) * remove metadata_dict * Update mteb/overview.py Co-authored-by: Isaac Chung --------- Co-authored-by: Isaac Chung --- mteb/abstasks/AbsTask.py | 4 ---- mteb/overview.py | 4 +--- scripts/data/create_task_table.py | 11 +++-------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 399157757d..e8876f3ff2 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -278,10 +278,6 @@ def _calculate_metrics_from_split( ) -> DescriptiveStatistics: raise NotImplementedError - @property - def metadata_dict(self) -> dict[str, Any]: - return dict(self.metadata) - @property def languages(self) -> list[str]: """Returns the languages of the task""" diff --git a/mteb/overview.py b/mteb/overview.py index 39d96041bd..31bc5130e8 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -124,13 +124,11 @@ def __repr__(self) -> str: return "MTEBTasks" + super().__repr__() @staticmethod - def _extract_property_from_task(task, property: str): + def _extract_property_from_task(task: AbsTask, property: str): if hasattr(task.metadata, property): return getattr(task.metadata, property) elif hasattr(task, property): return getattr(task, property) - elif property in task.metadata_dict: - return task.metadata_dict[property] else: raise KeyError("Property neither in Task attribute or in task metadata.") diff --git a/scripts/data/create_task_table.py b/scripts/data/create_task_table.py index e5b292a08a..e15edb4820 100644 --- a/scripts/data/create_task_table.py +++ b/scripts/data/create_task_table.py @@ -137,14 +137,9 @@ def get_ds_stats(hf_hub_name): # Select all tasks for task in MTEB().tasks: print("Task: ", task) - if "dataset" in task.metadata_dict: - hub_name = hub_url = task.metadata.dataset["path"] - ds_stats = get_ds_stats(hub_name.split("/")[-1]) - elif "beir_name" in task.metadata_dict: - hub_name = hub_url = "BeIR/" + task.metadata_dict.get("beir_name") - ds_stats = get_ds_stats_beir("/".join(hub_name.split("/")[1:])) - if "cqadupstack" in hub_name: - hub_url = "BeIR/cqadupstack-qrels" + hub_name = hub_url = task.metadata.dataset["path"] + ds_stats = get_ds_stats(hub_name.split("/")[-1]) + TABLE_STRING += "\n" + ONE_LINE.format( f"[{task.metadata.name}]({task.metadata.reference})", f"[{hub_name}](https://huggingface.co/datasets/{hub_url})", From ce5cb3e859322d77f32019434f3d7cf9d72b02f5 Mon Sep 17 00:00:00 2001 From: Sam <40773225+sam-hey@users.noreply.github.com> Date: Fri, 17 Jan 2025 14:22:58 +0100 Subject: [PATCH 37/40] [v2] add similarity_fn in ModelMeta (#1759) * add dotwrapper * lint * make cleaner * add poc similarity_fn in ModelMeta * ref: rename EvaluationFunction to ScoringFunction Co-authored-by: Isaac Chung * make cos_sim default * Revert "make cleaner" This reverts commit 7d1e949f555066b08134ccacd89690e92554af30. * Revert "add dotwrapper" This reverts commit d71718b1bb6b0fc0cf378cea3b16528091fdd8d7. * lint * fix: _run_eval no co tracking * fix: bm25s * add enum to models * add mapping st sim fn name to mteb sim fn name * fix model meta use new fn for sim operators * add max_sim * fix: colbert & rm similarity_fn_name * ci: skip AfriSentiLID for now (#1785) * skip AfriSentiLID for now * skip relevant test case instead --------- Co-authored-by: Isaac Chung * test: add test for bm25s and ColBERT * lint * feat: add mapping for max_sim from pylate https://github.com/lightonai/pylate/issues/77 * test: bm25s skip * fix: MaxSim as max_sim match pylate & rm Enum in models * rm enum * update tests skip --------- Co-authored-by: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> Co-authored-by: Isaac Chung Co-authored-by: Isaac Chung --- .../evaluators/RetrievalEvaluator.py | 1 + mteb/evaluation/evaluators/model_classes.py | 16 +++++-- mteb/evaluation/evaluators/utils.py | 28 ++++++++++++ mteb/model_meta.py | 16 ++++++- mteb/models/colbert_models.py | 7 ++- mteb/models/overview.py | 9 ++-- mteb/models/sentence_transformer_wrapper.py | 6 +-- mteb/models/wrapper.py | 15 ------- tests/test_benchmark/test_models.py | 44 +++++++++++++++++++ 9 files changed, 113 insertions(+), 29 deletions(-) create mode 100644 tests/test_benchmark/test_models.py diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index be2f5af1f0..9e088aacdf 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -82,6 +82,7 @@ def __call__( self.top_k, task_name=self.task_name, # type: ignore instructions=instructions, + score_function="bm25", **kwargs, ) else: diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index b05de30d7f..b2d2c54be8 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -332,9 +332,19 @@ def _full_corpus_search( query_embeddings = torch.as_tensor(query_embeddings).to(device) sub_corpus_embeddings = torch.as_tensor(sub_corpus_embeddings).to(device) - score_function = ( - self.model.similarity if hasattr(self.model, "similarity") else cos_sim - ) + if hasattr(self.model.model, "mteb_model_meta") or hasattr( + self.model, "similarity" + ): + score_function = ( + self.model.similarity + if hasattr(self.model, "similarity") + else self.model.model.mteb_model_meta.get_similarity_function() + ) + else: + logger.warning( + "The model does not provide `mteb_model_meta`; defaulting to the cosine similarity function." + ) + score_function = cos_sim with torch.inference_mode(): scores = score_function(query_embeddings, sub_corpus_embeddings) diff --git a/mteb/evaluation/evaluators/utils.py b/mteb/evaluation/evaluators/utils.py index e01e0ec463..14ca673ce9 100644 --- a/mteb/evaluation/evaluators/utils.py +++ b/mteb/evaluation/evaluators/utils.py @@ -70,6 +70,34 @@ def _cos_sim_core(a_tensor, b_tensor): return _cos_sim_core(a, b) +def max_sim(a: np.ndarray, b: np.ndarray) -> np.ndarray: + """Computes the max-similarity max_sim(a[i], b[j]) for all i and j. + Works with a Tensor of the shape (batch_size, num_tokens, token_dim) + + Return: + Matrix with res[i][j] = max_sim(a[i], b[j]) + """ # noqa: D402 + if not isinstance(a, torch.Tensor): + a = torch.tensor(a, dtype=torch.float32) + + if not isinstance(b, torch.Tensor): + b = torch.tensor(b, dtype=torch.float32) + + if len(a.shape) == 2: + a = a.unsqueeze(0) + + if len(b.shape) == 2: + b = b.unsqueeze(0) + + scores = torch.einsum( + "ash,bth->abst", + a, + b, + ) + + return scores.max(axis=-1).values.sum(axis=-1) + + def dot_score(a: torch.Tensor, b: torch.Tensor): """Computes the dot-product dot_prod(a[i], b[j]) for all i and j. :return: Matrix with res[i][j] = dot_prod(a[i], b[j]) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 1754ab4bbb..bb063e7ba3 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -4,11 +4,13 @@ from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal +import numpy as np from pydantic import BaseModel, ConfigDict from mteb.abstasks.AbsTask import AbsTask from mteb.abstasks.TaskMetadata import STR_DATE, STR_URL from mteb.encoder_interface import Encoder +from mteb.evaluation.evaluators.utils import cos_sim, dot_score, max_sim from .languages import ISO_LANGUAGE_SCRIPT @@ -30,7 +32,6 @@ "PyLate", "ColBERT", ] -DISTANCE_METRICS = Literal["cosine", "max_sim", "dot"] def sentence_transformers_loader( @@ -51,6 +52,9 @@ def get_loader_name( return loader.__name__ +DISTANCE_METRICS = Literal["cosine", "MaxSim", "dot"] + + class ModelMeta(BaseModel): """The model metadata object. @@ -106,6 +110,16 @@ class ModelMeta(BaseModel): superseded_by: str | None = None citation: str | None = None + def get_similarity_function(self) -> Callable[[np.ndarray, np.ndarray], np.ndarray]: + if self.similarity_fn_name == "cosine": + return cos_sim + elif self.similarity_fn_name == "dot": + return dot_score + elif self.similarity_fn_name == "MaxSim": + return max_sim + elif self.similarity_fn_name is None: + raise ValueError("Similarity function not specified.") + def to_dict(self): dict_repr = self.model_dump() loader = dict_repr.pop("loader", None) diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py index 8753791bff..6ce7ca6fb9 100644 --- a/mteb/models/colbert_models.py +++ b/mteb/models/colbert_models.py @@ -100,10 +100,13 @@ def encode( ) logger.info(f"Encoding {len(sentences)} sentences.") + if "request_qid" in kwargs: + kwargs.pop("request_qid") pred = self.model.encode( sentences, prompt_name=prompt_name, is_query=True if prompt_type == PromptType.query else False, + convert_to_tensor=True, **kwargs, ) @@ -158,7 +161,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: max_tokens=180, # Reduced for Benchmarking - see ColBERT paper embed_dim=None, # Bag of Embeddings (128) for each token license="mit", - similarity_fn_name="max_sim", + similarity_fn_name="MaxSim", framework=["PyLate", "ColBERT"], reference="https://huggingface.co/colbert-ir/colbertv2.0", use_instructions=False, @@ -209,7 +212,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: max_tokens=8192, embed_dim=None, # Bag of Embeddings (128) for each token license="cc-by-nc-4.0", - similarity_fn_name="max_sim", + similarity_fn_name="MaxSim", framework=["PyLate", "ColBERT"], reference="https://huggingface.co/jinaai/jina-colbert-v2", use_instructions=False, diff --git a/mteb/models/overview.py b/mteb/models/overview.py index 4e19bed19c..e9774cacd9 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -157,9 +157,12 @@ def get_model(model_name: str, revision: str | None = None, **kwargs: Any) -> En model = meta.load_model(**kwargs) # If revision not available in the modelmeta, try to extract it from sentence-transformers - if meta.revision is None and isinstance(model, SentenceTransformer): - _meta = model_meta_from_sentence_transformers(model) - meta.revision = _meta.revision if _meta.revision else meta.revision + if isinstance(model.model, SentenceTransformer): + _meta = model_meta_from_sentence_transformers(model.model) + if meta.revision is None: + meta.revision = _meta.revision if _meta.revision else meta.revision + if not meta.similarity_fn_name: + meta.similarity_fn_name = _meta.similarity_fn_name model.mteb_model_meta = meta # type: ignore return model diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 9ec25a9896..bb47467838 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -21,7 +21,6 @@ def __init__( model: str | SentenceTransformer | CrossEncoder, revision: str | None = None, model_prompts: dict[str, str] | None = None, - similarity_fn_name: str | None = None, **kwargs, ) -> None: """Wrapper for SentenceTransformer models. @@ -33,7 +32,6 @@ def __init__( First priority is given to the composed prompt of task name + prompt type (query or passage), then to the specific task prompt, then to the composed prompt of task type + prompt type, then to the specific task type prompt, and finally to the specific prompt type. - similarity_fn_name: A similarity function to use. **kwargs: Additional arguments to pass to the SentenceTransformer model. """ if isinstance(model, str): @@ -61,9 +59,7 @@ def __init__( if isinstance(self.model, CrossEncoder): self.predict = self.handle_instructions_predict - if similarity_fn_name: - self.similarity = self.get_similarity_function(similarity_fn_name) - elif hasattr(self.model, "similarity") and callable(self.model.similarity): + if hasattr(self.model, "similarity") and callable(self.model.similarity): self.similarity = self.model.similarity def encode( diff --git a/mteb/models/wrapper.py b/mteb/models/wrapper.py index 76b31ba529..956071d3dc 100644 --- a/mteb/models/wrapper.py +++ b/mteb/models/wrapper.py @@ -3,12 +3,9 @@ import logging from typing import Callable, get_args -import numpy as np - import mteb from mteb.abstasks.TaskMetadata import TASK_TYPE from mteb.encoder_interface import PromptType -from mteb.evaluation.evaluators.utils import cos_sim, dot_score logger = logging.getLogger(__name__) @@ -67,18 +64,6 @@ def get_prompt_name( ) return None - @staticmethod - def get_similarity_function( - similarity_fn_name: str, - ) -> Callable[[np.ndarray, np.ndarray], np.ndarray]: - if similarity_fn_name == "cosine": - return cos_sim - if similarity_fn_name == "dot": - return dot_score - raise ValueError( - "Invalid similarity function. Should be one of ['cosine', 'dot']" - ) - @staticmethod def validate_task_to_prompt_name( task_to_prompt_name: dict[str, str] | None, diff --git a/tests/test_benchmark/test_models.py b/tests/test_benchmark/test_models.py new file mode 100644 index 0000000000..ee5bed091b --- /dev/null +++ b/tests/test_benchmark/test_models.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import pytest + +import mteb +from mteb import MTEB +from mteb.abstasks import AbsTask + +from .mock_tasks import MockRetrievalTask + + +@pytest.mark.parametrize("model", ["colbert-ir/colbertv2.0"]) +@pytest.mark.parametrize("task", [MockRetrievalTask()]) +def test_colbert_model_e2e(task: AbsTask, model: str): + pytest.importorskip("pylate", reason="pylate not installed") + eval_splits = ["test"] + model = mteb.get_model(model) + evaluation = MTEB(tasks=[task]) + + results = evaluation.run( + model, + eval_splits=eval_splits, + corpus_chunk_size=500, + ) + result = results[0] + + assert result.scores["test"][0]["ndcg_at_1"] == 1.0 + + +def test_bm25s_e2e(): + # fails for dataset smaller then 1000 + pytest.importorskip("bm25s", reason="bm25s not installed") + pytest.importorskip("Stemmer", reason="PyStemmer not installed") + + model = mteb.get_model("bm25s") + tasks = mteb.get_tasks(tasks=["NFCorpus"]) + eval_splits = ["test"] + + evaluation = MTEB(tasks=tasks) + + results = evaluation.run(model, eval_splits=eval_splits) + result = results[0] + + assert result.scores["test"][0]["ndcg_at_1"] == 0.42879 From 77f7c839e5ae6cc92c643719f3eb75ded27f9649 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:07:11 +0300 Subject: [PATCH 38/40] fix merge --- mteb/models/arctic_models.py | 25 +++++ mteb/models/bge_models.py | 1 - mteb/models/colbert_models.py | 4 +- mteb/models/e5_models.py | 2 +- mteb/models/salesforce_models.py | 27 ----- mteb/models/sentence_transformers_models.py | 111 +------------------- 6 files changed, 31 insertions(+), 139 deletions(-) diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index 1d99e31f40..b7217d1ef9 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -115,6 +115,31 @@ primaryClass={cs.LG}, url={https://arxiv.org/abs/2407.18887}, }""", + public_training_code=None, + training_datasets={ + # source: https://arxiv.org/pdf/2405.05374 + # splits not specified to assuming everything + # in MTEB + "NQ": ["test"], + "NQHardNegatives": ["test"], + "HotPotQA": ["test"], + "HotPotQAHardNegatives": ["test"], + "HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on) + "FEVER": ["test"], + "FEVERHardNegatives": ["test"], + # not in MTEB + # trained on stack exchange (title-body) + # "stackexchange": [], + # potentially means that: + # "StackExchangeClusteringP2P": ["test"], + # "StackExchangeClusteringP2P.v2": ["test"], + # "StackExchangeClustering": ["test"], + # "StackExchangeClustering.v2": ["test"], + # not in MTEB + # "paq": [], + # "s2orc": [], + # "other": [], # undisclosed including webdata + }, # also use synthetic ) diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index 7958548060..79d220588a 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -431,7 +431,6 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, citation=BGE_15_CITATION, - public_training_data=True, # https://data.baai.ac.cn/details/BAAI-MTP public_training_code=None, # seemingly released (at least for some models, but the link is broken training_datasets=bge_training_data, ) diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py index 0a8c0e4a57..f4baca3586 100644 --- a/mteb/models/colbert_models.py +++ b/mteb/models/colbert_models.py @@ -161,7 +161,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: max_tokens=180, # Reduced for Benchmarking - see ColBERT paper embed_dim=None, # Bag of Embeddings (128) for each token license="mit", - similarity_fn_name="MaxSim", + similarity_fn_name="max_sim", framework=["PyLate", "ColBERT"], reference="https://huggingface.co/colbert-ir/colbertv2.0", use_instructions=False, @@ -213,7 +213,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: max_tokens=8192, embed_dim=None, # Bag of Embeddings (128) for each token license="cc-by-nc-4.0", - similarity_fn_name="MaxSim", + similarity_fn_name="max_sim", framework=["PyLate", "ColBERT"], reference="https://huggingface.co/jinaai/jina-colbert-v2", use_instructions=False, diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index 651ca81529..fe265f6f41 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -165,7 +165,7 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, citation=MULTILINGUAL_E5_CITATION, - public_training_code=False, # couldn't find + public_training_code=None, # couldn't find training_datasets=E5_TRAINING_DATA, ) diff --git a/mteb/models/salesforce_models.py b/mteb/models/salesforce_models.py index a8968648f5..fd54871663 100644 --- a/mteb/models/salesforce_models.py +++ b/mteb/models/salesforce_models.py @@ -61,33 +61,6 @@ def instruction_template( ) -SFR_Embedding_Mistral = ModelMeta( - loader=partial( # type: ignore - instruct_wrapper, - model_name_or_path="Salesforce/SFR-Embedding-Mistral", - instruction_template=instruction_template, - attn="cccc", - pooling_method="lasttoken", - mode="embedding", - torch_dtype="auto", - normalized=True, - ), - name="Salesforce/SFR-Embedding-Mistral", - languages=["eng_Latn"], - open_weights=True, - revision="938c560d1c236aa563b2dbdf084f28ab28bccb11", - release_date="2024-01-24", # initial commit of hf model. - n_parameters=7_110_000_000, - embed_dim=4096, - license="cc-by-nc-4.0", - max_tokens=32768, - reference="https://huggingface.co/Salesforce/SFR-Embedding-Mistral", - similarity_fn_name="cosine", - framework=["Sentence Transformers", "PyTorch"], - use_instructions=True, -) - - SFR_Embedding_Mistral = ModelMeta( loader=partial( # type: ignore instruct_wrapper, diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 4f500fd516..63be6e925c 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -272,8 +272,7 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=False, # does sentence transformer count? - public_training_data=True, + public_training_code=None, # does sentence transformer count? training_datasets={ # source: frontmatter in readme # trained on stack exchange, unsure if sources match @@ -309,112 +308,6 @@ citation=SBERT_CITATION, ) -jina_embeddings_v2_base_en = ModelMeta( - name="jinaai/jina-embeddings-v2-base-en", - languages=["eng-Latn"], - open_weights=True, - revision="6e85f575bc273f1fd840a658067d0157933c83f0", - release_date="2023-09-27", - n_parameters=137_000_000, - embed_dim=768, - license="apache-2.0", - max_tokens=8192, - reference="https://huggingface.co/jinaai/jina-embeddings-v2-base-en", - similarity_fn_name="cosine", - framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, - superseded_by=None, - adapted_from=None, - training_datasets={"allenai/c4": ["train"]}, -) - -jina_embeddings_v2_small_en = ModelMeta( - name="jinaai/jina-embeddings-v2-small-en", - languages=["eng-Latn"], - open_weights=True, - revision="796cff318cdd4e5fbe8b7303a1ef8cbec36996ef", - release_date="2023-09-27", - n_parameters=32_700_000, - embed_dim=512, - license="apache-2.0", - max_tokens=8192, - reference="https://huggingface.co/jinaai/jina-embeddings-v2-small-en", - similarity_fn_name="cosine", - framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, - superseded_by=None, - adapted_from=None, - training_datasets={"jinaai/negation-dataset": ["train"]}, -) - -jina_embedding_b_en_v1 = ModelMeta( - name="jinaai/jina-embedding-b-en-v1", - languages=["eng-Latn"], - open_weights=True, - revision="aa0645035294a8c0607ce5bb700aba982cdff32c", - release_date="2023-07-07", - n_parameters=110_000_000, - embed_dim=768, - license="apache-2.0", - max_tokens=512, - reference="https://huggingface.co/jinaai/jina-embedding-b-en-v1", - similarity_fn_name="cosine", - framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, - superseded_by="jinaai/jina-embeddings-v2-base-en", - adapted_from=None, - training_datasets={"jinaai/negation-dataset": ["train"]}, -) - -jina_embedding_s_en_v1 = ModelMeta( - name="jinaai/jina-embedding-s-en-v1", - languages=["eng-Latn"], - open_weights=True, - revision="c1fed70aa4823a640f1a7150a276e4d3b08dce08", - release_date="2023-07-07", - n_parameters=35_000_000, - embed_dim=512, - license="apache-2.0", - max_tokens=512, - reference="https://huggingface.co/jinaai/jina-embedding-s-en-v1", - similarity_fn_name="cosine", - framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, - superseded_by="jinaai/jina-embeddings-v2-small-en", - adapted_from=None, - training_datasets={"jinaai/negation-dataset": ["train"]}, -) - -all_MiniLM_L12_v2 = ModelMeta( - name="sentence-transformers/all-MiniLM-L12-v2", - languages=["eng-Latn"], - open_weights=True, - revision="364dd28d28dcd3359b537f3cf1f5348ba679da62", - release_date="2021-08-30", - n_parameters=33_400_000, - embed_dim=384, - license="apache-2.0", - max_tokens=256, - reference="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2", - similarity_fn_name="cosine", - framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, - superseded_by=None, - adapted_from=None, - citation="""@misc{feng2022languageagnosticbertsentenceembedding, - title={Language-agnostic BERT Sentence Embedding}, - author={Fangxiaoyu Feng and Yinfei Yang and Daniel Cer and Naveen Arivazhagan and Wei Wang}, - year={2022}, - eprint={2007.01852}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2007.01852}, - }""", - training_datasets=sent_trf_training_dataset, - public_training_code=None, -) - - contriever = ModelMeta( loader=partial( SentenceTransformerWrapper, @@ -443,6 +336,8 @@ url = {https://arxiv.org/abs/2112.09118}, doi = {10.48550/ARXIV.2112.09118}, }""", + public_training_code=None, + training_datasets=None, ) microllama_text_embedding = ModelMeta( From 6da8a13f58d01a9049201cab44b9add97aaf9955 Mon Sep 17 00:00:00 2001 From: Sam <40773225+sam-hey@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:22:43 +0100 Subject: [PATCH 39/40] [v2] ci: run bm25 and ColBERT test in ci (#1829) * update install for tests * use tmp dir for tests * ref: use tmp_path for output_folder * ref: clean up tests * skip test for pylate python < 3.10 * fix: tests * fix: tests * fix: model meta CrossEncoder * test: model meta * update path test * lint * Update mteb/models/overview.py Co-authored-by: Roman Solomatin * use as_posix() * add more asserts & get embeding_dim for st * fix: MaxSim add test fix ModelMeta * fix: colbert test py 3.9 & add revision * ref: _get_model_meta --------- Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> Co-authored-by: Roman Solomatin --- Makefile | 2 +- mteb/evaluation/MTEB.py | 15 +++- mteb/model_meta.py | 4 +- mteb/models/__init__.py | 2 + mteb/models/colbert_models.py | 4 +- mteb/models/overview.py | 53 +++++++++++++- tests/test_benchmark/test_benchmark.py | 57 ++++++++------- ...est_benchmark_integration_with_datasets.py | 5 +- ...k_integration_with_sentencetransformers.py | 7 +- tests/test_benchmark/test_models.py | 13 +++- tests/test_cli.py | 17 +++-- tests/test_model_meta/test_model_meta.py | 73 +++++++++++++++++++ tests/test_reproducible_workflow.py | 7 +- tests/test_tasks/test_mteb_rerank.py | 28 +++---- 14 files changed, 221 insertions(+), 66 deletions(-) create mode 100644 tests/test_model_meta/test_model_meta.py diff --git a/Makefile b/Makefile index 3c68c9e0dc..7d8ca4d74f 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ install: install-for-tests: @echo "--- 🚀 Installing project dependencies for test ---" @echo "This ensures that the project is not installed in editable mode" - pip install ".[dev,speedtask]" + pip install ".[dev,speedtask,bm25s,pylate]" lint: @echo "--- 🧹 Running linters ---" diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 3c94f24785..377f8b72eb 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -20,7 +20,10 @@ from mteb.abstasks.AbsTask import ScoresDict from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta -from mteb.models import model_meta_from_sentence_transformers +from mteb.models import ( + model_meta_from_cross_encoder, + model_meta_from_sentence_transformers, +) from ..abstasks.AbsTask import AbsTask from ..load_results.task_results import TaskResult @@ -495,7 +498,7 @@ def create_model_meta(model: Encoder) -> ModelMeta: meta = model.mteb_model_meta # type: ignore else: try: - meta = model_meta_from_sentence_transformers(model) # type: ignore + meta = MTEB._get_model_meta(model) except AttributeError: logger.warning( "Could not find model metadata. Please set the model.mteb_model_meta attribute or if you are using " @@ -597,3 +600,11 @@ def _get_missing_evaluations( missing_evaluations[split]["missing_subsets"] = missing_subsets return missing_evaluations + + @staticmethod + def _get_model_meta(model: Encoder) -> ModelMeta: + if isinstance(model, CrossEncoder): + meta = model_meta_from_cross_encoder(model) + else: + meta = model_meta_from_sentence_transformers(model) + return meta diff --git a/mteb/model_meta.py b/mteb/model_meta.py index fee525cba1..eed74c5b49 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -32,7 +32,7 @@ "PyLate", "ColBERT", ] -DISTANCE_METRICS = Literal["cosine", "max_sim", "dot"] +DISTANCE_METRICS = Literal["cosine", "MaxSim", "dot"] def sentence_transformers_loader( @@ -111,7 +111,7 @@ def get_similarity_function(self) -> Callable[[np.ndarray, np.ndarray], np.ndarr return cos_sim elif self.similarity_fn_name == "dot": return dot_score - elif self.similarity_fn_name == "max_sim": + elif self.similarity_fn_name == "MaxSim": return max_sim elif self.similarity_fn_name is None: raise ValueError("Similarity function not specified.") diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 1c70b528ce..1389e23982 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -6,6 +6,7 @@ get_model, get_model_meta, get_model_metas, + model_meta_from_cross_encoder, model_meta_from_sentence_transformers, ) from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper @@ -17,5 +18,6 @@ "get_model_meta", "get_model_metas", "model_meta_from_sentence_transformers", + "model_meta_from_cross_encoder", "SentenceTransformerWrapper", ] diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py index f4baca3586..0a8c0e4a57 100644 --- a/mteb/models/colbert_models.py +++ b/mteb/models/colbert_models.py @@ -161,7 +161,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: max_tokens=180, # Reduced for Benchmarking - see ColBERT paper embed_dim=None, # Bag of Embeddings (128) for each token license="mit", - similarity_fn_name="max_sim", + similarity_fn_name="MaxSim", framework=["PyLate", "ColBERT"], reference="https://huggingface.co/colbert-ir/colbertv2.0", use_instructions=False, @@ -213,7 +213,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: max_tokens=8192, embed_dim=None, # Bag of Embeddings (128) for each token license="cc-by-nc-4.0", - similarity_fn_name="max_sim", + similarity_fn_name="MaxSim", framework=["PyLate", "ColBERT"], reference="https://huggingface.co/jinaai/jina-colbert-v2", use_instructions=False, diff --git a/mteb/models/overview.py b/mteb/models/overview.py index 93eaa9ab5a..c72fe2ed89 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -6,7 +6,7 @@ from typing import Any from huggingface_hub import ModelCard -from sentence_transformers import SentenceTransformer +from sentence_transformers import CrossEncoder, SentenceTransformer from mteb.abstasks.AbsTask import AbsTask from mteb.encoder_interface import Encoder @@ -172,6 +172,11 @@ def get_model(model_name: str, revision: str | None = None, **kwargs: Any) -> En if not meta.similarity_fn_name: meta.similarity_fn_name = _meta.similarity_fn_name + elif isinstance(model, CrossEncoder): + _meta = model_meta_from_cross_encoder(model.model) + if meta.revision is None: + meta.revision = _meta.revision if _meta.revision else meta.revision + model.mteb_model_meta = meta # type: ignore return model @@ -251,6 +256,49 @@ def model_meta_from_hf_hub(model_name: str) -> ModelMeta: ) +def model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta: + try: + name = model.model.name_or_path + + meta = ModelMeta( + name=name, + revision=model.config._commit_hash, + release_date=None, + languages=None, + framework=["Sentence Transformers"], + similarity_fn_name=None, + n_parameters=None, + max_tokens=None, + embed_dim=None, + license=None, + open_weights=True, + public_training_code=None, + use_instructions=None, + training_datasets=None, + ) + except AttributeError as e: + logger.warning( + f"Failed to extract metadata from model: {e}. Upgrading to sentence-transformers v3.0.0 or above is recommended." + ) + meta = ModelMeta( + name=None, + revision=None, + languages=None, + release_date=None, + n_parameters=None, + max_tokens=None, + embed_dim=None, + license=None, + open_weights=True, + public_training_code=None, + similarity_fn_name=None, + use_instructions=None, + training_datasets=None, + framework=[], + ) + return meta + + def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta: try: name = ( @@ -263,6 +311,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe if isinstance(model.model_card_data.language, str) else model.model_card_data.language ) + embeddings_dim = model.get_sentence_embedding_dimension() meta = ModelMeta( name=name, revision=model.model_card_data.base_model_revision, @@ -272,7 +321,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe similarity_fn_name=model.similarity_fn_name, n_parameters=None, max_tokens=None, - embed_dim=None, + embed_dim=embeddings_dim, license=None, open_weights=True, public_training_code=None, diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 1393d46f12..37a226f737 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -41,7 +41,7 @@ def test_mulitple_mteb_tasks(tasks: list[AbsTask], model: mteb.Encoder, tmp_path: Path): """Test that multiple tasks can be run""" eval = mteb.MTEB(tasks=tasks) - eval.run(model, output_folder=str(tmp_path), overwrite_results=True) + eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) # ensure that we can generate a readme from the output folder generate_readme(tmp_path) @@ -56,7 +56,9 @@ def test_mulitple_mteb_tasks(tasks: list[AbsTask], model: mteb.Encoder, tmp_path MockTorchbf16Encoder(), ], ) -def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): +def test_benchmark_encoders_on_task( + task: str | AbsTask, model: mteb.Encoder, tmp_path: Path +): """Test that a task can be fetched and run using a variety of encoders""" if isinstance(task, str): tasks = mteb.get_tasks(tasks=[task]) @@ -64,7 +66,7 @@ def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): tasks = [task] eval = mteb.MTEB(tasks=tasks) - eval.run(model, output_folder="tests/results", overwrite_results=True) + eval.run(model, output_folder=tmp_path.as_posix()) @pytest.mark.parametrize("task", [MockMultilingualRetrievalTask()]) @@ -72,7 +74,9 @@ def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): "model", [MockSentenceTransformer()], ) -def test_run_eval_without_co2_tracking(task: str | AbsTask, model: mteb.Encoder): +def test_run_eval_without_co2_tracking( + task: str | AbsTask, model: mteb.Encoder, tmp_path: Path +): """Test that a task can be fetched and run without CO2 tracking""" if isinstance(task, str): tasks = mteb.get_tasks(tasks=[task]) @@ -80,9 +84,7 @@ def test_run_eval_without_co2_tracking(task: str | AbsTask, model: mteb.Encoder) tasks = [task] eval = mteb.MTEB(tasks=tasks) - eval.run( - model, output_folder="tests/results", overwrite_results=True, co2_tracker=False - ) + eval.run(model, output_folder=tmp_path.as_posix(), co2_tracker=False) @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID[:1]) @@ -95,20 +97,22 @@ def test_reload_results(task: str | AbsTask, model: mteb.Encoder, tmp_path: Path tasks = [task] eval = mteb.MTEB(tasks=tasks) - results = eval.run(model, output_folder=str(tmp_path), overwrite_results=True) + results = eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) assert isinstance(results, list) assert isinstance(results[0], mteb.TaskResult) # reload the results - results = eval.run(model, output_folder=str(tmp_path), overwrite_results=False) + results = eval.run( + model, output_folder=tmp_path.as_posix(), overwrite_results=False + ) assert isinstance(results, list) assert isinstance(results[0], mteb.TaskResult) @pytest.mark.parametrize("task_name", MOCK_TASK_TEST_GRID) -def test_prompt_name_passed_to_all_encodes(task_name: str | AbsTask): +def test_prompt_name_passed_to_all_encodes(task_name: str | AbsTask, tmp_path: Path): """Test that all tasks correctly pass down the prompt_name to the encoder which supports it, and that the encoder which does not support it does not receive it. """ @@ -141,17 +145,17 @@ def encode(self, sentences, **kwargs): eval.run( model, - output_folder="tests/results", + output_folder=tmp_path.as_posix(), overwrite_results=True, ) # Test that the task_name is not passed down to the encoder model = EncoderWithoutInstructions("average_word_embeddings_levy_dependency") assert model.prompts == {}, "The encoder should not have any prompts" - eval.run(model, output_folder="tests/results", overwrite_results=True) + eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) @pytest.mark.parametrize("task_name", MOCK_TASK_TEST_GRID) -def test_encode_kwargs_passed_to_all_encodes(task_name: str | AbsTask): +def test_encode_kwargs_passed_to_all_encodes(task_name: str | AbsTask, tmp_path: Path): """Test that all tasks correctly pass down the encode_kwargs to the encoder.""" my_encode_kwargs = {"no_one_uses_this_args": "but_its_here"} @@ -175,14 +179,14 @@ def encode(self, sentences, task_name: str | None = None, **kwargs): model = MockEncoderWithKwargs() eval.run( model, - output_folder="tests/results", + output_folder=tmp_path.as_posix(), overwrite_results=True, encode_kwargs=my_encode_kwargs, ) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_run_using_benchmark(model: mteb.Encoder): +def test_run_using_benchmark(model: mteb.Encoder, tmp_path: Path): """Test that a benchmark object can be run using the MTEB class.""" bench = Benchmark( name="test_bench", tasks=mteb.get_tasks(tasks=["STS12", "SummEval"]) @@ -190,12 +194,12 @@ def test_run_using_benchmark(model: mteb.Encoder): eval = mteb.MTEB(tasks=[bench]) eval.run( - model, output_folder="tests/results", overwrite_results=True + model, output_folder=tmp_path.as_posix(), overwrite_results=True ) # we just want to test that it runs @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_run_using_list_of_benchmark(model: mteb.Encoder): +def test_run_using_list_of_benchmark(model: mteb.Encoder, tmp_path: Path): """Test that a list of benchmark objects can be run using the MTEB class.""" bench = [ Benchmark(name="test_bench", tasks=mteb.get_tasks(tasks=["STS12", "SummEval"])) @@ -203,7 +207,7 @@ def test_run_using_list_of_benchmark(model: mteb.Encoder): eval = mteb.MTEB(tasks=bench) eval.run( - model, output_folder="tests/results", overwrite_results=True + model, output_folder=tmp_path.as_posix() ) # we just want to test that it runs @@ -229,7 +233,7 @@ def test_get_benchmark(name): @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) @pytest.mark.parametrize("is_task_name", [True, False]) def test_prompt_name_passed_to_all_encodes_with_prompts( - task: AbsTask | str, is_task_name: bool + task: AbsTask | str, is_task_name: bool, tmp_path: Path ): """Test that all tasks and task_types correctly pass down the prompt_name to the encoder with prompts.""" _task_name = task.metadata.name if isinstance(task, AbsTask) else task @@ -258,8 +262,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ) eval.run( model, - output_folder="tests/results", - overwrite_results=True, + output_folder=tmp_path.as_posix(), ) class MockEncoderWithExistingPrompts(mteb.Encoder): @@ -275,7 +278,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): model = MockSentenceTransformerWrapper(MockEncoderWithExistingPrompts()) eval.run( model, - output_folder="tests/results", + output_folder=tmp_path.as_posix(), overwrite_results=True, ) @@ -292,7 +295,9 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ], ) @pytest.mark.parametrize("is_task_name", [True, False]) -def test_model_query_passage_prompts_task_type(task: AbsTask | str, is_task_name: bool): +def test_model_query_passage_prompts_task_type( + task: AbsTask | str, is_task_name: bool, tmp_path: Path +): """Test that the model with prompts is correctly called.""" tasks = [task] @@ -331,8 +336,7 @@ def encode(self, sentences, prompt_name: str | None = None, *args, **kwargs): eval.run( model, model_prompts=prompt_list, - output_folder="tests/results", - overwrite_results=True, + output_folder=tmp_path.as_posix(), ) model = MockSentenceTransformerWrapper( MockSentenceEncoderWithPrompts(), model_prompts=prompt_list @@ -341,6 +345,5 @@ def encode(self, sentences, prompt_name: str | None = None, *args, **kwargs): eval.run( model, model_prompts=prompt_list, - output_folder="tests/results", - overwrite_results=True, + output_folder=tmp_path.as_posix(), ) diff --git a/tests/test_benchmark/test_benchmark_integration_with_datasets.py b/tests/test_benchmark/test_benchmark_integration_with_datasets.py index 81d4c6b676..8288680c3c 100644 --- a/tests/test_benchmark/test_benchmark_integration_with_datasets.py +++ b/tests/test_benchmark/test_benchmark_integration_with_datasets.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from pathlib import Path import pytest @@ -18,7 +19,7 @@ @pytest.mark.parametrize("task", TASK_TEST_GRID) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_benchmark_datasets(task: str | AbsTask, model: mteb.Encoder): +def test_benchmark_datasets(task: str | AbsTask, model: mteb.Encoder, tmp_path: Path): """Test that a task can be fetched and run""" eval = MTEB(tasks=[task]) - eval.run(model, output_folder="tests/results", overwrite_results=True) + eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) diff --git a/tests/test_benchmark/test_benchmark_integration_with_sentencetransformers.py b/tests/test_benchmark/test_benchmark_integration_with_sentencetransformers.py index 4ca0056cd7..e79515be56 100644 --- a/tests/test_benchmark/test_benchmark_integration_with_sentencetransformers.py +++ b/tests/test_benchmark/test_benchmark_integration_with_sentencetransformers.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from pathlib import Path import pytest from sentence_transformers import SentenceTransformer @@ -22,9 +23,11 @@ "average_word_embeddings_levy_dependency", ], ) -def test_benchmark_sentence_transformer(task: str | AbsTask, model_name: str): +def test_benchmark_sentence_transformer( + task: str | AbsTask, model_name: str, tmp_path: Path +): """Test that a task can be fetched and run""" if isinstance(model_name, str): model = SentenceTransformer(model_name) eval = MTEB(tasks=[task]) - eval.run(model, output_folder="tests/results", overwrite_results=True) + eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) diff --git a/tests/test_benchmark/test_models.py b/tests/test_benchmark/test_models.py index ee5bed091b..5d6cc1a022 100644 --- a/tests/test_benchmark/test_models.py +++ b/tests/test_benchmark/test_models.py @@ -1,5 +1,8 @@ from __future__ import annotations +import sys +from pathlib import Path + import pytest import mteb @@ -9,9 +12,10 @@ from .mock_tasks import MockRetrievalTask +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Requires Python 3.10 or higher") @pytest.mark.parametrize("model", ["colbert-ir/colbertv2.0"]) @pytest.mark.parametrize("task", [MockRetrievalTask()]) -def test_colbert_model_e2e(task: AbsTask, model: str): +def test_colbert_model_e2e(task: AbsTask, model: str, tmp_path: Path): pytest.importorskip("pylate", reason="pylate not installed") eval_splits = ["test"] model = mteb.get_model(model) @@ -21,13 +25,14 @@ def test_colbert_model_e2e(task: AbsTask, model: str): model, eval_splits=eval_splits, corpus_chunk_size=500, + output_folder=tmp_path.as_posix(), ) result = results[0] assert result.scores["test"][0]["ndcg_at_1"] == 1.0 -def test_bm25s_e2e(): +def test_bm25s_e2e(tmp_path: Path): # fails for dataset smaller then 1000 pytest.importorskip("bm25s", reason="bm25s not installed") pytest.importorskip("Stemmer", reason="PyStemmer not installed") @@ -38,7 +43,9 @@ def test_bm25s_e2e(): evaluation = MTEB(tasks=tasks) - results = evaluation.run(model, eval_splits=eval_splits) + results = evaluation.run( + model, eval_splits=eval_splits, output_folder=tmp_path.as_posix() + ) result = results[0] assert result.scores["test"][0]["ndcg_at_1"] == 0.42879 diff --git a/tests/test_cli.py b/tests/test_cli.py index 7c71528f0d..fc4a468112 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -50,12 +50,13 @@ def test_run_task( model_name: str, task_name: str, model_revision: str, + tmp_path: Path, ): args = Namespace( model=model_name, tasks=[task_name], model_revision=model_revision, - output_folder="tests/results/test_model", + output_folder=tmp_path.as_posix(), verbosity=3, device=None, categories=None, @@ -71,9 +72,7 @@ def test_run_task( run(args) model_name_as_path = model_name.replace("/", "__").replace(" ", "_") - results_path = Path( - f"tests/results/test_model/{model_name_as_path}/{model_revision}" - ) + results_path = tmp_path / model_name_as_path / model_revision assert results_path.exists(), "Output folder not created" assert "model_meta.json" in [ f.name for f in list(results_path.glob("*.json")) @@ -122,7 +121,7 @@ def test_create_meta(): ), f"Value for {key} does not match" # ensure that the command line interface works as well - command = f"{sys.executable} -m mteb create_meta --results_folder {results} --output_path {output_path} --overwrite" + command = f"{sys.executable} -m mteb create_meta --results_folder {results.as_posix()} --output_path {output_path.as_posix()} --overwrite" result = subprocess.run(command, shell=True, capture_output=True, text=True) assert result.returncode == 0, "Command failed" @@ -134,14 +133,16 @@ def test_create_meta(): ("model_card_without_frontmatter.md", "model_card_gold_without_frontmatter.md"), ], ) -def test_create_meta_from_existing(existing_readme_name: str, gold_readme_name: str): +def test_create_meta_from_existing( + existing_readme_name: str, gold_readme_name: str, tmp_path: Path +): """Test create_meta function directly as well as through the command line interface""" test_folder = Path(__file__).parent output_folder = test_folder / "create_meta" results = ( output_folder / "all-MiniLM-L6-v2" / "8b3219a92973c328a8e22fadcfa821b5dc75636a" ) - output_path = output_folder / "model_card.md" + output_path = tmp_path / "model_card.md" existing_readme = output_folder / existing_readme_name args = Namespace( @@ -183,7 +184,7 @@ def test_create_meta_from_existing(existing_readme_name: str, gold_readme_name: ), f"Value for {key} does not match" assert readme_output == gold_readme # ensure that the command line interface works as well - command = f"{sys.executable} -m mteb create_meta --results_folder {results} --output_path {output_path} --from_existing {existing_readme} --overwrite" + command = f"{sys.executable} -m mteb create_meta --results_folder {results.as_posix()} --output_path {output_path.as_posix()} --from_existing {existing_readme.as_posix()} --overwrite" result = subprocess.run(command, shell=True, capture_output=True, text=True) assert result.returncode == 0, "Command failed" diff --git a/tests/test_model_meta/test_model_meta.py b/tests/test_model_meta/test_model_meta.py new file mode 100644 index 0000000000..2d23bc66cb --- /dev/null +++ b/tests/test_model_meta/test_model_meta.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest +from sentence_transformers import CrossEncoder, SentenceTransformer + +from mteb import MTEB +from mteb.abstasks import AbsTask +from tests.test_benchmark.mock_tasks import MockRetrievalTask + + +def test_create_model_meta_from_sentence_transformers(): + model_name = "sentence-transformers/average_word_embeddings_levy_dependency" + revision = "6d9c09a789ad5dd126b476323fccfeeafcd90509" + model = SentenceTransformer(model_name, revision=revision) + + meta = MTEB.create_model_meta(model) + + assert meta.similarity_fn_name == "cosine" + assert meta.embed_dim == model.get_sentence_embedding_dimension() + assert type(meta.framework) is list + assert meta.framework[0] == "Sentence Transformers" + assert meta.name == model_name + assert meta.revision == revision + + +def test_create_model_meta_from_cross_encoder(): + model_name = "cross-encoder/ms-marco-TinyBERT-L-2-v2" + revision = "841d331b6f34b15d6ac0ab366ae3a3b36eeac691" + model = CrossEncoder(model_name, revision=revision) + + meta = MTEB.create_model_meta(model) + + assert meta.name == model_name + assert meta.revision == revision + + return meta + + +@pytest.mark.parametrize("task", [MockRetrievalTask()]) +def test_output_folder_model_meta(task: AbsTask, tmp_path: Path): + mteb = MTEB(tasks=[task]) + model_name = "cross-encoder/ms-marco-TinyBERT-L-2-v2" + model = CrossEncoder(model_name) + meta = mteb.create_model_meta(model) + output_path = mteb.create_output_folder( + model_meta=meta, output_folder=tmp_path.as_posix() + ) + + output_path = Path(output_path) + assert output_path.exists() + assert output_path.is_dir() + assert output_path.name == model.config._commit_hash + assert output_path.parent.name == "cross-encoder__ms-marco-TinyBERT-L-2-v2" + assert output_path.parent.parent == tmp_path + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Requires Python 3.10 or higher") +def test_model_meta_colbert(): + model_name = "colbert-ir/colbertv2.0" + colbert_model = pytest.importorskip("pylate.models", reason="pylate not installed") + revision = "c1e84128e85ef755c096a95bdb06b47793b13acf" + model = colbert_model.ColBERT(model_name, revision=revision) + + meta = MTEB.create_model_meta(model) + + # assert meta.similarity_fn_name == "MaxSim" test with new release of pylate + assert type(meta.framework) is list + assert meta.framework[0] == "Sentence Transformers" + assert meta.name == model_name + assert meta.revision == revision diff --git a/tests/test_reproducible_workflow.py b/tests/test_reproducible_workflow.py index 1c7536076e..1973072bab 100644 --- a/tests/test_reproducible_workflow.py +++ b/tests/test_reproducible_workflow.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +from pathlib import Path import pytest @@ -18,7 +19,9 @@ @pytest.mark.parametrize("task_name", ["BornholmBitextMining"]) @pytest.mark.parametrize("model_name", ["sentence-transformers/all-MiniLM-L6-v2"]) @pytest.mark.parametrize("model_revision", ["8b3219a92973c328a8e22fadcfa821b5dc75636a"]) -def test_reproducibility_workflow(task_name: str, model_name: str, model_revision: str): +def test_reproducibility_workflow( + task_name: str, model_name: str, model_revision: str, tmp_path: Path +): """Test that a model and a task can be fetched and run in a reproducible fashion.""" model_meta = mteb.get_model_meta(model_name, revision=model_revision) task = mteb.get_task(task_name) @@ -30,7 +33,7 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio assert isinstance(model, Encoder) eval = MTEB(tasks=[task]) - eval.run(model, output_folder="tests/results", overwrite_results=True) + eval.run(model, output_folder=tmp_path.as_posix(), overwrite_results=True) @pytest.mark.parametrize( diff --git a/tests/test_tasks/test_mteb_rerank.py b/tests/test_tasks/test_mteb_rerank.py index effd76829d..4a535bebbd 100644 --- a/tests/test_tasks/test_mteb_rerank.py +++ b/tests/test_tasks/test_mteb_rerank.py @@ -339,17 +339,16 @@ def test_mteb_rerank(tmp_path: Path): eval.run( model, # type: ignore - output_folder="tests/results", + output_folder=tmp_path.as_posix(), overwrite_results=True, eval_splits=["test"], top_k=2, previous_results=tmp_file, save_predictions=True, ) - tmp_file.unlink() # read in the results - with open("tests/results/SciFact_default_predictions.json") as f: + with (tmp_path / "SciFact_default_predictions.json").open() as f: results = json.load(f) # check that only the top two results are re-orderd @@ -358,7 +357,7 @@ def test_mteb_rerank(tmp_path: Path): assert "18670" in results["1"] -def test_reranker_same_ndcg1(): +def test_reranker_same_ndcg1(tmp_path: Path): de_name = "average_word_embeddings_komninos" revision = "21eec43590414cb8e3a6f654857abed0483ae36e" de = SentenceTransformer(de_name, revision=revision) @@ -382,32 +381,35 @@ def test_reranker_same_ndcg1(): framework=["Sentence Transformers", "PyTorch"], ) eval = MTEB(tasks=mteb.get_tasks(["SciFact"])) + stage1_path = tmp_path / "stage1" eval.run( de, - output_folder="tests/results/stage1", + output_folder=stage1_path.as_posix(), overwrite_results=True, save_predictions=True, eval_splits=["test"], ) + stage2_path = tmp_path / "stage2" eval.run( ce, # type: ignore - output_folder="tests/results/stage2", + output_folder=stage2_path.as_posix(), overwrite_results=True, - previous_results="tests/results/stage1/SciFact_default_predictions.json", + previous_results=(stage1_path / "SciFact_default_predictions.json"), save_predictions=False, eval_splits=["test"], top_k=1, # don't allow it to rerank more than 1 so we can check for top_1 being the same ) # read in stage 1 and stage two and check ndcg@1 is the same - with open( - f"tests/results/stage1/sentence-transformers__{de_name}/{revision}/SciFact.json" - ) as f: + with ( + stage1_path / f"sentence-transformers__{de_name}/{revision}/SciFact.json" + ).open() as f: stage1 = json.load(f) - with open( - f"tests/results/stage2/cross-encoder__ms-marco-TinyBERT-L-2-v2/{ce_revision}/SciFact.json" - ) as f: + with ( + stage2_path + / f"cross-encoder__ms-marco-TinyBERT-L-2-v2/{ce_revision}/SciFact.json" + ).open() as f: stage2 = json.load(f) assert ( From f1d418c88df9ce0d2d46f6a536c3133e49aa4907 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 24 Jan 2025 19:50:19 +0300 Subject: [PATCH 40/40] [v2] Update v2 again (#1864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Add reported annotation and re-added public_training_data (#1846) * fix: Add additional dataset annotations * fix: readded public training data * update voyage annotations * 1.29.11 Automatically generated by python-semantic-release * fix: Leaderboard Refinements (#1849) * Added better descriptions to benchmarks and removed beta tags * Fixed zero-shot filtering on app loading * Added zero-shot definition in an accordion * NaN values are now filled with blank * Added type hints to filter_models * 1.29.12 Automatically generated by python-semantic-release * fix: Fixed leaderboard search bar (#1852) Fixed leaderboard search bar * 1.29.13 Automatically generated by python-semantic-release * fix: Hotfixed public_training_data type annotation (#1857) Fixed public_training_data flag type to include boolean, as this is how all models are annotated * fix: Fix zeta alpha mistral (#1736) * fix zeta alpha mistral * update use_instructions * update training datasets * Update mteb/models/e5_instruct.py Co-authored-by: Kenneth Enevoldsen * update float * Update mteb/models/e5_instruct.py --------- Co-authored-by: Kenneth Enevoldsen * Add more annotations (#1833) * apply additions from #1794 * add annotations for rumodels * add nomic training data * fix metadata * update rest of model meta * fix bge reranker * 1.29.14 Automatically generated by python-semantic-release * fix: Adding missing model meta (#1856) * Added CDE models * Added bge-en-icl * Updated CDE to bge_full_data * Fixed public_training_data flag type to include boolean, as this is how all models are annotated * Added public training data link instead of bool to CDE and BGE * Added GME models * Changed Torch to PyTorch * Added metadata on LENS models * Added ember_v1 * Added metadata for amazon titan * Removed GME implementation * 1.29.15 Automatically generated by python-semantic-release * fix: Added correct training data annotation to LENS (#1859) Added correct training data annotation to LENS * 1.29.16 Automatically generated by python-semantic-release * lint * fix meta * fix meta * fix empty model meta * lint --------- Co-authored-by: Kenneth Enevoldsen Co-authored-by: github-actions Co-authored-by: Márton Kardos --- mteb/benchmarks/benchmarks.py | 69 +++++-- mteb/leaderboard/app.py | 154 ++++++++------ mteb/leaderboard/table.py | 13 +- mteb/model_meta.py | 4 +- mteb/models/arctic_models.py | 23 ++- mteb/models/bge_models.py | 211 +++++++++++--------- mteb/models/bm25.py | 1 + mteb/models/cde_models.py | 54 +++++ mteb/models/cohere_models.py | 12 +- mteb/models/colbert_models.py | 12 +- mteb/models/e5_instruct.py | 87 ++++++++ mteb/models/e5_models.py | 25 ++- mteb/models/gme_models.py | 62 ++++++ mteb/models/google_models.py | 9 +- mteb/models/gritlm_models.py | 18 +- mteb/models/gte_models.py | 9 +- mteb/models/ibm_granite_models.py | 71 ++++++- mteb/models/inf_models.py | 1 + mteb/models/jasper_models.py | 1 + mteb/models/jina_models.py | 23 ++- mteb/models/lens_models.py | 45 +++++ mteb/models/linq_models.py | 1 + mteb/models/llm2vec_models.py | 8 + mteb/models/misc_models.py | 131 ++++++++++-- mteb/models/model2vec_models.py | 9 +- mteb/models/moka_models.py | 9 +- mteb/models/mxbai_models.py | 1 + mteb/models/no_instruct_sentence_models.py | 1 + mteb/models/nomic_models.py | 101 +++++++++- mteb/models/nvidia_models.py | 2 + mteb/models/openai_models.py | 9 +- mteb/models/overview.py | 81 +++----- mteb/models/piccolo_models.py | 2 + mteb/models/promptriever_models.py | 4 + mteb/models/repllama_models.py | 2 + mteb/models/rerankers_custom.py | 6 +- mteb/models/rerankers_monot5_based.py | 14 ++ mteb/models/ru_sentence_models.py | 155 +++++++++++--- mteb/models/salesforce_models.py | 39 ++-- mteb/models/sentence_transformers_models.py | 44 +--- mteb/models/stella_models.py | 8 + mteb/models/text2vec_models.py | 9 +- mteb/models/uae_models.py | 1 + mteb/models/voyage_models.py | 104 +++++++++- pyproject.toml | 2 +- scripts/generate_metadata.py | 1 + tests/test_tasks/test_mteb_rerank.py | 1 + 47 files changed, 1264 insertions(+), 385 deletions(-) create mode 100644 mteb/models/cde_models.py create mode 100644 mteb/models/gme_models.py create mode 100644 mteb/models/lens_models.py diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 36641507ac..50e2b45cc5 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -71,7 +71,7 @@ def load_results( MTEB_EN = Benchmark( - name="MTEB(eng, beta)", + name="MTEB(eng)", tasks=MTEBTasks( get_tasks( tasks=[ @@ -128,7 +128,13 @@ def load_results( get_task("STS22.v2", eval_splits=["test"], hf_subsets=["en"]), ), ), - description="English benchmarks from MTEB", + description="""The new English Massive Text Embedding Benchmark. +This benchmark was created to account for the fact that many models have now been finetuned +to tasks in the original MTEB, and contains tasks that are not as frequently used for model training. +This way the new benchmark and leaderboard can give our users a more realistic expectation of models' generalization performance. + +The original MTEB leaderboard is available under the [MTEB(eng, classic)](http://mteb-leaderboard-2-demo.hf.space/?benchmark_name=MTEB%28eng%2C+classic%29) tab. + """, citation="", contacts=["KennethEnevoldsen", "Muennighoff"], ) @@ -216,7 +222,12 @@ def load_results( get_task("STS22", eval_splits=["test"], hf_subsets=["en"]), ) ), - description="The original English benchmark by Muennighoff et al., (2023).", + description="""The original English benchmark by Muennighoff et al., (2023). +This page is an adaptation of the [old MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard). + +> We recommend that you use [MTEB(eng)](http://mteb-leaderboard-2-demo.hf.space/?benchmark_name=MTEB%28eng%29) instead, +as many models have been tuned on MTEB(eng, classic) datasets, and MTEB(eng) might give a more accurate representation of models' generalization performance. + """, citation="""@inproceedings{muennighoff-etal-2023-mteb, title = "{MTEB}: Massive Text Embedding Benchmark", author = "Muennighoff, Niklas and @@ -275,7 +286,7 @@ def load_results( "STS22", ], ), - description="Main Russian benchmarks from MTEB", + description="A Russian version of the Massive Text Embedding Benchmark with a number of novel Russian tasks in all task categories of the original MTEB.", reference="https://aclanthology.org/2023.eacl-main.148/", citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb, title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design}, @@ -324,8 +335,8 @@ def load_results( "LegalQuAD", ] ), - description="Legal benchmarks from MTEB.", - reference="https://aclanthology.org/2023.eacl-main.148/", + description="A benchmark of retrieval tasks in the legal domain.", + reference=None, citation=None, ) @@ -365,7 +376,10 @@ def load_results( "Tatoeba", ] ), - description="BitextMining benchmark from MINERS", + description="""Bitext Mining texts from the MINERS benchmark, a benchmark designed to evaluate the + ability of multilingual LMs in semantic retrieval tasks, + including bitext mining and classification via retrieval-augmented contexts. + """, reference="https://arxiv.org/pdf/2406.07424", citation=""" @article{winata2024miners, @@ -533,7 +547,7 @@ def load_results( ) + (get_task("STS22", eval_splits=["test"], hf_subsets=["fr"]),) ), - description="Main French benchmarks from MTEB", + description="MTEB-French, a French expansion of the original benchmark with high-quality native French datasets.", reference="https://arxiv.org/abs/2405.20468", citation="""@misc{ciancone2024mtebfrenchresourcesfrenchsentence, title={MTEB-French: Resources for French Sentence Embedding Evaluation and Analysis}, @@ -581,7 +595,7 @@ def load_results( "STS22", ], ), - description="Main German benchmarks from MTEB", + description="A benchmark for text-embedding performance in German.", reference="https://arxiv.org/html/2401.02709v1", citation="""@misc{wehrli2024germantextembeddingclustering, title={German Text Embedding Clustering Benchmark}, @@ -613,7 +627,7 @@ def load_results( "KorSTS", ], ), - description="Main Korean benchmarks from MTEB", + description="A benchmark and leaderboard for evaluation of text embedding in Korean.", reference=None, citation=None, ) @@ -650,7 +664,11 @@ def load_results( ) + (get_task("STS22", eval_splits=["test"], hf_subsets=["pl"]),), ), - description="Main Polish benchmarks from MTEB", + description="""Polish Massive Text Embedding Benchmark (PL-MTEB), a comprehensive benchmark for text embeddings in Polish. The PL-MTEB consists of 28 diverse NLP +tasks from 5 task types. With tasks adapted based on previously used datasets by the Polish +NLP community. In addition, a new PLSC (Polish Library of Science Corpus) dataset was created +consisting of titles and abstracts of scientific publications in Polish, which was used as the basis for +two novel clustering tasks.""", # Rephrased from the abstract reference="https://arxiv.org/abs/2405.10138", citation="""@article{poswiata2024plmteb, title={PL-MTEB: Polish Massive Text Embedding Benchmark}, @@ -695,14 +713,14 @@ def load_results( "typescript", ], ), - description="Main code benchmarks from MTEB", + description="A massive code embedding benchmark covering retrieval tasks in a miriad of popular programming languages.", reference=None, citation=None, ) MTEB_multilingual = Benchmark( - name="MTEB(Multilingual, beta)", + name="MTEB(Multilingual)", tasks=get_tasks( tasks=[ "BornholmBitextMining", @@ -840,7 +858,7 @@ def load_results( "MIRACLRetrievalHardNegatives", ], ), - description="The Multilingual benchmarks from MMTEB. Currently under development.", + description="A large-scale multilingual expansion of MTEB, driven mainly by highly-curated community contributions covering 250+ languages.", reference=None, citation=None, contacts=["KennethEnevoldsen", "isaac-chung"], @@ -875,7 +893,7 @@ def load_results( "ESCIReranking", ], ), - description="Main Japanese benchmarks from MTEB", + description="JMTEB is a benchmark for evaluating Japanese text embedding models.", reference="https://github.com/sbintuitions/JMTEB", citation=None, ) @@ -915,7 +933,7 @@ def load_results( ] MTEB_INDIC = Benchmark( - name="MTEB(Indic, beta)", + name="MTEB(Indic)", tasks=get_tasks( tasks=[ # Bitext @@ -952,7 +970,7 @@ def load_results( languages=indic_languages, exclusive_language_filter=True, ), - description="Main Indic benchmark from MMTEB", + description="A regional geopolitical text embedding benchmark targetting embedding performance on Indic languages.", reference=None, citation=None, contacts=["KennethEnevoldsen", "isaac-chung"], @@ -1003,7 +1021,7 @@ def load_results( ] MTEB_EU = Benchmark( - name="MTEB(Europe, beta)", + name="MTEB(Europe)", tasks=get_tasks( tasks=[ "BornholmBitextMining", @@ -1084,7 +1102,7 @@ def load_results( languages=eu_languages, exclusive_language_filter=True, ), - description="Main European benchmark from MMTEB", + description="A regional geopolitical text embedding benchmark targetting embedding performance on European languages.", reference=None, citation=None, contacts=["KennethEnevoldsen", "isaac-chung"], @@ -1102,7 +1120,10 @@ def load_results( "LEMBWikimQARetrieval", ], ), - description="The main benchmark for evaluating long document retrieval.", + description="""LongEmbed is a benchmark oriented at exploring models' performance on long-context retrieval. + The benchmark comprises two synthetic tasks and four carefully chosen real-world tasks, + featuring documents of varying length and dispersed target information. + """, # Pieced together from paper abstract. reference="https://arxiv.org/abs/2404.12096v2", citation="""@article{zhu2024longembed, title={LongEmbed: Extending Embedding Models for Long Context Retrieval}, @@ -1117,7 +1138,13 @@ def load_results( tasks=get_tasks( tasks=["BrightRetrieval"], ), - description="A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval.", + description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval. + BRIGHT is the first text retrieval + benchmark that requires intensive reasoning to retrieve relevant documents with + a dataset consisting of 1,384 real-world queries spanning diverse domains, such as + economics, psychology, mathematics, and coding. These queries are drawn from + naturally occurring and carefully curated human data. + """, reference="https://brightbenchmark.github.io/", citation="""@article{su2024bright, title={Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index cb806e4671..5ee5a6b9da 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -6,6 +6,7 @@ import tempfile import time from pathlib import Path +from typing import Literal from urllib.parse import urlencode import gradio as gr @@ -48,9 +49,12 @@ def produce_benchmark_link(benchmark_name: str, request: gr.Request) -> str: return md +DEFAULT_BENCHMARK_NAME = "MTEB(Multilingual)" + + def set_benchmark_on_load(request: gr.Request): query_params = request.query_params - return query_params.get("benchmark_name", "MTEB(Multilingual, beta)") + return query_params.get("benchmark_name", DEFAULT_BENCHMARK_NAME) def download_table(table: pd.DataFrame) -> Path: @@ -117,23 +121,75 @@ def update_task_info(task_names: str) -> gr.DataFrame: return gr.DataFrame(df, datatype=["markdown"] + ["str"] * (len(df.columns) - 1)) +# Model sizes in million parameters +MIN_MODEL_SIZE, MAX_MODEL_SIZE = 0, 10_000 + + +def filter_models( + model_names, + task_select, + availability, + compatibility, + instructions, + model_size, + zero_shot_setting, +): + lower, upper = model_size + # Setting to None, when the user doesn't specify anything + if (lower == MIN_MODEL_SIZE) and (upper == MAX_MODEL_SIZE): + lower, upper = None, None + else: + # Multiplying by millions + lower = lower * 1e6 + upper = upper * 1e6 + model_metas = mteb.get_model_metas( + model_names=model_names, + open_weights=availability, + use_instructions=instructions, + frameworks=compatibility, + n_parameters_range=(lower, upper), + ) + tasks = mteb.get_tasks(tasks=task_select) + models_to_keep = set() + for model_meta in model_metas: + is_model_zero_shot = model_meta.is_zero_shot_on(tasks) + if is_model_zero_shot is None: + if zero_shot_setting == "hard": + continue + elif not is_model_zero_shot: + if zero_shot_setting != "off": + continue + models_to_keep.add(model_meta.name) + return list(models_to_keep) + + logger.info("Loading all benchmark results") all_results = load_results() -# Model sizes in million parameters -min_model_size, max_model_size = 0, 10_000 - benchmarks = mteb.get_benchmarks() all_benchmark_results = { benchmark.name: benchmark.load_results(base_results=all_results) for benchmark in benchmarks } -default_benchmark = mteb.get_benchmark("MTEB(Multilingual, beta)") +default_benchmark = mteb.get_benchmark(DEFAULT_BENCHMARK_NAME) default_results = all_benchmark_results[default_benchmark.name] logger.info("Benchmark results loaded") default_scores = default_results.get_scores(format="long") -summary_table, per_task_table = scores_to_tables(default_scores) +all_models = list({entry["model_name"] for entry in default_scores}) +filtered_models = filter_models( + all_models, + default_results.task_names, + availability=None, + compatibility=[], + instructions=None, + model_size=(MIN_MODEL_SIZE, MAX_MODEL_SIZE), + zero_shot_setting="soft", +) + +summary_table, per_task_table = scores_to_tables( + [entry for entry in default_scores if entry["model_name"] in filtered_models] +) benchmark_select = gr.Dropdown( [bench.name for bench in benchmarks], @@ -207,7 +263,7 @@ def update_task_info(task_names: str) -> gr.DataFrame: with gr.Row(): searchbar = gr.Textbox( label="Search Models", - info="Search models by name (RegEx sensitive. Separate queries with `|`)", + info="Press Enter to search.\nSearch models by name (RegEx sensitive. Separate queries with `|`)", interactive=True, ) compatibility = gr.CheckboxGroup( @@ -258,14 +314,14 @@ def update_task_info(task_names: str) -> gr.DataFrame: interactive=True, ) model_size = RangeSlider( - minimum=min_model_size, - maximum=max_model_size, - value=(min_model_size, max_model_size), + minimum=MIN_MODEL_SIZE, + maximum=MAX_MODEL_SIZE, + value=(MIN_MODEL_SIZE, MAX_MODEL_SIZE), label="Model Size (#M Parameters)", interactive=True, ) scores = gr.State(default_scores) - models = gr.State(list({entry["model_name"] for entry in default_scores})) + models = gr.State(filtered_models) with gr.Row(): with gr.Column(): description = gr.Markdown( @@ -295,6 +351,10 @@ def update_task_info(task_names: str) -> gr.DataFrame: """ ) summary_table.render() + download_summary = gr.DownloadButton("Download Table") + download_summary.click( + download_table, inputs=[summary_table], outputs=[download_summary] + ) with gr.Accordion( "What do aggregate measures (Rank(Borda), Mean(Task), etc.) mean?", open=False, @@ -308,10 +368,19 @@ def update_task_info(task_names: str) -> gr.DataFrame: **Mean(TaskType)**: This is a weighted average across different task categories, such as classification or retrieval. It is computed by first computing the average by task category and then computing the average on each category. Similar to the Mean(Task) this measure is continuous and tends to overvalue tasks with higher variance. This score also prefers models that perform well across all task categories. """ ) - download_summary = gr.DownloadButton("Download Table") - download_summary.click( - download_table, inputs=[summary_table], outputs=[download_summary] - ) + with gr.Accordion( + "What does zero-shot mean?", + open=False, + ): + gr.Markdown( + """ +A model is considered zero-shot if it is not trained on any splits of the datasets used to derive the tasks. +E.g., if a model is trained on Natural Questions, it cannot be considered zero-shot on benchmarks containing the task “NQ” which is derived from Natural Questions. +This definition creates a few edge cases. For instance, multiple models are typically trained on Wikipedia title and body pairs, but we do not define this as leakage on, e.g., “WikipediaRetrievalMultilingual” and “WikiClusteringP2P” as these datasets are not based on title-body pairs. +Distilled, further fine-tunes or in other ways, derivative models inherit the datasets of their parent models. +Based on community feedback and research findings, This definition could change in the future. + """ + ) with gr.Tab("Performance per task"): per_task_table.render() download_per_task = gr.DownloadButton("Download Table") @@ -405,51 +474,14 @@ def update_task_list(benchmark_name, type_select, domain_select, lang_select): outputs=[task_select], ) - def filter_models( - model_names, - task_select, - availability, - compatibility, - instructions, - model_size, - zero_shot_setting, - ): - lower, upper = model_size - # Setting to None, when the user doesn't specify anything - if (lower == min_model_size) and (upper == max_model_size): - lower, upper = None, None - else: - # Multiplying by millions - lower = lower * 1e6 - upper = upper * 1e6 - model_metas = mteb.get_model_metas( - model_names=model_names, - open_weights=availability, - use_instructions=instructions, - frameworks=compatibility, - n_parameters_range=(lower, upper), - ) - tasks = mteb.get_tasks(tasks=task_select) - models_to_keep = set() - for model_meta in model_metas: - is_model_zero_shot = model_meta.is_zero_shot_on(tasks) - if is_model_zero_shot is None: - if zero_shot_setting == "hard": - continue - elif not is_model_zero_shot: - if zero_shot_setting != "off": - continue - models_to_keep.add(model_meta.name) - return list(models_to_keep) - def update_models( - scores, - tasks, - availability, - compatibility, - instructions, - model_size, - zero_shot, + scores: list[dict], + tasks: list[str], + availability: bool | None, + compatibility: list[str], + instructions: bool | None, + model_size: tuple[int, int], + zero_shot: Literal["hard", "soft", "off"], ): start_time = time.time() model_names = list({entry["model_name"] for entry in scores}) @@ -544,7 +576,7 @@ def update_models( ], outputs=[models], ) - zero_shot.input( + zero_shot.change( update_models, inputs=[ scores, @@ -594,7 +626,7 @@ def update_tables( inputs=[scores, searchbar, task_select, models], outputs=[summary_table, per_task_table], ) - searchbar.input( + searchbar.submit( update_tables, inputs=[scores, searchbar, task_select, models], outputs=[summary_table, per_task_table], diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index 041df47094..ef28392cf7 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -142,6 +142,11 @@ def scores_to_tables( names = per_task.index.get_level_values("model_name") names = pd.Series(names, index=per_task.index) to_remove |= ~names.str.contains(search_query, regex=True) + if to_remove.all(): + no_results_frame = pd.DataFrame( + {"No results": ["You can try relaxing your criteria"]} + ) + return gr.DataFrame(no_results_frame), gr.DataFrame(no_results_frame) models_to_remove = list(per_task[to_remove].index) typed_mean = mean_per_type.mean(skipna=False, axis=1) overall_mean = per_task.mean(skipna=False, axis=1) @@ -218,7 +223,11 @@ def scores_to_tables( joint_table[score_columns] = joint_table[score_columns].map(format_scores) joint_table_style = ( joint_table.style.format( - {**{column: "{:.2f}" for column in score_columns}, "Rank (Borda)": "{:.0f}"} + { + **{column: "{:.2f}" for column in score_columns}, + "Rank (Borda)": "{:.0f}", + }, + na_rep="", ) .highlight_min("Rank (Borda)", props="font-weight: bold") .highlight_max(subset=score_columns, props="font-weight: bold") @@ -226,7 +235,7 @@ def scores_to_tables( task_score_columns = per_task.select_dtypes("number").columns per_task[task_score_columns] *= 100 per_task_style = per_task.style.format( - "{:.2f}", subset=task_score_columns + "{:.2f}", subset=task_score_columns, na_rep="" ).highlight_max(subset=task_score_columns, props="font-weight: bold") return ( gr.DataFrame( diff --git a/mteb/model_meta.py b/mteb/model_meta.py index eed74c5b49..b0dbccf24e 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -68,7 +68,8 @@ class ModelMeta(BaseModel): release_date: The date the model's revision was released. license: The license under which the model is released. Required if open_weights is True. open_weights: Whether the model is open source or proprietary. - public_training_code: Whether the code used to train the model is publicly available. + public_training_code: A link to the publicly available training code. If none it is assumed that the training code is not publicly available. + public_training_data: A link to the publicly available training data. If none it is assumed that the training data is not publicly available. similarity_fn_name: The distance metric used by the model. framework: The framework the model is implemented in, can be a list of frameworks e.g. `["Sentence Transformers", "PyTorch"]`. reference: A URL to the model's page on huggingface or another source. @@ -97,6 +98,7 @@ class ModelMeta(BaseModel): license: str | None open_weights: bool | None public_training_code: str | None + public_training_data: str | bool | None framework: list[FRAMEWORKS] reference: STR_URL | None = None similarity_fn_name: DISTANCE_METRICS | None diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index b7217d1ef9..dd3cd1c8df 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -116,6 +116,7 @@ url={https://arxiv.org/abs/2407.18887}, }""", public_training_code=None, + public_training_data=None, training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -164,7 +165,8 @@ use_instructions=True, adapted_from="sentence-transformers/all-MiniLM-L6-v2", superseded_by=None, - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -213,7 +215,8 @@ use_instructions=True, adapted_from="intfloat/e5-small-unsupervised", superseded_by=None, - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -262,7 +265,8 @@ use_instructions=True, adapted_from="intfloat/e5-base-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-m-v1.5", - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -311,7 +315,8 @@ use_instructions=True, adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-m-v2.0", - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -360,7 +365,8 @@ use_instructions=True, adapted_from="intfloat/e5-base-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-l-v2.0", - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -412,6 +418,7 @@ adapted_from=None, superseded_by="Snowflake/snowflake-arctic-embed-m-v2.0", public_training_code=None, + public_training_data=None, training_datasets=None, ) @@ -437,7 +444,8 @@ use_instructions=True, adapted_from="Alibaba-NLP/gte-multilingual-base", superseded_by=None, - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything @@ -485,7 +493,8 @@ use_instructions=True, adapted_from="BAAI/bge-m3-retromae", superseded_by=None, - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets={ # source: https://arxiv.org/pdf/2405.05374 # splits not specified to assuming everything diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index 79d220588a..91ff256bb8 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -4,6 +4,8 @@ from mteb.model_meta import ModelMeta, sentence_transformers_loader +from .e5_instruct import E5_MISTRAL_TRAINING_DATA + model_prompts = {"query": "Represent this sentence for searching relevant passages: "} BGE_15_CITATION = """@misc{bge_embedding, title={C-Pack: Packaged Resources To Advance General Chinese Embedding}, @@ -15,8 +17,8 @@ }""" model_prompts_zh = {"query": "为这个句子生成表示以用于检索相关文章:"} -bge_m_training_data = { - # source: https://arxiv.org/pdf/2402.03216 +bge_m3_training_data = { + # source: https://arxiv.org/abs/2402.03216 "MIRACLRetrieval": ["train"], "MIRACLRetrievalHardNegatives": ["train"], "MIRACLReranking": ["train"], @@ -36,6 +38,28 @@ "HotpotQA": ["train"], "HotpotQA-PL": ["train"], # translation not trained on "HotpotQAHardNegatives": ["train"], + "T2Retrieval": ["train"], + "DuReader": ["train"], + "MMarcoReranking": ["train"], + "CodeSearchNet": ["train"], + # not in mteb + # "s2orc" + # Wikipedia + # "xP3" + # "mC4" + # "CC-News" + # "MTP" + # "NLLB" + # "CCMatrix" + # TriviaQA + # COL-IEE + # PubMedQA + # SQuAD + # SimCSE + # mMARCO-ZH + # LawGPT + # NLI-zh2, LeCaRDv2, + # NLI, MultiLongDoc (their syntetic) # + synthetic data } @@ -97,38 +121,6 @@ # "s2orc": [], # (title, abstract) (title, citation title) (abstract, citation abstract) } -bgem3_training_data = { - # source https://arxiv.org/abs/2402.03216 - "T2Retrieval": ["train"], - "DuReader": ["train"], - "MMarcoReranking": ["train"], - "CMedQAv2-reranking": ["train"], - "HotpotQA": ["train"], - "NQ": ["train"], - "MSMARCO": ["train"], - "MrTidyRetrieval": ["train"], - "MIRACLRetrieval": ["train"], - "CodeSearchNet": ["train"], - # not in mteb - # "s2orc" - # Wikipedia - # "xP3" - # "mC4" - # "CC-News" - # "MTP" - # "NLLB" - # "CCMatrix" - # TriviaQA - # COL-IEE - # PubMedQA - # SQuAD - # SimCSE - # mMARCO-ZH - # LawGPT - # NLI-zh2, LeCaRDv2, - # NLI, MultiLongDoc (their syntetic) -} - # https://huggingface.co/BAAI/bge-m3/discussions/29 bgem3_languages = [ "afr_Latn", # af @@ -306,59 +298,6 @@ "zho_Hans", # zh ] -bge_m_training_data = { - # source: https://arxiv.org/pdf/2402.03216 - "MIRACLRetrieval": ["train"], - "MIRACLRetrievalHardNegatives": ["train"], - "MIRACLReranking": ["train"], - "LeCaRDv2": ["train"], - "CMedQAv1-reranking": ["train"], - "CMedQAv2-reranking": ["train"], - "MrTidyRetrieval": ["train"], - "T2Reranking": ["train"], - "MSMARCO": ["train"], - "MSMARCOHardNegatives": ["train"], - "NanoMSMARCORetrieval": ["train"], - "MSMARCO-PL": ["train"], # translation not trained on - "NQ": ["train"], - "NQHardNegatives": ["train"], - "NanoNQRetrieval": ["train"], - "NQ-PL": ["train"], # translation not trained on - "HotpotQA": ["train"], - "HotpotQA-PL": ["train"], # translation not trained on - "HotpotQAHardNegatives": ["train"], - # + synthetic data -} - -bge_training_data = { - # source: https://data.baai.ac.cn/details/BAAI-MTP - "NQ": ["test"], - "NQHardNegatives": ["test"], - "AmazonReviewsClassification": [ - "validation", - "test", - ], # assumed from: amazon_reviews_multi - "MLQARetrieval": [ - "validation", - "test", - ], # assumed from mlqa (question, context) - # not in mteb - # Dataset Pairs - # wudao (title, passage) - # cmrc2018 (query, context) - # dureader (query, context) - # simclue (sentence_a, sentence_b) - # csl (title, abstract) - # amazon_reviews_multi (title, body) - # wiki_atomic_edits (base_sentence, edited_sentence) - # mlqa (question, context) - # xlsum (title, summary) (title, text) - # "sentence-transformers data": [], # https://huggingface.co/datasets/sentence-transformers/embedding-training-data # TODO check this further - # "wikipedia": [], # title + section title, passage - # "reddit": [], # title, body - # "stackexchange": [], # (title, upvoted answer) (title+body, upvoted answer) - # "s2orc": [], # (title, abstract) (title, citation title) (abstract, citation abstract) -} bge_small_en_v1_5 = ModelMeta( loader=partial( # type: ignore @@ -380,7 +319,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, - public_training_code=None, # seemingly released (at least for some models, but the link is broken + public_training_code=None, + public_training_data="https://data.baai.ac.cn/details/BAAI-MTP", training_datasets=bge_training_data, citation=BGE_15_CITATION, ) @@ -406,6 +346,7 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, # seemingly released (at least for some models, but the link is broken + public_training_data="https://data.baai.ac.cn/details/BAAI-MTP", training_datasets=bge_training_data, citation=BGE_15_CITATION, ) @@ -432,6 +373,7 @@ use_instructions=True, citation=BGE_15_CITATION, public_training_code=None, # seemingly released (at least for some models, but the link is broken + public_training_data="https://data.baai.ac.cn/details/BAAI-MTP", training_datasets=bge_training_data, ) @@ -455,7 +397,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, - public_training_code=None, # seemingly released (at least for some models, but the link is broken + public_training_code=None, + public_training_data=None, training_datasets=bge_chinese_training_data, ) @@ -479,7 +422,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, - public_training_code=None, # seemingly released (at least for some models, but the link is broken + public_training_code=None, + public_training_data=None, training_datasets=bge_chinese_training_data, ) @@ -503,7 +447,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, - public_training_code=None, # seemingly released (at least for some models, but the link is broken + public_training_code=None, + public_training_data=None, training_datasets=bge_chinese_training_data, ) @@ -527,7 +472,8 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, - training_datasets=bgem3_training_data, + public_training_data="https://huggingface.co/datasets/cfli/bge-full-data", + training_datasets=bge_m3_training_data, ) @@ -560,5 +506,86 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, # not disclosed ) + +# Contents of cfli/bge-full-data +bge_full_data = { + # source: https://arxiv.org/pdf/2409.15700 + # Charles Goodhart is turning back and forth + # in his grave as I'm annotating this + # |Retrieval| + # ELI5 + # SQuaD + # TriviaQA + # QuoraDuplicateQuestions + "HotpotQA": ["train"], + "FEVER": ["train"], + "MSMARCO": ["train"], + "NQ": ["train"], + "ArguAna": ["train"], + "FiQA2018": ["train"], + # |Reranking| + "SciDocsReranking": ["train"], + "StackOverflowDupQuestions": ["train"], + # |Classification| + "AmazonReviewsClassification": ["train"], + "AmazonCounterfactualClassification": ["train"], + "Banking77Classification": ["train"], + "EmotionClassification": ["train"], + "TweetSentimentExtractionClassification": ["train"], + "MTOPIntentClassification": ["train"], + "ImdbClassification": ["train"], + "ToxicConversationsClassification": ["train"], + # |Clustering| + "ArxivClusteringS2S": ["train"], + "ArxivClusteringP2P": ["train"], + "BiorxivClusteringS2S": ["train"], + "BiorxivClusteringP2P": ["train"], + "MedrxivClusteringS2S": ["train"], + "MedrxivClusteringP2P": ["train"], + "BiorxivClusteringS2S.v2": ["train"], + "BiorxivClusteringP2P.v2": ["train"], + "MedrxivClusteringS2S.v2": ["train"], + "MedrxivClusteringP2P.v2": ["train"], + "RedditClusteringP2P": ["train"], + "RedditClustering": ["train"], + "RedditClustering.v2": ["train"], + "TwentyNewsgroupsClustering": ["train"], + "TwentyNewsgroupsClustering.v2": ["train"], + # |STS| + "STS22": ["train"], + "STS22.v2": ["train"], + "STSBenchmark": ["train"], +} + +bge_en_icl = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="BAAI/bge-en-icl", + revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5", + ), + name="BAAI/bge-en-icl", + languages=[ + "eng_Latn", + ], + open_weights=True, + revision="971c7e1445cc86656ca0bd85ed770b8675a40bb5", + release_date="2024-07-25", # initial commit of hf model. + n_parameters=7.11 * 1e9, + embed_dim=4096, + license="apache-2", + max_tokens=32768, + reference="https://huggingface.co/BAAI/bge-en-icl", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + public_training_code="https://github.com/FlagOpen/FlagEmbedding", + public_training_data="https://huggingface.co/datasets/cfli/bge-full-data", + training_datasets={ + **E5_MISTRAL_TRAINING_DATA, + **bge_full_data, + }, + adapted_from="intfloat/e5-mistral-7b-instruct", +) diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index ea56fd432b..6e3d3747d9 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -139,5 +139,6 @@ def encode(self, texts: list[str], **kwargs): framework=[], use_instructions=False, public_training_code="https://github.com/xhluca/bm25s", + public_training_data=None, training_datasets=None, ) diff --git a/mteb/models/cde_models.py b/mteb/models/cde_models.py new file mode 100644 index 0000000000..78870ef129 --- /dev/null +++ b/mteb/models/cde_models.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import logging + +from mteb.model_meta import ModelMeta + +from .bge_models import bge_full_data + +logger = logging.getLogger(__name__) + + +cde_small_v1 = ModelMeta( + loader=None, # I will leave this at None for now, + name="jxm/cde-small-v1", + languages=["eng_Latn"], + open_weights=True, + revision="8d5736163718a8b65cd787b75ed61020d18bad3c", + release_date="2024-09-24", + n_parameters=int(281 * 1e6), # Though the second-stage model is only 140M + max_tokens=512, + embed_dim=768, + license="mit", + similarity_fn_name="cosine", + framework=["Sentence Transformers"], + reference="https://huggingface.co/jxm/cde-small-v1", + use_instructions=True, + adapted_from="nomic-ai/nomic-bert-2048", + superseded_by="jxm/cde-small-v2", + training_datasets=bge_full_data, + public_training_code="https://github.com/jxmorris12/cde", + public_training_data="https://huggingface.co/datasets/cfli/bge-full-data", +) + +cde_small_v2 = ModelMeta( + loader=None, # I will leave this at None for now, + name="jxm/cde-small-v2", + languages=["eng_Latn"], + open_weights=True, + revision="a7e5882ad52c27ea2831fc8258f24379c25cb459", + release_date="2025-01-13", + n_parameters=int(306 * 1e6), # Though the second-stage model is only 140M + max_tokens=512, + embed_dim=768, + license="mit", + similarity_fn_name="cosine", + framework=["Sentence Transformers"], + reference="https://huggingface.co/jxm/cde-small-v1", + use_instructions=True, + adapted_from="answerdotai/ModernBERT-base", + superseded_by="jxm/cde-small-v2", + training_datasets=bge_full_data, + public_training_code="https://github.com/jxmorris12/cde", + public_training_data="https://huggingface.co/datasets/cfli/bge-full-data", +) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 8718a2e2a3..60ff63ee81 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -234,7 +234,8 @@ def encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) @@ -257,7 +258,8 @@ def encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) @@ -280,7 +282,8 @@ def encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) @@ -303,6 +306,7 @@ def encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) diff --git a/mteb/models/colbert_models.py b/mteb/models/colbert_models.py index 0a8c0e4a57..6c29510855 100644 --- a/mteb/models/colbert_models.py +++ b/mteb/models/colbert_models.py @@ -156,6 +156,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: open_weights=True, revision="c1e84128e85ef755c096a95bdb06b47793b13acf", public_training_code=None, + public_training_data=None, release_date="2024-09-21", n_parameters=110 * 1e6, max_tokens=180, # Reduced for Benchmarking - see ColBERT paper @@ -167,7 +168,9 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: use_instructions=False, adapted_from=None, superseded_by=None, - training_datasets=None, + training_datasets={ + "MSMARCO": ["train"], # dev? + }, ) @@ -208,6 +211,7 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: open_weights=True, revision="4cf816e5e2b03167b132a3c847a9ecd48ba708e1", public_training_code=None, + public_training_data=None, release_date="2024-08-16", n_parameters=559 * 1e6, max_tokens=8192, @@ -219,5 +223,9 @@ def similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: use_instructions=False, adapted_from=None, superseded_by=None, - training_datasets=None, + training_datasets={ + "MSMARCO": ["train"], + "DuRetrieval": [], + "MIRACL": ["train"], + }, ) diff --git a/mteb/models/e5_instruct.py b/mteb/models/e5_instruct.py index 1d457652a7..58afc17976 100644 --- a/mteb/models/e5_instruct.py +++ b/mteb/models/e5_instruct.py @@ -15,6 +15,16 @@ E5_INSTRUCTION = "Instruct: {instruction}\nQuery: " +E5_MISTRAL_TRAINING_DATA = { + **E5_TRAINING_DATA, + "FEVER": ["train"], + "FEVERHardNegatives": ["train"], + "FEVER-PL": ["train"], # translation not trained on + "HotpotQA": ["train"], + "HotpotQAHardNegatives": ["train"], + "HotpotQA-PL": ["train"], # translation not trained on +} + e5_instruct = ModelMeta( loader=partial( # type: ignore instruct_wrapper, @@ -46,6 +56,7 @@ year={2024} }""", public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, ) @@ -91,5 +102,81 @@ } """, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, ) + +zeta_alpha_ai__Zeta_Alpha_E5_Mistral = ModelMeta( + loader=partial( # type: ignore + instruct_wrapper, + model_name_or_path="zeta-alpha-ai/Zeta-Alpha-E5-Mistral", + instruction_template=E5_INSTRUCTION, + attn="cccc", + pooling_method="lasttoken", + mode="embedding", + torch_dtype=torch.bfloat16, + # The ST script does not normalize while the HF one does so unclear what to do + # https://huggingface.co/intfloat/e5-mistral-7b-instruct#transformers + normalized=True, + ), + name="zeta-alpha-ai/Zeta-Alpha-E5-Mistral", + revision="c791d37474fa6a5c72eb3a2522be346bc21fbfc3", + release_date="2024-08-30", + languages=["eng_Latn"], + n_parameters=7110660096, + max_tokens=32768.0, + embed_dim=4096, + license="mit", + open_weights=True, + public_training_data=None, + public_training_code=None, + framework=["PyTorch"], + reference="https://huggingface.co/zeta-alpha-ai/Zeta-Alpha-E5-Mistral", + similarity_fn_name="cosine", + use_instructions=True, + training_datasets={ + # copied from e5 + # source: https://arxiv.org/pdf/2212.03533 + "NQ": ["test"], + "NQHardNegatives": ["test"], + "MSMARCO": ["train"], # dev? + # source: https://www.zeta-alpha.com/post/fine-tuning-an-llm-for-state-of-the-art-retrieval-zeta-alpha-s-top-10-submission-to-the-the-mteb-be + # "Arguana", + # "FEVER", + # "FIQA", + # "HotPotQA", + # "MsMarco (passage)", + # "NFCorpus", + # "SciFact", + # "NLI", + # "SQuad", + # "StackExchange", + # "TriviaQA", + # "SciRep", + # "SciRepEval" + # mteb + # https://huggingface.co/datasets/mteb/raw_arxiv + # "ArxivClusteringS2S": ["train"], + # "ArxivClusteringP2P": ["train"], + # https://huggingface.co/datasets/mteb/raw_biorxiv + # "BiorxivClusteringS2S": ["train"], + # "BiorxivClusteringP2P": ["train"], + # https://huggingface.co/datasets/mteb/raw_medrxiv + # "MedrxivClusteringS2S": ["train"], + # "MedrxivClusteringP2P": ["train"], + # as their train datasets + "AmazonCounterfactualClassification": ["train"], + "AmazonReviewsClassification": ["train"], + "Banking77Classification": ["train"], + "EmotionClassification": ["train"], + "MTOPIntentClassification": ["train"], + "ToxicConversationsClassification": ["train"], + "TweetSentimentExtractionClassification": ["train"], + "ImdbClassification": ["train"], + "STS12": ["train"], + "STS22": ["train"], + "STSBenchmark": ["train"], + }, + adapted_from="intfloat/e5-mistral-7b-instruct", + superseded_by=None, +) diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index fe265f6f41..4c3c3d4790 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -144,6 +144,16 @@ "NQ-PL": ["train"], # translation not trained on } +ME5_TRAINING_DATA = { + **E5_TRAINING_DATA, + "FEVER": ["train"], + "FEVERHardNegatives": ["train"], + "FEVER-PL": ["train"], # translation not trained on + "HotpotQA": ["train"], + "HotpotQAHardNegatives": ["train"], + "HotpotQA-PL": ["train"], # translation not trained on +} + e5_mult_small = ModelMeta( loader=partial( # type: ignore sentence_transformers_loader, @@ -166,7 +176,8 @@ use_instructions=True, citation=MULTILINGUAL_E5_CITATION, public_training_code=None, # couldn't find - training_datasets=E5_TRAINING_DATA, + training_datasets=ME5_TRAINING_DATA, + public_training_data=None, ) e5_mult_base = ModelMeta( @@ -189,7 +200,8 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, - training_datasets=E5_TRAINING_DATA, + public_training_data=None, + training_datasets=ME5_TRAINING_DATA, citation=MULTILINGUAL_E5_CITATION, ) @@ -214,7 +226,8 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, - training_datasets=E5_TRAINING_DATA, + public_training_data=None, + training_datasets=ME5_TRAINING_DATA, citation=MULTILINGUAL_E5_CITATION, ) @@ -238,6 +251,7 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, citation=E5_CITATION, ) @@ -263,6 +277,7 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, citation=E5_CITATION, ) @@ -291,6 +306,7 @@ adapted_from=None, citation=E5_CITATION, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, ) @@ -317,6 +333,7 @@ superseded_by=None, adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, citation=E5_CITATION, ) @@ -344,6 +361,7 @@ superseded_by="intfloat/e5-large-v2", adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, citation=E5_CITATION, ) @@ -371,6 +389,7 @@ superseded_by="intfloat/e5-base-v2", adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets=E5_TRAINING_DATA, citation=E5_CITATION, ) diff --git a/mteb/models/gme_models.py b/mteb/models/gme_models.py new file mode 100644 index 0000000000..804dfbc84d --- /dev/null +++ b/mteb/models/gme_models.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import logging + +from mteb.model_meta import ModelMeta + +logger = logging.getLogger(__name__) + + +gme_qwen2_vl_2b_instruct = ModelMeta( + loader=None, + name="Alibaba-NLP/gme-Qwen2-VL-2B-Instruct", + languages=["eng_Latn"], + open_weights=True, + revision="cfeb66885b598de483cc04eb08c7d9da534d7afe", + release_date="2024-12-21", + n_parameters=int(2.21 * 1e9), + max_tokens=32768, + embed_dim=1536, + license="mit", + similarity_fn_name="cosine", + framework=["PyTorch"], + reference="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct", + use_instructions=True, + adapted_from=None, + superseded_by=None, + training_datasets={ + # Only annotating text data for now + # source: https://arxiv.org/pdf/2412.16855 + "MSMARCO": ["train"], + "MSMARCO.v2": ["train"], + }, + public_training_code=None, + public_training_data=None, +) + +gme_qwen2_vl_7b_instruct = ModelMeta( + loader=None, + name="Alibaba-NLP/gme-Qwen2-VL-2B-Instruct", + languages=["eng_Latn"], + open_weights=True, + revision="d42eca5a540526cfa982a349724b24b25c12a95e", + release_date="2024-12-21", + n_parameters=int(8.29 * 1e9), + max_tokens=32768, + embed_dim=3584, + license="mit", + similarity_fn_name="cosine", + framework=["PyTorch"], + reference="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct", + use_instructions=True, + adapted_from=None, + superseded_by=None, + training_datasets={ + # Only annotating text data for now + # source: https://arxiv.org/pdf/2412.16855 + "MSMARCO": ["train"], + "MSMARCO.v2": ["train"], + }, + public_training_code=None, + public_training_data=None, +) diff --git a/mteb/models/google_models.py b/mteb/models/google_models.py index 08065f7af0..40d316fee7 100644 --- a/mteb/models/google_models.py +++ b/mteb/models/google_models.py @@ -151,7 +151,8 @@ def encode( similarity_fn_name="cosine", # assumed framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) @@ -173,7 +174,8 @@ def encode( similarity_fn_name="cosine", # assumed framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) @@ -195,6 +197,7 @@ def encode( similarity_fn_name="cosine", # assumed framework=["API"], use_instructions=True, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) diff --git a/mteb/models/gritlm_models.py b/mteb/models/gritlm_models.py index ab32a6a9a6..eb23ee66bf 100644 --- a/mteb/models/gritlm_models.py +++ b/mteb/models/gritlm_models.py @@ -11,6 +11,18 @@ logger = logging.getLogger(__name__) +GRIT_LM_TRAINING_DATA = { + **E5_TRAINING_DATA, # source https://arxiv.org/pdf/2402.09906 + # also uses medi2 which contains fever and hotpotqa: + "FEVER": ["train"], + "FEVERHardNegatives": ["train"], + "FEVER-PL": ["train"], # translation not trained on + "HotpotQA": ["train"], + "HotpotQAHardNegatives": ["train"], + "HotpotQA-PL": ["train"], # translation not trained on +} + + def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: return ( "<|user|>\n" + instruction + "\n<|embed|>\n" if instruction else "<|embed|>\n" @@ -50,9 +62,10 @@ def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: similarity_fn_name="cosine", framework=["GritLM", "PyTorch"], use_instructions=True, - training_datasets=E5_TRAINING_DATA, # source https://arxiv.org/pdf/2402.09906 + training_datasets=GRIT_LM_TRAINING_DATA, # section 3.1 "We finetune our final models from Mistral 7B [68] and Mixtral 8x7B [69] using adaptations of E5 [160] and the Tülu 2 data public_training_code="https://github.com/ContextualAI/gritlm", + public_training_data=None, citation=GRITLM_CITATION, ) gritlm8x7b = ModelMeta( @@ -76,8 +89,9 @@ def gritlm_instruction(instruction: str = "", prompt_type=None) -> str: similarity_fn_name="cosine", framework=["GritLM", "PyTorch"], use_instructions=True, + training_datasets=GRIT_LM_TRAINING_DATA, citation=GRITLM_CITATION, - training_datasets=E5_TRAINING_DATA, # source https://arxiv.org/pdf/2402.09906 # section 3.1 "We finetune our final models from Mistral 7B [68] and Mixtral 8x7B [69] using adaptations of E5 [160] and the Tülu 2 data public_training_code="https://github.com/ContextualAI/gritlm", + public_training_data=None, ) diff --git a/mteb/models/gte_models.py b/mteb/models/gte_models.py index f80dc01fdd..fb3bb6db3e 100644 --- a/mteb/models/gte_models.py +++ b/mteb/models/gte_models.py @@ -56,6 +56,7 @@ def instruction_template( use_instructions=True, citation=GTE_CITATION, public_training_code=None, + public_training_data=None, training_datasets=None, max_tokens=131072, ) @@ -87,6 +88,7 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, + public_training_data=None, training_datasets=None, ) @@ -117,6 +119,7 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, + public_training_data=None, training_datasets=None, ) @@ -140,6 +143,7 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, # Not disclosed ) @@ -163,6 +167,7 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, # Not disclosed ) @@ -186,6 +191,7 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, # Not disclosed ) @@ -301,6 +307,7 @@ def instruction_template( similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - public_training_code=None, # couldn't find + public_training_code=None, + public_training_data=None, # couldn't find training_datasets=gte_multi_training_data, ) diff --git a/mteb/models/ibm_granite_models.py b/mteb/models/ibm_granite_models.py index 78bad6097f..e7c3b8b022 100644 --- a/mteb/models/ibm_granite_models.py +++ b/mteb/models/ibm_granite_models.py @@ -20,6 +20,65 @@ "zho_Hans", ] +granite_training_data = { + # Multilingual MC4 + # Multilingual Webhose + # English Wikipedia + # Multilingual Wikimedia + "WikipediaRetrievalMultilingual": [], + "WikipediaRerankingMultilingual": [], + # Miracl Corpus (Title-Body) + # Stack Exchange Duplicate questions (titles) + # Stack Exchange Duplicate questions (titles) + # Stack Exchange Duplicate questions (bodies) + "StackOverflowDupQuestions": [], + "AskUbuntuDupQuestions": [], + # Stack Exchange (Title, Answer) pairs + # Stack Exchange (Title, Body) pairs + # Stack Exchange (Title, Body) pairs + # Machine Translations of Stack Exchange Duplicate questions (titles) + # Machine Translations of Stack Exchange (Title+Body, Answer) pairs + "StackExchangeClusteringP2P": [], + "StackExchangeClusteringP2P.v2": [], + "StackExchangeClustering": [], + "StackExchangeClustering.v2": [], + # SearchQA + # S2ORC (Title, Abstract) + # WikiAnswers Duplicate question pairs + # CCNews + # XSum + # SimpleWiki + # Machine Translated Cross Lingual Parallel Corpora + # SPECTER citation triplets + # Machine Translations of SPECTER citation triplets + # Natural Questions (NQ) + "NQ": ["test"], + "NQHardNegatives": ["test"], + # SQuAD2.0 + # HotpotQA + "HotPotQA": ["test"], + "HotPotQAHardNegatives": ["test"], + "HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on) + # Fever + "FEVER": ["test"], + "FEVERHardNegatives": ["test"], + # PubMed + # Multilingual Miracl Triples + "MIRACLRetrieval": ["train"], + "MIRACLRetrievalHardNegatives": ["train"], + "MIRACLReranking": ["train"], + # Multilingual MrTydi Triples + "MrTidyRetrieval": ["train"], + # Sadeeem Question Asnwering + # DBPedia Title-Body Pairs + "DBPedia": ["train"], + # Synthetic: English Query-Wikipedia Passage + # Synthetic: English Fact Verification + # Synthetic: Multilingual Query-Wikipedia Passage + # Synthetic: Multilingual News Summaries + # IBM Internal Triples + # IBM Internal Title-Body Pairs +} granite_107m_multilingual = ModelMeta( loader=partial( # type: ignore @@ -42,8 +101,9 @@ adapted_from=None, superseded_by=None, public_training_code=None, + public_training_data=None, use_instructions=False, - training_datasets=None, + training_datasets=granite_training_data, ) granite_278m_multilingual = ModelMeta( @@ -67,8 +127,9 @@ adapted_from=None, superseded_by=None, public_training_code=None, + public_training_data=None, use_instructions=False, - training_datasets=None, + training_datasets=granite_training_data, ) granite_30m_english = ModelMeta( @@ -92,8 +153,9 @@ adapted_from=None, superseded_by=None, public_training_code=None, + public_training_data=None, use_instructions=False, - training_datasets=None, + training_datasets=granite_training_data, ) granite_125m_english = ModelMeta( @@ -117,6 +179,7 @@ adapted_from=None, superseded_by=None, public_training_code=None, + public_training_data=None, use_instructions=False, - training_datasets=None, + training_datasets=granite_training_data, ) diff --git a/mteb/models/inf_models.py b/mteb/models/inf_models.py index dc31adccd2..0d40ff3ef2 100644 --- a/mteb/models/inf_models.py +++ b/mteb/models/inf_models.py @@ -26,5 +26,6 @@ use_instructions=True, adapted_from="Alibaba-NLP/gte-Qwen2-7B-instruct", public_training_code=None, + public_training_data=None, training_datasets=None, ) diff --git a/mteb/models/jasper_models.py b/mteb/models/jasper_models.py index 1dc06d5640..dbd1615ad8 100644 --- a/mteb/models/jasper_models.py +++ b/mteb/models/jasper_models.py @@ -93,4 +93,5 @@ def encode( training_datasets=nvidia_training_datasets, # "In jasper model the teacher model is nvidia/NV-Embed-v2", source https://huggingface.co/infgrad/jasper_en_vision_language_v1 # "non_mteb": ["BAAI/Infinity-MM", "HuggingFaceFW/fineweb-edu"], public_training_code=None, + public_training_data=None, ) diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index f9b1f1b72a..41742a2ee3 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -222,8 +222,25 @@ def encode( framework=["Sentence Transformers", "PyTorch"], use_instructions=True, reference="https://huggingface.co/jinaai/jina-embeddings-v3", - training_datasets=None, public_training_code=None, + public_training_data=None, + training_datasets={ + # CulturaX + "STS12": [], + # "SICK": [], + # "WMT19": [], + # "MADLAD-3B": [], + # NLI + "MSMARCO": ["train"], + "MSMARCOHardNegatives": ["train"], + "NanoMSMARCORetrieval": ["train"], + "NQ": ["train"], + "NQHardNegatives": ["train"], + "NanoNQRetrieval": ["train"], + "NQ-PL": ["train"], # translation not trained on + # oasst1, oasst2 + }, + adapted_from="XLM-RoBERTa", citation=""" @misc{sturua2024jinaembeddingsv3multilingualembeddingstask, title={jina-embeddings-v3: Multilingual Embeddings With Task LoRA}, @@ -256,6 +273,7 @@ def encode( adapted_from=None, training_datasets=None, public_training_code=None, + public_training_data=None, ) jina_embeddings_v2_small_en = ModelMeta( @@ -276,6 +294,7 @@ def encode( adapted_from=None, training_datasets=None, public_training_code=None, + public_training_data=None, ) jina_embedding_b_en_v1 = ModelMeta( @@ -296,6 +315,7 @@ def encode( adapted_from=None, training_datasets=None, public_training_code=None, + public_training_data=None, ) jina_embedding_s_en_v1 = ModelMeta( @@ -316,4 +336,5 @@ def encode( adapted_from=None, training_datasets=None, public_training_code=None, + public_training_data=None, ) diff --git a/mteb/models/lens_models.py b/mteb/models/lens_models.py new file mode 100644 index 0000000000..380724e53e --- /dev/null +++ b/mteb/models/lens_models.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from mteb.model_meta import ModelMeta + +from .bge_models import bge_full_data + +lens_d4000 = ModelMeta( + loader=None, # TODO: implement this in the future + name="yibinlei/LENS-d4000", + languages=None, + open_weights=True, + revision="e473b33364e6c48a324796fd1411d3b93670c6fe", + release_date="2025-01-17", + n_parameters=int(7.11 * 1e9), + embed_dim=4000, + license="apache-2.0", + reference="https://huggingface.co/yibinlei/LENS-d4000", + similarity_fn_name="cosine", + framework=["PyTorch"], + use_instructions=True, + public_training_code=None, + public_training_data="https://huggingface.co/datasets/cfli/bge-full-data", + training_datasets=bge_full_data, + max_tokens=32768, +) + +lens_d8000 = ModelMeta( + loader=None, # TODO: implement this in the future + name="yibinlei/LENS-d8000", + languages=None, + open_weights=True, + revision="a0b87bd91cb27b6f2f0b0fe22c28026da1d464ef", + release_date="2025-01-17", + n_parameters=int(7.11 * 1e9), + embed_dim=8000, + license="apache-2.0", + reference="https://huggingface.co/yibinlei/LENS-d8000", + similarity_fn_name="cosine", + framework=["PyTorch"], + use_instructions=True, + public_training_code=None, + public_training_data="https://huggingface.co/datasets/cfli/bge-full-data", + training_datasets=bge_full_data, + max_tokens=32768, +) diff --git a/mteb/models/linq_models.py b/mteb/models/linq_models.py index 11cfa74ed1..ead10ebf71 100644 --- a/mteb/models/linq_models.py +++ b/mteb/models/linq_models.py @@ -40,5 +40,6 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, + public_training_data=None, training_datasets=None, ) diff --git a/mteb/models/llm2vec_models.py b/mteb/models/llm2vec_models.py index 7083534751..82186309db 100644 --- a/mteb/models/llm2vec_models.py +++ b/mteb/models/llm2vec_models.py @@ -138,6 +138,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets=llm2vec_supervised_training_data, + public_training_data=None, citation=LLM2VEC_CITATION, ) @@ -165,6 +166,7 @@ def loader_inner(**kwargs: Any) -> Encoder: citation=LLM2VEC_CITATION, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets={}, + public_training_data=None, ) @@ -192,6 +194,7 @@ def loader_inner(**kwargs: Any) -> Encoder: citation=LLM2VEC_CITATION, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets=llm2vec_supervised_training_data, + public_training_data=None, ) llm2vec_mistral7b_unsupervised = ModelMeta( @@ -218,6 +221,7 @@ def loader_inner(**kwargs: Any) -> Encoder: citation=LLM2VEC_CITATION, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets={}, + public_training_data=None, ) llm2vec_llama2_7b_supervised = ModelMeta( @@ -244,6 +248,7 @@ def loader_inner(**kwargs: Any) -> Encoder: citation=LLM2VEC_CITATION, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets=llm2vec_supervised_training_data, + public_training_data=None, ) llm2vec_llama2_7b_unsupervised = ModelMeta( @@ -269,6 +274,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets={}, + public_training_data=None, citation=LLM2VEC_CITATION, ) @@ -296,6 +302,7 @@ def loader_inner(**kwargs: Any) -> Encoder: citation=LLM2VEC_CITATION, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets=llm2vec_supervised_training_data, + public_training_data=None, ) llm2vec_sheared_llama_unsupervised = ModelMeta( @@ -322,4 +329,5 @@ def loader_inner(**kwargs: Any) -> Encoder: citation=LLM2VEC_CITATION, public_training_code="https://github.com/McGill-NLP/llm2vec/tree/250292a307428240d801fadd85825464e71c3277/train_configs", training_datasets={}, + public_training_data=None, ) diff --git a/mteb/models/misc_models.py b/mteb/models/misc_models.py index 5233ecec6b..140d8bac74 100644 --- a/mteb/models/misc_models.py +++ b/mteb/models/misc_models.py @@ -7,7 +7,7 @@ from mteb.model_meta import ModelMeta, sentence_transformers_loader from mteb.models.e5_models import E5_TRAINING_DATA -from .bge_models import bge_m_training_data, bge_training_data +from .bge_models import bge_m3_training_data, bge_training_data from .sentence_transformers_models import sent_trf_training_dataset Haon_Chen__speed_embedding_7b_instruct = ModelMeta( @@ -22,6 +22,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/Haon-Chen/speed-embedding-7b-instruct", similarity_fn_name="cosine", @@ -42,6 +43,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Gameselo/STS-multilingual-mpnet-base-v2", similarity_fn_name="cosine", @@ -62,6 +64,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1", similarity_fn_name="cosine", @@ -82,6 +85,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-v1", similarity_fn_name="cosine", @@ -102,6 +106,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/Hum-Works/lodestone-base-4096-v1", similarity_fn_name="cosine", @@ -164,6 +169,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Jaume/gemma-2b-embeddings", similarity_fn_name="cosine", @@ -184,6 +190,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/BeastyZ/e5-R-mistral-7b", similarity_fn_name="cosine", @@ -210,6 +217,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Lajavaness/bilingual-embedding-base", similarity_fn_name="cosine", @@ -235,6 +243,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Lajavaness/bilingual-embedding-large", similarity_fn_name="cosine", @@ -260,6 +269,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Lajavaness/bilingual-embedding-small", similarity_fn_name="cosine", @@ -280,6 +290,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/Bulbasaur", similarity_fn_name="cosine", @@ -301,6 +312,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/Ivysaur", similarity_fn_name="cosine", @@ -322,6 +334,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/Squirtle", similarity_fn_name="cosine", @@ -343,6 +356,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/Venusaur", similarity_fn_name="cosine", @@ -364,6 +378,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/Wartortle", similarity_fn_name="cosine", @@ -385,6 +400,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/gte-micro", similarity_fn_name="cosine", @@ -405,6 +421,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Mihaiii/gte-micro-v4", similarity_fn_name="cosine", @@ -425,6 +442,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/OrdalieTech/Solon-embeddings-large-0.1", similarity_fn_name="cosine", @@ -445,6 +463,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka", similarity_fn_name="cosine", @@ -465,6 +484,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet", similarity_fn_name="cosine", @@ -487,6 +507,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka", similarity_fn_name="cosine", @@ -509,6 +530,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-labse-Matryoshka", similarity_fn_name="cosine", @@ -531,6 +553,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet", similarity_fn_name="cosine", @@ -553,6 +576,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka", similarity_fn_name="cosine", @@ -573,6 +597,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/consciousAI/cai-lunaris-text-embeddings", similarity_fn_name="cosine", @@ -593,6 +618,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/consciousAI/cai-stellaris-text-embeddings", similarity_fn_name="cosine", @@ -613,6 +639,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/manu/bge-m3-custom-fr", similarity_fn_name="cosine", @@ -633,6 +660,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.2", similarity_fn_name="cosine", @@ -653,6 +681,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.3", similarity_fn_name="cosine", @@ -673,6 +702,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.4", similarity_fn_name="cosine", @@ -694,6 +724,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/thenlper/gte-base", similarity_fn_name="cosine", @@ -714,6 +745,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/thenlper/gte-large", similarity_fn_name="cosine", @@ -734,6 +766,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/thenlper/gte-small", similarity_fn_name="cosine", @@ -754,6 +787,7 @@ license="gpl-3.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/OrlikB/KartonBERT-USE-base-v1", similarity_fn_name="cosine", @@ -774,6 +808,7 @@ license="lgpl", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/OrlikB/st-polish-kartonberta-base-alpha-v1", similarity_fn_name="cosine", @@ -794,6 +829,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/sdadas/mmlw-e5-base", similarity_fn_name="cosine", @@ -814,6 +850,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/dwzhu/e5-base-4k", similarity_fn_name="cosine", @@ -834,6 +871,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/sdadas/mmlw-e5-large", similarity_fn_name="cosine", @@ -854,6 +892,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/sdadas/mmlw-e5-small", similarity_fn_name="cosine", @@ -874,6 +913,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/sdadas/mmlw-roberta-base", similarity_fn_name="cosine", @@ -894,6 +934,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/sdadas/mmlw-roberta-large", similarity_fn_name="cosine", @@ -960,6 +1001,7 @@ license="bigscience-bloom-rail-1.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/izhx/udever-bloom-1b1", similarity_fn_name="cosine", @@ -1026,6 +1068,7 @@ license="bigscience-bloom-rail-1.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/izhx/udever-bloom-3b", similarity_fn_name="cosine", @@ -1092,6 +1135,7 @@ license="bigscience-bloom-rail-1.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/izhx/udever-bloom-560m", similarity_fn_name="cosine", @@ -1158,6 +1202,7 @@ license="bigscience-bloom-rail-1.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/izhx/udever-bloom-7b1", similarity_fn_name="cosine", @@ -1178,6 +1223,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/avsolatorio/GIST-Embedding-v0", similarity_fn_name="cosine", @@ -1198,6 +1244,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/avsolatorio/GIST-all-MiniLM-L6-v2", similarity_fn_name="cosine", @@ -1218,6 +1265,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/avsolatorio/GIST-large-Embedding-v0", similarity_fn_name="cosine", @@ -1238,6 +1286,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/avsolatorio/GIST-small-Embedding-v0", similarity_fn_name="cosine", @@ -1258,6 +1307,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco", similarity_fn_name="cosine", @@ -1278,6 +1328,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/aari1995/German_Semantic_STS_V2", similarity_fn_name="cosine", @@ -1299,6 +1350,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/abhinand/MedEmbed-small-v0.1", similarity_fn_name="cosine", @@ -1325,6 +1377,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/avsolatorio/NoInstruct-small-Embedding-v0", similarity_fn_name="cosine", @@ -1345,6 +1398,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/brahmairesearch/slx-v0.1", similarity_fn_name="cosine", @@ -1365,6 +1419,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/deepfile/embedder-100p", similarity_fn_name="cosine", @@ -1385,11 +1440,12 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/deepvk/USER-bge-m3", similarity_fn_name="cosine", use_instructions=None, - training_datasets=bge_m_training_data, # derived from. + training_datasets=bge_m3_training_data, # derived from. # not in MTEB: # "deepvk/ru-HNP": ["train"], # "deepvk/ru-WANLI": ["train"], @@ -1416,6 +1472,7 @@ license="mit", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/infgrad/stella-base-en-v2", similarity_fn_name="cosine", @@ -1436,6 +1493,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch"], reference="https://huggingface.co/malenia1/ternary-weight-embedding", similarity_fn_name="cosine", @@ -1456,6 +1514,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/omarelshehy/arabic-english-sts-matryoshka", similarity_fn_name="cosine", @@ -1486,6 +1545,7 @@ license=None, open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/openbmb/MiniCPM-Embedding", similarity_fn_name="cosine", @@ -1516,6 +1576,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/shibing624/text2vec-base-multilingual", similarity_fn_name="cosine", @@ -1537,6 +1598,7 @@ license="apache-2.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/silma-ai/silma-embeddding-matryoshka-v0.1", similarity_fn_name="cosine", @@ -1545,26 +1607,7 @@ adapted_from="/workspace/v3-matryoshka_aubmindlab-bert-base-arabertv02-2024-10-12_13-55-06/checkpoint-26250", superseded_by=None, ) -zeta_alpha_ai__Zeta_Alpha_E5_Mistral = ModelMeta( - name="zeta-alpha-ai/Zeta-Alpha-E5-Mistral", - revision="3e6076bdc2ff592a2f95fbc04570e51db5aa0c0c", - release_date="2024-08-30", - languages=["eng_Latn"], - loader=None, - n_parameters=7110660096, - max_tokens=32768.0, - embed_dim=4096, - license="mit", - open_weights=True, - public_training_code=None, - framework=["PyTorch"], - reference="https://huggingface.co/zeta-alpha-ai/Zeta-Alpha-E5-Mistral", - similarity_fn_name="cosine", - use_instructions=None, - training_datasets=None, - adapted_from="intfloat/e5-mistral-7b-instruct", - superseded_by=None, -) + sbert_chinese_general_v1 = ModelMeta( name="DMetaSoul/sbert-chinese-general-v1", revision="bd27765956bcc2fcf682de0097819947ac10037e", @@ -1577,6 +1620,7 @@ license="apache-2", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/DMetaSoul/sbert-chinese-general-v1", similarity_fn_name="cosine", @@ -1601,6 +1645,7 @@ license="apache-2", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/DMetaSoul/Dmeta-embedding-zh-small/", similarity_fn_name="cosine", @@ -1620,6 +1665,7 @@ license="not specified", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/lier007/xiaobu-embedding", similarity_fn_name="cosine", @@ -1640,6 +1686,7 @@ license="not specified", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/lier007/xiaobu-embedding-v2", similarity_fn_name="cosine", @@ -1660,6 +1707,7 @@ license="not specified", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Classical/Yinka", similarity_fn_name="cosine", @@ -1680,6 +1728,7 @@ license="cc-by-nc-4.0", open_weights=True, public_training_code=None, + public_training_data=None, framework=["PyTorch", "Sentence Transformers"], reference="https://huggingface.co/Classical/Yinka", similarity_fn_name="cosine", @@ -1688,3 +1737,41 @@ training_datasets=None, # They "scraped" things from the internet, we don't know, could be leakage superseded_by=None, ) +ember_v1 = ModelMeta( + name="llmrails/ember-v1", + revision="5e5ce5904901f6ce1c353a95020f17f09e5d021d", + release_date="2023-10-10", + languages=["eng_Latn"], + n_parameters=335 * 1e6, + max_tokens=512, + embed_dim=1024, + license="mit", + open_weights=True, + public_training_code=None, + public_training_data=None, + framework=["PyTorch", "Sentence Transformers"], + reference="https://huggingface.co/llmrails/ember-v1", + similarity_fn_name="cosine", + use_instructions=None, + training_datasets=None, + superseded_by=None, +) +amazon_titan_text_embeddings_v2 = ModelMeta( + name="amazon/Titan-text-embeddings-v2", + revision="1", + release_date="2024-04-30", + languages=["eng_Latn"], + n_parameters=None, + max_tokens=None, + embed_dim=None, + license="proprietary", + open_weights=False, + public_training_code=None, + public_training_data=None, + framework=[], + reference="https://huggingface.co/amazon/Titan-text-embeddings-v2", + similarity_fn_name="cosine", + use_instructions=False, + training_datasets=None, + superseded_by=None, +) diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py index afbf9df627..33da211c7a 100644 --- a/mteb/models/model2vec_models.py +++ b/mteb/models/model2vec_models.py @@ -75,7 +75,8 @@ def encode( adapted_from="BAAI/bge-base-en-v1.5", superseded_by=None, training_datasets=bge_training_data, # distilled - public_training_code="https://github.com/MinishLab/model2vec", # + public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) @@ -101,6 +102,7 @@ def encode( superseded_by=None, training_datasets=bge_training_data, # distilled public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) m2v_base_output = ModelMeta( @@ -125,6 +127,7 @@ def encode( superseded_by=None, training_datasets=bge_training_data, # distilled public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) m2v_multilingual_output = ModelMeta( @@ -149,6 +152,7 @@ def encode( superseded_by=None, training_datasets=None, public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) potion_base_2m = ModelMeta( @@ -173,6 +177,7 @@ def encode( superseded_by=None, training_datasets=bge_training_data, # distilled public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) potion_base_4m = ModelMeta( @@ -197,6 +202,7 @@ def encode( superseded_by=None, training_datasets=bge_training_data, # distilled public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) potion_base_8m = ModelMeta( @@ -221,4 +227,5 @@ def encode( superseded_by=None, training_datasets=bge_training_data, # distilled public_training_code="https://github.com/MinishLab/model2vec", + public_training_data=None, ) diff --git a/mteb/models/moka_models.py b/mteb/models/moka_models.py index d3943d78d7..1504b40789 100644 --- a/mteb/models/moka_models.py +++ b/mteb/models/moka_models.py @@ -96,7 +96,8 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=None, # Not published + public_training_code=None, + public_training_data=None, # Not published training_datasets=m3e_dataset, ) @@ -117,7 +118,8 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=None, # Not published + public_training_code=None, + public_training_data=None, # Not published training_datasets=m3e_dataset, ) @@ -139,6 +141,7 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=None, # Not published + public_training_code=None, + public_training_data=None, # Not published training_datasets=m3e_dataset, ) diff --git a/mteb/models/mxbai_models.py b/mteb/models/mxbai_models.py index e0be5c9d93..c4bc7c3db8 100644 --- a/mteb/models/mxbai_models.py +++ b/mteb/models/mxbai_models.py @@ -42,5 +42,6 @@ } """, public_training_code=None, + public_training_data=None, training_datasets=None, ) diff --git a/mteb/models/no_instruct_sentence_models.py b/mteb/models/no_instruct_sentence_models.py index a0596b9bd1..9ff5cf901f 100644 --- a/mteb/models/no_instruct_sentence_models.py +++ b/mteb/models/no_instruct_sentence_models.py @@ -100,5 +100,6 @@ def encode( # type: ignore adapted_from=None, superseded_by=None, public_training_code=None, + public_training_data=None, training_datasets=None, ) diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index b2b0542543..c2d06e2f6e 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -90,6 +90,79 @@ def encode( # type: ignore return emb +nomic_training_data = { + # https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/data/contrastive_pretrain.yaml + # reddit_title_body + "RedditClustering": [], + "RedditClusteringP2P": [], + "RedditClustering.v2": [], + "RedditClusteringP2P.v2": [], + # amazon_reviews + # amazonqa + "AmazonPolarityClassification": [], + "AmazonReviewsClassification": [], + "AmazonCounterfactualClassification": [], + # paq + # s2orc_citation_titles + # s2orc_title_abstract + # s2orc_abstract_citation + # s2orc_abstract_body + # wikianswers + # wikipedia + "WikipediaRetrievalMultilingual": [], + "WikipediaRerankingMultilingual": [], + # gooaq + # codesearch + "CodeSearchNetCCRetrieval": [], + "COIRCodeSearchNetRetrieval": [], + # yahoo_title_answer + # yahoo_qa + # yahoo_title_question + "YahooAnswersTopicsClassification": [], + # agnews + # ccnews + # npr + # eli5 + # cnn + # stackexchange_duplicate_questions + # stackexchange_title_body + # stackexchange_body_body + "StackExchangeClustering.v2": [], + "StackExchangeClusteringP2P.v2": [], + # sentence_compression + # wikihow + # altlex + # quora + "QuoraRetrieval": [], + "NanoQuoraRetrieval": [], + # simplewiki + # squad + "FQuADRetrieval": [], + # https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/data/finetune_triplets.yaml + # msmaro + "MSMARCO": ["train"], + "MSMARCOHardNegatives": ["train"], + "NanoMSMARCORetrieval": ["train"], + # nq_triples + "NQ": ["train"], + "NQHardNegatives": ["train"], + "NanoNQRetrieval": ["train"], + "NQ-PL": ["train"], # translation not trained on + # nli_triplets + # reddit + # medi_wiki + # medi_stackexchange + # medi_flickr + # medi_supernli + # hotpot + "HotPotQA": ["test"], + "HotPotQAHardNegatives": ["test"], + "HotPotQA-PL": ["test"], # translated from hotpotQA (not trained on) + # fever + "FEVER": ["test"], + "FEVERHardNegatives": ["test"], +} + # https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/eval/mteb_eval/eval_mteb.py#L142-L159 model_prompts = { "Classification": "classification: ", @@ -138,8 +211,9 @@ def encode( # type: ignore use_instructions=True, adapted_from=None, superseded_by=None, - public_training_code=None, - training_datasets=None, + public_training_data=None, + public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_finetune.yaml", + training_datasets=nomic_training_data, ) nomic_embed_v1 = ModelMeta( @@ -166,8 +240,9 @@ def encode( # type: ignore citation=NOMIC_CITATION, adapted_from=None, superseded_by="nomic-ai/nomic-embed-text-v1.5", - public_training_code=None, - training_datasets=None, + public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_finetune.yaml", + training_datasets=nomic_training_data, + public_training_data=None, ) nomic_embed_v1_ablated = ModelMeta( @@ -193,8 +268,9 @@ def encode( # type: ignore use_instructions=True, adapted_from=None, superseded_by=None, - public_training_code=None, - training_datasets=None, + public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_finetune.yaml", + training_datasets=nomic_training_data, + public_training_data=None, ) @@ -221,8 +297,9 @@ def encode( # type: ignore use_instructions=True, adapted_from=None, superseded_by=None, - public_training_code=None, - training_datasets=None, + public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_finetune.yaml", + training_datasets=nomic_training_data, + public_training_data=None, ) nomic_modern_bert_embed = ModelMeta( @@ -248,8 +325,10 @@ def encode( # type: ignore similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, - adapted_from=None, + adapted_from="answerdotai/ModernBERT-base", + public_training_code="https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_pretrain_modernbert.yaml", + # https://github.com/nomic-ai/contrastors/blob/5f7b461e5a13b5636692d1c9f1141b27232fe966/src/contrastors/configs/train/contrastive_finetune_modernnomic.yaml superseded_by=None, - public_training_code=None, - training_datasets=None, + training_datasets=nomic_training_data, + public_training_data=None, ) diff --git a/mteb/models/nvidia_models.py b/mteb/models/nvidia_models.py index 1f345a62be..1997a85274 100644 --- a/mteb/models/nvidia_models.py +++ b/mteb/models/nvidia_models.py @@ -141,6 +141,7 @@ def encode( use_instructions=True, training_datasets=nvidia_training_datasets, public_training_code=None, + public_training_data=None, ) NV_embed_v1 = ModelMeta( @@ -164,4 +165,5 @@ def encode( use_instructions=True, training_datasets=nvidia_training_datasets, public_training_code=None, + public_training_data=None, ) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 863c9d7828..079e7c9361 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -135,7 +135,8 @@ def _to_numpy(self, embedding_response) -> np.ndarray: similarity_fn_name="cosine", framework=["API"], use_instructions=False, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, ) text_embedding_3_large = ModelMeta( @@ -156,7 +157,8 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, license=None, similarity_fn_name=None, @@ -179,7 +181,8 @@ def _to_numpy(self, embedding_response) -> np.ndarray: framework=["API"], use_instructions=False, n_parameters=None, - public_training_code=None, # assumed + public_training_code=None, + public_training_data=None, # assumed training_datasets=None, license=None, similarity_fn_name=None, diff --git a/mteb/models/overview.py b/mteb/models/overview.py index c72fe2ed89..e23285ff68 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -15,10 +15,12 @@ arctic_models, bge_models, bm25, + cde_models, cohere_models, colbert_models, e5_instruct, e5_models, + gme_models, google_models, gritlm_models, gte_models, @@ -26,6 +28,7 @@ inf_models, jasper_models, jina_models, + lens_models, linq_models, llm2vec_models, misc_models, @@ -56,6 +59,7 @@ arctic_models, bge_models, bm25, + cde_models, cohere_models, colbert_models, e5_instruct, @@ -64,9 +68,11 @@ google_models, gritlm_models, gte_models, + gme_models, ibm_granite_models, inf_models, jina_models, + lens_models, linq_models, llm2vec_models, mxbai_models, @@ -210,6 +216,25 @@ def get_model_meta(model_name: str, revision: str | None = None) -> ModelMeta: return meta +empty_model_meta = ModelMeta( + name=None, + revision=None, + languages=None, + release_date=None, + n_parameters=None, + max_tokens=None, + embed_dim=None, + license=None, + open_weights=True, + public_training_code=None, + public_training_data=None, + similarity_fn_name=None, + use_instructions=None, + training_datasets=None, + framework=[], +) + + @lru_cache def model_meta_from_hf_hub(model_name: str) -> ModelMeta: try: @@ -234,26 +259,14 @@ def model_meta_from_hf_hub(model_name: str) -> ModelMeta: embed_dim=None, open_weights=True, public_training_code=None, + public_training_data=None, use_instructions=None, ) except Exception as e: logger.warning(f"Failed to extract metadata from model: {e}.") - return ModelMeta( - name=model_name, - revision=None, - languages=None, - release_date=None, - n_parameters=None, - max_tokens=None, - embed_dim=None, - license=None, - open_weights=True, - public_training_code=None, - similarity_fn_name=None, - use_instructions=None, - training_datasets=None, - framework=[], - ) + meta = empty_model_meta + meta.name = model_name + return meta def model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta: @@ -273,6 +286,7 @@ def model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta: license=None, open_weights=True, public_training_code=None, + public_training_data=None, use_instructions=None, training_datasets=None, ) @@ -280,22 +294,7 @@ def model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta: logger.warning( f"Failed to extract metadata from model: {e}. Upgrading to sentence-transformers v3.0.0 or above is recommended." ) - meta = ModelMeta( - name=None, - revision=None, - languages=None, - release_date=None, - n_parameters=None, - max_tokens=None, - embed_dim=None, - license=None, - open_weights=True, - public_training_code=None, - similarity_fn_name=None, - use_instructions=None, - training_datasets=None, - framework=[], - ) + meta = empty_model_meta return meta @@ -325,6 +324,7 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe license=None, open_weights=True, public_training_code=None, + public_training_data=None, use_instructions=None, training_datasets=None, ) @@ -332,20 +332,5 @@ def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMe logger.warning( f"Failed to extract metadata from model: {e}. Upgrading to sentence-transformers v3.0.0 or above is recommended." ) - meta = ModelMeta( - name=None, - revision=None, - languages=None, - release_date=None, - n_parameters=None, - max_tokens=None, - embed_dim=None, - license=None, - open_weights=True, - public_training_code=None, - similarity_fn_name=None, - use_instructions=None, - training_datasets=None, - framework=[], - ) + meta = empty_model_meta return meta diff --git a/mteb/models/piccolo_models.py b/mteb/models/piccolo_models.py index bb92b55673..d51487b8ba 100644 --- a/mteb/models/piccolo_models.py +++ b/mteb/models/piccolo_models.py @@ -21,6 +21,7 @@ superseded_by=None, adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets=None, # They don't specify ) @@ -42,5 +43,6 @@ superseded_by=None, adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets=None, # They don't say ) diff --git a/mteb/models/promptriever_models.py b/mteb/models/promptriever_models.py index 7dc98a26a5..df2204defe 100644 --- a/mteb/models/promptriever_models.py +++ b/mteb/models/promptriever_models.py @@ -80,6 +80,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, citation=PROMPTRIEVER_CITATION, public_training_code=None, + public_training_data=None, ) promptriever_llama3 = ModelMeta( @@ -107,6 +108,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, citation=PROMPTRIEVER_CITATION, public_training_code=None, + public_training_data=None, ) @@ -135,6 +137,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, citation=PROMPTRIEVER_CITATION, public_training_code=None, + public_training_data=None, ) promptriever_mistral_v1 = ModelMeta( @@ -162,4 +165,5 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, citation=PROMPTRIEVER_CITATION, public_training_code=None, + public_training_data=None, ) diff --git a/mteb/models/repllama_models.py b/mteb/models/repllama_models.py index 8faa2c490f..ffe1f0bd87 100644 --- a/mteb/models/repllama_models.py +++ b/mteb/models/repllama_models.py @@ -172,6 +172,7 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, citation=REPLLAMA_CITATION, public_training_code=None, + public_training_data=None, ) @@ -199,5 +200,6 @@ def loader_inner(**kwargs: Any) -> Encoder: use_instructions=True, citation=REPLLAMA_CITATION, public_training_code=None, + public_training_data=None, training_datasets=None, ) diff --git a/mteb/models/rerankers_custom.py b/mteb/models/rerankers_custom.py index bedfd09604..34adea7ffd 100644 --- a/mteb/models/rerankers_custom.py +++ b/mteb/models/rerankers_custom.py @@ -11,6 +11,7 @@ from mteb.encoder_interface import Encoder from mteb.evaluation.evaluators.RetrievalEvaluator import DenseRetrievalExactSearch from mteb.model_meta import ModelMeta +from mteb.models.bge_models import bge_m3_training_data logger = logging.getLogger(__name__) @@ -209,6 +210,7 @@ def loader_inner(**kwargs: Any) -> Encoder: embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -233,6 +235,7 @@ def loader_inner(**kwargs: Any) -> Encoder: embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -289,9 +292,10 @@ def loader_inner(**kwargs: Any) -> Encoder: embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, - training_datasets=None, + training_datasets=bge_m3_training_data, framework=["Sentence Transformers", "PyTorch"], citation=""" @misc{li2023making, diff --git a/mteb/models/rerankers_monot5_based.py b/mteb/models/rerankers_monot5_based.py index f45addb18f..320ee4bc7d 100644 --- a/mteb/models/rerankers_monot5_based.py +++ b/mteb/models/rerankers_monot5_based.py @@ -301,6 +301,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -342,6 +343,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -365,6 +367,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -397,6 +400,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -452,6 +456,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], @@ -497,6 +502,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], @@ -542,6 +548,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], @@ -587,6 +594,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], @@ -610,6 +618,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -651,6 +660,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -674,6 +684,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, @@ -707,6 +718,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], @@ -854,6 +866,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, framework=["PyTorch"], @@ -876,6 +889,7 @@ def get_prediction_tokens(self, *args, **kwargs): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index 1869ce62db..683c8c5024 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -6,42 +6,53 @@ from mteb.model_meta import ModelMeta, sentence_transformers_loader -from .bge_models import bge_training_data +from .bge_models import bge_m3_training_data -rubert_tiny2 = ModelMeta( - name="cointegrated/rubert-tiny2", +rubert_tiny = ModelMeta( + name="cointegrated/rubert-tiny", languages=["rus_Cyrl"], open_weights=True, - revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3", - release_date="2021-10-28", + revision="5441c5ea8026d4f6d7505ec004845409f1259fb1", + release_date="2021-05-24", n_parameters=29_400_000, embed_dim=312, license="mit", max_tokens=2048, - reference="https://huggingface.co/cointegrated/rubert-tiny2", + reference="https://huggingface.co/cointegrated/rubert-tiny", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - public_training_code=None, - training_datasets=None, + public_training_code="https://gist.github.com/avidale/7bc6350f26196918bf339c01261f5c60", + training_datasets={ + # [Yandex Translate corpus](https://translate.yandex.ru/corpus), [OPUS-100](https://huggingface.co/datasets/opus100) + "Tatoeba": ["train"], + }, + adapted_from="google-bert/bert-base-multilingual-cased", + public_training_data=None, ) -rubert_tiny = ModelMeta( - name="cointegrated/rubert-tiny", +rubert_tiny2 = ModelMeta( + name="cointegrated/rubert-tiny2", languages=["rus_Cyrl"], open_weights=True, - revision="5441c5ea8026d4f6d7505ec004845409f1259fb1", - release_date="2021-05-24", + revision="dad72b8f77c5eef6995dd3e4691b758ba56b90c3", + release_date="2021-10-28", n_parameters=29_400_000, embed_dim=312, license="mit", max_tokens=2048, - reference="https://huggingface.co/cointegrated/rubert-tiny", + reference="https://huggingface.co/cointegrated/rubert-tiny2", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - public_training_code=None, - training_datasets=None, + public_training_code="https://colab.research.google.com/drive/1mSWfIQ6PIlteLVZ9DKKpcorycgLIKZLf?usp=sharing", + training_datasets={ + # https://huggingface.co/datasets/cointegrated/ru-paraphrase-NMT-Leipzig + # Wikipedia https://huggingface.co/datasets/Madjogger/JamSpell_dataset + # https://huggingface.co/datasets/imvladikon/leipzig_corpora_collection + }, + adapted_from="cointegrated/rubert-tiny", + public_training_data=None, ) sbert_large_nlu_ru = ModelMeta( @@ -59,6 +70,7 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, ) @@ -77,7 +89,11 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, - training_datasets=None, + public_training_data=None, + training_datasets={ + # SNLI, MNLI + # https://github.com/brmson/dataset-sts + }, ) user_base_ru = ModelMeta( @@ -93,12 +109,13 @@ revision="436a489a2087d61aa670b3496a9915f84e46c861", release_date="2024-06-10", n_parameters=427_000_000, - embed_dim=1024, - license="Not specified", - max_tokens=512, # best guess - reference="https://huggingface.co/ai-forever/sbert_large_mt_nlu_ru", + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/deepvk/USER-base", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], + adapted_from="https://huggingface.co/deepvk/deberta-v1-base", use_instructions=True, citation="""@misc{deepvk2024user, title={USER: Universal Sentence Encoder for Russian}, @@ -108,13 +125,68 @@ year={2024}, } """, + training_datasets={ + "BibleNLPBitextMining": ["train"], + # https://github.com/unicamp-dl/mMARCO + # deepvk/ru-HNP + # deepvk/ru-WANLI + # MedNLI + # RCB + "TERRa": ["train"], + # Tapaco + # Opus100 + # BiblePar + # RudetoxifierDataDetox + # RuParadetox + "MIRACL": ["train"], + # MLDR + # Lenta + "MLSUMClusteringP2P": ["train"], + "MLSUMClusteringP2P.v2": ["train"], + "MLSUMClusteringS2S": ["train"], + "MLSUMClusteringS2S.v2": ["train"], + "MrTidyRetrieval": ["train"], + # "Panorama" + # PravoIsrael + # xlsum + # Fialka-v1 + # RussianKeywords + # Gazeta + # Gsm8k-ru + # DSumRu + # SummDialogNews + }, + public_training_code=None, + public_training_data=None, +) + +user_bge_m3 = ModelMeta( + loader=partial( # type: ignore + sentence_transformers_loader, + model_name="deepvk/USER-bge-m3", + revision="0cc6cfe48e260fb0474c753087a69369e88709ae", + ), + name="deepvk/USER-bge-m3", + languages=["rus_Cyrl"], + open_weights=True, + revision="0cc6cfe48e260fb0474c753087a69369e88709ae", + release_date="2024-07-05", + n_parameters=359_026_688, + embed_dim=1024, + license="apache-2.0", + max_tokens=8194, + reference="https://huggingface.co/deepvk/USER-base", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + adapted_from="https://huggingface.co/BAAI/bge-m3", + use_instructions=False, training_datasets={ "BibleNLPBitextMining": ["train"], "MLSUMClusteringP2P": ["train"], "MLSUMClusteringP2P.v2": ["train"], "MLSUMClusteringS2S": ["train"], "MLSUMClusteringS2S.v2": ["train"], - **bge_training_data, + **bge_m3_training_data, # not MTEB: # "deepvk/ru-HNP": ["train"], # "deepvk/ru-WANLI": ["train"], @@ -132,6 +204,7 @@ # "CarlBrendt/Summ_Dialog_News": ["train"], }, public_training_code=None, + public_training_data=None, ) deberta_v1_ru = ModelMeta( @@ -148,7 +221,9 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + # Wikipedia, Books, Twitter comments, Pikabu, Proza.ru, Film subtitles, News websites, and Social corpus public_training_code=None, + public_training_data=None, training_datasets=None, ) @@ -161,12 +236,13 @@ n_parameters=1280_000_000, embed_dim=768, license="Not specified", - max_tokens=512, # best guess + max_tokens=512, reference="https://huggingface.co/DeepPavlov/rubert-base-cased", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, citation="""@misc{kuratov2019adaptationdeepbidirectionalmultilingual, title={Adaptation of Deep Bidirectional Multilingual Transformers for Russian Language}, @@ -194,6 +270,7 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, citation="""@misc{https://doi.org/10.48550/arxiv.2205.02340, doi = {10.48550/ARXIV.2205.02340}, @@ -222,7 +299,11 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, - training_datasets=None, + public_training_data=None, + training_datasets={ + # "SNLI": [], + "XNLI": ["dev"] + }, ) labse_en_ru = ModelMeta( @@ -239,8 +320,10 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - public_training_code=None, + public_training_code="https://colab.research.google.com/drive/1dnPRn0-ugj3vZgSpyCC9sgslM2SuSfHy?usp=sharing", + public_training_data=None, training_datasets=None, + adapted_from="sentence-transformers/LaBSE", ) rubert_tiny_turbo = ModelMeta( @@ -258,8 +341,10 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, public_training_code=None, + public_training_data=None, training_datasets=None, # source model in unknown # Not MTEB: {"IlyaGusev/gazeta": ["train"], "zloelias/lenta-ru": ["train"]}, + adapted_from="cointegrated/rubert-tiny2", ) labse_ru_turbo = ModelMeta( @@ -276,9 +361,11 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - training_datasets=None, # source model in unknown + training_datasets=None, # not MTEB: {"IlyaGusev/gazeta": ["train"], "zloelias/lenta-ru": ["train"]}, public_training_code=None, + adapted_from="cointegrated/LaBSE-en-ru", + public_training_data=None, ) @@ -305,8 +392,24 @@ embed_dim=1024, license="mit", similarity_fn_name="cosine", + adapted_from="ai-forever/ruRoberta-large", + training_datasets={ + # https://huggingface.co/ai-forever/ruRoberta-large + # https://huggingface.co/datasets/IlyaGusev/yandex_q_full + # https://huggingface.co/datasets/IlyaGusev/pikabu + # https://huggingface.co/datasets/IlyaGusev/ru_stackoverflow + # https://huggingface.co/datasets/IlyaGusev/habr + # https://huggingface.co/datasets/its5Q/habr_qna + # NewsCommentary + # MultiParaCrawl + "XNLI": [], + "XNLIV2": [], + "LanguageClassification": [], # XNLI + "MIRACLReranking": ["train"], + "MIRACLRetrieval": ["train"], + }, + public_training_data=None, public_training_code=None, - training_datasets=None, framework=["Sentence Transformers", "PyTorch"], citation="""@misc{snegirev2024russianfocusedembeddersexplorationrumteb, title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design}, diff --git a/mteb/models/salesforce_models.py b/mteb/models/salesforce_models.py index fd54871663..c5ba799338 100644 --- a/mteb/models/salesforce_models.py +++ b/mteb/models/salesforce_models.py @@ -6,6 +6,8 @@ from mteb.model_meta import ModelMeta from mteb.models.instruct_wrapper import instruct_wrapper +from .e5_instruct import E5_MISTRAL_TRAINING_DATA + def instruction_template( instruction: str, prompt_type: PromptType | None = None @@ -13,6 +15,19 @@ def instruction_template( return f"Instruct: {instruction}\nQuery: " if instruction else "" +SFR_TRAINING_DATA = { # inherits from e5 + **E5_MISTRAL_TRAINING_DATA, + # From previously released blogpost which now have been taken down: + "FiQA2018": ["train"], + "FiQA2018-PL": ["train"], + "FEVER": ["train"], + "FEVERHardNegatives": ["train"], + "FEVER-PL": ["train"], # translation not trained on + "HotpotQA": ["train"], + "HotpotQAHardNegatives": ["train"], + "HotpotQA-PL": ["train"], # translation not trained on +} + SFR_Embedding_2_R = ModelMeta( loader=partial( # type: ignore instruct_wrapper, @@ -41,16 +56,8 @@ def instruction_template( use_instructions=True, adapted_from="intfloat/e5-mistral-7b-instruct", public_training_code=None, - training_datasets={ # inherits from e5 - "MSMARCO": ["train"], - "MSMARCOHardNegatives": ["train"], - "NanoMSMARCORetrieval": ["train"], - "MSMARCO-PL": ["train"], # translation not trained on - "NQ": ["train"], - "NQHardNegatives": ["train"], - "NanoNQRetrieval": ["train"], - "NQ-PL": ["train"], # translation not trained on - }, + public_training_data=None, + training_datasets=SFR_TRAINING_DATA, citation="""@misc{SFR-embedding-2, title={SFR-Embedding-2: Advanced Text Embedding with Multi-stage Training}, author={Rui Meng*, Ye Liu*, Shafiq Rayhan Joty, Caiming Xiong, Yingbo Zhou, Semih Yavuz}, @@ -86,14 +93,6 @@ def instruction_template( framework=["Sentence Transformers", "PyTorch"], use_instructions=True, public_training_code=None, - training_datasets={ # inherits from e5 - "MSMARCO": ["train"], - "MSMARCOHardNegatives": ["train"], - "NanoMSMARCORetrieval": ["train"], - "MSMARCO-PL": ["train"], # translation not trained on - "NQ": ["train"], - "NQHardNegatives": ["train"], - "NanoNQRetrieval": ["train"], - "NQ-PL": ["train"], # translation not trained on - }, + public_training_data=None, + training_datasets=SFR_TRAINING_DATA, ) diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 63be6e925c..73dcf8a666 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -129,6 +129,7 @@ adapted_from=None, training_datasets=sent_trf_training_dataset, public_training_code=None, + public_training_data=None, citation=SBERT_CITATION, ) @@ -151,6 +152,7 @@ training_datasets=sent_trf_training_dataset, public_training_code=None, citation=SBERT_CITATION, + public_training_data=None, ) paraphrase_multilingual_MiniLM_L12_v2 = ModelMeta( @@ -172,6 +174,7 @@ training_datasets=sent_trf_training_dataset, # assumed (probably some parallel as well) public_training_code=None, citation=SBERT_CITATION, + public_training_data=None, ) paraphrase_multilingual_mpnet_base_v2 = ModelMeta( @@ -204,6 +207,7 @@ # "yahoo-answers": yahoo_answers_train_dataset, # "stack-exchange": stack_exchange_train_dataset, public_training_code=None, + public_training_data=None, ) labse = ModelMeta( @@ -233,6 +237,7 @@ primaryClass={cs.CL}, url={https://arxiv.org/abs/2007.01852}, }""", + public_training_data=None, ) multi_qa_MiniLM_L6_cos_v1 = ModelMeta( @@ -253,6 +258,7 @@ adapted_from="nreimers/MiniLM-L6-H384-uncased", training_datasets=sent_trf_training_dataset, # assumed public_training_code=None, + public_training_data=None, citation=SBERT_CITATION, ) @@ -272,39 +278,9 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=None, # does sentence transformer count? - training_datasets={ - # source: frontmatter in readme - # trained on stack exchange, unsure if sources match - "StackExchangeClusteringP2P": ["test"], - "StackExchangeClusteringP2P.v2": ["test"], - "StackExchangeClustering": ["test"], - "StackExchangeClustering.v2": ["test"], - "NQ": ["test"], - "NQHardNegatives": ["test"], - "MSMARCO": ["train"], - # Non MTEB sources - # "s2orc": ["train"], - # "flax-sentence-embeddings/stackexchange_xml": ["train"], - # "ms_marco": ["train"], - # "gooaq": ["train"], - # "yahoo_answers_topics": ["train"], - # "code_search_net": ["train"], - # "search_qa": ["train"], - # "eli5": ["train"], - # "snli": ["train"], - # "multi_nli": ["train"], - # "wikihow": ["train"], - # "trivia_qa": ["train"], - # "embedding-data/sentence-compression": ["train"], - # "embedding-data/flickr30k-captions": ["train"], - # "embedding-data/altlex": ["train"], - # "embedding-data/simple-wiki": ["train"], - # "embedding-data/QQP": ["train"], - # "embedding-data/SPECTER": ["train"], - # "embedding-data/PAQ_pairs": ["train"], - # "embedding-data/WikiAnswers": ["train"], - }, + training_datasets=sent_trf_training_dataset, + public_training_code=None, + public_training_data=None, citation=SBERT_CITATION, ) @@ -337,6 +313,7 @@ doi = {10.48550/ARXIV.2112.09118}, }""", public_training_code=None, + public_training_data=None, training_datasets=None, ) @@ -368,4 +345,5 @@ # "sentence-transformers/natural-questions": ["train"], }, public_training_code=None, + public_training_data=None, ) diff --git a/mteb/models/stella_models.py b/mteb/models/stella_models.py index 44aa1f8604..92d5db7c8a 100644 --- a/mteb/models/stella_models.py +++ b/mteb/models/stella_models.py @@ -29,7 +29,9 @@ framework=["Sentence Transformers", "PyTorch", "GritLM"], reference="https://huggingface.co/dunzhang/stella_en_400M_v5", training_datasets=None, + # will be at https://github.com/NLPJCL/RAG-Retrieval public_training_code=None, + public_training_data=None, ) stella_en_1_5b = ModelMeta( @@ -54,8 +56,10 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch", "GritLM"], reference="https://huggingface.co/dunzhang/stella_en_1.5B_v5", + # will be at https://github.com/NLPJCL/RAG-Retrieval training_datasets=None, public_training_code=None, + public_training_data=None, ) stella_large_zh_v3_1792d = ModelMeta( @@ -75,6 +79,7 @@ superseded_by="dunzhang/stella-mrl-large-zh-v3.5-1792d", adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets={ # Not in MTEB: # - infgrad/dialogue_rewrite_llm @@ -99,6 +104,7 @@ superseded_by=None, adapted_from=None, public_training_code=None, + public_training_data=None, training_datasets={ # Not in MTEB: # - infgrad/dialogue_rewrite_llm @@ -124,6 +130,7 @@ superseded_by=None, adapted_from="dunzhang/stella-large-zh-v3-1792d", public_training_code=None, + public_training_data=None, training_datasets=None, # Not specified ) @@ -144,6 +151,7 @@ superseded_by=None, adapted_from="dunzhang/stella-mrl-large-zh-v3.5-1792d", public_training_code=None, + public_training_data=None, training_datasets={ # It's a bit unclear what they have trained on to be honest, because they don't list all # And they also have some rather cryptic description of their training procedure, but at diff --git a/mteb/models/text2vec_models.py b/mteb/models/text2vec_models.py index 12322e69e9..86a9bcca4f 100644 --- a/mteb/models/text2vec_models.py +++ b/mteb/models/text2vec_models.py @@ -21,7 +21,8 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=None, # Couldn't find it + public_training_code=None, + public_training_data=None, # Couldn't find it training_datasets={ # source: https://huggingface.co/shibing624/text2vec-base-chinese # Not in MTEB @@ -46,7 +47,8 @@ use_instructions=False, superseded_by=None, adapted_from=None, - public_training_code=None, # Couldn't find it + public_training_code=None, + public_training_data=None, # Couldn't find it training_datasets={ # source: https://huggingface.co/shibing624/text2vec-base-chinese # Not in MTEB @@ -87,7 +89,8 @@ use_instructions=False, superseded_by=None, adapted_from="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", - public_training_code=None, # Couldn't find it + public_training_code=None, + public_training_data=None, # Couldn't find it training_datasets={ # source: https://huggingface.co/shibing624/text2vec-base-chinese # Not in MTEB diff --git a/mteb/models/uae_models.py b/mteb/models/uae_models.py index e3cdaa8436..a12a936326 100644 --- a/mteb/models/uae_models.py +++ b/mteb/models/uae_models.py @@ -91,4 +91,5 @@ def encode( "SNLI": [], }, public_training_code=None, + public_training_data=None, ) diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index a98bc041bc..a637dee36a 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -12,6 +12,11 @@ from .wrapper import Wrapper +VOYAGE_TRAINING_DATA = { + # Self-reported (message from VoyageAI member) + # synthetic data +} + def token_limit(max_tpm: int, interval: int = 60): limit_interval_start_ts = time.time() @@ -156,8 +161,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_finance_2 = ModelMeta( @@ -179,8 +185,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_law_2 = ModelMeta( @@ -202,8 +209,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_code_2 = ModelMeta( @@ -225,8 +233,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_large_2 = ModelMeta( @@ -248,8 +257,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_2 = ModelMeta( @@ -271,8 +281,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_multilingual_2 = ModelMeta( name="voyageai/voyage-multilingual-2", @@ -293,8 +304,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_3 = ModelMeta( @@ -316,8 +328,9 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, public_training_code=None, + public_training_data=None, ) voyage_3_lite = ModelMeta( @@ -339,6 +352,79 @@ def _batched_encode( similarity_fn_name="cosine", framework=["API"], use_instructions=True, - training_datasets=None, + training_datasets=VOYAGE_TRAINING_DATA, + public_training_code=None, + public_training_data=None, +) + + +voyage_3_exp = ModelMeta( + name="voyageai/voyage-3-m-exp", + revision="1", + release_date=None, # not released + languages=None, # supported languages not specified + loader=partial( + VoyageWrapper, + model_name="voyage-3-m-exp", + model_prompts=model_prompts, + ), + max_tokens=32000, + embed_dim=512, + open_weights=False, + n_parameters=None, + license=None, + reference="https://huggingface.co/voyageai/voyage-3-m-exp", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=True, + training_datasets={ + # MTEB(eng, classic) training data: + "ArguAna": ["train"], + "ArguAna-PL": ["train"], + "NanoArguAnaRetrieval": ["train"], + "HotpotQA": ["train"], + "HotpotQA-PL": ["train"], # translation not trained on + "HotpotQAHardNegatives": ["train"], + "MSMARCO": ["train"], + "MSMARCOHardNegatives": ["train"], + "NanoMSMARCORetrieval": ["train"], + "MSMARCO-PL": ["train"], # translation not trained on + "NQ": ["train"], + "NQHardNegatives": ["train"], + "NanoNQRetrieval": ["train"], + "NQ-PL": ["train"], # translation not trained on + "FEVER": ["train"], + "FEVERHardNegatives": ["train"], + "NanoFEVERRetrieval": ["train"], + "FiQA2018": ["train"], + "FiQA2018-PL": ["train"], # translation not trained on + "STS12": ["train"], + "STS22": ["train"], + "AmazonReviewsClassification": ["train"], + "AmazonCounterfactualClassification": ["train"], + "Banking77Classification": ["train"], + "EmotionClassification": ["train"], + "ImdbClassification": ["train"], + "MTOPIntentClassification": ["train"], + "ToxicConversationsClassification": ["train"], + "TweetSentimentExtractionClassification": ["train"], + "ArxivClusteringP2P": ["train"], + "ArxivClusteringP2P.v2": ["train"], + "ArxivClusteringS2S": ["train"], + "ArxivClusteringS2S.v2": ["train"], + "BiorxivClusteringP2P": ["train"], + "BiorxivClusteringP2P.v2": ["train"], + "BiorxivClusteringS2S": ["train"], + "BiorxivClusteringS2S.v2": ["train"], + "MedrxivClusteringP2P": ["train"], + "MedrxivClusteringP2P.v2": ["train"], + "MedrxivClusteringS2S": ["train"], + "MedrxivClusteringS2S.v2": ["train"], + "TwentyNewsgroupsClustering": ["train"], + "TwentyNewsgroupsClustering.v2": ["train"], + "STSBenchmark": ["train"], + "STSBenchmarkMultilingualSTS": ["train"], # translated, not trained on + }, public_training_code=None, + public_training_data=None, ) diff --git a/pyproject.toml b/pyproject.toml index 58c94a1979..f42014e3a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.29.10" +version = "1.29.16" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ diff --git a/scripts/generate_metadata.py b/scripts/generate_metadata.py index a192fa1341..4ae87fdbca 100644 --- a/scripts/generate_metadata.py +++ b/scripts/generate_metadata.py @@ -242,6 +242,7 @@ def model_meta_from_hf_hub(model_name: str) -> ModelMeta: license=None, open_weights=True, public_training_code=None, + public_training_data=None, similarity_fn_name=None, use_instructions=None, training_datasets=None, diff --git a/tests/test_tasks/test_mteb_rerank.py b/tests/test_tasks/test_mteb_rerank.py index 4a535bebbd..7705de4d3f 100644 --- a/tests/test_tasks/test_mteb_rerank.py +++ b/tests/test_tasks/test_mteb_rerank.py @@ -374,6 +374,7 @@ def test_reranker_same_ndcg1(tmp_path: Path): embed_dim=None, license=None, public_training_code=None, + public_training_data=None, reference=None, similarity_fn_name=None, use_instructions=None,